3658 lines
119 KiB
Python
3658 lines
119 KiB
Python
|
import datetime
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
import pytz
|
||
|
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
Index,
|
||
|
Timedelta,
|
||
|
merge_asof,
|
||
|
option_context,
|
||
|
to_datetime,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.reshape.merge import MergeError
|
||
|
|
||
|
|
||
|
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
||
|
def unit(request):
|
||
|
"""
|
||
|
Resolution for datetimelike dtypes.
|
||
|
"""
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
class TestAsOfMerge:
|
||
|
def prep_data(self, df, dedupe=False):
|
||
|
if dedupe:
|
||
|
df = df.drop_duplicates(["time", "ticker"], keep="last").reset_index(
|
||
|
drop=True
|
||
|
)
|
||
|
df.time = to_datetime(df.time)
|
||
|
return df
|
||
|
|
||
|
@pytest.fixture
|
||
|
def trades(self):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
["20160525 13:30:00.023", "MSFT", "51.9500", "75", "NASDAQ"],
|
||
|
["20160525 13:30:00.038", "MSFT", "51.9500", "155", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.7700", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9200", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "200", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "300", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "600", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "44", "NASDAQ"],
|
||
|
["20160525 13:30:00.074", "AAPL", "98.6700", "478343", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6700", "478343", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6600", "6", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "30", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "75", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "20", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "35", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "10", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "1000", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "300", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "400", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "600", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "783", "NASDAQ"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter".split(","),
|
||
|
)
|
||
|
df["price"] = df["price"].astype("float64")
|
||
|
df["quantity"] = df["quantity"].astype("int64")
|
||
|
return self.prep_data(df)
|
||
|
|
||
|
@pytest.fixture
|
||
|
def quotes(self):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
["20160525 13:30:00.023", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.041", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.072", "GOOG", "720.50", "720.88"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.92", "51.95"],
|
||
|
],
|
||
|
columns="time,ticker,bid,ask".split(","),
|
||
|
)
|
||
|
df["bid"] = df["bid"].astype("float64")
|
||
|
df["ask"] = df["ask"].astype("float64")
|
||
|
return self.prep_data(df, dedupe=True)
|
||
|
|
||
|
@pytest.fixture
|
||
|
def asof(self):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.038",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"155",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.77",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.92",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"200",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"300",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"600",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"44",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.074",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.66",
|
||
|
"6",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"30",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"20",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"35",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"1000",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"300",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"400",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"600",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"783",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","),
|
||
|
)
|
||
|
df["price"] = df["price"].astype("float64")
|
||
|
df["quantity"] = df["quantity"].astype("int64")
|
||
|
df["bid"] = df["bid"].astype("float64")
|
||
|
df["ask"] = df["ask"].astype("float64")
|
||
|
return self.prep_data(df)
|
||
|
|
||
|
@pytest.fixture
|
||
|
def tolerance(self):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.038",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"155",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.77",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.92",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"200",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"300",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"600",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"44",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.074",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.66",
|
||
|
"6",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"30",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"20",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"35",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"1000",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"300",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"400",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"600",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"783",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","),
|
||
|
)
|
||
|
df["price"] = df["price"].astype("float64")
|
||
|
df["quantity"] = df["quantity"].astype("int64")
|
||
|
df["bid"] = df["bid"].astype("float64")
|
||
|
df["ask"] = df["ask"].astype("float64")
|
||
|
return self.prep_data(df)
|
||
|
|
||
|
@pytest.fixture
|
||
|
def allow_exact_matches(self, datapath):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.038",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"155",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.77",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.92",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"200",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"300",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"600",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"44",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.074",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.66",
|
||
|
"6",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"30",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"20",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"35",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"1000",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"300",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"400",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"600",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"783",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","),
|
||
|
)
|
||
|
df["price"] = df["price"].astype("float64")
|
||
|
df["quantity"] = df["quantity"].astype("int64")
|
||
|
df["bid"] = df["bid"].astype("float64")
|
||
|
df["ask"] = df["ask"].astype("float64")
|
||
|
return self.prep_data(df)
|
||
|
|
||
|
@pytest.fixture
|
||
|
def allow_exact_matches_and_tolerance(self):
|
||
|
df = pd.DataFrame(
|
||
|
[
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.038",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"155",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.77",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.92",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"200",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"300",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"600",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"44",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.074",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.66",
|
||
|
"6",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"30",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"20",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"35",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"1000",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"300",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"400",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"600",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"783",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","),
|
||
|
)
|
||
|
df["price"] = df["price"].astype("float64")
|
||
|
df["quantity"] = df["quantity"].astype("int64")
|
||
|
df["bid"] = df["bid"].astype("float64")
|
||
|
df["ask"] = df["ask"].astype("float64")
|
||
|
return self.prep_data(df)
|
||
|
|
||
|
def test_examples1(self):
|
||
|
"""doc-string examples"""
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 3, 7]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_examples2(self, unit):
|
||
|
"""doc-string examples"""
|
||
|
if unit == "s":
|
||
|
pytest.skip(
|
||
|
"This test is invalid for unit='s' because that would "
|
||
|
"round the trades['time']]"
|
||
|
)
|
||
|
trades = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.038",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.048",
|
||
|
]
|
||
|
).astype(f"M8[{unit}]"),
|
||
|
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
},
|
||
|
columns=["time", "ticker", "price", "quantity"],
|
||
|
)
|
||
|
|
||
|
quotes = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.030",
|
||
|
"20160525 13:30:00.041",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.049",
|
||
|
"20160525 13:30:00.072",
|
||
|
"20160525 13:30:00.075",
|
||
|
]
|
||
|
).astype(f"M8[{unit}]"),
|
||
|
"ticker": [
|
||
|
"GOOG",
|
||
|
"MSFT",
|
||
|
"MSFT",
|
||
|
"MSFT",
|
||
|
"GOOG",
|
||
|
"AAPL",
|
||
|
"GOOG",
|
||
|
"MSFT",
|
||
|
],
|
||
|
"bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
|
||
|
"ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
|
||
|
},
|
||
|
columns=["time", "ticker", "bid", "ask"],
|
||
|
)
|
||
|
|
||
|
merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
|
||
|
merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms"))
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.038",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.048",
|
||
|
]
|
||
|
).astype(f"M8[{unit}]"),
|
||
|
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
"bid": [np.nan, 51.97, np.nan, np.nan, np.nan],
|
||
|
"ask": [np.nan, 51.98, np.nan, np.nan, np.nan],
|
||
|
},
|
||
|
columns=["time", "ticker", "price", "quantity", "bid", "ask"],
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades,
|
||
|
quotes,
|
||
|
on="time",
|
||
|
by="ticker",
|
||
|
tolerance=Timedelta("10ms"),
|
||
|
allow_exact_matches=False,
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_examples3(self):
|
||
|
"""doc-string examples"""
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, np.nan]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", direction="forward")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_examples4(self):
|
||
|
"""doc-string examples"""
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, 7]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", direction="nearest")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic(self, trades, asof, quotes):
|
||
|
expected = asof
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic_categorical(self, trades, asof, quotes):
|
||
|
expected = asof
|
||
|
trades.ticker = trades.ticker.astype("category")
|
||
|
quotes.ticker = quotes.ticker.astype("category")
|
||
|
expected.ticker = expected.ticker.astype("category")
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic_left_index(self, trades, asof, quotes):
|
||
|
# GH14253
|
||
|
expected = asof
|
||
|
trades = trades.set_index("time")
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades, quotes, left_index=True, right_on="time", by="ticker"
|
||
|
)
|
||
|
# left-only index uses right"s index, oddly
|
||
|
expected.index = result.index
|
||
|
# time column appears after left"s columns
|
||
|
expected = expected[result.columns]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic_right_index(self, trades, asof, quotes):
|
||
|
expected = asof
|
||
|
quotes = quotes.set_index("time")
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades, quotes, left_on="time", right_index=True, by="ticker"
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic_left_index_right_index(self, trades, asof, quotes):
|
||
|
expected = asof.set_index("time")
|
||
|
trades = trades.set_index("time")
|
||
|
quotes = quotes.set_index("time")
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades, quotes, left_index=True, right_index=True, by="ticker"
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_multi_index_left(self, trades, quotes):
|
||
|
# MultiIndex is prohibited
|
||
|
trades = trades.set_index(["time", "price"])
|
||
|
quotes = quotes.set_index("time")
|
||
|
with pytest.raises(MergeError, match="left can only have one index"):
|
||
|
merge_asof(trades, quotes, left_index=True, right_index=True)
|
||
|
|
||
|
def test_multi_index_right(self, trades, quotes):
|
||
|
# MultiIndex is prohibited
|
||
|
trades = trades.set_index("time")
|
||
|
quotes = quotes.set_index(["time", "bid"])
|
||
|
with pytest.raises(MergeError, match="right can only have one index"):
|
||
|
merge_asof(trades, quotes, left_index=True, right_index=True)
|
||
|
|
||
|
def test_on_and_index_left_on(self, trades, quotes):
|
||
|
# "on" parameter and index together is prohibited
|
||
|
trades = trades.set_index("time")
|
||
|
quotes = quotes.set_index("time")
|
||
|
msg = 'Can only pass argument "left_on" OR "left_index" not both.'
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades, quotes, left_on="price", left_index=True, right_index=True
|
||
|
)
|
||
|
|
||
|
def test_on_and_index_right_on(self, trades, quotes):
|
||
|
trades = trades.set_index("time")
|
||
|
quotes = quotes.set_index("time")
|
||
|
msg = 'Can only pass argument "right_on" OR "right_index" not both.'
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades, quotes, right_on="bid", left_index=True, right_index=True
|
||
|
)
|
||
|
|
||
|
def test_basic_left_by_right_by(self, trades, asof, quotes):
|
||
|
# GH14253
|
||
|
expected = asof
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades, quotes, on="time", left_by="ticker", right_by="ticker"
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_missing_right_by(self, trades, asof, quotes):
|
||
|
expected = asof
|
||
|
|
||
|
q = quotes[quotes.ticker != "MSFT"]
|
||
|
result = merge_asof(trades, q, on="time", by="ticker")
|
||
|
expected.loc[expected.ticker == "MSFT", ["bid", "ask"]] = np.nan
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_multiby(self):
|
||
|
# GH13936
|
||
|
trades = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.046",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.050",
|
||
|
]
|
||
|
),
|
||
|
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
|
||
|
"exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "price", "quantity"],
|
||
|
)
|
||
|
|
||
|
quotes = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.030",
|
||
|
"20160525 13:30:00.041",
|
||
|
"20160525 13:30:00.045",
|
||
|
"20160525 13:30:00.049",
|
||
|
]
|
||
|
),
|
||
|
"ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL"],
|
||
|
"exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"],
|
||
|
"bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99],
|
||
|
"ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "bid", "ask"],
|
||
|
)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.046",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.050",
|
||
|
]
|
||
|
),
|
||
|
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
|
||
|
"exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
"bid": [np.nan, 51.95, 720.50, 720.51, np.nan],
|
||
|
"ask": [np.nan, 51.96, 720.93, 720.92, np.nan],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"],
|
||
|
)
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", ["object", "string"])
|
||
|
def test_multiby_heterogeneous_types(self, dtype):
|
||
|
# GH13936
|
||
|
trades = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.046",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.050",
|
||
|
]
|
||
|
),
|
||
|
"ticker": [0, 0, 1, 1, 2],
|
||
|
"exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "price", "quantity"],
|
||
|
)
|
||
|
trades = trades.astype({"ticker": dtype, "exch": dtype})
|
||
|
|
||
|
quotes = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.030",
|
||
|
"20160525 13:30:00.041",
|
||
|
"20160525 13:30:00.045",
|
||
|
"20160525 13:30:00.049",
|
||
|
]
|
||
|
),
|
||
|
"ticker": [1, 0, 0, 0, 1, 2],
|
||
|
"exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"],
|
||
|
"bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99],
|
||
|
"ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "bid", "ask"],
|
||
|
)
|
||
|
quotes = quotes.astype({"ticker": dtype, "exch": dtype})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.023",
|
||
|
"20160525 13:30:00.046",
|
||
|
"20160525 13:30:00.048",
|
||
|
"20160525 13:30:00.050",
|
||
|
]
|
||
|
),
|
||
|
"ticker": [0, 0, 1, 1, 2],
|
||
|
"exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"],
|
||
|
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
|
||
|
"quantity": [75, 155, 100, 100, 100],
|
||
|
"bid": [np.nan, 51.95, 720.50, 720.51, np.nan],
|
||
|
"ask": [np.nan, 51.96, 720.93, 720.92, np.nan],
|
||
|
},
|
||
|
columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"],
|
||
|
)
|
||
|
expected = expected.astype({"ticker": dtype, "exch": dtype})
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_mismatched_index_dtype(self):
|
||
|
# similar to test_multiby_indexed, but we change the dtype on left.index
|
||
|
left = pd.DataFrame(
|
||
|
[
|
||
|
[to_datetime("20160602"), 1, "a"],
|
||
|
[to_datetime("20160602"), 2, "a"],
|
||
|
[to_datetime("20160603"), 1, "b"],
|
||
|
[to_datetime("20160603"), 2, "b"],
|
||
|
],
|
||
|
columns=["time", "k1", "k2"],
|
||
|
).set_index("time")
|
||
|
# different dtype for the index
|
||
|
left.index = left.index - pd.Timestamp(0)
|
||
|
|
||
|
right = pd.DataFrame(
|
||
|
[
|
||
|
[to_datetime("20160502"), 1, "a", 1.0],
|
||
|
[to_datetime("20160502"), 2, "a", 2.0],
|
||
|
[to_datetime("20160503"), 1, "b", 3.0],
|
||
|
[to_datetime("20160503"), 2, "b", 4.0],
|
||
|
],
|
||
|
columns=["time", "k1", "k2", "value"],
|
||
|
).set_index("time")
|
||
|
|
||
|
msg = "incompatible merge keys"
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(left, right, left_index=True, right_index=True, by=["k1", "k2"])
|
||
|
|
||
|
def test_multiby_indexed(self):
|
||
|
# GH15676
|
||
|
left = pd.DataFrame(
|
||
|
[
|
||
|
[to_datetime("20160602"), 1, "a"],
|
||
|
[to_datetime("20160602"), 2, "a"],
|
||
|
[to_datetime("20160603"), 1, "b"],
|
||
|
[to_datetime("20160603"), 2, "b"],
|
||
|
],
|
||
|
columns=["time", "k1", "k2"],
|
||
|
).set_index("time")
|
||
|
|
||
|
right = pd.DataFrame(
|
||
|
[
|
||
|
[to_datetime("20160502"), 1, "a", 1.0],
|
||
|
[to_datetime("20160502"), 2, "a", 2.0],
|
||
|
[to_datetime("20160503"), 1, "b", 3.0],
|
||
|
[to_datetime("20160503"), 2, "b", 4.0],
|
||
|
],
|
||
|
columns=["time", "k1", "k2", "value"],
|
||
|
).set_index("time")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
[
|
||
|
[to_datetime("20160602"), 1, "a", 1.0],
|
||
|
[to_datetime("20160602"), 2, "a", 2.0],
|
||
|
[to_datetime("20160603"), 1, "b", 3.0],
|
||
|
[to_datetime("20160603"), 2, "b", 4.0],
|
||
|
],
|
||
|
columns=["time", "k1", "k2", "value"],
|
||
|
).set_index("time")
|
||
|
|
||
|
result = merge_asof(
|
||
|
left, right, left_index=True, right_index=True, by=["k1", "k2"]
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(expected, result)
|
||
|
|
||
|
with pytest.raises(
|
||
|
MergeError, match="left_by and right_by must be the same length"
|
||
|
):
|
||
|
merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
left_index=True,
|
||
|
right_index=True,
|
||
|
left_by=["k1", "k2"],
|
||
|
right_by=["k1"],
|
||
|
)
|
||
|
|
||
|
def test_basic2(self, datapath):
|
||
|
expected = pd.DataFrame(
|
||
|
[
|
||
|
[
|
||
|
"20160525 13:30:00.023",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.038",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"155",
|
||
|
"NASDAQ",
|
||
|
"51.95",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.77",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.92",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"200",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"300",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"600",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.048",
|
||
|
"GOOG",
|
||
|
"720.93",
|
||
|
"44",
|
||
|
"NASDAQ",
|
||
|
"720.5",
|
||
|
"720.93",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.074",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
np.nan,
|
||
|
np.nan,
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.67",
|
||
|
"478343",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.66",
|
||
|
"6",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"30",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"75",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"20",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"35",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.65",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.075",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"6",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"1000",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"300",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"400",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"600",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.076",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"200",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"783",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.078",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.084",
|
||
|
"AAPL",
|
||
|
"98.64",
|
||
|
"40",
|
||
|
"NASDAQ",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.084",
|
||
|
"AAPL",
|
||
|
"98.55",
|
||
|
"149",
|
||
|
"EDGX",
|
||
|
"98.55",
|
||
|
"98.56",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.086",
|
||
|
"AAPL",
|
||
|
"98.56",
|
||
|
"500",
|
||
|
"ARCA",
|
||
|
"98.55",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"647",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"300",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"50",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"50",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"70",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"70",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"1",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"62",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.104",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.105",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.105",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"700",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.106",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"61",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.107",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.107",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"53",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.108",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.108",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"839",
|
||
|
"ARCA",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.115",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"5",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.118",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"295",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.118",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"5",
|
||
|
"EDGX",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.128",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.128",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"98.62",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.128",
|
||
|
"MSFT",
|
||
|
"51.92",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"10",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"59",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"31",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"69",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"12",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"12",
|
||
|
"EDGX",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.129",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.130",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"317",
|
||
|
"ARCA",
|
||
|
"51.93",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.130",
|
||
|
"MSFT",
|
||
|
"51.95",
|
||
|
"283",
|
||
|
"ARCA",
|
||
|
"51.93",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.135",
|
||
|
"MSFT",
|
||
|
"51.93",
|
||
|
"100",
|
||
|
"EDGX",
|
||
|
"51.92",
|
||
|
"51.95",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.135",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"12",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"88",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"162",
|
||
|
"NASDAQ",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.61",
|
||
|
"100",
|
||
|
"BATS",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"61",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"25",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.144",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"14",
|
||
|
"ARCA",
|
||
|
"98.61",
|
||
|
"98.62",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.145",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"12",
|
||
|
"ARCA",
|
||
|
"98.6",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.145",
|
||
|
"AAPL",
|
||
|
"98.62",
|
||
|
"100",
|
||
|
"ARCA",
|
||
|
"98.6",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.145",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"98.6",
|
||
|
"98.63",
|
||
|
],
|
||
|
[
|
||
|
"20160525 13:30:00.145",
|
||
|
"AAPL",
|
||
|
"98.63",
|
||
|
"100",
|
||
|
"NASDAQ",
|
||
|
"98.6",
|
||
|
"98.63",
|
||
|
],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","),
|
||
|
)
|
||
|
expected["price"] = expected["price"].astype("float64")
|
||
|
expected["quantity"] = expected["quantity"].astype("int64")
|
||
|
expected["bid"] = expected["bid"].astype("float64")
|
||
|
expected["ask"] = expected["ask"].astype("float64")
|
||
|
expected = self.prep_data(expected)
|
||
|
|
||
|
trades = pd.DataFrame(
|
||
|
[
|
||
|
["20160525 13:30:00.023", "MSFT", "51.9500", "75", "NASDAQ"],
|
||
|
["20160525 13:30:00.038", "MSFT", "51.9500", "155", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.7700", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9200", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "200", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "300", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "600", "NASDAQ"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.9300", "44", "NASDAQ"],
|
||
|
["20160525 13:30:00.074", "AAPL", "98.6700", "478343", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6700", "478343", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6600", "6", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "30", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "75", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "20", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "35", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.6500", "10", "NASDAQ"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "1000", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "300", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "400", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "600", "ARCA"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "783", "NASDAQ"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.084", "AAPL", "98.6400", "40", "NASDAQ"],
|
||
|
["20160525 13:30:00.084", "AAPL", "98.5500", "149", "EDGX"],
|
||
|
["20160525 13:30:00.086", "AAPL", "98.5600", "500", "ARCA"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "647", "EDGX"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "300", "EDGX"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "50", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "50", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "70", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "70", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "1", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "62", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "10", "NASDAQ"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.6300", "100", "ARCA"],
|
||
|
["20160525 13:30:00.105", "AAPL", "98.6300", "100", "ARCA"],
|
||
|
["20160525 13:30:00.105", "AAPL", "98.6300", "700", "ARCA"],
|
||
|
["20160525 13:30:00.106", "AAPL", "98.6300", "61", "EDGX"],
|
||
|
["20160525 13:30:00.107", "AAPL", "98.6300", "100", "ARCA"],
|
||
|
["20160525 13:30:00.107", "AAPL", "98.6300", "53", "ARCA"],
|
||
|
["20160525 13:30:00.108", "AAPL", "98.6300", "100", "ARCA"],
|
||
|
["20160525 13:30:00.108", "AAPL", "98.6300", "839", "ARCA"],
|
||
|
["20160525 13:30:00.115", "AAPL", "98.6300", "5", "EDGX"],
|
||
|
["20160525 13:30:00.118", "AAPL", "98.6300", "295", "EDGX"],
|
||
|
["20160525 13:30:00.118", "AAPL", "98.6300", "5", "EDGX"],
|
||
|
["20160525 13:30:00.128", "AAPL", "98.6300", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.128", "AAPL", "98.6300", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.128", "MSFT", "51.9200", "100", "ARCA"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "10", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "59", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "31", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "69", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "12", "NASDAQ"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "12", "EDGX"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "100", "ARCA"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.6200", "100", "ARCA"],
|
||
|
["20160525 13:30:00.130", "MSFT", "51.9500", "317", "ARCA"],
|
||
|
["20160525 13:30:00.130", "MSFT", "51.9500", "283", "ARCA"],
|
||
|
["20160525 13:30:00.135", "MSFT", "51.9300", "100", "EDGX"],
|
||
|
["20160525 13:30:00.135", "AAPL", "98.6200", "100", "ARCA"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "12", "NASDAQ"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "88", "NASDAQ"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "162", "NASDAQ"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6100", "100", "BATS"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "61", "ARCA"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "25", "ARCA"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.6200", "14", "ARCA"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.6200", "12", "ARCA"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.6200", "100", "ARCA"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.6300", "100", "NASDAQ"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.6300", "100", "NASDAQ"],
|
||
|
],
|
||
|
columns="time,ticker,price,quantity,marketCenter".split(","),
|
||
|
)
|
||
|
trades["price"] = trades["price"].astype("float64")
|
||
|
trades["quantity"] = trades["quantity"].astype("int64")
|
||
|
trades = self.prep_data(trades)
|
||
|
|
||
|
quotes = pd.DataFrame(
|
||
|
[
|
||
|
["20160525 13:30:00.023", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.041", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||
|
["20160525 13:30:00.072", "GOOG", "720.50", "720.88"],
|
||
|
["20160525 13:30:00.075", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.95", "51.95"],
|
||
|
["20160525 13:30:00.078", "MSFT", "51.92", "51.95"],
|
||
|
["20160525 13:30:00.079", "MSFT", "51.92", "51.95"],
|
||
|
["20160525 13:30:00.080", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.084", "AAPL", "98.55", "98.56"],
|
||
|
["20160525 13:30:00.086", "AAPL", "98.55", "98.63"],
|
||
|
["20160525 13:30:00.088", "AAPL", "98.65", "98.63"],
|
||
|
["20160525 13:30:00.089", "AAPL", "98.63", "98.63"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.63", "98.63"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.63", "98.63"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.63", "98.63"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.63", "98.63"],
|
||
|
["20160525 13:30:00.104", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.105", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.107", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.115", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.115", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.118", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.128", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.128", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.62", "98.63"],
|
||
|
["20160525 13:30:00.129", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.130", "MSFT", "51.93", "51.95"],
|
||
|
["20160525 13:30:00.130", "MSFT", "51.93", "51.95"],
|
||
|
["20160525 13:30:00.130", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
|
||
|
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.136", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.136", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.144", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.61", "98.62"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.60", "98.63"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.61", "98.63"],
|
||
|
["20160525 13:30:00.145", "AAPL", "98.60", "98.63"],
|
||
|
],
|
||
|
columns="time,ticker,bid,ask".split(","),
|
||
|
)
|
||
|
quotes["bid"] = quotes["bid"].astype("float64")
|
||
|
quotes["ask"] = quotes["ask"].astype("float64")
|
||
|
quotes = self.prep_data(quotes, dedupe=True)
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_basic_no_by(self, trades, asof, quotes):
|
||
|
f = (
|
||
|
lambda x: x[x.ticker == "MSFT"]
|
||
|
.drop("ticker", axis=1)
|
||
|
.reset_index(drop=True)
|
||
|
)
|
||
|
|
||
|
# just use a single ticker
|
||
|
expected = f(asof)
|
||
|
trades = f(trades)
|
||
|
quotes = f(quotes)
|
||
|
|
||
|
result = merge_asof(trades, quotes, on="time")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_valid_join_keys(self, trades, quotes):
|
||
|
msg = r"incompatible merge keys \[1\] .* must be the same type"
|
||
|
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(trades, quotes, left_on="time", right_on="bid", by="ticker")
|
||
|
|
||
|
with pytest.raises(MergeError, match="can only asof on a key for left"):
|
||
|
merge_asof(trades, quotes, on=["time", "ticker"], by="ticker")
|
||
|
|
||
|
with pytest.raises(MergeError, match="can only asof on a key for left"):
|
||
|
merge_asof(trades, quotes, by="ticker")
|
||
|
|
||
|
def test_with_duplicates(self, datapath, trades, quotes, asof):
|
||
|
q = (
|
||
|
pd.concat([quotes, quotes])
|
||
|
.sort_values(["time", "ticker"])
|
||
|
.reset_index(drop=True)
|
||
|
)
|
||
|
result = merge_asof(trades, q, on="time", by="ticker")
|
||
|
expected = self.prep_data(asof)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_with_duplicates_no_on(self):
|
||
|
df1 = pd.DataFrame({"key": [1, 1, 3], "left_val": [1, 2, 3]})
|
||
|
df2 = pd.DataFrame({"key": [1, 2, 2], "right_val": [1, 2, 3]})
|
||
|
result = merge_asof(df1, df2, on="key")
|
||
|
expected = pd.DataFrame(
|
||
|
{"key": [1, 1, 3], "left_val": [1, 2, 3], "right_val": [1, 1, 3]}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_valid_allow_exact_matches(self, trades, quotes):
|
||
|
msg = "allow_exact_matches must be boolean, passed foo"
|
||
|
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades, quotes, on="time", by="ticker", allow_exact_matches="foo"
|
||
|
)
|
||
|
|
||
|
def test_valid_tolerance(self, trades, quotes):
|
||
|
# dti
|
||
|
merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("1s"))
|
||
|
|
||
|
# integer
|
||
|
merge_asof(
|
||
|
trades.reset_index(),
|
||
|
quotes.reset_index(),
|
||
|
on="index",
|
||
|
by="ticker",
|
||
|
tolerance=1,
|
||
|
)
|
||
|
|
||
|
msg = r"incompatible tolerance .*, must be compat with type .*"
|
||
|
|
||
|
# incompat
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(trades, quotes, on="time", by="ticker", tolerance=1)
|
||
|
|
||
|
# invalid
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades.reset_index(),
|
||
|
quotes.reset_index(),
|
||
|
on="index",
|
||
|
by="ticker",
|
||
|
tolerance=1.0,
|
||
|
)
|
||
|
|
||
|
msg = "tolerance must be positive"
|
||
|
|
||
|
# invalid negative
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades, quotes, on="time", by="ticker", tolerance=-Timedelta("1s")
|
||
|
)
|
||
|
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(
|
||
|
trades.reset_index(),
|
||
|
quotes.reset_index(),
|
||
|
on="index",
|
||
|
by="ticker",
|
||
|
tolerance=-1,
|
||
|
)
|
||
|
|
||
|
def test_non_sorted(self, trades, quotes):
|
||
|
trades = trades.sort_values("time", ascending=False)
|
||
|
quotes = quotes.sort_values("time", ascending=False)
|
||
|
|
||
|
# we require that we are already sorted on time & quotes
|
||
|
assert not trades.time.is_monotonic_increasing
|
||
|
assert not quotes.time.is_monotonic_increasing
|
||
|
with pytest.raises(ValueError, match="left keys must be sorted"):
|
||
|
merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
|
||
|
trades = trades.sort_values("time")
|
||
|
assert trades.time.is_monotonic_increasing
|
||
|
assert not quotes.time.is_monotonic_increasing
|
||
|
with pytest.raises(ValueError, match="right keys must be sorted"):
|
||
|
merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
|
||
|
quotes = quotes.sort_values("time")
|
||
|
assert trades.time.is_monotonic_increasing
|
||
|
assert quotes.time.is_monotonic_increasing
|
||
|
|
||
|
# ok, though has dupes
|
||
|
merge_asof(trades, quotes, on="time", by="ticker")
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"tolerance_ts",
|
||
|
[Timedelta("1day"), datetime.timedelta(days=1)],
|
||
|
ids=["Timedelta", "datetime.timedelta"],
|
||
|
)
|
||
|
def test_tolerance(self, tolerance_ts, trades, quotes, tolerance):
|
||
|
result = merge_asof(
|
||
|
trades, quotes, on="time", by="ticker", tolerance=tolerance_ts
|
||
|
)
|
||
|
expected = tolerance
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_tolerance_forward(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", direction="forward", tolerance=1)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_tolerance_nearest(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", direction="nearest", tolerance=1)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_tolerance_tz(self, unit):
|
||
|
# GH 14844
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"date": pd.date_range(
|
||
|
start=to_datetime("2016-01-02"),
|
||
|
freq="D",
|
||
|
periods=5,
|
||
|
tz=pytz.timezone("UTC"),
|
||
|
unit=unit,
|
||
|
),
|
||
|
"value1": np.arange(5),
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"date": pd.date_range(
|
||
|
start=to_datetime("2016-01-01"),
|
||
|
freq="D",
|
||
|
periods=5,
|
||
|
tz=pytz.timezone("UTC"),
|
||
|
unit=unit,
|
||
|
),
|
||
|
"value2": list("ABCDE"),
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(left, right, on="date", tolerance=Timedelta("1 day"))
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"date": pd.date_range(
|
||
|
start=to_datetime("2016-01-02"),
|
||
|
freq="D",
|
||
|
periods=5,
|
||
|
tz=pytz.timezone("UTC"),
|
||
|
unit=unit,
|
||
|
),
|
||
|
"value1": np.arange(5),
|
||
|
"value2": list("BCDEE"),
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_tolerance_float(self):
|
||
|
# GH22981
|
||
|
left = pd.DataFrame({"a": [1.1, 3.5, 10.9], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame(
|
||
|
{"a": [1.0, 2.5, 3.3, 7.5, 11.5], "right_val": [1.0, 2.5, 3.3, 7.5, 11.5]}
|
||
|
)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1.1, 3.5, 10.9],
|
||
|
"left_val": ["a", "b", "c"],
|
||
|
"right_val": [1, 3.3, np.nan],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", direction="nearest", tolerance=0.5)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_index_tolerance(self, trades, quotes, tolerance):
|
||
|
# GH 15135
|
||
|
expected = tolerance.set_index("time")
|
||
|
trades = trades.set_index("time")
|
||
|
quotes = quotes.set_index("time")
|
||
|
|
||
|
result = merge_asof(
|
||
|
trades,
|
||
|
quotes,
|
||
|
left_index=True,
|
||
|
right_index=True,
|
||
|
by="ticker",
|
||
|
tolerance=Timedelta("1day"),
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches(self, trades, quotes, allow_exact_matches):
|
||
|
result = merge_asof(
|
||
|
trades, quotes, on="time", by="ticker", allow_exact_matches=False
|
||
|
)
|
||
|
expected = allow_exact_matches
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_forward(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 7, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
left, right, on="a", direction="forward", allow_exact_matches=False
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_nearest(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 3, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
left, right, on="a", direction="nearest", allow_exact_matches=False
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_and_tolerance(
|
||
|
self, trades, quotes, allow_exact_matches_and_tolerance
|
||
|
):
|
||
|
result = merge_asof(
|
||
|
trades,
|
||
|
quotes,
|
||
|
on="time",
|
||
|
by="ticker",
|
||
|
tolerance=Timedelta("100ms"),
|
||
|
allow_exact_matches=False,
|
||
|
)
|
||
|
expected = allow_exact_matches_and_tolerance
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_and_tolerance2(self):
|
||
|
# GH 13695
|
||
|
df1 = pd.DataFrame(
|
||
|
{"time": to_datetime(["2016-07-15 13:30:00.030"]), "username": ["bob"]}
|
||
|
)
|
||
|
df2 = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"]
|
||
|
),
|
||
|
"version": [1, 2],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(df1, df2, on="time")
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(["2016-07-15 13:30:00.030"]),
|
||
|
"username": ["bob"],
|
||
|
"version": [2],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = merge_asof(df1, df2, on="time", allow_exact_matches=False)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(["2016-07-15 13:30:00.030"]),
|
||
|
"username": ["bob"],
|
||
|
"version": [1],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = merge_asof(
|
||
|
df1,
|
||
|
df2,
|
||
|
on="time",
|
||
|
allow_exact_matches=False,
|
||
|
tolerance=Timedelta("10ms"),
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(["2016-07-15 13:30:00.030"]),
|
||
|
"username": ["bob"],
|
||
|
"version": [np.nan],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_and_tolerance3(self):
|
||
|
# GH 13709
|
||
|
df1 = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"]
|
||
|
),
|
||
|
"username": ["bob", "charlie"],
|
||
|
}
|
||
|
)
|
||
|
df2 = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"]
|
||
|
),
|
||
|
"version": [1, 2],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
df1,
|
||
|
df2,
|
||
|
on="time",
|
||
|
allow_exact_matches=False,
|
||
|
tolerance=Timedelta("10ms"),
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"]
|
||
|
),
|
||
|
"username": ["bob", "charlie"],
|
||
|
"version": [np.nan, np.nan],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_and_tolerance_forward(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 6, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 6, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
on="a",
|
||
|
direction="forward",
|
||
|
allow_exact_matches=False,
|
||
|
tolerance=1,
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_allow_exact_matches_and_tolerance_nearest(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 7, 11]})
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 4, 11]}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
on="a",
|
||
|
direction="nearest",
|
||
|
allow_exact_matches=False,
|
||
|
tolerance=1,
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_forward_by(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 5, 10, 12, 15],
|
||
|
"b": ["X", "X", "Y", "Z", "Y"],
|
||
|
"left_val": ["a", "b", "c", "d", "e"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 6, 11, 15, 16],
|
||
|
"b": ["X", "Z", "Y", "Z", "Y"],
|
||
|
"right_val": [1, 6, 11, 15, 16],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 5, 10, 12, 15],
|
||
|
"b": ["X", "X", "Y", "Z", "Y"],
|
||
|
"left_val": ["a", "b", "c", "d", "e"],
|
||
|
"right_val": [1, np.nan, 11, 15, 16],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", by="b", direction="forward")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_nearest_by(self):
|
||
|
# GH14887
|
||
|
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 5, 10, 12, 15],
|
||
|
"b": ["X", "X", "Z", "Z", "Y"],
|
||
|
"left_val": ["a", "b", "c", "d", "e"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 6, 11, 15, 16],
|
||
|
"b": ["X", "Z", "Z", "Z", "Y"],
|
||
|
"right_val": [1, 6, 11, 15, 16],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"a": [1, 5, 10, 12, 15],
|
||
|
"b": ["X", "X", "Z", "Z", "Y"],
|
||
|
"left_val": ["a", "b", "c", "d", "e"],
|
||
|
"right_val": [1, 1, 11, 11, 16],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", by="b", direction="nearest")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_by_int(self):
|
||
|
# we specialize by type, so test that this is correct
|
||
|
df1 = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.020",
|
||
|
"20160525 13:30:00.030",
|
||
|
"20160525 13:30:00.040",
|
||
|
"20160525 13:30:00.050",
|
||
|
"20160525 13:30:00.060",
|
||
|
]
|
||
|
),
|
||
|
"key": [1, 2, 1, 3, 2],
|
||
|
"value1": [1.1, 1.2, 1.3, 1.4, 1.5],
|
||
|
},
|
||
|
columns=["time", "key", "value1"],
|
||
|
)
|
||
|
|
||
|
df2 = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.015",
|
||
|
"20160525 13:30:00.020",
|
||
|
"20160525 13:30:00.025",
|
||
|
"20160525 13:30:00.035",
|
||
|
"20160525 13:30:00.040",
|
||
|
"20160525 13:30:00.055",
|
||
|
"20160525 13:30:00.060",
|
||
|
"20160525 13:30:00.065",
|
||
|
]
|
||
|
),
|
||
|
"key": [2, 1, 1, 3, 2, 1, 2, 3],
|
||
|
"value2": [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8],
|
||
|
},
|
||
|
columns=["time", "key", "value2"],
|
||
|
)
|
||
|
|
||
|
result = merge_asof(df1, df2, on="time", by="key")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"time": to_datetime(
|
||
|
[
|
||
|
"20160525 13:30:00.020",
|
||
|
"20160525 13:30:00.030",
|
||
|
"20160525 13:30:00.040",
|
||
|
"20160525 13:30:00.050",
|
||
|
"20160525 13:30:00.060",
|
||
|
]
|
||
|
),
|
||
|
"key": [1, 2, 1, 3, 2],
|
||
|
"value1": [1.1, 1.2, 1.3, 1.4, 1.5],
|
||
|
"value2": [2.2, 2.1, 2.3, 2.4, 2.7],
|
||
|
},
|
||
|
columns=["time", "key", "value1", "value2"],
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_on_float(self):
|
||
|
# mimics how to determine the minimum-price variation
|
||
|
df1 = pd.DataFrame(
|
||
|
{
|
||
|
"price": [5.01, 0.0023, 25.13, 340.05, 30.78, 1040.90, 0.0078],
|
||
|
"symbol": list("ABCDEFG"),
|
||
|
},
|
||
|
columns=["symbol", "price"],
|
||
|
)
|
||
|
|
||
|
df2 = pd.DataFrame(
|
||
|
{"price": [0.0, 1.0, 100.0], "mpv": [0.0001, 0.01, 0.05]},
|
||
|
columns=["price", "mpv"],
|
||
|
)
|
||
|
|
||
|
df1 = df1.sort_values("price").reset_index(drop=True)
|
||
|
|
||
|
result = merge_asof(df1, df2, on="price")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"symbol": list("BGACEDF"),
|
||
|
"price": [0.0023, 0.0078, 5.01, 25.13, 30.78, 340.05, 1040.90],
|
||
|
"mpv": [0.0001, 0.0001, 0.01, 0.01, 0.01, 0.05, 0.05],
|
||
|
},
|
||
|
columns=["symbol", "price", "mpv"],
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_on_specialized_type(self, any_real_numpy_dtype):
|
||
|
# see gh-13936
|
||
|
dtype = np.dtype(any_real_numpy_dtype).type
|
||
|
|
||
|
df1 = pd.DataFrame(
|
||
|
{"value": [5, 2, 25, 100, 78, 120, 79], "symbol": list("ABCDEFG")},
|
||
|
columns=["symbol", "value"],
|
||
|
)
|
||
|
df1.value = dtype(df1.value)
|
||
|
|
||
|
df2 = pd.DataFrame(
|
||
|
{"value": [0, 80, 120, 125], "result": list("xyzw")},
|
||
|
columns=["value", "result"],
|
||
|
)
|
||
|
df2.value = dtype(df2.value)
|
||
|
|
||
|
df1 = df1.sort_values("value").reset_index(drop=True)
|
||
|
result = merge_asof(df1, df2, on="value")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"symbol": list("BACEGDF"),
|
||
|
"value": [2, 5, 25, 78, 79, 100, 120],
|
||
|
"result": list("xxxxxyz"),
|
||
|
},
|
||
|
columns=["symbol", "value", "result"],
|
||
|
)
|
||
|
expected.value = dtype(expected.value)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_on_specialized_type_by_int(self, any_real_numpy_dtype):
|
||
|
# see gh-13936
|
||
|
dtype = np.dtype(any_real_numpy_dtype).type
|
||
|
|
||
|
df1 = pd.DataFrame(
|
||
|
{
|
||
|
"value": [5, 2, 25, 100, 78, 120, 79],
|
||
|
"key": [1, 2, 3, 2, 3, 1, 2],
|
||
|
"symbol": list("ABCDEFG"),
|
||
|
},
|
||
|
columns=["symbol", "key", "value"],
|
||
|
)
|
||
|
df1.value = dtype(df1.value)
|
||
|
|
||
|
df2 = pd.DataFrame(
|
||
|
{"value": [0, 80, 120, 125], "key": [1, 2, 2, 3], "result": list("xyzw")},
|
||
|
columns=["value", "key", "result"],
|
||
|
)
|
||
|
df2.value = dtype(df2.value)
|
||
|
|
||
|
df1 = df1.sort_values("value").reset_index(drop=True)
|
||
|
result = merge_asof(df1, df2, on="value", by="key")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"symbol": list("BACEGDF"),
|
||
|
"key": [2, 1, 3, 3, 2, 2, 1],
|
||
|
"value": [2, 5, 25, 78, 79, 100, 120],
|
||
|
"result": [np.nan, "x", np.nan, np.nan, np.nan, "y", "x"],
|
||
|
},
|
||
|
columns=["symbol", "key", "value", "result"],
|
||
|
)
|
||
|
expected.value = dtype(expected.value)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_on_float_by_int(self):
|
||
|
# type specialize both "by" and "on" parameters
|
||
|
df1 = pd.DataFrame(
|
||
|
{
|
||
|
"symbol": list("AAABBBCCC"),
|
||
|
"exch": [1, 2, 3, 1, 2, 3, 1, 2, 3],
|
||
|
"price": [
|
||
|
3.26,
|
||
|
3.2599,
|
||
|
3.2598,
|
||
|
12.58,
|
||
|
12.59,
|
||
|
12.5,
|
||
|
378.15,
|
||
|
378.2,
|
||
|
378.25,
|
||
|
],
|
||
|
},
|
||
|
columns=["symbol", "exch", "price"],
|
||
|
)
|
||
|
|
||
|
df2 = pd.DataFrame(
|
||
|
{
|
||
|
"exch": [1, 1, 1, 2, 2, 2, 3, 3, 3],
|
||
|
"price": [0.0, 1.0, 100.0, 0.0, 5.0, 100.0, 0.0, 5.0, 1000.0],
|
||
|
"mpv": [0.0001, 0.01, 0.05, 0.0001, 0.01, 0.1, 0.0001, 0.25, 1.0],
|
||
|
},
|
||
|
columns=["exch", "price", "mpv"],
|
||
|
)
|
||
|
|
||
|
df1 = df1.sort_values("price").reset_index(drop=True)
|
||
|
df2 = df2.sort_values("price").reset_index(drop=True)
|
||
|
|
||
|
result = merge_asof(df1, df2, on="price", by="exch")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"symbol": list("AAABBBCCC"),
|
||
|
"exch": [3, 2, 1, 3, 1, 2, 1, 2, 3],
|
||
|
"price": [
|
||
|
3.2598,
|
||
|
3.2599,
|
||
|
3.26,
|
||
|
12.5,
|
||
|
12.58,
|
||
|
12.59,
|
||
|
378.15,
|
||
|
378.2,
|
||
|
378.25,
|
||
|
],
|
||
|
"mpv": [0.0001, 0.0001, 0.01, 0.25, 0.01, 0.01, 0.05, 0.1, 0.25],
|
||
|
},
|
||
|
columns=["symbol", "exch", "price", "mpv"],
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_merge_datatype_error_raises(self, using_infer_string):
|
||
|
if using_infer_string:
|
||
|
msg = "incompatible merge keys"
|
||
|
else:
|
||
|
msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
|
||
|
|
||
|
left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
|
||
|
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(left, right, on="a")
|
||
|
|
||
|
def test_merge_datatype_categorical_error_raises(self):
|
||
|
msg = (
|
||
|
r"incompatible merge keys \[0\] .* both sides category, "
|
||
|
"but not equal ones"
|
||
|
)
|
||
|
|
||
|
left = pd.DataFrame(
|
||
|
{"left_val": [1, 5, 10], "a": pd.Categorical(["a", "b", "c"])}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"right_val": [1, 2, 3, 6, 7],
|
||
|
"a": pd.Categorical(["a", "X", "c", "X", "b"]),
|
||
|
}
|
||
|
)
|
||
|
|
||
|
with pytest.raises(MergeError, match=msg):
|
||
|
merge_asof(left, right, on="a")
|
||
|
|
||
|
def test_merge_groupby_multiple_column_with_categorical_column(self):
|
||
|
# GH 16454
|
||
|
df = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])})
|
||
|
result = merge_asof(df, df, on="x", by=["y", "z"])
|
||
|
expected = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"func", [lambda x: x, lambda x: to_datetime(x)], ids=["numeric", "datetime"]
|
||
|
)
|
||
|
@pytest.mark.parametrize("side", ["left", "right"])
|
||
|
def test_merge_on_nans(self, func, side):
|
||
|
# GH 23189
|
||
|
msg = f"Merge keys contain null values on {side} side"
|
||
|
nulls = func([1.0, 5.0, np.nan])
|
||
|
non_nulls = func([1.0, 5.0, 10.0])
|
||
|
df_null = pd.DataFrame({"a": nulls, "left_val": ["a", "b", "c"]})
|
||
|
df = pd.DataFrame({"a": non_nulls, "right_val": [1, 6, 11]})
|
||
|
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
if side == "left":
|
||
|
merge_asof(df_null, df, on="a")
|
||
|
else:
|
||
|
merge_asof(df, df_null, on="a")
|
||
|
|
||
|
def test_by_nullable(self, any_numeric_ea_dtype, using_infer_string):
|
||
|
# Note: this test passes if instead of using pd.array we use
|
||
|
# np.array([np.nan, 1]). Other than that, I (@jbrockmendel)
|
||
|
# have NO IDEA what the expected behavior is.
|
||
|
# TODO(GH#32306): may be relevant to the expected behavior here.
|
||
|
|
||
|
arr = pd.array([pd.NA, 0, 1], dtype=any_numeric_ea_dtype)
|
||
|
if arr.dtype.kind in ["i", "u"]:
|
||
|
max_val = np.iinfo(arr.dtype.numpy_dtype).max
|
||
|
else:
|
||
|
max_val = np.finfo(arr.dtype.numpy_dtype).max
|
||
|
# set value s.t. (at least for integer dtypes) arr._values_for_argsort
|
||
|
# is not an injection
|
||
|
arr[2] = max_val
|
||
|
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"by_col1": arr,
|
||
|
"by_col2": ["HELLO", "To", "You"],
|
||
|
"on_col": [2, 4, 6],
|
||
|
"value": ["a", "c", "e"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"by_col1": arr,
|
||
|
"by_col2": ["WORLD", "Wide", "Web"],
|
||
|
"on_col": [1, 2, 6],
|
||
|
"value": ["b", "d", "f"],
|
||
|
}
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col")
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"by_col1": arr,
|
||
|
"by_col2": ["HELLO", "To", "You"],
|
||
|
"on_col": [2, 4, 6],
|
||
|
"value_x": ["a", "c", "e"],
|
||
|
}
|
||
|
)
|
||
|
expected["value_y"] = np.array([np.nan, np.nan, np.nan], dtype=object)
|
||
|
if using_infer_string:
|
||
|
expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_merge_by_col_tz_aware(self):
|
||
|
# GH 21184
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"),
|
||
|
"on_col": [2],
|
||
|
"values": ["a"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"),
|
||
|
"on_col": [1],
|
||
|
"values": ["b"],
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(left, right, by="by_col", on="on_col")
|
||
|
expected = pd.DataFrame(
|
||
|
[[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]],
|
||
|
columns=["by_col", "on_col", "values_x", "values_y"],
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_by_mixed_tz_aware(self, using_infer_string):
|
||
|
# GH 26649
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"),
|
||
|
"by_col2": ["HELLO"],
|
||
|
"on_col": [2],
|
||
|
"value": ["a"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"),
|
||
|
"by_col2": ["WORLD"],
|
||
|
"on_col": [1],
|
||
|
"value": ["b"],
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col")
|
||
|
expected = pd.DataFrame(
|
||
|
[[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]],
|
||
|
columns=["by_col1", "by_col2", "on_col", "value_x"],
|
||
|
)
|
||
|
expected["value_y"] = np.array([np.nan], dtype=object)
|
||
|
if using_infer_string:
|
||
|
expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", ["float64", "int16", "m8[ns]", "M8[us]"])
|
||
|
def test_by_dtype(self, dtype):
|
||
|
# GH 55453, GH 22794
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"by_col": np.array([1], dtype=dtype),
|
||
|
"on_col": [2],
|
||
|
"value": ["a"],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"by_col": np.array([1], dtype=dtype),
|
||
|
"on_col": [1],
|
||
|
"value": ["b"],
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(left, right, by="by_col", on="on_col")
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"by_col": np.array([1], dtype=dtype),
|
||
|
"on_col": [2],
|
||
|
"value_x": ["a"],
|
||
|
"value_y": ["b"],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_timedelta_tolerance_nearest(self, unit):
|
||
|
# GH 27642
|
||
|
if unit == "s":
|
||
|
pytest.skip(
|
||
|
"This test is invalid with unit='s' because that would "
|
||
|
"round left['time']"
|
||
|
)
|
||
|
|
||
|
left = pd.DataFrame(
|
||
|
list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
|
||
|
columns=["time", "left"],
|
||
|
)
|
||
|
|
||
|
left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]")
|
||
|
|
||
|
right = pd.DataFrame(
|
||
|
list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
|
||
|
columns=["time", "right"],
|
||
|
)
|
||
|
|
||
|
right["time"] = pd.to_timedelta(right["time"], "ms").astype(f"m8[{unit}]")
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
list(
|
||
|
zip(
|
||
|
[0, 5, 10, 15, 20, 25],
|
||
|
[0, 1, 2, 3, 4, 5],
|
||
|
[0, np.nan, 2, 4, np.nan, np.nan],
|
||
|
)
|
||
|
),
|
||
|
columns=["time", "left", "right"],
|
||
|
)
|
||
|
|
||
|
expected["time"] = pd.to_timedelta(expected["time"], "ms").astype(f"m8[{unit}]")
|
||
|
|
||
|
result = merge_asof(
|
||
|
left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_int_type_tolerance(self, any_int_dtype):
|
||
|
# GH #28870
|
||
|
|
||
|
left = pd.DataFrame({"a": [0, 10, 20], "left_val": [1, 2, 3]})
|
||
|
right = pd.DataFrame({"a": [5, 15, 25], "right_val": [1, 2, 3]})
|
||
|
left["a"] = left["a"].astype(any_int_dtype)
|
||
|
right["a"] = right["a"].astype(any_int_dtype)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"a": [0, 10, 20], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]}
|
||
|
)
|
||
|
expected["a"] = expected["a"].astype(any_int_dtype)
|
||
|
|
||
|
result = merge_asof(left, right, on="a", tolerance=10)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_merge_index_column_tz(self):
|
||
|
# GH 29864
|
||
|
index = pd.date_range("2019-10-01", freq="30min", periods=5, tz="UTC")
|
||
|
left = pd.DataFrame([0.9, 0.8, 0.7, 0.6], columns=["xyz"], index=index[1:])
|
||
|
right = pd.DataFrame({"from_date": index, "abc": [2.46] * 4 + [2.19]})
|
||
|
result = merge_asof(
|
||
|
left=left, right=right, left_index=True, right_on=["from_date"]
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"xyz": [0.9, 0.8, 0.7, 0.6],
|
||
|
"from_date": index[1:],
|
||
|
"abc": [2.46] * 3 + [2.19],
|
||
|
},
|
||
|
index=pd.date_range(
|
||
|
"2019-10-01 00:30:00", freq="30min", periods=4, tz="UTC"
|
||
|
),
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = merge_asof(
|
||
|
left=right, right=left, right_index=True, left_on=["from_date"]
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"from_date": index,
|
||
|
"abc": [2.46] * 4 + [2.19],
|
||
|
"xyz": [np.nan, 0.9, 0.8, 0.7, 0.6],
|
||
|
},
|
||
|
index=Index([0, 1, 2, 3, 4]),
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_left_index_right_index_tolerance(self, unit):
|
||
|
# https://github.com/pandas-dev/pandas/issues/35558
|
||
|
if unit == "s":
|
||
|
pytest.skip(
|
||
|
"This test is invalid with unit='s' because that would round dr1"
|
||
|
)
|
||
|
|
||
|
dr1 = pd.date_range(
|
||
|
start="1/1/2020", end="1/20/2020", freq="2D", unit=unit
|
||
|
) + Timedelta(seconds=0.4).as_unit(unit)
|
||
|
dr2 = pd.date_range(start="1/1/2020", end="2/1/2020", unit=unit)
|
||
|
|
||
|
df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1))
|
||
|
df2 = pd.DataFrame({"val2": "bar"}, index=pd.DatetimeIndex(dr2))
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"val1": "foo", "val2": "bar"}, index=pd.DatetimeIndex(dr1)
|
||
|
)
|
||
|
result = merge_asof(
|
||
|
df1,
|
||
|
df2,
|
||
|
left_index=True,
|
||
|
right_index=True,
|
||
|
tolerance=Timedelta(seconds=0.5),
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||
|
)
|
||
|
@pytest.mark.parametrize(
|
||
|
"kwargs", [{"on": "x"}, {"left_index": True, "right_index": True}]
|
||
|
)
|
||
|
@pytest.mark.parametrize(
|
||
|
"data",
|
||
|
[["2019-06-01 00:09:12", "2019-06-01 00:10:29"], [1.0, "2019-06-01 00:10:29"]],
|
||
|
)
|
||
|
def test_merge_asof_non_numerical_dtype(kwargs, data, infer_string):
|
||
|
# GH#29130
|
||
|
with option_context("future.infer_string", infer_string):
|
||
|
left = pd.DataFrame({"x": data}, index=data)
|
||
|
right = pd.DataFrame({"x": data}, index=data)
|
||
|
with pytest.raises(
|
||
|
MergeError,
|
||
|
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
|
||
|
):
|
||
|
merge_asof(left, right, **kwargs)
|
||
|
|
||
|
|
||
|
def test_merge_asof_non_numerical_dtype_object():
|
||
|
# GH#29130
|
||
|
left = pd.DataFrame({"a": ["12", "13", "15"], "left_val1": ["a", "b", "c"]})
|
||
|
right = pd.DataFrame({"a": ["a", "b", "c"], "left_val": ["d", "e", "f"]})
|
||
|
with pytest.raises(
|
||
|
MergeError,
|
||
|
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
|
||
|
):
|
||
|
merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
left_on="left_val1",
|
||
|
right_on="a",
|
||
|
left_by="a",
|
||
|
right_by="left_val",
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"kwargs",
|
||
|
[
|
||
|
{"right_index": True, "left_index": True},
|
||
|
{"left_on": "left_time", "right_index": True},
|
||
|
{"left_index": True, "right_on": "right"},
|
||
|
],
|
||
|
)
|
||
|
def test_merge_asof_index_behavior(kwargs):
|
||
|
# GH 33463
|
||
|
index = Index([1, 5, 10], name="test")
|
||
|
left = pd.DataFrame({"left": ["a", "b", "c"], "left_time": [1, 4, 10]}, index=index)
|
||
|
right = pd.DataFrame({"right": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])
|
||
|
result = merge_asof(left, right, **kwargs)
|
||
|
|
||
|
expected = pd.DataFrame(
|
||
|
{"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]},
|
||
|
index=index,
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_numeric_column_in_index():
|
||
|
# GH#34488
|
||
|
left = pd.DataFrame({"b": [10, 11, 12]}, index=Index([1, 2, 3], name="a"))
|
||
|
right = pd.DataFrame({"c": [20, 21, 22]}, index=Index([0, 2, 3], name="a"))
|
||
|
|
||
|
result = merge_asof(left, right, left_on="a", right_on="a")
|
||
|
expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_numeric_column_in_multiindex():
|
||
|
# GH#34488
|
||
|
left = pd.DataFrame(
|
||
|
{"b": [10, 11, 12]},
|
||
|
index=pd.MultiIndex.from_arrays([[1, 2, 3], ["a", "b", "c"]], names=["a", "z"]),
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{"c": [20, 21, 22]},
|
||
|
index=pd.MultiIndex.from_arrays([[1, 2, 3], ["x", "y", "z"]], names=["a", "y"]),
|
||
|
)
|
||
|
|
||
|
result = merge_asof(left, right, left_on="a", right_on="a")
|
||
|
expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_numeri_column_in_index_object_dtype():
|
||
|
# GH#34488
|
||
|
left = pd.DataFrame({"b": [10, 11, 12]}, index=Index(["1", "2", "3"], name="a"))
|
||
|
right = pd.DataFrame({"c": [20, 21, 22]}, index=Index(["m", "n", "o"], name="a"))
|
||
|
|
||
|
with pytest.raises(
|
||
|
MergeError,
|
||
|
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
|
||
|
):
|
||
|
merge_asof(left, right, left_on="a", right_on="a")
|
||
|
|
||
|
left = left.reset_index().set_index(["a", "b"])
|
||
|
right = right.reset_index().set_index(["a", "c"])
|
||
|
|
||
|
with pytest.raises(
|
||
|
MergeError,
|
||
|
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
|
||
|
):
|
||
|
merge_asof(left, right, left_on="a", right_on="a")
|
||
|
|
||
|
|
||
|
def test_merge_asof_array_as_on(unit):
|
||
|
# GH#42844
|
||
|
dti = pd.DatetimeIndex(
|
||
|
["2021/01/01 00:37", "2021/01/01 01:40"], dtype=f"M8[{unit}]"
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"a": [2, 6],
|
||
|
"ts": dti,
|
||
|
}
|
||
|
)
|
||
|
ts_merge = pd.date_range(
|
||
|
start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h", unit=unit
|
||
|
)
|
||
|
left = pd.DataFrame({"b": [4, 8, 7]})
|
||
|
result = merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
left_on=ts_merge,
|
||
|
right_on="ts",
|
||
|
allow_exact_matches=False,
|
||
|
direction="backward",
|
||
|
)
|
||
|
expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = merge_asof(
|
||
|
right,
|
||
|
left,
|
||
|
left_on="ts",
|
||
|
right_on=ts_merge,
|
||
|
allow_exact_matches=False,
|
||
|
direction="backward",
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"a": [2, 6],
|
||
|
"ts": dti,
|
||
|
"b": [4, 8],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_raise_for_duplicate_columns():
|
||
|
# GH#50102
|
||
|
left = pd.DataFrame([[1, 2, "a"]], columns=["a", "a", "left_val"])
|
||
|
right = pd.DataFrame([[1, 1, 1]], columns=["a", "a", "right_val"])
|
||
|
|
||
|
with pytest.raises(ValueError, match="column label 'a'"):
|
||
|
merge_asof(left, right, on="a")
|
||
|
|
||
|
with pytest.raises(ValueError, match="column label 'a'"):
|
||
|
merge_asof(left, right, left_on="a", right_on="right_val")
|
||
|
|
||
|
with pytest.raises(ValueError, match="column label 'a'"):
|
||
|
merge_asof(left, right, left_on="left_val", right_on="a")
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
"Int64",
|
||
|
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||
|
pytest.param("timestamp[s][pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||
|
],
|
||
|
)
|
||
|
def test_merge_asof_extension_dtype(dtype):
|
||
|
# GH 52904
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"join_col": [1, 3, 5],
|
||
|
"left_val": [1, 2, 3],
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"join_col": [2, 3, 4],
|
||
|
"right_val": [1, 2, 3],
|
||
|
}
|
||
|
)
|
||
|
left = left.astype({"join_col": dtype})
|
||
|
right = right.astype({"join_col": dtype})
|
||
|
result = merge_asof(left, right, on="join_col")
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"join_col": [1, 3, 5],
|
||
|
"left_val": [1, 2, 3],
|
||
|
"right_val": [np.nan, 2.0, 3.0],
|
||
|
}
|
||
|
)
|
||
|
expected = expected.astype({"join_col": dtype})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
@td.skip_if_no("pyarrow")
|
||
|
def test_merge_asof_pyarrow_td_tolerance():
|
||
|
# GH 56486
|
||
|
ser = pd.Series(
|
||
|
[datetime.datetime(2023, 1, 1)], dtype="timestamp[us, UTC][pyarrow]"
|
||
|
)
|
||
|
df = pd.DataFrame(
|
||
|
{
|
||
|
"timestamp": ser,
|
||
|
"value": [1],
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(df, df, on="timestamp", tolerance=Timedelta("1s"))
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"timestamp": ser,
|
||
|
"value_x": [1],
|
||
|
"value_y": [1],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_read_only_ndarray():
|
||
|
# GH 53513
|
||
|
left = pd.Series([2], index=[2], name="left")
|
||
|
right = pd.Series([1], index=[1], name="right")
|
||
|
# set to read-only
|
||
|
left.index.values.flags.writeable = False
|
||
|
right.index.values.flags.writeable = False
|
||
|
result = merge_asof(left, right, left_index=True, right_index=True)
|
||
|
expected = pd.DataFrame({"left": [2], "right": [1]}, index=[2])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_merge_asof_multiby_with_categorical():
|
||
|
# GH 43541
|
||
|
left = pd.DataFrame(
|
||
|
{
|
||
|
"c1": pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]),
|
||
|
"c2": ["x"] * 4,
|
||
|
"t": [1] * 4,
|
||
|
"v": range(4),
|
||
|
}
|
||
|
)
|
||
|
right = pd.DataFrame(
|
||
|
{
|
||
|
"c1": pd.Categorical(["b", "b"], categories=["b", "a"]),
|
||
|
"c2": ["x"] * 2,
|
||
|
"t": [1, 2],
|
||
|
"v": range(2),
|
||
|
}
|
||
|
)
|
||
|
result = merge_asof(
|
||
|
left,
|
||
|
right,
|
||
|
by=["c1", "c2"],
|
||
|
on="t",
|
||
|
direction="forward",
|
||
|
suffixes=["_left", "_right"],
|
||
|
)
|
||
|
expected = pd.DataFrame(
|
||
|
{
|
||
|
"c1": pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]),
|
||
|
"c2": ["x"] * 4,
|
||
|
"t": [1] * 4,
|
||
|
"v_left": range(4),
|
||
|
"v_right": [np.nan, np.nan, 0.0, 0.0],
|
||
|
}
|
||
|
)
|
||
|
tm.assert_frame_equal(result, expected)
|