from datetime import datetime import numpy as np import pandas as pd from pandas import ( Period, Series, date_range, period_range, to_datetime, ) import pandas._testing as tm class TestCombineFirst: def test_combine_first_period_datetime(self): # GH#3367 didx = date_range(start="1950-01-31", end="1950-07-31", freq="M") pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M") # check to be consistent with DatetimeIndex for idx in [didx, pidx]: a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) b = Series([9, 9, 9, 9, 9, 9, 9], index=idx) result = a.combine_first(b) expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) tm.assert_series_equal(result, expected) def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name def test_combine_first(self): values = tm.makeIntIndex(20).values.astype(float) series = Series(values, index=tm.makeIntIndex(20)) series_copy = series * 2 series_copy[::2] = np.NaN # nothing used from the input combined = series.combine_first(series_copy) tm.assert_series_equal(combined, series) # Holes filled from input combined = series_copy.combine_first(series) assert np.isfinite(combined).all() tm.assert_series_equal(combined[::2], series[::2]) tm.assert_series_equal(combined[1::2], series_copy[1::2]) # mixed types index = tm.makeStringIndex(20) floats = Series(np.random.randn(20), index=index) strings = Series(tm.makeStringIndex(10), index=index[::2]) combined = strings.combine_first(floats) tm.assert_series_equal(strings, combined.loc[index[::2]]) tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) # corner case ser = Series([1.0, 2, 3], index=[0, 1, 2]) empty = Series([], index=[], dtype=object) result = ser.combine_first(empty) ser.index = ser.index.astype("O") tm.assert_series_equal(ser, result) def test_combine_first_dt64(self): s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"])) rs = s0.combine_first(s1) xp = to_datetime(Series(["2010", "2011"])) tm.assert_series_equal(rs, xp) s0 = to_datetime(Series(["2010", np.NaN])) s1 = Series([np.NaN, "2011"]) rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]") tm.assert_series_equal(rs, xp) def test_combine_first_dt_tz_values(self, tz_naive_fixture): ser1 = Series( pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), name="ser1", ) ser2 = Series( pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), index=[2, 3, 4], name="ser2", ) result = ser1.combine_first(ser2) exp_vals = pd.DatetimeIndex( ["20150101", "20150102", "20150103", "20160515", "20160516"], tz=tz_naive_fixture, ) exp = Series(exp_vals, name="ser1") tm.assert_series_equal(exp, result) def test_combine_first_timezone_series_with_empty_series(self): # GH 41800 time_index = date_range( datetime(2021, 1, 1, 1), datetime(2021, 1, 1, 10), freq="H", tz="Europe/Rome", ) s1 = Series(range(10), index=time_index) s2 = Series(index=time_index) result = s1.combine_first(s2) tm.assert_series_equal(result, s1)