projektAI/venv/Lib/site-packages/pandas/tests/plotting/frame/test_frame_subplots.py

665 lines
26 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
""" Test cases for DataFrame.plot """
import string
import warnings
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DataFrame, Series, date_range
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase
from pandas.io.formats.printing import pprint_thing
pytestmark = pytest.mark.slow
@td.skip_if_no_mpl
class TestDataFramePlotsSubplots(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.tdf = tm.makeTimeDataFrame()
self.hexbin_df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
def test_subplots(self):
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
for kind in ["bar", "barh", "line", "area"]:
axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True)
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
assert axes.shape == (3,)
for ax, column in zip(axes, df.columns):
self._check_legend_labels(ax, labels=[pprint_thing(column)])
for ax in axes[:-2]:
self._check_visible(ax.xaxis) # xaxis must be visible for grid
self._check_visible(ax.get_xticklabels(), visible=False)
if not (kind == "bar" and self.mpl_ge_3_1_0):
# change https://github.com/pandas-dev/pandas/issues/26714
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
self._check_visible(ax.xaxis.get_label(), visible=False)
self._check_visible(ax.get_yticklabels())
self._check_visible(axes[-1].xaxis)
self._check_visible(axes[-1].get_xticklabels())
self._check_visible(axes[-1].get_xticklabels(minor=True))
self._check_visible(axes[-1].xaxis.get_label())
self._check_visible(axes[-1].get_yticklabels())
axes = df.plot(kind=kind, subplots=True, sharex=False)
for ax in axes:
self._check_visible(ax.xaxis)
self._check_visible(ax.get_xticklabels())
self._check_visible(ax.get_xticklabels(minor=True))
self._check_visible(ax.xaxis.get_label())
self._check_visible(ax.get_yticklabels())
axes = df.plot(kind=kind, subplots=True, legend=False)
for ax in axes:
assert ax.get_legend() is None
def test_subplots_timeseries(self):
idx = date_range(start="2014-07-01", freq="M", periods=10)
df = DataFrame(np.random.rand(10, 3), index=idx)
for kind in ["line", "area"]:
axes = df.plot(kind=kind, subplots=True, sharex=True)
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
for ax in axes[:-2]:
# GH 7801
self._check_visible(ax.xaxis) # xaxis must be visible for grid
self._check_visible(ax.get_xticklabels(), visible=False)
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
self._check_visible(ax.xaxis.get_label(), visible=False)
self._check_visible(ax.get_yticklabels())
self._check_visible(axes[-1].xaxis)
self._check_visible(axes[-1].get_xticklabels())
self._check_visible(axes[-1].get_xticklabels(minor=True))
self._check_visible(axes[-1].xaxis.get_label())
self._check_visible(axes[-1].get_yticklabels())
self._check_ticks_props(axes, xrot=0)
axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7)
for ax in axes:
self._check_visible(ax.xaxis)
self._check_visible(ax.get_xticklabels())
self._check_visible(ax.get_xticklabels(minor=True))
self._check_visible(ax.xaxis.get_label())
self._check_visible(ax.get_yticklabels())
self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7)
def test_subplots_timeseries_y_axis(self):
# GH16953
data = {
"numeric": np.array([1, 2, 5]),
"timedelta": [
pd.Timedelta(-10, unit="s"),
pd.Timedelta(10, unit="m"),
pd.Timedelta(10, unit="h"),
],
"datetime_no_tz": [
pd.to_datetime("2017-08-01 00:00:00"),
pd.to_datetime("2017-08-01 02:00:00"),
pd.to_datetime("2017-08-02 00:00:00"),
],
"datetime_all_tz": [
pd.to_datetime("2017-08-01 00:00:00", utc=True),
pd.to_datetime("2017-08-01 02:00:00", utc=True),
pd.to_datetime("2017-08-02 00:00:00", utc=True),
],
"text": ["This", "should", "fail"],
}
testdata = DataFrame(data)
y_cols = ["numeric", "timedelta", "datetime_no_tz", "datetime_all_tz"]
for col in y_cols:
ax = testdata.plot(y=col)
result = ax.get_lines()[0].get_data()[1]
expected = testdata[col].values
assert (result == expected).all()
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
testdata.plot(y="text")
@pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz")
def test_subplots_timeseries_y_axis_not_supported(self):
"""
This test will fail for:
period:
since period isn't yet implemented in ``select_dtypes``
and because it will need a custom value converter +
tick formatter (as was done for x-axis plots)
categorical:
because it will need a custom value converter +
tick formatter (also doesn't work for x-axis, as of now)
datetime_mixed_tz:
because of the way how pandas handles ``Series`` of
``datetime`` objects with different timezone,
generally converting ``datetime`` objects in a tz-aware
form could help with this problem
"""
data = {
"numeric": np.array([1, 2, 5]),
"period": [
pd.Period("2017-08-01 00:00:00", freq="H"),
pd.Period("2017-08-01 02:00", freq="H"),
pd.Period("2017-08-02 00:00:00", freq="H"),
],
"categorical": pd.Categorical(
["c", "b", "a"], categories=["a", "b", "c"], ordered=False
),
"datetime_mixed_tz": [
pd.to_datetime("2017-08-01 00:00:00", utc=True),
pd.to_datetime("2017-08-01 02:00:00"),
pd.to_datetime("2017-08-02 00:00:00"),
],
}
testdata = DataFrame(data)
ax_period = testdata.plot(x="numeric", y="period")
assert (
ax_period.get_lines()[0].get_data()[1] == testdata["period"].values
).all()
ax_categorical = testdata.plot(x="numeric", y="categorical")
assert (
ax_categorical.get_lines()[0].get_data()[1]
== testdata["categorical"].values
).all()
ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz")
assert (
ax_datetime_mixed_tz.get_lines()[0].get_data()[1]
== testdata["datetime_mixed_tz"].values
).all()
def test_subplots_layout_multi_column(self):
# GH 6667
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, layout=(2, 2))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(-1, 2))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(2, -1))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(1, 4))
self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
assert axes.shape == (1, 4)
axes = df.plot(subplots=True, layout=(-1, 4))
self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
assert axes.shape == (1, 4)
axes = df.plot(subplots=True, layout=(4, -1))
self._check_axes_shape(axes, axes_num=3, layout=(4, 1))
assert axes.shape == (4, 1)
with pytest.raises(ValueError):
df.plot(subplots=True, layout=(1, 1))
with pytest.raises(ValueError):
df.plot(subplots=True, layout=(-1, -1))
@pytest.mark.parametrize(
"kwargs, expected_axes_num, expected_layout, expected_shape",
[
({}, 1, (1, 1), (1,)),
({"layout": (3, 3)}, 1, (3, 3), (3, 3)),
],
)
def test_subplots_layout_single_column(
self, kwargs, expected_axes_num, expected_layout, expected_shape
):
# GH 6667
df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, **kwargs)
self._check_axes_shape(
axes,
axes_num=expected_axes_num,
layout=expected_layout,
)
assert axes.shape == expected_shape
def test_subplots_warnings(self):
# GH 9464
with tm.assert_produces_warning(None):
df = DataFrame(np.random.randn(100, 4))
df.plot(subplots=True, layout=(3, 2))
df = DataFrame(
np.random.randn(100, 4), index=date_range("1/1/2000", periods=100)
)
df.plot(subplots=True, layout=(3, 2))
def test_subplots_multiple_axes(self):
# GH 5353, 6970, GH 7069
fig, axes = self.plt.subplots(2, 3)
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False)
self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
assert returned.shape == (3,)
assert returned[0].figure is fig
# draw on second row
returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False)
self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
assert returned.shape == (3,)
assert returned[0].figure is fig
self._check_axes_shape(axes, axes_num=6, layout=(2, 3))
tm.close()
with pytest.raises(ValueError):
fig, axes = self.plt.subplots(2, 3)
# pass different number of axes from required
df.plot(subplots=True, ax=axes)
# pass 2-dim axes and invalid layout
# invalid lauout should not affect to input and return value
# (show warning is tested in
# TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
fig, axes = self.plt.subplots(2, 2)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10]))
returned = df.plot(
subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
returned = df.plot(
subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
returned = df.plot(
subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
# single column
fig, axes = self.plt.subplots(1, 1)
df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False)
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
assert axes.shape == (1,)
def test_subplots_ts_share_axes(self):
# GH 3964
fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True)
self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3)
df = DataFrame(
np.random.randn(10, 9),
index=date_range(start="2014-07-01", freq="M", periods=10),
)
for i, ax in enumerate(axes.ravel()):
df[i].plot(ax=ax, fontsize=5)
# Rows other than bottom should not be visible
for ax in axes[0:-1].ravel():
self._check_visible(ax.get_xticklabels(), visible=False)
# Bottom row should be visible
for ax in axes[-1].ravel():
self._check_visible(ax.get_xticklabels(), visible=True)
# First column should be visible
for ax in axes[[0, 1, 2], [0]].ravel():
self._check_visible(ax.get_yticklabels(), visible=True)
# Other columns should not be visible
for ax in axes[[0, 1, 2], [1]].ravel():
self._check_visible(ax.get_yticklabels(), visible=False)
for ax in axes[[0, 1, 2], [2]].ravel():
self._check_visible(ax.get_yticklabels(), visible=False)
def test_subplots_sharex_axes_existing_axes(self):
# GH 9158
d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]}
df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14"))
axes = df[["A", "B"]].plot(subplots=True)
df["C"].plot(ax=axes[0], secondary_y=True)
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
for ax in axes.ravel():
self._check_visible(ax.get_yticklabels(), visible=True)
def test_subplots_dup_columns(self):
# GH 10962
df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa"))
axes = df.plot(subplots=True)
for ax in axes:
self._check_legend_labels(ax, labels=["a"])
assert len(ax.lines) == 1
tm.close()
axes = df.plot(subplots=True, secondary_y="a")
for ax in axes:
# (right) is only attached when subplots=False
self._check_legend_labels(ax, labels=["a"])
assert len(ax.lines) == 1
tm.close()
ax = df.plot(secondary_y="a")
self._check_legend_labels(ax, labels=["a (right)"] * 5)
assert len(ax.lines) == 0
assert len(ax.right_ax.lines) == 5
def test_bar_log_no_subplots(self):
# GH3254, GH3298 matplotlib/matplotlib#1882, #1892
# regressions in 1.2.1
expected = np.array([0.1, 1.0, 10.0, 100])
# no subplots
df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5))
ax = df.plot.bar(grid=True, log=True)
tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
def test_bar_log_subplots(self):
expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4])
ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar(
log=True, subplots=True
)
tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)
def test_boxplot_subplots_return_type(self):
df = self.hist_df
# normal style: return_type=None
result = df.plot.box(subplots=True)
assert isinstance(result, Series)
self._check_box_return_type(
result, None, expected_keys=["height", "weight", "category"]
)
for t in ["dict", "axes", "both"]:
returned = df.plot.box(return_type=t, subplots=True)
self._check_box_return_type(
returned,
t,
expected_keys=["height", "weight", "category"],
check_ax_title=False,
)
def test_df_subplots_patterns_minorticks(self):
# GH 10657
import matplotlib.pyplot as plt
df = DataFrame(
np.random.randn(10, 2),
index=date_range("1/1/2000", periods=10),
columns=list("AB"),
)
# shared subplots
fig, axes = plt.subplots(2, 1, sharex=True)
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
# xaxis of 1st ax must be hidden
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
tm.close()
fig, axes = plt.subplots(2, 1)
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
# xaxis of 1st ax must be hidden
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
tm.close()
# not shared
fig, axes = plt.subplots(2, 1)
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
def test_subplots_sharex_false(self):
# test when sharex is set to False, two plots should have different
# labels, GH 25160
df = DataFrame(np.random.rand(10, 2))
df.iloc[5:, 1] = np.nan
df.iloc[:5, 0] = np.nan
figs, axs = self.plt.subplots(2, 1)
df.plot.line(ax=axs, subplots=True, sharex=False)
expected_ax1 = np.arange(4.5, 10, 0.5)
expected_ax2 = np.arange(-0.5, 5, 0.5)
tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
@pytest.mark.parametrize(
"index_name, old_label, new_label",
[
(None, "", "new"),
("old", "old", "new"),
(None, "", ""),
(None, "", 1),
(None, "", [1, 2]),
],
)
@pytest.mark.parametrize("kind", ["line", "area", "bar"])
def test_xlabel_ylabel_dataframe_subplots(
self, kind, index_name, old_label, new_label
):
# GH 9093
df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
df.index.name = index_name
# default is the ylabel is not shown and xlabel is index name
axes = df.plot(kind=kind, subplots=True)
assert all(ax.get_ylabel() == "" for ax in axes)
assert all(ax.get_xlabel() == old_label for ax in axes)
# old xlabel will be overriden and assigned ylabel will be used as ylabel
axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True)
assert all(ax.get_ylabel() == str(new_label) for ax in axes)
assert all(ax.get_xlabel() == str(new_label) for ax in axes)
@pytest.mark.parametrize(
"kwargs",
[
# stacked center
{"kind": "bar", "stacked": True},
{"kind": "bar", "stacked": True, "width": 0.9},
{"kind": "barh", "stacked": True},
{"kind": "barh", "stacked": True, "width": 0.9},
# center
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": False, "width": 0.9},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": False, "width": 0.9},
# subplots center
{"kind": "bar", "subplots": True},
{"kind": "bar", "subplots": True, "width": 0.9},
{"kind": "barh", "subplots": True},
{"kind": "barh", "subplots": True, "width": 0.9},
# align edge
{"kind": "bar", "stacked": True, "align": "edge"},
{"kind": "bar", "stacked": True, "width": 0.9, "align": "edge"},
{"kind": "barh", "stacked": True, "align": "edge"},
{"kind": "barh", "stacked": True, "width": 0.9, "align": "edge"},
{"kind": "bar", "stacked": False, "align": "edge"},
{"kind": "bar", "stacked": False, "width": 0.9, "align": "edge"},
{"kind": "barh", "stacked": False, "align": "edge"},
{"kind": "barh", "stacked": False, "width": 0.9, "align": "edge"},
{"kind": "bar", "subplots": True, "align": "edge"},
{"kind": "bar", "subplots": True, "width": 0.9, "align": "edge"},
{"kind": "barh", "subplots": True, "align": "edge"},
{"kind": "barh", "subplots": True, "width": 0.9, "align": "edge"},
],
)
def test_bar_align_multiple_columns(self, kwargs):
# GH2157
df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
self._check_bar_alignment(df, **kwargs)
@pytest.mark.parametrize(
"kwargs",
[
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": True},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": True},
{"kind": "bar", "subplots": True},
{"kind": "barh", "subplots": True},
],
)
def test_bar_align_single_column(self, kwargs):
df = DataFrame(np.random.randn(5))
self._check_bar_alignment(df, **kwargs)
@pytest.mark.parametrize(
"kwargs",
[
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": True},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": True},
{"kind": "bar", "subplots": True},
{"kind": "barh", "subplots": True},
],
)
def test_bar_barwidth_position(self, kwargs):
df = DataFrame(np.random.randn(5, 5))
self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs)
def test_bar_barwidth_position_int(self):
# GH 12979
df = DataFrame(np.random.randn(5, 5))
for w in [1, 1.0]:
ax = df.plot.bar(stacked=True, width=w)
ticks = ax.xaxis.get_ticklocs()
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4]))
assert ax.get_xlim() == (-0.75, 4.75)
# check left-edge of bars
assert ax.patches[0].get_x() == -0.5
assert ax.patches[-1].get_x() == 3.5
self._check_bar_alignment(df, kind="bar", stacked=True, width=1)
self._check_bar_alignment(df, kind="barh", stacked=False, width=1)
self._check_bar_alignment(df, kind="barh", stacked=True, width=1)
self._check_bar_alignment(df, kind="bar", subplots=True, width=1)
self._check_bar_alignment(df, kind="barh", subplots=True, width=1)
def _check_bar_alignment(
self,
df,
kind="bar",
stacked=False,
subplots=False,
align="center",
width=0.5,
position=0.5,
):
axes = df.plot(
kind=kind,
stacked=stacked,
subplots=subplots,
align=align,
width=width,
position=position,
grid=True,
)
axes = self._flatten_visible(axes)
for ax in axes:
if kind == "bar":
axis = ax.xaxis
ax_min, ax_max = ax.get_xlim()
min_edge = min(p.get_x() for p in ax.patches)
max_edge = max(p.get_x() + p.get_width() for p in ax.patches)
elif kind == "barh":
axis = ax.yaxis
ax_min, ax_max = ax.get_ylim()
min_edge = min(p.get_y() for p in ax.patches)
max_edge = max(p.get_y() + p.get_height() for p in ax.patches)
else:
raise ValueError
# GH 7498
# compare margins between lim and bar edges
tm.assert_almost_equal(ax_min, min_edge - 0.25)
tm.assert_almost_equal(ax_max, max_edge + 0.25)
p = ax.patches[0]
if kind == "bar" and (stacked is True or subplots is True):
edge = p.get_x()
center = edge + p.get_width() * position
elif kind == "bar" and stacked is False:
center = p.get_x() + p.get_width() * len(df.columns) * position
edge = p.get_x()
elif kind == "barh" and (stacked is True or subplots is True):
center = p.get_y() + p.get_height() * position
edge = p.get_y()
elif kind == "barh" and stacked is False:
center = p.get_y() + p.get_height() * len(df.columns) * position
edge = p.get_y()
else:
raise ValueError
# Check the ticks locates on integer
assert (axis.get_ticklocs() == np.arange(len(df))).all()
if align == "center":
# Check whether the bar locates on center
tm.assert_almost_equal(axis.get_ticklocs()[0], center)
elif align == "edge":
# Check whether the bar's edge starts from the tick
tm.assert_almost_equal(axis.get_ticklocs()[0], edge)
else:
raise ValueError
return axes