first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,664 @@
""" Test cases for DataFrame.plot """
import re
import warnings
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.plotting.common import (
TestPlotBase,
_check_plot_works,
)
pytestmark = pytest.mark.slow
@td.skip_if_no_mpl
class TestDataFrameColor(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.tdf = tm.makeTimeDataFrame()
self.hexbin_df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
def test_mpl2_color_cycle_str(self):
# GH 15516
df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"]
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", "MatplotlibDeprecationWarning")
for color in colors:
_check_plot_works(df.plot, color=color)
# if warning is raised, check that it is the exact problematic one
# GH 36972
if w:
match = "Support for uppercase single-letter colors is deprecated"
warning_message = str(w[0].message)
msg = "MatplotlibDeprecationWarning related to CN colors was raised"
assert match not in warning_message, msg
def test_color_single_series_list(self):
# GH 3486
df = DataFrame({"A": [1, 2, 3]})
_check_plot_works(df.plot, color=["red"])
@pytest.mark.parametrize("color", [(1, 0, 0), (1, 0, 0, 0.5)])
def test_rgb_tuple_color(self, color):
# GH 16695
df = DataFrame({"x": [1, 2], "y": [3, 4]})
_check_plot_works(df.plot, x="x", y="y", color=color)
def test_color_empty_string(self):
df = DataFrame(np.random.randn(10, 2))
with pytest.raises(ValueError, match="Invalid color argument:"):
df.plot(color="")
def test_color_and_style_arguments(self):
df = DataFrame({"x": [1, 2], "y": [3, 4]})
# passing both 'color' and 'style' arguments should be allowed
# if there is no color symbol in the style strings:
ax = df.plot(color=["red", "black"], style=["-", "--"])
# check that the linestyles are correctly set:
linestyle = [line.get_linestyle() for line in ax.lines]
assert linestyle == ["-", "--"]
# check that the colors are correctly set:
color = [line.get_color() for line in ax.lines]
assert color == ["red", "black"]
# passing both 'color' and 'style' arguments should not be allowed
# if there is a color symbol in the style strings:
msg = (
"Cannot pass 'style' string with a color symbol and 'color' keyword "
"argument. Please use one or the other or pass 'style' without a color "
"symbol"
)
with pytest.raises(ValueError, match=msg):
df.plot(color=["red", "black"], style=["k-", "r--"])
@pytest.mark.parametrize(
"color, expected",
[
("green", ["green"] * 4),
(["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]),
],
)
def test_color_and_marker(self, color, expected):
# GH 21003
df = DataFrame(np.random.random((7, 4)))
ax = df.plot(color=color, style="d--")
# check colors
result = [i.get_color() for i in ax.lines]
assert result == expected
# check markers and linestyles
assert all(i.get_linestyle() == "--" for i in ax.lines)
assert all(i.get_marker() == "d" for i in ax.lines)
def test_bar_colors(self):
import matplotlib.pyplot as plt
default_colors = self._unpack_cycler(plt.rcParams)
df = DataFrame(np.random.randn(5, 5))
ax = df.plot.bar()
self._check_colors(ax.patches[::5], facecolors=default_colors[:5])
tm.close()
custom_colors = "rgcby"
ax = df.plot.bar(color=custom_colors)
self._check_colors(ax.patches[::5], facecolors=custom_colors)
tm.close()
from matplotlib import cm
# Test str -> colormap functionality
ax = df.plot.bar(colormap="jet")
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
self._check_colors(ax.patches[::5], facecolors=rgba_colors)
tm.close()
# Test colormap functionality
ax = df.plot.bar(colormap=cm.jet)
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
self._check_colors(ax.patches[::5], facecolors=rgba_colors)
tm.close()
ax = df.loc[:, [0]].plot.bar(color="DodgerBlue")
self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"])
tm.close()
ax = df.plot(kind="bar", color="green")
self._check_colors(ax.patches[::5], facecolors=["green"] * 5)
tm.close()
def test_bar_user_colors(self):
df = DataFrame(
{"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]}
)
# This should *only* work when `y` is specified, else
# we use one color per column
ax = df.plot.bar(y="A", color=df["color"])
result = [p.get_facecolor() for p in ax.patches]
expected = [
(1.0, 0.0, 0.0, 1.0),
(0.0, 0.0, 1.0, 1.0),
(0.0, 0.0, 1.0, 1.0),
(1.0, 0.0, 0.0, 1.0),
]
assert result == expected
def test_if_scatterplot_colorbar_affects_xaxis_visibility(self):
# addressing issue #10611, to ensure colobar does not
# interfere with x-axis label and ticklabels with
# ipython inline backend.
random_array = np.random.random((1000, 3))
df = DataFrame(random_array, columns=["A label", "B label", "C label"])
ax1 = df.plot.scatter(x="A label", y="B label")
ax2 = df.plot.scatter(x="A label", y="B label", c="C label")
vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()]
vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()]
assert vis1 == vis2
vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()]
vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()]
assert vis1 == vis2
assert (
ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible()
)
def test_if_hexbin_xaxis_label_is_visible(self):
# addressing issue #10678, to ensure colobar does not
# interfere with x-axis label and ticklabels with
# ipython inline backend.
random_array = np.random.random((1000, 3))
df = DataFrame(random_array, columns=["A label", "B label", "C label"])
ax = df.plot.hexbin("A label", "B label", gridsize=12)
assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels())
assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels())
assert ax.xaxis.get_label().get_visible()
def test_if_scatterplot_colorbars_are_next_to_parent_axes(self):
import matplotlib.pyplot as plt
random_array = np.random.random((1000, 3))
df = DataFrame(random_array, columns=["A label", "B label", "C label"])
fig, axes = plt.subplots(1, 2)
df.plot.scatter("A label", "B label", c="C label", ax=axes[0])
df.plot.scatter("A label", "B label", c="C label", ax=axes[1])
plt.tight_layout()
points = np.array([ax.get_position().get_points() for ax in fig.axes])
axes_x_coords = points[:, :, 0]
parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :]
colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :]
assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all()
@pytest.mark.parametrize("cmap", [None, "Greys"])
def test_scatter_with_c_column_name_with_colors(self, cmap):
# https://github.com/pandas-dev/pandas/issues/34316
df = DataFrame(
[[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]],
columns=["length", "width"],
)
df["species"] = ["r", "r", "g", "g", "b"]
ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap)
assert ax.collections[0].colorbar is None
def test_scatter_colors(self):
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
with pytest.raises(TypeError, match="Specify exactly one of `c` and `color`"):
df.plot.scatter(x="a", y="b", c="c", color="green")
default_colors = self._unpack_cycler(self.plt.rcParams)
ax = df.plot.scatter(x="a", y="b", c="c")
tm.assert_numpy_array_equal(
ax.collections[0].get_facecolor()[0],
np.array(self.colorconverter.to_rgba(default_colors[0])),
)
ax = df.plot.scatter(x="a", y="b", color="white")
tm.assert_numpy_array_equal(
ax.collections[0].get_facecolor()[0],
np.array([1, 1, 1, 1], dtype=np.float64),
)
def test_scatter_colorbar_different_cmap(self):
# GH 33389
import matplotlib.pyplot as plt
df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]})
df["x2"] = df["x"] + 1
fig, ax = plt.subplots()
df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax)
df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax)
assert ax.collections[0].cmap.name == "cividis"
assert ax.collections[1].cmap.name == "magma"
def test_line_colors(self):
from matplotlib import cm
custom_colors = "rgcby"
df = DataFrame(np.random.randn(5, 5))
ax = df.plot(color=custom_colors)
self._check_colors(ax.get_lines(), linecolors=custom_colors)
tm.close()
ax2 = df.plot(color=custom_colors)
lines2 = ax2.get_lines()
for l1, l2 in zip(ax.get_lines(), lines2):
assert l1.get_color() == l2.get_color()
tm.close()
ax = df.plot(colormap="jet")
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
tm.close()
ax = df.plot(colormap=cm.jet)
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
tm.close()
# make color a list if plotting one column frame
# handles cases like df.plot(color='DodgerBlue')
ax = df.loc[:, [0]].plot(color="DodgerBlue")
self._check_colors(ax.lines, linecolors=["DodgerBlue"])
ax = df.plot(color="red")
self._check_colors(ax.get_lines(), linecolors=["red"] * 5)
tm.close()
# GH 10299
custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"]
ax = df.plot(color=custom_colors)
self._check_colors(ax.get_lines(), linecolors=custom_colors)
tm.close()
def test_dont_modify_colors(self):
colors = ["r", "g", "b"]
DataFrame(np.random.rand(10, 2)).plot(color=colors)
assert len(colors) == 3
def test_line_colors_and_styles_subplots(self):
# GH 9894
from matplotlib import cm
default_colors = self._unpack_cycler(self.plt.rcParams)
df = DataFrame(np.random.randn(5, 5))
axes = df.plot(subplots=True)
for ax, c in zip(axes, list(default_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
# single color char
axes = df.plot(subplots=True, color="k")
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["k"])
tm.close()
# single color str
axes = df.plot(subplots=True, color="green")
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["green"])
tm.close()
custom_colors = "rgcby"
axes = df.plot(color=custom_colors, subplots=True)
for ax, c in zip(axes, list(custom_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
axes = df.plot(color=list(custom_colors), subplots=True)
for ax, c in zip(axes, list(custom_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
# GH 10299
custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"]
axes = df.plot(color=custom_colors, subplots=True)
for ax, c in zip(axes, list(custom_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
for cmap in ["jet", cm.jet]:
axes = df.plot(colormap=cmap, subplots=True)
for ax, c in zip(axes, rgba_colors):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
# make color a list if plotting one column frame
# handles cases like df.plot(color='DodgerBlue')
axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True)
self._check_colors(axes[0].lines, linecolors=["DodgerBlue"])
# single character style
axes = df.plot(style="r", subplots=True)
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["r"])
tm.close()
# list of styles
styles = list("rgcby")
axes = df.plot(style=styles, subplots=True)
for ax, c in zip(axes, styles):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
def test_area_colors(self):
from matplotlib import cm
from matplotlib.collections import PolyCollection
custom_colors = "rgcby"
df = DataFrame(np.random.rand(5, 5))
ax = df.plot.area(color=custom_colors)
self._check_colors(ax.get_lines(), linecolors=custom_colors)
poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
self._check_colors(poly, facecolors=custom_colors)
handles, labels = ax.get_legend_handles_labels()
self._check_colors(handles, facecolors=custom_colors)
for h in handles:
assert h.get_alpha() is None
tm.close()
ax = df.plot.area(colormap="jet")
jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
self._check_colors(ax.get_lines(), linecolors=jet_colors)
poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
self._check_colors(poly, facecolors=jet_colors)
handles, labels = ax.get_legend_handles_labels()
self._check_colors(handles, facecolors=jet_colors)
for h in handles:
assert h.get_alpha() is None
tm.close()
# When stacked=False, alpha is set to 0.5
ax = df.plot.area(colormap=cm.jet, stacked=False)
self._check_colors(ax.get_lines(), linecolors=jet_colors)
poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors]
self._check_colors(poly, facecolors=jet_with_alpha)
handles, labels = ax.get_legend_handles_labels()
linecolors = jet_with_alpha
self._check_colors(handles[: len(jet_colors)], linecolors=linecolors)
for h in handles:
assert h.get_alpha() == 0.5
def test_hist_colors(self):
default_colors = self._unpack_cycler(self.plt.rcParams)
df = DataFrame(np.random.randn(5, 5))
ax = df.plot.hist()
self._check_colors(ax.patches[::10], facecolors=default_colors[:5])
tm.close()
custom_colors = "rgcby"
ax = df.plot.hist(color=custom_colors)
self._check_colors(ax.patches[::10], facecolors=custom_colors)
tm.close()
from matplotlib import cm
# Test str -> colormap functionality
ax = df.plot.hist(colormap="jet")
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
tm.close()
# Test colormap functionality
ax = df.plot.hist(colormap=cm.jet)
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)]
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
tm.close()
ax = df.loc[:, [0]].plot.hist(color="DodgerBlue")
self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"])
ax = df.plot(kind="hist", color="green")
self._check_colors(ax.patches[::10], facecolors=["green"] * 5)
tm.close()
@td.skip_if_no_scipy
def test_kde_colors(self):
from matplotlib import cm
custom_colors = "rgcby"
df = DataFrame(np.random.rand(5, 5))
ax = df.plot.kde(color=custom_colors)
self._check_colors(ax.get_lines(), linecolors=custom_colors)
tm.close()
ax = df.plot.kde(colormap="jet")
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
tm.close()
ax = df.plot.kde(colormap=cm.jet)
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
@td.skip_if_no_scipy
def test_kde_colors_and_styles_subplots(self):
from matplotlib import cm
default_colors = self._unpack_cycler(self.plt.rcParams)
df = DataFrame(np.random.randn(5, 5))
axes = df.plot(kind="kde", subplots=True)
for ax, c in zip(axes, list(default_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
# single color char
axes = df.plot(kind="kde", color="k", subplots=True)
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["k"])
tm.close()
# single color str
axes = df.plot(kind="kde", color="red", subplots=True)
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["red"])
tm.close()
custom_colors = "rgcby"
axes = df.plot(kind="kde", color=custom_colors, subplots=True)
for ax, c in zip(axes, list(custom_colors)):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))]
for cmap in ["jet", cm.jet]:
axes = df.plot(kind="kde", colormap=cmap, subplots=True)
for ax, c in zip(axes, rgba_colors):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
# make color a list if plotting one column frame
# handles cases like df.plot(color='DodgerBlue')
axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True)
self._check_colors(axes[0].lines, linecolors=["DodgerBlue"])
# single character style
axes = df.plot(kind="kde", style="r", subplots=True)
for ax in axes:
self._check_colors(ax.get_lines(), linecolors=["r"])
tm.close()
# list of styles
styles = list("rgcby")
axes = df.plot(kind="kde", style=styles, subplots=True)
for ax, c in zip(axes, styles):
self._check_colors(ax.get_lines(), linecolors=[c])
tm.close()
def test_boxplot_colors(self):
def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None):
# TODO: outside this func?
if fliers_c is None:
fliers_c = "k"
self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"]))
self._check_colors(
bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"])
)
self._check_colors(
bp["medians"], linecolors=[medians_c] * len(bp["medians"])
)
self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"]))
self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"]))
default_colors = self._unpack_cycler(self.plt.rcParams)
df = DataFrame(np.random.randn(5, 5))
bp = df.plot.box(return_type="dict")
_check_colors(
bp,
default_colors[0],
default_colors[0],
default_colors[2],
default_colors[0],
)
tm.close()
dict_colors = {
"boxes": "#572923",
"whiskers": "#982042",
"medians": "#804823",
"caps": "#123456",
}
bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict")
_check_colors(
bp,
dict_colors["boxes"],
dict_colors["whiskers"],
dict_colors["medians"],
dict_colors["caps"],
"r",
)
tm.close()
# partial colors
dict_colors = {"whiskers": "c", "medians": "m"}
bp = df.plot.box(color=dict_colors, return_type="dict")
_check_colors(bp, default_colors[0], "c", "m", default_colors[0])
tm.close()
from matplotlib import cm
# Test str -> colormap functionality
bp = df.plot.box(colormap="jet", return_type="dict")
jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)]
_check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2], jet_colors[0])
tm.close()
# Test colormap functionality
bp = df.plot.box(colormap=cm.jet, return_type="dict")
_check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2], jet_colors[0])
tm.close()
# string color is applied to all artists except fliers
bp = df.plot.box(color="DodgerBlue", return_type="dict")
_check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue")
# tuple is also applied to all artists except fliers
bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict")
_check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456")
msg = re.escape(
"color dict contains invalid key 'xxxx'. The key must be either "
"['boxes', 'whiskers', 'medians', 'caps']"
)
with pytest.raises(ValueError, match=msg):
# Color contains invalid key results in ValueError
df.plot.box(color={"boxes": "red", "xxxx": "blue"})
def test_default_color_cycle(self):
import cycler
import matplotlib.pyplot as plt
colors = list("rgbk")
plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors)
df = DataFrame(np.random.randn(5, 3))
ax = df.plot()
expected = self._unpack_cycler(plt.rcParams)[:3]
self._check_colors(ax.get_lines(), linecolors=expected)
def test_no_color_bar(self):
df = self.hexbin_df
ax = df.plot.hexbin(x="A", y="B", colorbar=None)
assert ax.collections[0].colorbar is None
def test_mixing_cmap_and_colormap_raises(self):
df = self.hexbin_df
msg = "Only specify one of `cmap` and `colormap`"
with pytest.raises(TypeError, match=msg):
df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn")
def test_passed_bar_colors(self):
import matplotlib as mpl
color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
colormap = mpl.colors.ListedColormap(color_tuples)
barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap)
assert color_tuples == [c.get_facecolor() for c in barplot.patches]
def test_rcParams_bar_colors(self):
import matplotlib as mpl
color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}):
barplot = DataFrame([[1, 2, 3]]).plot(kind="bar")
assert color_tuples == [c.get_facecolor() for c in barplot.patches]
def test_colors_of_columns_with_same_name(self):
# ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136
# Creating a DataFrame with duplicate column labels and testing colors of them.
df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]})
df1 = DataFrame({"a": [2, 4, 6]})
df_concat = pd.concat([df, df1], axis=1)
result = df_concat.plot()
for legend, line in zip(result.get_legend().legendHandles, result.lines):
assert legend.get_color() == line.get_color()
def test_invalid_colormap(self):
df = DataFrame(np.random.randn(3, 2), columns=["A", "B"])
msg = "'invalid_colormap' is not a valid value for name; supported values are "
with pytest.raises(ValueError, match=msg):
df.plot(colormap="invalid_colormap")

View File

@ -0,0 +1,92 @@
""" Test cases for DataFrame.plot """
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase
pytestmark = pytest.mark.slow
@td.skip_if_no_mpl
class TestDataFramePlotsGroupby(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.tdf = tm.makeTimeDataFrame()
self.hexbin_df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
def _assert_ytickslabels_visibility(self, axes, expected):
for ax, exp in zip(axes, expected):
self._check_visible(ax.get_yticklabels(), visible=exp)
def _assert_xtickslabels_visibility(self, axes, expected):
for ax, exp in zip(axes, expected):
self._check_visible(ax.get_xticklabels(), visible=exp)
@pytest.mark.parametrize(
"kwargs, expected",
[
# behavior without keyword
({}, [True, False, True, False]),
# set sharey=True should be identical
({"sharey": True}, [True, False, True, False]),
# sharey=False, all yticklabels should be visible
({"sharey": False}, [True, True, True, True]),
],
)
def test_groupby_boxplot_sharey(self, kwargs, expected):
# https://github.com/pandas-dev/pandas/issues/20968
# sharey can now be switched check whether the right
# pair of axes is turned on or off
df = DataFrame(
{
"a": [-1.43, -0.15, -3.70, -1.43, -0.14],
"b": [0.56, 0.84, 0.29, 0.56, 0.85],
"c": [0, 1, 2, 3, 1],
},
index=[0, 1, 2, 3, 4],
)
axes = df.groupby("c").boxplot(**kwargs)
self._assert_ytickslabels_visibility(axes, expected)
@pytest.mark.parametrize(
"kwargs, expected",
[
# behavior without keyword
({}, [True, True, True, True]),
# set sharex=False should be identical
({"sharex": False}, [True, True, True, True]),
# sharex=True, xticklabels should be visible
# only for bottom plots
({"sharex": True}, [False, False, True, True]),
],
)
def test_groupby_boxplot_sharex(self, kwargs, expected):
# https://github.com/pandas-dev/pandas/issues/20968
# sharex can now be switched check whether the right
# pair of axes is turned on or off
df = DataFrame(
{
"a": [-1.43, -0.15, -3.70, -1.43, -0.14],
"b": [0.56, 0.84, 0.29, 0.56, 0.85],
"c": [0, 1, 2, 3, 1],
},
index=[0, 1, 2, 3, 4],
)
axes = df.groupby("c").boxplot(**kwargs)
self._assert_xtickslabels_visibility(axes, expected)

View File

@ -0,0 +1,195 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
date_range,
)
from pandas.tests.plotting.common import TestPlotBase
pytestmark = pytest.mark.slow
class TestFrameLegend(TestPlotBase):
@pytest.mark.xfail(
reason=(
"Open bug in matplotlib "
"https://github.com/matplotlib/matplotlib/issues/11357"
)
)
def test_mixed_yerr(self):
# https://github.com/pandas-dev/pandas/issues/39522
from matplotlib.collections import LineCollection
from matplotlib.lines import Line2D
df = DataFrame([{"x": 1, "a": 1, "b": 1}, {"x": 2, "a": 2, "b": 3}])
ax = df.plot("x", "a", c="orange", yerr=0.1, label="orange")
df.plot("x", "b", c="blue", yerr=None, ax=ax, label="blue")
legend = ax.get_legend()
result_handles = legend.legendHandles
assert isinstance(result_handles[0], LineCollection)
assert isinstance(result_handles[1], Line2D)
def test_legend_false(self):
# https://github.com/pandas-dev/pandas/issues/40044
df = DataFrame({"a": [1, 1], "b": [2, 3]})
df2 = DataFrame({"d": [2.5, 2.5]})
ax = df.plot(legend=True, color={"a": "blue", "b": "green"}, secondary_y="b")
df2.plot(legend=True, color={"d": "red"}, ax=ax)
legend = ax.get_legend()
result = [handle.get_color() for handle in legend.legendHandles]
expected = ["blue", "green", "red"]
assert result == expected
def test_df_legend_labels(self):
kinds = ["line", "bar", "barh", "kde", "area", "hist"]
df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"])
df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"])
df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"])
df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"])
for kind in kinds:
ax = df.plot(kind=kind, legend=True)
self._check_legend_labels(ax, labels=df.columns)
ax = df2.plot(kind=kind, legend=False, ax=ax)
self._check_legend_labels(ax, labels=df.columns)
ax = df3.plot(kind=kind, legend=True, ax=ax)
self._check_legend_labels(ax, labels=df.columns.union(df3.columns))
ax = df4.plot(kind=kind, legend="reverse", ax=ax)
expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns))
self._check_legend_labels(ax, labels=expected)
# Secondary Y
ax = df.plot(legend=True, secondary_y="b")
self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
ax = df2.plot(legend=False, ax=ax)
self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax)
self._check_legend_labels(
ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"]
)
# Time Series
ind = date_range("1/1/2014", periods=3)
df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind)
df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind)
df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind)
ax = df.plot(legend=True, secondary_y="b")
self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
ax = df2.plot(legend=False, ax=ax)
self._check_legend_labels(ax, labels=["a", "b (right)", "c"])
ax = df3.plot(legend=True, ax=ax)
self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"])
# scatter
ax = df.plot.scatter(x="a", y="b", label="data1")
self._check_legend_labels(ax, labels=["data1"])
ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax)
self._check_legend_labels(ax, labels=["data1"])
ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax)
self._check_legend_labels(ax, labels=["data1", "data3"])
# ensure label args pass through and
# index name does not mutate
# column names don't mutate
df5 = df.set_index("a")
ax = df5.plot(y="b")
self._check_legend_labels(ax, labels=["b"])
ax = df5.plot(y="b", label="LABEL_b")
self._check_legend_labels(ax, labels=["LABEL_b"])
self._check_text_labels(ax.xaxis.get_label(), "a")
ax = df5.plot(y="c", label="LABEL_c", ax=ax)
self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"])
assert df5.columns.tolist() == ["b", "c"]
def test_missing_marker_multi_plots_on_same_ax(self):
# GH 18222
df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"])
fig, ax = self.plt.subplots(nrows=1, ncols=3)
# Left plot
df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0])
df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0])
df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0])
self._check_legend_labels(ax[0], labels=["r", "g", "b"])
self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"])
# Center plot
df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1])
df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1])
df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1])
self._check_legend_labels(ax[1], labels=["b", "r", "g"])
self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"])
# Right plot
df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2])
df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2])
df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2])
self._check_legend_labels(ax[2], labels=["g", "b", "r"])
self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"])
def test_legend_name(self):
multi = DataFrame(
np.random.randn(4, 4),
columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])],
)
multi.columns.names = ["group", "individual"]
ax = multi.plot()
leg_title = ax.legend_.get_title()
self._check_text_labels(leg_title, "group,individual")
df = DataFrame(np.random.randn(5, 5))
ax = df.plot(legend=True, ax=ax)
leg_title = ax.legend_.get_title()
self._check_text_labels(leg_title, "group,individual")
df.columns.name = "new"
ax = df.plot(legend=False, ax=ax)
leg_title = ax.legend_.get_title()
self._check_text_labels(leg_title, "group,individual")
ax = df.plot(legend=True, ax=ax)
leg_title = ax.legend_.get_title()
self._check_text_labels(leg_title, "new")
def test_no_legend(self):
kinds = ["line", "bar", "barh", "kde", "area", "hist"]
df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"])
for kind in kinds:
ax = df.plot(kind=kind, legend=False)
self._check_legend_labels(ax, visible=False)
def test_missing_markers_legend(self):
# 14958
df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"])
ax = df.plot(y=["A"], marker="x", linestyle="solid")
df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax)
df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax)
self._check_legend_labels(ax, labels=["A", "B", "C"])
self._check_legend_marker(ax, expected_markers=["x", "o", "<"])
def test_missing_markers_legend_using_style(self):
# 14563
df = DataFrame(
{
"A": [1, 2, 3, 4, 5, 6],
"B": [2, 4, 1, 3, 2, 4],
"C": [3, 3, 2, 6, 4, 2],
"X": [1, 2, 3, 4, 5, 6],
}
)
fig, ax = self.plt.subplots()
for kind in "ABC":
df.plot("X", kind, label=kind, ax=ax, style=".")
self._check_legend_labels(ax, labels=["A", "B", "C"])
self._check_legend_marker(ax, expected_markers=[".", ".", "."])

View File

@ -0,0 +1,687 @@
""" Test cases for DataFrame.plot """
import string
import warnings
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
Series,
date_range,
)
import pandas._testing as tm
from pandas.tests.plotting.common import TestPlotBase
from pandas.io.formats.printing import pprint_thing
pytestmark = pytest.mark.slow
@td.skip_if_no_mpl
class TestDataFramePlotsSubplots(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.tdf = tm.makeTimeDataFrame()
self.hexbin_df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
def test_subplots(self):
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
for kind in ["bar", "barh", "line", "area"]:
axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True)
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
assert axes.shape == (3,)
for ax, column in zip(axes, df.columns):
self._check_legend_labels(ax, labels=[pprint_thing(column)])
for ax in axes[:-2]:
self._check_visible(ax.xaxis) # xaxis must be visible for grid
self._check_visible(ax.get_xticklabels(), visible=False)
if not (kind == "bar" and self.mpl_ge_3_1_0):
# change https://github.com/pandas-dev/pandas/issues/26714
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
self._check_visible(ax.xaxis.get_label(), visible=False)
self._check_visible(ax.get_yticklabels())
self._check_visible(axes[-1].xaxis)
self._check_visible(axes[-1].get_xticklabels())
self._check_visible(axes[-1].get_xticklabels(minor=True))
self._check_visible(axes[-1].xaxis.get_label())
self._check_visible(axes[-1].get_yticklabels())
axes = df.plot(kind=kind, subplots=True, sharex=False)
for ax in axes:
self._check_visible(ax.xaxis)
self._check_visible(ax.get_xticklabels())
self._check_visible(ax.get_xticklabels(minor=True))
self._check_visible(ax.xaxis.get_label())
self._check_visible(ax.get_yticklabels())
axes = df.plot(kind=kind, subplots=True, legend=False)
for ax in axes:
assert ax.get_legend() is None
def test_subplots_timeseries(self):
idx = date_range(start="2014-07-01", freq="M", periods=10)
df = DataFrame(np.random.rand(10, 3), index=idx)
for kind in ["line", "area"]:
axes = df.plot(kind=kind, subplots=True, sharex=True)
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
for ax in axes[:-2]:
# GH 7801
self._check_visible(ax.xaxis) # xaxis must be visible for grid
self._check_visible(ax.get_xticklabels(), visible=False)
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
self._check_visible(ax.xaxis.get_label(), visible=False)
self._check_visible(ax.get_yticklabels())
self._check_visible(axes[-1].xaxis)
self._check_visible(axes[-1].get_xticklabels())
self._check_visible(axes[-1].get_xticklabels(minor=True))
self._check_visible(axes[-1].xaxis.get_label())
self._check_visible(axes[-1].get_yticklabels())
self._check_ticks_props(axes, xrot=0)
axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7)
for ax in axes:
self._check_visible(ax.xaxis)
self._check_visible(ax.get_xticklabels())
self._check_visible(ax.get_xticklabels(minor=True))
self._check_visible(ax.xaxis.get_label())
self._check_visible(ax.get_yticklabels())
self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7)
def test_subplots_timeseries_y_axis(self):
# GH16953
data = {
"numeric": np.array([1, 2, 5]),
"timedelta": [
pd.Timedelta(-10, unit="s"),
pd.Timedelta(10, unit="m"),
pd.Timedelta(10, unit="h"),
],
"datetime_no_tz": [
pd.to_datetime("2017-08-01 00:00:00"),
pd.to_datetime("2017-08-01 02:00:00"),
pd.to_datetime("2017-08-02 00:00:00"),
],
"datetime_all_tz": [
pd.to_datetime("2017-08-01 00:00:00", utc=True),
pd.to_datetime("2017-08-01 02:00:00", utc=True),
pd.to_datetime("2017-08-02 00:00:00", utc=True),
],
"text": ["This", "should", "fail"],
}
testdata = DataFrame(data)
y_cols = ["numeric", "timedelta", "datetime_no_tz", "datetime_all_tz"]
for col in y_cols:
ax = testdata.plot(y=col)
result = ax.get_lines()[0].get_data()[1]
expected = testdata[col].values
assert (result == expected).all()
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
testdata.plot(y="text")
@pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz")
def test_subplots_timeseries_y_axis_not_supported(self):
"""
This test will fail for:
period:
since period isn't yet implemented in ``select_dtypes``
and because it will need a custom value converter +
tick formatter (as was done for x-axis plots)
categorical:
because it will need a custom value converter +
tick formatter (also doesn't work for x-axis, as of now)
datetime_mixed_tz:
because of the way how pandas handles ``Series`` of
``datetime`` objects with different timezone,
generally converting ``datetime`` objects in a tz-aware
form could help with this problem
"""
data = {
"numeric": np.array([1, 2, 5]),
"period": [
pd.Period("2017-08-01 00:00:00", freq="H"),
pd.Period("2017-08-01 02:00", freq="H"),
pd.Period("2017-08-02 00:00:00", freq="H"),
],
"categorical": pd.Categorical(
["c", "b", "a"], categories=["a", "b", "c"], ordered=False
),
"datetime_mixed_tz": [
pd.to_datetime("2017-08-01 00:00:00", utc=True),
pd.to_datetime("2017-08-01 02:00:00"),
pd.to_datetime("2017-08-02 00:00:00"),
],
}
testdata = DataFrame(data)
ax_period = testdata.plot(x="numeric", y="period")
assert (
ax_period.get_lines()[0].get_data()[1] == testdata["period"].values
).all()
ax_categorical = testdata.plot(x="numeric", y="categorical")
assert (
ax_categorical.get_lines()[0].get_data()[1]
== testdata["categorical"].values
).all()
ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz")
assert (
ax_datetime_mixed_tz.get_lines()[0].get_data()[1]
== testdata["datetime_mixed_tz"].values
).all()
def test_subplots_layout_multi_column(self):
# GH 6667
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, layout=(2, 2))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(-1, 2))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(2, -1))
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
assert axes.shape == (2, 2)
axes = df.plot(subplots=True, layout=(1, 4))
self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
assert axes.shape == (1, 4)
axes = df.plot(subplots=True, layout=(-1, 4))
self._check_axes_shape(axes, axes_num=3, layout=(1, 4))
assert axes.shape == (1, 4)
axes = df.plot(subplots=True, layout=(4, -1))
self._check_axes_shape(axes, axes_num=3, layout=(4, 1))
assert axes.shape == (4, 1)
msg = "Layout of 1x1 must be larger than required size 3"
with pytest.raises(ValueError, match=msg):
df.plot(subplots=True, layout=(1, 1))
msg = "At least one dimension of layout must be positive"
with pytest.raises(ValueError, match=msg):
df.plot(subplots=True, layout=(-1, -1))
@pytest.mark.parametrize(
"kwargs, expected_axes_num, expected_layout, expected_shape",
[
({}, 1, (1, 1), (1,)),
({"layout": (3, 3)}, 1, (3, 3), (3, 3)),
],
)
def test_subplots_layout_single_column(
self, kwargs, expected_axes_num, expected_layout, expected_shape
):
# GH 6667
df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, **kwargs)
self._check_axes_shape(
axes,
axes_num=expected_axes_num,
layout=expected_layout,
)
assert axes.shape == expected_shape
def test_subplots_warnings(self):
# GH 9464
with tm.assert_produces_warning(None):
df = DataFrame(np.random.randn(100, 4))
df.plot(subplots=True, layout=(3, 2))
df = DataFrame(
np.random.randn(100, 4), index=date_range("1/1/2000", periods=100)
)
df.plot(subplots=True, layout=(3, 2))
def test_subplots_multiple_axes(self):
# GH 5353, 6970, GH 7069
fig, axes = self.plt.subplots(2, 3)
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False)
self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
assert returned.shape == (3,)
assert returned[0].figure is fig
# draw on second row
returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False)
self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
assert returned.shape == (3,)
assert returned[0].figure is fig
self._check_axes_shape(axes, axes_num=6, layout=(2, 3))
tm.close()
msg = "The number of passed axes must be 3, the same as the output plot"
with pytest.raises(ValueError, match=msg):
fig, axes = self.plt.subplots(2, 3)
# pass different number of axes from required
df.plot(subplots=True, ax=axes)
# pass 2-dim axes and invalid layout
# invalid lauout should not affect to input and return value
# (show warning is tested in
# TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
fig, axes = self.plt.subplots(2, 2)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10]))
returned = df.plot(
subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
returned = df.plot(
subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
returned = df.plot(
subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False
)
self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
assert returned.shape == (4,)
# single column
fig, axes = self.plt.subplots(1, 1)
df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10]))
axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False)
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
assert axes.shape == (1,)
def test_subplots_ts_share_axes(self):
# GH 3964
fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True)
self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3)
df = DataFrame(
np.random.randn(10, 9),
index=date_range(start="2014-07-01", freq="M", periods=10),
)
for i, ax in enumerate(axes.ravel()):
df[i].plot(ax=ax, fontsize=5)
# Rows other than bottom should not be visible
for ax in axes[0:-1].ravel():
self._check_visible(ax.get_xticklabels(), visible=False)
# Bottom row should be visible
for ax in axes[-1].ravel():
self._check_visible(ax.get_xticklabels(), visible=True)
# First column should be visible
for ax in axes[[0, 1, 2], [0]].ravel():
self._check_visible(ax.get_yticklabels(), visible=True)
# Other columns should not be visible
for ax in axes[[0, 1, 2], [1]].ravel():
self._check_visible(ax.get_yticklabels(), visible=False)
for ax in axes[[0, 1, 2], [2]].ravel():
self._check_visible(ax.get_yticklabels(), visible=False)
def test_subplots_sharex_axes_existing_axes(self):
# GH 9158
d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]}
df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14"))
axes = df[["A", "B"]].plot(subplots=True)
df["C"].plot(ax=axes[0], secondary_y=True)
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
for ax in axes.ravel():
self._check_visible(ax.get_yticklabels(), visible=True)
def test_subplots_dup_columns(self):
# GH 10962
df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa"))
axes = df.plot(subplots=True)
for ax in axes:
self._check_legend_labels(ax, labels=["a"])
assert len(ax.lines) == 1
tm.close()
axes = df.plot(subplots=True, secondary_y="a")
for ax in axes:
# (right) is only attached when subplots=False
self._check_legend_labels(ax, labels=["a"])
assert len(ax.lines) == 1
tm.close()
ax = df.plot(secondary_y="a")
self._check_legend_labels(ax, labels=["a (right)"] * 5)
assert len(ax.lines) == 0
assert len(ax.right_ax.lines) == 5
def test_bar_log_no_subplots(self):
# GH3254, GH3298 matplotlib/matplotlib#1882, #1892
# regressions in 1.2.1
expected = np.array([0.1, 1.0, 10.0, 100])
# no subplots
df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5))
ax = df.plot.bar(grid=True, log=True)
tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected)
def test_bar_log_subplots(self):
expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4])
ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar(
log=True, subplots=True
)
tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)
def test_boxplot_subplots_return_type(self):
df = self.hist_df
# normal style: return_type=None
result = df.plot.box(subplots=True)
assert isinstance(result, Series)
self._check_box_return_type(
result, None, expected_keys=["height", "weight", "category"]
)
for t in ["dict", "axes", "both"]:
returned = df.plot.box(return_type=t, subplots=True)
self._check_box_return_type(
returned,
t,
expected_keys=["height", "weight", "category"],
check_ax_title=False,
)
def test_df_subplots_patterns_minorticks(self):
# GH 10657
import matplotlib.pyplot as plt
df = DataFrame(
np.random.randn(10, 2),
index=date_range("1/1/2000", periods=10),
columns=list("AB"),
)
# shared subplots
fig, axes = plt.subplots(2, 1, sharex=True)
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
# xaxis of 1st ax must be hidden
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
tm.close()
fig, axes = plt.subplots(2, 1)
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
# xaxis of 1st ax must be hidden
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
tm.close()
# not shared
fig, axes = plt.subplots(2, 1)
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
def test_subplots_sharex_false(self):
# test when sharex is set to False, two plots should have different
# labels, GH 25160
df = DataFrame(np.random.rand(10, 2))
df.iloc[5:, 1] = np.nan
df.iloc[:5, 0] = np.nan
figs, axs = self.plt.subplots(2, 1)
df.plot.line(ax=axs, subplots=True, sharex=False)
expected_ax1 = np.arange(4.5, 10, 0.5)
expected_ax2 = np.arange(-0.5, 5, 0.5)
tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1)
tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2)
def test_subplots_constrained_layout(self):
# GH 25261
idx = date_range(start="now", periods=10)
df = DataFrame(np.random.rand(10, 3), index=idx)
kwargs = {}
if hasattr(self.plt.Figure, "get_constrained_layout"):
kwargs["constrained_layout"] = True
fig, axes = self.plt.subplots(2, **kwargs)
with tm.assert_produces_warning(None):
df.plot(ax=axes[0])
with tm.ensure_clean(return_filelike=True) as path:
self.plt.savefig(path)
@pytest.mark.parametrize(
"index_name, old_label, new_label",
[
(None, "", "new"),
("old", "old", "new"),
(None, "", ""),
(None, "", 1),
(None, "", [1, 2]),
],
)
@pytest.mark.parametrize("kind", ["line", "area", "bar"])
def test_xlabel_ylabel_dataframe_subplots(
self, kind, index_name, old_label, new_label
):
# GH 9093
df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
df.index.name = index_name
# default is the ylabel is not shown and xlabel is index name
axes = df.plot(kind=kind, subplots=True)
assert all(ax.get_ylabel() == "" for ax in axes)
assert all(ax.get_xlabel() == old_label for ax in axes)
# old xlabel will be overridden and assigned ylabel will be used as ylabel
axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True)
assert all(ax.get_ylabel() == str(new_label) for ax in axes)
assert all(ax.get_xlabel() == str(new_label) for ax in axes)
@pytest.mark.parametrize(
"kwargs",
[
# stacked center
{"kind": "bar", "stacked": True},
{"kind": "bar", "stacked": True, "width": 0.9},
{"kind": "barh", "stacked": True},
{"kind": "barh", "stacked": True, "width": 0.9},
# center
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": False, "width": 0.9},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": False, "width": 0.9},
# subplots center
{"kind": "bar", "subplots": True},
{"kind": "bar", "subplots": True, "width": 0.9},
{"kind": "barh", "subplots": True},
{"kind": "barh", "subplots": True, "width": 0.9},
# align edge
{"kind": "bar", "stacked": True, "align": "edge"},
{"kind": "bar", "stacked": True, "width": 0.9, "align": "edge"},
{"kind": "barh", "stacked": True, "align": "edge"},
{"kind": "barh", "stacked": True, "width": 0.9, "align": "edge"},
{"kind": "bar", "stacked": False, "align": "edge"},
{"kind": "bar", "stacked": False, "width": 0.9, "align": "edge"},
{"kind": "barh", "stacked": False, "align": "edge"},
{"kind": "barh", "stacked": False, "width": 0.9, "align": "edge"},
{"kind": "bar", "subplots": True, "align": "edge"},
{"kind": "bar", "subplots": True, "width": 0.9, "align": "edge"},
{"kind": "barh", "subplots": True, "align": "edge"},
{"kind": "barh", "subplots": True, "width": 0.9, "align": "edge"},
],
)
def test_bar_align_multiple_columns(self, kwargs):
# GH2157
df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5))
self._check_bar_alignment(df, **kwargs)
@pytest.mark.parametrize(
"kwargs",
[
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": True},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": True},
{"kind": "bar", "subplots": True},
{"kind": "barh", "subplots": True},
],
)
def test_bar_align_single_column(self, kwargs):
df = DataFrame(np.random.randn(5))
self._check_bar_alignment(df, **kwargs)
@pytest.mark.parametrize(
"kwargs",
[
{"kind": "bar", "stacked": False},
{"kind": "bar", "stacked": True},
{"kind": "barh", "stacked": False},
{"kind": "barh", "stacked": True},
{"kind": "bar", "subplots": True},
{"kind": "barh", "subplots": True},
],
)
def test_bar_barwidth_position(self, kwargs):
df = DataFrame(np.random.randn(5, 5))
self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs)
def test_bar_barwidth_position_int(self):
# GH 12979
df = DataFrame(np.random.randn(5, 5))
for w in [1, 1.0]:
ax = df.plot.bar(stacked=True, width=w)
ticks = ax.xaxis.get_ticklocs()
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4]))
assert ax.get_xlim() == (-0.75, 4.75)
# check left-edge of bars
assert ax.patches[0].get_x() == -0.5
assert ax.patches[-1].get_x() == 3.5
self._check_bar_alignment(df, kind="bar", stacked=True, width=1)
self._check_bar_alignment(df, kind="barh", stacked=False, width=1)
self._check_bar_alignment(df, kind="barh", stacked=True, width=1)
self._check_bar_alignment(df, kind="bar", subplots=True, width=1)
self._check_bar_alignment(df, kind="barh", subplots=True, width=1)
def _check_bar_alignment(
self,
df,
kind="bar",
stacked=False,
subplots=False,
align="center",
width=0.5,
position=0.5,
):
axes = df.plot(
kind=kind,
stacked=stacked,
subplots=subplots,
align=align,
width=width,
position=position,
grid=True,
)
axes = self._flatten_visible(axes)
for ax in axes:
if kind == "bar":
axis = ax.xaxis
ax_min, ax_max = ax.get_xlim()
min_edge = min(p.get_x() for p in ax.patches)
max_edge = max(p.get_x() + p.get_width() for p in ax.patches)
elif kind == "barh":
axis = ax.yaxis
ax_min, ax_max = ax.get_ylim()
min_edge = min(p.get_y() for p in ax.patches)
max_edge = max(p.get_y() + p.get_height() for p in ax.patches)
else:
raise ValueError
# GH 7498
# compare margins between lim and bar edges
tm.assert_almost_equal(ax_min, min_edge - 0.25)
tm.assert_almost_equal(ax_max, max_edge + 0.25)
p = ax.patches[0]
if kind == "bar" and (stacked is True or subplots is True):
edge = p.get_x()
center = edge + p.get_width() * position
elif kind == "bar" and stacked is False:
center = p.get_x() + p.get_width() * len(df.columns) * position
edge = p.get_x()
elif kind == "barh" and (stacked is True or subplots is True):
center = p.get_y() + p.get_height() * position
edge = p.get_y()
elif kind == "barh" and stacked is False:
center = p.get_y() + p.get_height() * len(df.columns) * position
edge = p.get_y()
else:
raise ValueError
# Check the ticks locates on integer
assert (axis.get_ticklocs() == np.arange(len(df))).all()
if align == "center":
# Check whether the bar locates on center
tm.assert_almost_equal(axis.get_ticklocs()[0], center)
elif align == "edge":
# Check whether the bar's edge starts from the tick
tm.assert_almost_equal(axis.get_ticklocs()[0], edge)
else:
raise ValueError
return axes

View File

@ -0,0 +1,389 @@
import re
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.plotting.common import (
TestPlotBase,
_check_plot_works,
)
def _create_hist_box_with_by_df():
np.random.seed(0)
df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
df["C"] = np.random.choice(["a", "b", "c"], 30)
df["D"] = np.random.choice(["a", "b", "c"], 30)
return df
@td.skip_if_no_mpl
class TestHistWithBy(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.hist_df = _create_hist_box_with_by_df()
@pytest.mark.parametrize(
"by, column, titles, legends",
[
("C", "A", ["a", "b", "c"], [["A"]] * 3),
("C", ["A", "B"], ["a", "b", "c"], [["A", "B"]] * 3),
("C", None, ["a", "b", "c"], [["A", "B"]] * 3),
(
["C", "D"],
"A",
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
],
[["A"]] * 9,
),
(
["C", "D"],
["A", "B"],
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
],
[["A", "B"]] * 9,
),
(
["C", "D"],
None,
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
],
[["A", "B"]] * 9,
),
],
)
def test_hist_plot_by_argument(self, by, column, titles, legends):
# GH 15079
axes = _check_plot_works(self.hist_df.plot.hist, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
]
assert result_legends == legends
assert result_titles == titles
@pytest.mark.parametrize(
"by, column, titles, legends",
[
(0, "A", ["a", "b", "c"], [["A"]] * 3),
(0, None, ["a", "b", "c"], [["A", "B"]] * 3),
(
[0, "D"],
"A",
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
],
[["A"]] * 9,
),
],
)
def test_hist_plot_by_0(self, by, column, titles, legends):
# GH 15079
df = self.hist_df.copy()
df = df.rename(columns={"C": 0})
axes = _check_plot_works(df.plot.hist, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
]
assert result_legends == legends
assert result_titles == titles
@pytest.mark.parametrize(
"by, column",
[
([], ["A"]),
([], ["A", "B"]),
((), None),
((), ["A", "B"]),
],
)
def test_hist_plot_empty_list_string_tuple_by(self, by, column):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(self.hist_df.plot.hist, column=column, by=by)
@pytest.mark.slow
@pytest.mark.parametrize(
"by, column, layout, axes_num",
[
(["C"], "A", (2, 2), 3),
("C", "A", (2, 2), 3),
(["C"], ["A"], (1, 3), 3),
("C", None, (3, 1), 3),
("C", ["A", "B"], (3, 1), 3),
(["C", "D"], "A", (9, 1), 9),
(["C", "D"], "A", (3, 3), 9),
(["C", "D"], ["A"], (5, 2), 9),
(["C", "D"], ["A", "B"], (9, 1), 9),
(["C", "D"], None, (9, 1), 9),
(["C", "D"], ["A", "B"], (5, 2), 9),
],
)
def test_hist_plot_layout_with_by(self, by, column, layout, axes_num):
# GH 15079
# _check_plot_works adds an ax so catch warning. see GH #13188
with tm.assert_produces_warning(UserWarning):
axes = _check_plot_works(
self.hist_df.plot.hist, column=column, by=by, layout=layout
)
self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
@pytest.mark.parametrize(
"msg, by, layout",
[
("larger than required size", ["C", "D"], (1, 1)),
(re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
("At least one dimension of layout must be positive", "C", (-1, -1)),
],
)
def test_hist_plot_invalid_layout_with_by_raises(self, msg, by, layout):
# GH 15079, test if error is raised when invalid layout is given
with pytest.raises(ValueError, match=msg):
self.hist_df.plot.hist(column=["A", "B"], by=by, layout=layout)
@pytest.mark.slow
def test_axis_share_x_with_by(self):
# GH 15079
ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharex=True)
# share x
assert self.get_x_axis(ax1).joined(ax1, ax2)
assert self.get_x_axis(ax2).joined(ax1, ax2)
assert self.get_x_axis(ax3).joined(ax1, ax3)
assert self.get_x_axis(ax3).joined(ax2, ax3)
# don't share y
assert not self.get_y_axis(ax1).joined(ax1, ax2)
assert not self.get_y_axis(ax2).joined(ax1, ax2)
assert not self.get_y_axis(ax3).joined(ax1, ax3)
assert not self.get_y_axis(ax3).joined(ax2, ax3)
@pytest.mark.slow
def test_axis_share_y_with_by(self):
# GH 15079
ax1, ax2, ax3 = self.hist_df.plot.hist(column="A", by="C", sharey=True)
# share y
assert self.get_y_axis(ax1).joined(ax1, ax2)
assert self.get_y_axis(ax2).joined(ax1, ax2)
assert self.get_y_axis(ax3).joined(ax1, ax3)
assert self.get_y_axis(ax3).joined(ax2, ax3)
# don't share x
assert not self.get_x_axis(ax1).joined(ax1, ax2)
assert not self.get_x_axis(ax2).joined(ax1, ax2)
assert not self.get_x_axis(ax3).joined(ax1, ax3)
assert not self.get_x_axis(ax3).joined(ax2, ax3)
@pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
def test_figure_shape_hist_with_by(self, figsize):
# GH 15079
axes = self.hist_df.plot.hist(column="A", by="C", figsize=figsize)
self._check_axes_shape(axes, axes_num=3, figsize=figsize)
@td.skip_if_no_mpl
class TestBoxWithBy(TestPlotBase):
def setup_method(self, method):
TestPlotBase.setup_method(self, method)
import matplotlib as mpl
mpl.rcdefaults()
self.box_df = _create_hist_box_with_by_df()
@pytest.mark.parametrize(
"by, column, titles, xticklabels",
[
("C", "A", ["A"], [["a", "b", "c"]]),
(
["C", "D"],
"A",
["A"],
[
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
]
],
),
("C", ["A", "B"], ["A", "B"], [["a", "b", "c"]] * 2),
(
["C", "D"],
["A", "B"],
["A", "B"],
[
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
]
]
* 2,
),
(["C"], None, ["A", "B"], [["a", "b", "c"]] * 2),
],
)
def test_box_plot_by_argument(self, by, column, titles, xticklabels):
# GH 15079
axes = _check_plot_works(self.box_df.plot.box, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
]
assert result_xticklabels == xticklabels
assert result_titles == titles
@pytest.mark.parametrize(
"by, column, titles, xticklabels",
[
(0, "A", ["A"], [["a", "b", "c"]]),
(
[0, "D"],
"A",
["A"],
[
[
"(a, a)",
"(a, b)",
"(a, c)",
"(b, a)",
"(b, b)",
"(b, c)",
"(c, a)",
"(c, b)",
"(c, c)",
]
],
),
(0, None, ["A", "B"], [["a", "b", "c"]] * 2),
],
)
def test_box_plot_by_0(self, by, column, titles, xticklabels):
# GH 15079
df = self.box_df.copy()
df = df.rename(columns={"C": 0})
axes = _check_plot_works(df.plot.box, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
]
assert result_xticklabels == xticklabels
assert result_titles == titles
@pytest.mark.parametrize(
"by, column",
[
([], ["A"]),
((), "A"),
([], None),
((), ["A", "B"]),
],
)
def test_box_plot_with_none_empty_list_by(self, by, column):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(self.box_df.plot.box, column=column, by=by)
@pytest.mark.slow
@pytest.mark.parametrize(
"by, column, layout, axes_num",
[
(["C"], "A", (1, 1), 1),
("C", "A", (1, 1), 1),
("C", None, (2, 1), 2),
("C", ["A", "B"], (1, 2), 2),
(["C", "D"], "A", (1, 1), 1),
(["C", "D"], None, (1, 2), 2),
],
)
def test_box_plot_layout_with_by(self, by, column, layout, axes_num):
# GH 15079
axes = _check_plot_works(
self.box_df.plot.box, column=column, by=by, layout=layout
)
self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
@pytest.mark.parametrize(
"msg, by, layout",
[
("larger than required size", ["C", "D"], (1, 1)),
(re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
("At least one dimension of layout must be positive", "C", (-1, -1)),
],
)
def test_box_plot_invalid_layout_with_by_raises(self, msg, by, layout):
# GH 15079, test if error is raised when invalid layout is given
with pytest.raises(ValueError, match=msg):
self.box_df.plot.box(column=["A", "B"], by=by, layout=layout)
@pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
def test_figure_shape_hist_with_by(self, figsize):
# GH 15079
axes = self.box_df.plot.box(column="A", by="C", figsize=figsize)
self._check_axes_shape(axes, axes_num=1, figsize=figsize)