first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,307 @@
import numpy as np
import pytest
from pandas import DataFrame
pytest.importorskip("jinja2")
def bar_grad(a=None, b=None, c=None, d=None):
"""Used in multiple tests to simplify formatting of expected result"""
ret = [("width", "10em")]
if all(x is None for x in [a, b, c, d]):
return ret
return ret + [
(
"background",
f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
)
]
def no_bar():
return bar_grad()
def bar_to(x, color="#d65f5f"):
return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")
def bar_from_to(x, y, color="#d65f5f"):
return bar_grad(
f" transparent {x:.1f}%",
f" {color} {x:.1f}%",
f" {color} {y:.1f}%",
f" transparent {y:.1f}%",
)
@pytest.fixture
def df_pos():
return DataFrame([[1], [2], [3]])
@pytest.fixture
def df_neg():
return DataFrame([[-1], [-2], [-3]])
@pytest.fixture
def df_mix():
return DataFrame([[-3], [1], [2]])
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(50), bar_to(100)]),
("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
(np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
],
)
def test_align_positive_cases(df_pos, align, exp):
# test different align cases for all positive values
result = df_pos.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [bar_to(100), bar_to(50), no_bar()]),
("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
(np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
],
)
def test_align_negative_cases(df_neg, align, exp):
# test different align cases for all negative values
result = df_neg.style.bar(align=align)._compute().ctx
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(80), bar_to(100)]),
("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
(np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
],
)
@pytest.mark.parametrize("nans", [True, False])
def test_align_mixed_cases(df_mix, align, exp, nans):
# test different align cases for mixed positive and negative values
# also test no impact of NaNs and no_bar
expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
if nans:
df_mix.loc[3, :] = np.nan
expected.update({(3, 0): no_bar()})
result = df_mix.style.bar(align=align)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"align, exp",
[
(
"left",
{
"index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
"columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
"none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
},
),
(
"mid",
{
"index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
"columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
"none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
},
),
(
"zero",
{
"index": [
[bar_from_to(50, 66.66), bar_from_to(50, 75)],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_from_to(50, 75), bar_from_to(50, 100)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(50, 62.5), bar_from_to(50, 75)],
[bar_from_to(50, 87.5), bar_from_to(50, 100)],
],
},
),
(
2,
{
"index": [
[bar_to(50), no_bar()],
[bar_from_to(50, 100), bar_from_to(50, 100)],
],
"columns": [
[bar_to(50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
"none": [
[bar_from_to(25, 50), no_bar()],
[bar_from_to(50, 75), bar_from_to(50, 100)],
],
},
),
],
)
@pytest.mark.parametrize("axis", ["index", "columns", "none"])
def test_align_axis(align, exp, axis):
# test all axis combinations with positive values and different aligns
data = DataFrame([[1, 2], [3, 4]])
result = (
data.style.bar(align=align, axis=None if axis == "none" else axis)
._compute()
.ctx
)
expected = {
(0, 0): exp[axis][0][0],
(0, 1): exp[axis][0][1],
(1, 0): exp[axis][1][0],
(1, 1): exp[axis][1][1],
}
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 1.5, 2.5),
("negative", -2.5, -1.5),
("mixed", -2.5, 1.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that clipping occurs if any vmin > data_values or vmax < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"values, vmin, vmax",
[
("positive", 0.5, 4.5),
("negative", -4.5, -0.5),
("mixed", -4.5, 4.5),
],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
# test that widening occurs if any vmax > data_values or vmin < data_values
if align == "mid": # mid acts as left or right in each case
if values == "positive":
align = "left"
elif values == "negative":
align = "right"
df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
vmin = None if nullify == "vmin" else vmin
vmax = None if nullify == "vmax" else vmax
expand_df = df.copy()
expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax
result = (
df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
._compute()
.ctx
)
expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result.items() <= expected.items()
def test_numerics():
# test data is pre-selected for numeric values
data = DataFrame([[1, "a"], [2, "b"]])
result = data.style.bar()._compute().ctx
assert (0, 1) not in result
assert (1, 1) not in result
@pytest.mark.parametrize(
"align, exp",
[
("left", [no_bar(), bar_to(100, "green")]),
("right", [bar_to(100, "red"), no_bar()]),
("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
],
)
def test_colors_mixed(align, exp):
data = DataFrame([[-1], [3]])
result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
assert result == {(0, 0): exp[0], (1, 0): exp[1]}
def test_bar_align_height():
# test when keyword height is used 'no-repeat center' and 'background-size' present
data = DataFrame([[1], [2]])
result = data.style.bar(align="left", height=50)._compute().ctx
bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
expected = {
(0, 0): [("width", "10em")],
(1, 0): [
("width", "10em"),
("background", bg_s),
("background-size", "100% 50.0%"),
],
}
assert result == expected
def test_bar_value_error_raises():
df = DataFrame({"A": [-100, -60, -30, -20]})
msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
with pytest.raises(ValueError, match=msg):
df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()
msg = r"`width` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(width=200).to_html()
msg = r"`height` must be a value in \[0, 100\]"
with pytest.raises(ValueError, match=msg):
df.style.bar(height=200).to_html()

View File

@@ -0,0 +1,165 @@
"""
modules collects tests for Styler methods which have been deprecated
"""
import numpy as np
import pytest
jinja2 = pytest.importorskip("jinja2")
from pandas import (
DataFrame,
IndexSlice,
NaT,
Timestamp,
)
import pandas._testing as tm
@pytest.fixture
def df():
return DataFrame({"A": [0, 1], "B": np.random.randn(2)})
@pytest.mark.parametrize("axis", ["index", "columns"])
def test_hide_index_columns(df, axis):
with tm.assert_produces_warning(FutureWarning):
getattr(df.style, "hide_" + axis)()
def test_set_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
with tm.assert_produces_warning(FutureWarning):
ctx = df.style.set_na_rep("NA")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "NA"
assert ctx["body"][1][1]["display_value"] == "NA"
assert ctx["body"][1][2]["display_value"] == "NA"
def test_where_with_one_style(df):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
with tm.assert_produces_warning(FutureWarning):
result = df.style.where(f, style1)._compute().ctx
expected = {
(r, c): [("foo", "bar")]
for r, row in enumerate(df.index)
for c, col in enumerate(df.columns)
if f(df.loc[row, col])
}
assert result == expected
@pytest.mark.parametrize(
"slice_",
[
IndexSlice[:],
IndexSlice[:, ["A"]],
IndexSlice[[1], :],
IndexSlice[[1], ["A"]],
IndexSlice[:2, ["A", "B"]],
],
)
def test_where_subset(df, slice_):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
style2 = "baz: foo"
with tm.assert_produces_warning(FutureWarning):
res = df.style.where(f, style1, style2, subset=slice_)._compute().ctx
expected = {
(r, c): [("foo", "bar") if f(df.loc[row, col]) else ("baz", "foo")]
for r, row in enumerate(df.index)
for c, col in enumerate(df.columns)
if row in df.loc[slice_].index and col in df.loc[slice_].columns
}
assert res == expected
def test_where_subset_compare_with_applymap(df):
# GH 17474
def f(x):
return x > 0.5
style1 = "foo: bar"
style2 = "baz: foo"
def g(x):
return style1 if f(x) else style2
slices = [
IndexSlice[:],
IndexSlice[:, ["A"]],
IndexSlice[[1], :],
IndexSlice[[1], ["A"]],
IndexSlice[:2, ["A", "B"]],
]
for slice_ in slices:
with tm.assert_produces_warning(FutureWarning):
result = df.style.where(f, style1, style2, subset=slice_)._compute().ctx
expected = df.style.applymap(g, subset=slice_)._compute().ctx
assert result == expected
def test_where_kwargs():
df = DataFrame([[1, 2], [3, 4]])
def f(x, val):
return x > val
with tm.assert_produces_warning(FutureWarning):
res = df.style.where(f, "color:green;", "color:red;", val=2)._compute().ctx
expected = {
(0, 0): [("color", "red")],
(0, 1): [("color", "red")],
(1, 0): [("color", "green")],
(1, 1): [("color", "green")],
}
assert res == expected
def test_set_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
with tm.assert_produces_warning(FutureWarning):
ctx = df.style.set_na_rep("NA")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "NA"
with tm.assert_produces_warning(FutureWarning):
ctx = (
df.style.set_na_rep("NA")
.format(None, na_rep="-", subset=["B"])
._translate(True, True)
)
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "-"
def test_precision(df):
styler = df.style
with tm.assert_produces_warning(FutureWarning):
s2 = styler.set_precision(1)
assert styler is s2
assert styler.precision == 1
def test_render(df):
with tm.assert_produces_warning(FutureWarning):
df.style.render()

View File

@@ -0,0 +1,444 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
MultiIndex,
NaT,
Timestamp,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import _str_escape
@pytest.fixture
def df():
return DataFrame(
data=[[0, -0.609], [1, -1.228]],
columns=["A", "B"],
index=["x", "y"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_display_format(styler):
ctx = styler.format("{:0.1f}")._translate(True, True)
assert all(["display_value" in c for c in row] for row in ctx["body"])
assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_display_format_index(styler, index, columns):
exp_index = ["x", "y"]
if index:
styler.format_index(lambda v: v.upper(), axis=0) # test callable
exp_index = ["X", "Y"]
exp_columns = ["A", "B"]
if columns:
styler.format_index("*{}*", axis=1) # test string
exp_columns = ["*A*", "*B*"]
ctx = styler._translate(True, True)
for r, row in enumerate(ctx["body"]):
assert row[0]["display_value"] == exp_index[r]
for c, col in enumerate(ctx["head"][1:]):
assert col["display_value"] == exp_columns[c]
def test_format_dict(styler):
ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.0"
assert ctx["body"][0][2]["display_value"] == "-60.90%"
def test_format_index_dict(styler):
ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
for i, val in enumerate(["X", "Y"]):
assert ctx["body"][i][0]["display_value"] == val
def test_format_string(styler):
ctx = styler.format("{:.2f}")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "0.00"
assert ctx["body"][0][2]["display_value"] == "-0.61"
assert ctx["body"][1][1]["display_value"] == "1.00"
assert ctx["body"][1][2]["display_value"] == "-1.23"
def test_format_callable(styler):
ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "pos"
assert ctx["body"][0][2]["display_value"] == "neg"
assert ctx["body"][1][1]["display_value"] == "pos"
assert ctx["body"][1][2]["display_value"] == "neg"
def test_format_with_na_rep():
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "110.00%"
assert ctx["body"][1][2]["display_value"] == "120.00%"
ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "120.00%"
def test_format_index_with_na_rep():
df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "A"
for i in [2, 3, 4, 5]:
assert ctx["head"][0][i]["display_value"] == "--"
def test_format_non_numeric_na():
# GH 21527 28358
df = DataFrame(
{
"object": [None, np.nan, "foo"],
"datetime": [None, NaT, Timestamp("20120101")],
}
)
ctx = df.style.format(None, na_rep="-")._translate(True, True)
assert ctx["body"][0][1]["display_value"] == "-"
assert ctx["body"][0][2]["display_value"] == "-"
assert ctx["body"][1][1]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "-"
@pytest.mark.parametrize(
"func, attr, kwargs",
[
("format", "_display_funcs", {}),
("format_index", "_display_funcs_index", {"axis": 0}),
("format_index", "_display_funcs_columns", {"axis": 1}),
],
)
def test_format_clear(styler, func, attr, kwargs):
assert (0, 0) not in getattr(styler, attr) # using default
getattr(styler, func)("{:.2f}", **kwargs)
assert (0, 0) in getattr(styler, attr) # formatter is specified
getattr(styler, func)(**kwargs)
assert (0, 0) not in getattr(styler, attr) # formatter cleared to default
@pytest.mark.parametrize(
"escape, exp",
[
("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
(
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
),
],
)
def test_format_escape_html(escape, exp):
chars = '<>&"%$#_{}~^\\~ ^ \\ '
df = DataFrame([[chars]])
s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
assert expected in s.to_html()
# only the value should be escaped before passing to the formatter
s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
assert expected in s.to_html()
# also test format_index()
styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
styler.format_index("&{0}&", escape=None, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
styler.format_index("&{0}&", escape=escape, axis=1)
assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
def test_format_escape_na_rep():
# tests the na_rep is not escaped
df = DataFrame([['<>&"', None]])
s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
assert ex in s.to_html()
assert expected2 in s.to_html()
# also test for format_index()
df = DataFrame(columns=['<>&"', None])
styler = Styler(df, uuid_len=0)
styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
ctx = styler._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "X&&lt;&gt;&amp;&#34;>X"
assert ctx["head"][0][2]["display_value"] == "&"
def test_format_escape_floats(styler):
# test given formatter for number format is not impacted by escape
s = styler.format("{:.1f}", escape="html")
for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
# tests precision of floats is not impacted by escape
s = styler.format(precision=1, escape="html")
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.to_html()
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
@pytest.mark.parametrize("func", ["format", "format_index"])
def test_format_raises(styler, formatter, func):
with pytest.raises(TypeError, match="expected str or callable"):
getattr(styler, func)(formatter)
@pytest.mark.parametrize(
"precision, expected",
[
(1, ["1.0", "2.0", "3.2", "4.6"]),
(2, ["1.00", "2.01", "3.21", "4.57"]),
(3, ["1.000", "2.009", "3.212", "4.566"]),
],
)
def test_format_with_precision(precision, expected):
# Issue #13257
df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
styler = Styler(df)
styler.format(precision=precision)
styler.format_index(precision=precision, axis=1)
ctx = styler._translate(True, True)
for col, exp in enumerate(expected):
assert ctx["body"][0][col + 1]["display_value"] == exp # format test
assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"level, expected",
[
(0, ["X", "X", "_", "_"]), # level int
("zero", ["X", "X", "_", "_"]), # level name
(1, ["_", "_", "X", "X"]), # other level int
("one", ["_", "_", "X", "X"]), # other level name
([0, 1], ["X", "X", "X", "X"]), # both levels
([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous
([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name
(["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed
],
)
def test_format_index_level(axis, level, expected):
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
df = DataFrame([[1, 2], [3, 4]])
if axis == 0:
df.index = midx
else:
df.columns = midx
styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
ctx = styler._translate(True, True)
if axis == 0: # compare index
result = [ctx["body"][s][0]["display_value"] for s in range(2)]
result += [ctx["body"][s][1]["display_value"] for s in range(2)]
else: # compare columns
result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
assert expected == result
def test_format_subset():
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
ctx = df.style.format(
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
)._translate(True, True)
expected = "0.1"
raw_11 = "1.123400"
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
assert ctx["body"][0][2]["display_value"] == "12.34%"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][0][2]["display_value"] == "0.123400"
ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == raw_11
ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
True, True
)
assert ctx["body"][0][1]["display_value"] == expected
assert ctx["body"][1][1]["display_value"] == "1.1"
assert ctx["body"][0][2]["display_value"] == "0.123400"
assert ctx["body"][1][2]["display_value"] == raw_11
@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_thousands(formatter, decimal, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1000000]], index=[1000000]).style
result = getattr(styler, func)( # testing int
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate(True, True)
assert "1_000_000" in result["body"][0][col]["display_value"]
@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
def test_format_decimal(formatter, thousands, precision, func, col):
styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
result = getattr(styler, func)( # testing float
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
result = getattr(styler, func)( # testing complex
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate(True, True)
assert "000_123" in result["body"][0][col]["display_value"]
def test_str_escape_error():
msg = "`escape` only permitted in {'html', 'latex'}, got "
with pytest.raises(ValueError, match=msg):
_str_escape("text", "bad_escape")
with pytest.raises(ValueError, match=msg):
_str_escape("text", [])
_str_escape(2.00, "bad_escape") # OK since dtype is float
def test_format_options():
df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
ctx = df.style._translate(True, True)
# test option: na_rep
assert ctx["body"][1][2]["display_value"] == "nan"
with option_context("styler.format.na_rep", "MISSING"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
# test option: decimal and precision
assert ctx["body"][0][2]["display_value"] == "1.009000"
with option_context("styler.format.decimal", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
with option_context("styler.format.precision", 2):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
# test option: thousands
assert ctx["body"][0][1]["display_value"] == "2000"
with option_context("styler.format.thousands", "_"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
# test option: escape
assert ctx["body"][0][3]["display_value"] == "&<"
assert ctx["body"][1][3]["display_value"] == "&~"
with option_context("styler.format.escape", "html"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][3]["display_value"] == "&amp;&lt;"
with option_context("styler.format.escape", "latex"):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
# test option: formatter
with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
ctx_with_op = df.style._translate(True, True)
assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
def test_precision_zero(df):
styler = Styler(df, precision=0)
ctx = styler._translate(True, True)
assert ctx["body"][0][2]["display_value"] == "-1"
assert ctx["body"][1][2]["display_value"] == "-1"
@pytest.mark.parametrize(
"formatter, exp",
[
(lambda x: f"{x:.3f}", "9.000"),
("{:.2f}", "9.00"),
({0: "{:.1f}"}, "9.0"),
(None, "9"),
],
)
def test_formatter_options_validator(formatter, exp):
df = DataFrame([[9]])
with option_context("styler.format.formatter", formatter):
assert f" {exp} " in df.style.to_latex()
def test_formatter_options_raises():
msg = "Value must be an instance of"
with pytest.raises(ValueError, match=msg):
with option_context("styler.format.formatter", ["bad", "type"]):
DataFrame().style.to_latex()
def test_1level_multiindex():
# GH 43383
midx = MultiIndex.from_product([[1, 2]], names=[""])
df = DataFrame(-1, index=midx, columns=[0, 1])
ctx = df.style._translate(True, True)
assert ctx["body"][0][0]["display_value"] == "1"
assert ctx["body"][0][0]["is_visible"] is True
assert ctx["body"][1][0]["display_value"] == "2"
assert ctx["body"][1][0]["is_visible"] is True
def test_boolean_format():
# gh 46384: booleans do not collapse to integer representation on display
df = DataFrame([[True, False]])
ctx = df.style._translate(True, True)
assert ctx["body"][0][1]["display_value"] is True
assert ctx["body"][0][2]["display_value"] is False

View File

@@ -0,0 +1,219 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
def df(request):
# GH 45804
return DataFrame(
{"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_highlight_null(styler):
result = styler.highlight_null()._compute().ctx
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "red")],
}
assert result == expected
def test_highlight_null_subset(styler):
# GH 31345
result = (
styler.highlight_null(null_color="red", subset=["A"])
.highlight_null(null_color="green", subset=["B"])
._compute()
.ctx
)
expected = {
(1, 0): [("background-color", "red")],
(1, 1): [("background-color", "green")],
}
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
def test_highlight_minmax_basic(df, f):
expected = {
(0, 1): [("background-color", "red")],
# ignores NaN row,
(2, 0): [("background-color", "red")],
}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize(
"kwargs",
[
{"axis": None, "color": "red"}, # test axis
{"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN
{"axis": None, "props": "background-color: red"}, # test props
],
)
def test_highlight_minmax_ext(df, f, kwargs):
expected = {(2, 0): [("background-color", "red")]}
if f == "highlight_min":
df = -df
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
@pytest.mark.parametrize("axis", [None, 0, 1])
def test_highlight_minmax_nulls(f, axis):
# GH 42750
expected = {
(1, 0): [("background-color", "yellow")],
(1, 1): [("background-color", "yellow")],
}
if axis == 1:
expected.update({(2, 1): [("background-color", "yellow")]})
if f == "highlight_max":
df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
else:
df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
result = getattr(df.style, f)(axis=axis)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"kwargs",
[
{"left": 0, "right": 1}, # test basic range
{"left": 0, "right": 1, "props": "background-color: yellow"}, # test props
{"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset
{"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right
{"right": 1}, # test no left
{"left": [0, 0, 11], "axis": 0}, # test left as sequence
{"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis
{"left": 0, "right": [0, 1], "axis": 1}, # test sequence right
],
)
def test_highlight_between(styler, kwargs):
expected = {
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
}
result = styler.highlight_between(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"arg, map, axis",
[
("left", [1, 2], 0), # 0 axis has 3 elements not 2
("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3
("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2)
("right", [1, 2], 0), # same tests as above for 'right' not 'left'
("right", [1, 2, 3], 1), # ..
("right", np.array([[1, 2], [1, 2]]), None), # ..
],
)
def test_highlight_between_raises(arg, styler, map, axis):
msg = f"supplied '{arg}' is not correct shape"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(**{arg: map, "axis": axis})._compute()
def test_highlight_between_raises2(styler):
msg = "values can be 'both', 'left', 'right', or 'neither'"
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive="badstring")._compute()
with pytest.raises(ValueError, match=msg):
styler.highlight_between(inclusive=1)._compute()
@pytest.mark.parametrize(
"inclusive, expected",
[
(
"both",
{
(0, 0): [("background-color", "yellow")],
(0, 1): [("background-color", "yellow")],
},
),
("neither", {}),
("left", {(0, 0): [("background-color", "yellow")]}),
("right", {(0, 1): [("background-color", "yellow")]}),
],
)
def test_highlight_between_inclusive(styler, inclusive, expected):
kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]}
result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute()
assert result.ctx == expected
@pytest.mark.parametrize(
"kwargs",
[
{"q_left": 0.5, "q_right": 1, "axis": 0}, # base case
{"q_left": 0.5, "q_right": 1, "axis": None}, # test axis
{"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset
{"q_left": 0.5, "axis": 0}, # test no high
{"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low
{"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop
],
)
def test_highlight_quantile(styler, kwargs):
expected = {
(2, 0): [("background-color", "yellow")],
(2, 1): [("background-color", "yellow")],
}
result = styler.highlight_quantile(**kwargs)._compute().ctx
assert result == expected
@pytest.mark.skipif(np.__version__[:4] in ["1.16", "1.17"], reason="Numpy Issue #14831")
@pytest.mark.parametrize(
"f,kwargs",
[
("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}),
("highlight_max", {"axis": 0, "subset": [0]}),
("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}),
("highlight_between", {"subset": [0]}),
],
)
@pytest.mark.parametrize(
"df",
[
DataFrame([[0, 10], [20, 30]], dtype=int),
DataFrame([[0, 10], [20, 30]], dtype=float),
DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"),
DataFrame([[0, 10], [20, 30]], dtype=str),
DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"),
],
)
def test_all_highlight_dtypes(f, kwargs, df):
if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)):
return None # quantile incompatible with str
if f == "highlight_between":
kwargs["left"] = df.iloc[1, 0] # set the range low for testing
expected = {(1, 0): [("background-color", "yellow")]}
result = getattr(df.style, f)(**kwargs)._compute().ctx
assert result == expected

View File

@@ -0,0 +1,818 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
option_context,
)
jinja2 = pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
loader = jinja2.PackageLoader("pandas", "io/formats/templates")
env = jinja2.Environment(loader=loader, trim_blocks=True)
@pytest.fixture
def styler():
return Styler(DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"]))
@pytest.fixture
def styler_mi():
midx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx))
@pytest.fixture
def tpl_style():
return env.get_template("html_style.tpl")
@pytest.fixture
def tpl_table():
return env.get_template("html_table.tpl")
def test_html_template_extends_options():
# make sure if templates are edited tests are updated as are setup fixtures
# to understand the dependency
with open("pandas/io/formats/templates/html.tpl") as file:
result = file.read()
assert "{% include html_style_tpl %}" in result
assert "{% include html_table_tpl %}" in result
def test_exclude_styles(styler):
result = styler.to_html(exclude_styles=True, doctype_html=True)
expected = dedent(
"""\
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
</head>
<body>
<table>
<thead>
<tr>
<th >&nbsp;</th>
<th >A</th>
</tr>
</thead>
<tbody>
<tr>
<th >a</th>
<td >2.610000</td>
</tr>
<tr>
<th >b</th>
<td >2.690000</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
)
assert result == expected
def test_w3_html_format(styler):
styler.set_uuid("").set_table_styles(
[{"selector": "th", "props": "att2:v2;"}]
).applymap(lambda x: "att1:v1;").set_table_attributes(
'class="my-cls1" style="attr3:v3;"'
).set_td_classes(
DataFrame(["my-cls2"], index=["a"], columns=["A"])
).format(
"{:.1f}"
).set_caption(
"A comprehensive test"
)
expected = dedent(
"""\
<style type="text/css">
#T_ th {
att2: v2;
}
#T__row0_col0, #T__row1_col0 {
att1: v1;
}
</style>
<table id="T_" class="my-cls1" style="attr3:v3;">
<caption>A comprehensive test</caption>
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T__level0_col0" class="col_heading level0 col0" >A</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__level0_row0" class="row_heading level0 row0" >a</th>
<td id="T__row0_col0" class="data row0 col0 my-cls2" >2.6</td>
</tr>
<tr>
<th id="T__level0_row1" class="row_heading level0 row1" >b</th>
<td id="T__row1_col0" class="data row1 col0" >2.7</td>
</tr>
</tbody>
</table>
"""
)
assert expected == styler.to_html()
def test_colspan_w3():
# GH 36223
df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]])
styler = Styler(df, uuid="_", cell_ids=False)
assert '<th class="col_heading level0 col0" colspan="2">l0</th>' in styler.to_html()
def test_rowspan_w3():
# GH 38533
df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]])
styler = Styler(df, uuid="_", cell_ids=False)
assert '<th class="row_heading level0 row0" rowspan="2">l0</th>' in styler.to_html()
def test_styles(styler):
styler.set_uuid("abc")
styler.set_table_styles([{"selector": "td", "props": "color: red;"}])
result = styler.to_html(doctype_html=True)
expected = dedent(
"""\
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
#T_abc td {
color: red;
}
</style>
</head>
<body>
<table id="T_abc">
<thead>
<tr>
<th class="blank level0" >&nbsp;</th>
<th id="T_abc_level0_col0" class="col_heading level0 col0" >A</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T_abc_level0_row0" class="row_heading level0 row0" >a</th>
<td id="T_abc_row0_col0" class="data row0 col0" >2.610000</td>
</tr>
<tr>
<th id="T_abc_level0_row1" class="row_heading level0 row1" >b</th>
<td id="T_abc_row1_col0" class="data row1 col0" >2.690000</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
)
assert result == expected
def test_doctype(styler):
result = styler.to_html(doctype_html=False)
assert "<html>" not in result
assert "<body>" not in result
assert "<!DOCTYPE html>" not in result
assert "<head>" not in result
def test_doctype_encoding(styler):
with option_context("styler.render.encoding", "ASCII"):
result = styler.to_html(doctype_html=True)
assert '<meta charset="ASCII">' in result
result = styler.to_html(doctype_html=True, encoding="ANSI")
assert '<meta charset="ANSI">' in result
def test_bold_headers_arg(styler):
result = styler.to_html(bold_headers=True)
assert "th {\n font-weight: bold;\n}" in result
result = styler.to_html()
assert "th {\n font-weight: bold;\n}" not in result
def test_caption_arg(styler):
result = styler.to_html(caption="foo bar")
assert "<caption>foo bar</caption>" in result
result = styler.to_html()
assert "<caption>foo bar</caption>" not in result
def test_block_names(tpl_style, tpl_table):
# catch accidental removal of a block
expected_style = {
"before_style",
"style",
"table_styles",
"before_cellstyle",
"cellstyle",
}
expected_table = {
"before_table",
"table",
"caption",
"thead",
"tbody",
"after_table",
"before_head_rows",
"head_tr",
"after_head_rows",
"before_rows",
"tr",
"after_rows",
}
result1 = set(tpl_style.blocks)
assert result1 == expected_style
result2 = set(tpl_table.blocks)
assert result2 == expected_table
def test_from_custom_template_table(tmpdir):
p = tmpdir.mkdir("tpl").join("myhtml_table.tpl")
p.write(
dedent(
"""\
{% extends "html_table.tpl" %}
{% block table %}
<h1>{{custom_title}}</h1>
{{ super() }}
{% endblock table %}"""
)
)
result = Styler.from_custom_template(str(tmpdir.join("tpl")), "myhtml_table.tpl")
assert issubclass(result, Styler)
assert result.env is not Styler.env
assert result.template_html_table is not Styler.template_html_table
styler = result(DataFrame({"A": [1, 2]}))
assert "<h1>My Title</h1>\n\n\n<table" in styler.to_html(custom_title="My Title")
def test_from_custom_template_style(tmpdir):
p = tmpdir.mkdir("tpl").join("myhtml_style.tpl")
p.write(
dedent(
"""\
{% extends "html_style.tpl" %}
{% block style %}
<link rel="stylesheet" href="mystyle.css">
{{ super() }}
{% endblock style %}"""
)
)
result = Styler.from_custom_template(
str(tmpdir.join("tpl")), html_style="myhtml_style.tpl"
)
assert issubclass(result, Styler)
assert result.env is not Styler.env
assert result.template_html_style is not Styler.template_html_style
styler = result(DataFrame({"A": [1, 2]}))
assert '<link rel="stylesheet" href="mystyle.css">\n\n<style' in styler.to_html()
def test_caption_as_sequence(styler):
styler.set_caption(("full cap", "short cap"))
assert "<caption>full cap</caption>" in styler.to_html()
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
@pytest.mark.parametrize("index_name", [True, False])
def test_sticky_basic(styler, index, columns, index_name):
if index_name:
styler.index.name = "some text"
if index:
styler.set_sticky(axis=0)
if columns:
styler.set_sticky(axis=1)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: 0px;\n z-index: {1};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n z-index: {2};\n{3}}}"
)
res = styler.set_uuid("").to_html()
# test index stickys over thead and tbody
assert (left_css.format("thead tr th:nth-child(1)", "3 !important") in res) is index
assert (left_css.format("tbody tr th:nth-child(1)", "1") in res) is index
# test column stickys including if name row
assert (
top_css.format("thead tr:nth-child(1) th", "0", "2", " height: 25px;\n") in res
) is (columns and index_name)
assert (
top_css.format("thead tr:nth-child(2) th", "25", "2", " height: 25px;\n")
in res
) is (columns and index_name)
assert (top_css.format("thead tr:nth-child(1) th", "0", "2", "") in res) is (
columns and not index_name
)
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
def test_sticky_mi(styler_mi, index, columns):
if index:
styler_mi.set_sticky(axis=0)
if columns:
styler_mi.set_sticky(axis=1)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n height: 25px;\n z-index: {2};\n}}"
)
res = styler_mi.set_uuid("").to_html()
# test the index stickys for thead and tbody over both levels
assert (
left_css.format("thead tr th:nth-child(1)", "0", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level0", "0", "1") in res) is index
assert (
left_css.format("thead tr th:nth-child(2)", "75", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level1", "75", "1") in res) is index
# test the column stickys for each level row
assert (top_css.format("thead tr:nth-child(1) th", "0", "2") in res) is columns
assert (top_css.format("thead tr:nth-child(2) th", "25", "2") in res) is columns
@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
@pytest.mark.parametrize("levels", [[1], ["one"], "one"])
def test_sticky_levels(styler_mi, index, columns, levels):
styler_mi.index.names, styler_mi.columns.names = ["zero", "one"], ["zero", "one"]
if index:
styler_mi.set_sticky(axis=0, levels=levels)
if columns:
styler_mi.set_sticky(axis=1, levels=levels)
left_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}"
)
top_css = (
"#T_ {0} {{\n position: sticky;\n background-color: white;\n"
" top: {1}px;\n height: 25px;\n z-index: {2};\n}}"
)
res = styler_mi.set_uuid("").to_html()
# test no sticking of level0
assert "#T_ thead tr th:nth-child(1)" not in res
assert "#T_ tbody tr th.level0" not in res
assert "#T_ thead tr:nth-child(1) th" not in res
# test sticking level1
assert (
left_css.format("thead tr th:nth-child(2)", "0", "3 !important") in res
) is index
assert (left_css.format("tbody tr th.level1", "0", "1") in res) is index
assert (top_css.format("thead tr:nth-child(2) th", "0", "2") in res) is columns
def test_sticky_raises(styler):
with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"):
styler.set_sticky(axis="bad")
@pytest.mark.parametrize(
"sparse_index, sparse_columns",
[(True, True), (True, False), (False, True), (False, False)],
)
def test_sparse_options(sparse_index, sparse_columns):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx)
styler = df.style
default_html = styler.to_html() # defaults under pd.options to (True , True)
with option_context(
"styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns
):
html1 = styler.to_html()
assert (html1 == default_html) is (sparse_index and sparse_columns)
html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns)
assert html1 == html2
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize("columns", [True, False])
def test_applymap_header_cell_ids(styler, index, columns):
# GH 41893
func = lambda v: "attr: val;"
styler.uuid, styler.cell_ids = "", False
if index:
styler.applymap_index(func, axis="index")
if columns:
styler.applymap_index(func, axis="columns")
result = styler.to_html()
# test no data cell ids
assert '<td class="data row0 col0" >2.610000</td>' in result
assert '<td class="data row1 col0" >2.690000</td>' in result
# test index header ids where needed and css styles
assert (
'<th id="T__level0_row0" class="row_heading level0 row0" >a</th>' in result
) is index
assert (
'<th id="T__level0_row1" class="row_heading level0 row1" >b</th>' in result
) is index
assert ("#T__level0_row0, #T__level0_row1 {\n attr: val;\n}" in result) is index
# test column header ids where needed and css styles
assert (
'<th id="T__level0_col0" class="col_heading level0 col0" >A</th>' in result
) is columns
assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns
@pytest.mark.parametrize("rows", [True, False])
@pytest.mark.parametrize("cols", [True, False])
def test_maximums(styler_mi, rows, cols):
result = styler_mi.to_html(
max_rows=2 if rows else None,
max_columns=2 if cols else None,
)
assert ">5</td>" in result # [[0,1], [4,5]] always visible
assert (">8</td>" in result) is not rows # first trimmed vertical element
assert (">2</td>" in result) is not cols # first trimmed horizontal element
def test_replaced_css_class_names(styler_mi):
css = {
"row_heading": "ROWHEAD",
# "col_heading": "COLHEAD",
"index_name": "IDXNAME",
# "col": "COL",
"row": "ROW",
# "col_trim": "COLTRIM",
"row_trim": "ROWTRIM",
"level": "LEVEL",
"data": "DATA",
"blank": "BLANK",
}
midx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
styler_mi = Styler(
DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx),
uuid_len=0,
).set_table_styles(css_class_names=css)
styler_mi.index.names = ["n1", "n2"]
styler_mi.hide(styler_mi.index[1:], axis=0)
styler_mi.hide(styler_mi.columns[1:], axis=1)
styler_mi.applymap_index(lambda v: "color: red;", axis=0)
styler_mi.applymap_index(lambda v: "color: green;", axis=1)
styler_mi.applymap(lambda v: "color: blue;")
expected = dedent(
"""\
<style type="text/css">
#T__ROW0_col0 {
color: blue;
}
#T__LEVEL0_ROW0, #T__LEVEL1_ROW0 {
color: red;
}
#T__LEVEL0_col0, #T__LEVEL1_col0 {
color: green;
}
</style>
<table id="T_">
<thead>
<tr>
<th class="BLANK" >&nbsp;</th>
<th class="IDXNAME LEVEL0" >n1</th>
<th id="T__LEVEL0_col0" class="col_heading LEVEL0 col0" >a</th>
</tr>
<tr>
<th class="BLANK" >&nbsp;</th>
<th class="IDXNAME LEVEL1" >n2</th>
<th id="T__LEVEL1_col0" class="col_heading LEVEL1 col0" >c</th>
</tr>
<tr>
<th class="IDXNAME LEVEL0" >n1</th>
<th class="IDXNAME LEVEL1" >n2</th>
<th class="BLANK col0" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__LEVEL0_ROW0" class="ROWHEAD LEVEL0 ROW0" >a</th>
<th id="T__LEVEL1_ROW0" class="ROWHEAD LEVEL1 ROW0" >c</th>
<td id="T__ROW0_col0" class="DATA ROW0 col0" >0</td>
</tr>
</tbody>
</table>
"""
)
result = styler_mi.to_html()
assert result == expected
def test_include_css_style_rules_only_for_visible_cells(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap(lambda v: "color: blue;")
.hide(styler_mi.data.columns[1:], axis="columns")
.hide(styler_mi.data.index[1:], axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__row0_col0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_include_css_style_rules_only_for_visible_index_labels(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap_index(lambda v: "color: blue;", axis="index")
.hide(styler_mi.data.columns, axis="columns")
.hide(styler_mi.data.index[1:], axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__level0_row0, #T__level1_row0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_include_css_style_rules_only_for_visible_column_labels(styler_mi):
# GH 43619
result = (
styler_mi.set_uuid("")
.applymap_index(lambda v: "color: blue;", axis="columns")
.hide(styler_mi.data.columns[1:], axis="columns")
.hide(styler_mi.data.index, axis="index")
.to_html()
)
expected_styles = dedent(
"""\
<style type="text/css">
#T__level0_col0, #T__level1_col0 {
color: blue;
}
</style>
"""
)
assert expected_styles in result
def test_hiding_index_columns_multiindex_alignment():
# gh 43644
midx = MultiIndex.from_product(
[["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"]
)
cidx = MultiIndex.from_product(
[["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"]
)
df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx)
styler = Styler(df, uuid_len=0)
styler.hide(level=1, axis=0).hide(level=0, axis=1)
styler.hide([("j0", "i1", "j2")], axis=0)
styler.hide([("c0", "d1", "d2")], axis=1)
result = styler.to_html()
expected = dedent(
"""\
<style type="text/css">
</style>
<table id="T_">
<thead>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level1" >c-1</th>
<th id="T__level1_col0" class="col_heading level1 col0" colspan="2">c1</th>
<th id="T__level1_col2" class="col_heading level1 col2" >d1</th>
</tr>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level2" >c-2</th>
<th id="T__level2_col0" class="col_heading level2 col0" >c2</th>
<th id="T__level2_col1" class="col_heading level2 col1" >d2</th>
<th id="T__level2_col2" class="col_heading level2 col2" >c2</th>
</tr>
<tr>
<th class="index_name level0" >i-0</th>
<th class="index_name level2" >i-2</th>
<th class="blank col0" >&nbsp;</th>
<th class="blank col1" >&nbsp;</th>
<th class="blank col2" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th id="T__level0_row0" class="row_heading level0 row0" rowspan="2">i0</th>
<th id="T__level2_row0" class="row_heading level2 row0" >i2</th>
<td id="T__row0_col0" class="data row0 col0" >0</td>
<td id="T__row0_col1" class="data row0 col1" >1</td>
<td id="T__row0_col2" class="data row0 col2" >2</td>
</tr>
<tr>
<th id="T__level2_row1" class="row_heading level2 row1" >j2</th>
<td id="T__row1_col0" class="data row1 col0" >4</td>
<td id="T__row1_col1" class="data row1 col1" >5</td>
<td id="T__row1_col2" class="data row1 col2" >6</td>
</tr>
<tr>
<th id="T__level0_row2" class="row_heading level0 row2" >j0</th>
<th id="T__level2_row2" class="row_heading level2 row2" >i2</th>
<td id="T__row2_col0" class="data row2 col0" >8</td>
<td id="T__row2_col1" class="data row2 col1" >9</td>
<td id="T__row2_col2" class="data row2 col2" >10</td>
</tr>
</tbody>
</table>
"""
)
assert result == expected
def test_hiding_index_columns_multiindex_trimming():
# gh 44272
df = DataFrame(np.arange(64).reshape(8, 8))
df.columns = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]])
df.index = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]])
df.index.names, df.columns.names = ["a", "b"], ["c", "d"]
styler = Styler(df, cell_ids=False, uuid_len=0)
styler.hide([(0, 0), (0, 1), (1, 0)], axis=1).hide([(0, 0), (0, 1), (1, 0)], axis=0)
with option_context("styler.render.max_rows", 4, "styler.render.max_columns", 4):
result = styler.to_html()
expected = dedent(
"""\
<style type="text/css">
</style>
<table id="T_">
<thead>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level0" >c</th>
<th class="col_heading level0 col3" >1</th>
<th class="col_heading level0 col4" colspan="2">2</th>
<th class="col_heading level0 col6" >3</th>
</tr>
<tr>
<th class="blank" >&nbsp;</th>
<th class="index_name level1" >d</th>
<th class="col_heading level1 col3" >1</th>
<th class="col_heading level1 col4" >0</th>
<th class="col_heading level1 col5" >1</th>
<th class="col_heading level1 col6" >0</th>
<th class="col_heading level1 col_trim" >...</th>
</tr>
<tr>
<th class="index_name level0" >a</th>
<th class="index_name level1" >b</th>
<th class="blank col3" >&nbsp;</th>
<th class="blank col4" >&nbsp;</th>
<th class="blank col5" >&nbsp;</th>
<th class="blank col6" >&nbsp;</th>
<th class="blank col7 col_trim" >&nbsp;</th>
</tr>
</thead>
<tbody>
<tr>
<th class="row_heading level0 row3" >1</th>
<th class="row_heading level1 row3" >1</th>
<td class="data row3 col3" >27</td>
<td class="data row3 col4" >28</td>
<td class="data row3 col5" >29</td>
<td class="data row3 col6" >30</td>
<td class="data row3 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row4" rowspan="2">2</th>
<th class="row_heading level1 row4" >0</th>
<td class="data row4 col3" >35</td>
<td class="data row4 col4" >36</td>
<td class="data row4 col5" >37</td>
<td class="data row4 col6" >38</td>
<td class="data row4 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level1 row5" >1</th>
<td class="data row5 col3" >43</td>
<td class="data row5 col4" >44</td>
<td class="data row5 col5" >45</td>
<td class="data row5 col6" >46</td>
<td class="data row5 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row6" >3</th>
<th class="row_heading level1 row6" >0</th>
<td class="data row6 col3" >51</td>
<td class="data row6 col4" >52</td>
<td class="data row6 col5" >53</td>
<td class="data row6 col6" >54</td>
<td class="data row6 col_trim" >...</td>
</tr>
<tr>
<th class="row_heading level0 row_trim" >...</th>
<th class="row_heading level1 row_trim" >...</th>
<td class="data col3 row_trim" >...</td>
<td class="data col4 row_trim" >...</td>
<td class="data col5 row_trim" >...</td>
<td class="data col6 row_trim" >...</td>
<td class="data row_trim col_trim" >...</td>
</tr>
</tbody>
</table>
"""
)
assert result == expected
@pytest.mark.parametrize("type", ["data", "index"])
@pytest.mark.parametrize(
"text, exp, found",
[
("no link, just text", False, ""),
("subdomain not www: sub.web.com", False, ""),
("www subdomain: www.web.com other", True, "www.web.com"),
("scheme full structure: http://www.web.com", True, "http://www.web.com"),
("scheme no top-level: http://www.web", True, "http://www.web"),
("no scheme, no top-level: www.web", False, "www.web"),
("https scheme: https://www.web.com", True, "https://www.web.com"),
("ftp scheme: ftp://www.web", True, "ftp://www.web"),
("ftps scheme: ftps://www.web", True, "ftps://www.web"),
("subdirectories: www.web.com/directory", True, "www.web.com/directory"),
("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"),
("with port: http://web.com:80", True, "http://web.com:80"),
(
"full net_loc scheme: http://user:pass@web.com",
True,
"http://user:pass@web.com",
),
(
"with valid special chars: http://web.com/,.':;~!@#$*()[]",
True,
"http://web.com/,.':;~!@#$*()[]",
),
],
)
def test_rendered_links(type, text, exp, found):
if type == "data":
df = DataFrame([text])
styler = df.style.format(hyperlinks="html")
else:
df = DataFrame([0], index=[text])
styler = df.style.format_index(hyperlinks="html")
rendered = '<a href="{0}" target="_blank">{0}</a>'.format(found)
result = styler.to_html()
assert (rendered in result) is exp
assert (text in result) is not exp # test conversion done when expected and not
def test_multiple_rendered_links():
links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e")
df = DataFrame(["text {} {} text {} {}".format(*links)])
result = df.style.format(hyperlinks="html").to_html()
href = '<a href="{0}" target="_blank">{0}</a>'
for link in links:
assert href.format(link) in result
assert href.format("text") not in result

View File

@@ -0,0 +1,286 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
IndexSlice,
Series,
)
pytest.importorskip("matplotlib")
pytest.importorskip("jinja2")
import matplotlib as mpl
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame([[1, 2], [2, 4]], columns=["A", "B"])
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.fixture
def df_blank():
return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"])
@pytest.fixture
def styler_blank(df_blank):
return Styler(df_blank, uuid_len=0)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_function_gradient(styler, f):
for c_map in [None, "YlOrRd"]:
result = getattr(styler, f)(cmap=c_map)._compute().ctx
assert all("#" in x[0][1] for x in result.values())
assert result[(0, 0)] == result[(0, 1)]
assert result[(1, 0)] == result[(1, 1)]
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_color(styler, f):
result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx
if f == "background_gradient":
assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")]
elif f == "text_gradient":
assert result[(1, 0)] == [("color", "#fff7fb")]
@pytest.mark.parametrize(
"axis, expected",
[
(0, ["low", "low", "high", "high"]),
(1, ["low", "high", "low", "high"]),
(None, ["low", "mid", "mid", "high"]),
],
)
@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
def test_background_gradient_axis(styler, axis, expected, f):
if f == "background_gradient":
colors = {
"low": [("background-color", "#f7fbff"), ("color", "#000000")],
"mid": [("background-color", "#abd0e6"), ("color", "#000000")],
"high": [("background-color", "#08306b"), ("color", "#f1f1f1")],
}
elif f == "text_gradient":
colors = {
"low": [("color", "#f7fbff")],
"mid": [("color", "#abd0e6")],
"high": [("color", "#08306b")],
}
result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx
for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
assert result[cell] == colors[expected[i]]
@pytest.mark.parametrize(
"cmap, expected",
[
(
"PuBu",
{
(4, 5): [("background-color", "#86b0d3"), ("color", "#000000")],
(4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")],
},
),
(
"YlOrRd",
{
(4, 8): [("background-color", "#fd913e"), ("color", "#000000")],
(4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")],
},
),
(
None,
{
(7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")],
(7, 1): [("background-color", "#4cc26c"), ("color", "#000000")],
},
),
],
)
def test_text_color_threshold(cmap, expected):
# GH 39888
df = DataFrame(np.arange(100).reshape(10, 10))
result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx
for k in expected.keys():
assert result[k] == expected[k]
def test_background_gradient_vmin_vmax():
# GH 12145
df = DataFrame(range(5))
ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx
assert ctx[(0, 0)] == ctx[(1, 0)]
assert ctx[(4, 0)] == ctx[(3, 0)]
def test_background_gradient_int64():
# GH 28869
df1 = Series(range(3)).to_frame()
df2 = Series(range(3), dtype="Int64").to_frame()
ctx1 = df1.style.background_gradient()._compute().ctx
ctx2 = df2.style.background_gradient()._compute().ctx
assert ctx2[(0, 0)] == ctx1[(0, 0)]
assert ctx2[(1, 0)] == ctx1[(1, 0)]
assert ctx2[(2, 0)] == ctx1[(2, 0)]
@pytest.mark.parametrize(
"axis, gmap, expected",
[
(
0,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
1,
[1, 2],
{
(0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
(
None,
np.array([[2, 1], [1, 2]]),
{
(0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
(1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
(0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
(1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
},
),
],
)
def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected):
# tests when gmap is given as a sequence and converted to ndarray
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx
assert result == expected
@pytest.mark.parametrize(
"gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)]
)
def test_background_gradient_gmap_array_raises(gmap, axis):
# test when gmap as converted ndarray is bad shape
df = DataFrame([[0, 0, 0], [0, 0, 0]])
msg = "supplied 'gmap' is not correct shape"
with pytest.raises(ValueError, match=msg):
df.style.background_gradient(gmap=gmap, axis=axis)._compute()
@pytest.mark.parametrize(
"gmap",
[
DataFrame( # reverse the columns
[[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"]
),
DataFrame( # reverse the index
[[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"]
),
DataFrame( # reverse the index and columns
[[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"]
),
DataFrame( # add unnecessary columns
[[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"]
),
DataFrame( # add unnecessary index
[[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"]
),
],
)
@pytest.mark.parametrize(
"subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to
[
(None, [[1, 2], [2, 1]]),
(["A"], [[1], [2]]), # slice only column "A" in data and gmap
(["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data
(IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap
(IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data
],
)
def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap):
# test gmap given as DataFrame that it aligns to the the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset)
result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset)
assert expected._compute().ctx == result._compute().ctx
@pytest.mark.parametrize(
"gmap, axis, exp_gmap",
[
(Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index
(Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols
(Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx
(Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col
],
)
def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap):
# test gmap given as Series that it aligns to the the data including subset
expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute()
result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute()
assert expected.ctx == result.ctx
@pytest.mark.parametrize(
"gmap, axis",
[
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1),
(DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0),
],
)
def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis):
# test giving a gmap in DataFrame but with wrong axis
msg = "'gmap' is a DataFrame but underlying data for operations is a Series"
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=axis)._compute()
def test_background_gradient_gmap_wrong_series(styler_blank):
# test giving a gmap in Series form but with wrong axis
msg = "'gmap' is a Series but underlying data for operations is a DataFrame"
gmap = Series([1, 2], index=["X", "Y"])
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=None)._compute()
@pytest.mark.parametrize("cmap", ["PuBu", mpl.cm.get_cmap("PuBu")])
def test_bar_colormap(cmap):
data = DataFrame([[1, 2], [3, 4]])
ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
pubu_colors = {
(0, 0): "#d0d1e6",
(1, 0): "#056faf",
(0, 1): "#73a9cf",
(1, 1): "#023858",
}
for k, v in pubu_colors.items():
assert v in ctx[k][1][1]
def test_bar_color_raises(df):
msg = "`color` must be string or list or tuple of 2 strings"
with pytest.raises(ValueError, match=msg):
df.style.bar(color={"a", "b"}).to_html()
with pytest.raises(ValueError, match=msg):
df.style.bar(color=["a", "b", "c"]).to_html()
msg = "`color` and `cmap` cannot both be given"
with pytest.raises(ValueError, match=msg):
df.style.bar(color="something", cmap="something else").to_html()

View File

@@ -0,0 +1,140 @@
from textwrap import dedent
import pytest
from pandas import (
DataFrame,
IndexSlice,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i", "j", "j"],
columns=["c", "d", "d"],
dtype=float,
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
def test_format_non_unique(df):
# GH 41269
# test dict
html = df.style.format({"d": "{:.1f}"}).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<"]:
assert val in html
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
# test subset
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html()
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
assert val in html
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html
@pytest.mark.parametrize("func", ["apply", "applymap"])
def test_apply_applymap_non_unique_raises(df, func):
# GH 41269
if func == "apply":
op = lambda s: ["color: red;"] * len(s)
else:
op = lambda v: "color: red;"
with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"):
getattr(df.style, func)(op)._compute()
def test_table_styles_dict_non_unique_index(styler):
styles = styler.set_table_styles(
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
).table_styles
assert styles == [
{"selector": "td.row1", "props": [("a", "v")]},
{"selector": "td.row2", "props": [("a", "v")]},
]
def test_table_styles_dict_non_unique_columns(styler):
styles = styler.set_table_styles(
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
).table_styles
assert styles == [
{"selector": "td.col1", "props": [("a", "v")]},
{"selector": "td.col2", "props": [("a", "v")]},
]
def test_tooltips_non_unique_raises(styler):
# ttips has unique keys
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_tooltips(ttips=ttips) # OK
# ttips has non-unique columns
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
# ttips has non-unique index
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)
def test_set_td_classes_non_unique_raises(styler):
# classes has unique keys
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_td_classes(classes=classes) # OK
# classes has non-unique columns
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
# classes has non-unique index
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)
def test_hide_columns_non_unique(styler):
ctx = styler.hide(["d"], axis="columns")._translate(True, True)
assert ctx["head"][0][1]["display_value"] == "c"
assert ctx["head"][0][1]["is_visible"] is True
assert ctx["head"][0][2]["display_value"] == "d"
assert ctx["head"][0][2]["is_visible"] is False
assert ctx["head"][0][3]["display_value"] == "d"
assert ctx["head"][0][3]["is_visible"] is False
assert ctx["body"][0][1]["is_visible"] is True
assert ctx["body"][0][2]["is_visible"] is False
assert ctx["body"][0][3]["is_visible"] is False
def test_latex_non_unique(styler):
result = styler.to_latex()
assert result == dedent(
"""\
\\begin{tabular}{lrrr}
& c & d & d \\\\
i & 1.000000 & 2.000000 & 3.000000 \\\\
j & 4.000000 & 5.000000 & 6.000000 \\\\
j & 7.000000 & 8.000000 & 9.000000 \\\\
\\end{tabular}
"""
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,999 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
option_context,
)
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
from pandas.io.formats.style_render import (
_parse_latex_cell_styles,
_parse_latex_css_conversion,
_parse_latex_header_span,
_parse_latex_table_styles,
_parse_latex_table_wrapping,
)
@pytest.fixture
def df():
return DataFrame({"A": [0, 1], "B": [-0.61, -1.22], "C": ["ab", "cd"]})
@pytest.fixture
def df_ext():
return DataFrame(
{"A": [0, 1, 2], "B": [-0.61, -1.22, -2.22], "C": ["ab", "cd", "de"]}
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0, precision=2)
def test_minimal_latex_tabular(styler):
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex() == expected
def test_tabular_hrules(styler):
expected = dedent(
"""\
\\begin{tabular}{lrrl}
\\toprule
& A & B & C \\\\
\\midrule
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\bottomrule
\\end{tabular}
"""
)
assert styler.to_latex(hrules=True) == expected
def test_tabular_custom_hrules(styler):
styler.set_table_styles(
[
{"selector": "toprule", "props": ":hline"},
{"selector": "bottomrule", "props": ":otherline"},
]
) # no midrule
expected = dedent(
"""\
\\begin{tabular}{lrrl}
\\hline
& A & B & C \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\otherline
\\end{tabular}
"""
)
assert styler.to_latex() == expected
def test_column_format(styler):
# default setting is already tested in `test_latex_minimal_tabular`
styler.set_table_styles([{"selector": "column_format", "props": ":cccc"}])
assert "\\begin{tabular}{rrrr}" in styler.to_latex(column_format="rrrr")
styler.set_table_styles([{"selector": "column_format", "props": ":r|r|cc"}])
assert "\\begin{tabular}{r|r|cc}" in styler.to_latex()
def test_siunitx_cols(styler):
expected = dedent(
"""\
\\begin{tabular}{lSSl}
{} & {A} & {B} & {C} \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex(siunitx=True) == expected
def test_position(styler):
assert "\\begin{table}[h!]" in styler.to_latex(position="h!")
assert "\\end{table}" in styler.to_latex(position="h!")
styler.set_table_styles([{"selector": "position", "props": ":b!"}])
assert "\\begin{table}[b!]" in styler.to_latex()
assert "\\end{table}" in styler.to_latex()
@pytest.mark.parametrize("env", [None, "longtable"])
def test_label(styler, env):
assert "\n\\label{text}" in styler.to_latex(label="text", environment=env)
styler.set_table_styles([{"selector": "label", "props": ":{more §text}"}])
assert "\n\\label{more :text}" in styler.to_latex(environment=env)
def test_position_float_raises(styler):
msg = "`position_float` should be one of 'raggedright', 'raggedleft', 'centering',"
with pytest.raises(ValueError, match=msg):
styler.to_latex(position_float="bad_string")
msg = "`position_float` cannot be used in 'longtable' `environment`"
with pytest.raises(ValueError, match=msg):
styler.to_latex(position_float="centering", environment="longtable")
@pytest.mark.parametrize("label", [(None, ""), ("text", "\\label{text}")])
@pytest.mark.parametrize("position", [(None, ""), ("h!", "{table}[h!]")])
@pytest.mark.parametrize("caption", [(None, ""), ("text", "\\caption{text}")])
@pytest.mark.parametrize("column_format", [(None, ""), ("rcrl", "{tabular}{rcrl}")])
@pytest.mark.parametrize("position_float", [(None, ""), ("centering", "\\centering")])
def test_kwargs_combinations(
styler, label, position, caption, column_format, position_float
):
result = styler.to_latex(
label=label[0],
position=position[0],
caption=caption[0],
column_format=column_format[0],
position_float=position_float[0],
)
assert label[1] in result
assert position[1] in result
assert caption[1] in result
assert column_format[1] in result
assert position_float[1] in result
def test_custom_table_styles(styler):
styler.set_table_styles(
[
{"selector": "mycommand", "props": ":{myoptions}"},
{"selector": "mycommand2", "props": ":{myoptions2}"},
]
)
expected = dedent(
"""\
\\begin{table}
\\mycommand{myoptions}
\\mycommand2{myoptions2}
"""
)
assert expected in styler.to_latex()
def test_cell_styling(styler):
styler.highlight_max(props="itshape:;Huge:--wrap;")
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
0 & 0 & \\itshape {\\Huge -0.61} & ab \\\\
1 & \\itshape {\\Huge 1} & -1.22 & \\itshape {\\Huge cd} \\\\
\\end{tabular}
"""
)
assert expected == styler.to_latex()
def test_multiindex_columns(df):
cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df.columns = cidx
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& \\multicolumn{2}{r}{A} & B \\\\
& a & b & c \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
s = df.style.format(precision=2)
assert expected == s.to_latex()
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & A & B \\\\
& a & b & c \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
s = df.style.format(precision=2)
assert expected == s.to_latex(sparse_columns=False)
def test_multiindex_row(df_ext):
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index = ridx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
\\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex()
assert expected == result
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
A & a & 0 & -0.61 & ab \\\\
A & b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
result = styler.to_latex(sparse_index=False)
assert expected == result
def test_multirow_naive(df_ext):
ridx = MultiIndex.from_tuples([("X", "x"), ("X", "y"), ("Y", "z")])
df_ext.index = ridx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & A & B & C \\\\
X & x & 0 & -0.61 & ab \\\\
& y & 1 & -1.22 & cd \\\\
Y & z & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex(multirow_align="naive")
assert expected == result
def test_multiindex_row_and_col(df_ext):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & \\multicolumn{2}{l}{Z} & Y \\\\
& & a & b & c \\\\
\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
styler = df_ext.style.format(precision=2)
result = styler.to_latex(multirow_align="b", multicol_align="l")
assert result == expected
# non-sparse
expected = dedent(
"""\
\\begin{tabular}{llrrl}
& & Z & Z & Y \\\\
& & a & b & c \\\\
A & a & 0 & -0.61 & ab \\\\
A & b & 1 & -1.22 & cd \\\\
B & c & 2 & -2.22 & de \\\\
\\end{tabular}
"""
)
result = styler.to_latex(sparse_index=False, sparse_columns=False)
assert result == expected
@pytest.mark.parametrize(
"multicol_align, siunitx, header",
[
("naive-l", False, " & A & &"),
("naive-r", False, " & & & A"),
("naive-l", True, "{} & {A} & {} & {}"),
("naive-r", True, "{} & {} & {} & {A}"),
],
)
def test_multicol_naive(df, multicol_align, siunitx, header):
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")])
df.columns = ridx
level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}"
col_format = "lrrl" if not siunitx else "lSSl"
expected = dedent(
f"""\
\\begin{{tabular}}{{{col_format}}}
{header} \\\\
{level1} \\\\
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{{tabular}}
"""
)
styler = df.style.format(precision=2)
result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx)
assert expected == result
def test_multi_options(df_ext):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style.format(precision=2)
expected = dedent(
"""\
& & \\multicolumn{2}{r}{Z} & Y \\\\
& & a & b & c \\\\
\\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\
"""
)
result = styler.to_latex()
assert expected in result
with option_context("styler.latex.multicol_align", "l"):
assert " & & \\multicolumn{2}{l}{Z} & Y \\\\" in styler.to_latex()
with option_context("styler.latex.multirow_align", "b"):
assert "\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\" in styler.to_latex()
def test_multiindex_columns_hidden():
df = DataFrame([[1, 2, 3, 4]])
df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)])
s = df.style
assert "{tabular}{lrrrr}" in s.to_latex()
s.set_table_styles([]) # reset the position command
s.hide([("A", 2)], axis="columns")
assert "{tabular}{lrrr}" in s.to_latex()
@pytest.mark.parametrize(
"option, value",
[
("styler.sparse.index", True),
("styler.sparse.index", False),
("styler.sparse.columns", True),
("styler.sparse.columns", False),
],
)
def test_sparse_options(df_ext, option, value):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style
latex1 = styler.to_latex()
with option_context(option, value):
latex2 = styler.to_latex()
assert (latex1 == latex2) is value
def test_hidden_index(styler):
styler.hide(axis="index")
expected = dedent(
"""\
\\begin{tabular}{rrl}
A & B & C \\\\
0 & -0.61 & ab \\\\
1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert styler.to_latex() == expected
@pytest.mark.parametrize("environment", ["table", "figure*", None])
def test_comprehensive(df_ext, environment):
# test as many low level features simultaneously as possible
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
stlr = df_ext.style
stlr.set_caption("mycap")
stlr.set_table_styles(
[
{"selector": "label", "props": ":{fig§item}"},
{"selector": "position", "props": ":h!"},
{"selector": "position_float", "props": ":centering"},
{"selector": "column_format", "props": ":rlrlr"},
{"selector": "toprule", "props": ":toprule"},
{"selector": "midrule", "props": ":midrule"},
{"selector": "bottomrule", "props": ":bottomrule"},
{"selector": "rowcolors", "props": ":{3}{pink}{}"}, # custom command
]
)
stlr.highlight_max(axis=0, props="textbf:--rwrap;cellcolor:[rgb]{1,1,0.6}--rwrap")
stlr.highlight_max(axis=None, props="Huge:--wrap;", subset=[("Z", "a"), ("Z", "b")])
expected = (
"""\
\\begin{table}[h!]
\\centering
\\caption{mycap}
\\label{fig:item}
\\rowcolors{3}{pink}{}
\\begin{tabular}{rlrlr}
\\toprule
& & \\multicolumn{2}{r}{Z} & Y \\\\
& & a & b & c \\\\
\\midrule
\\multirow[c]{2}{*}{A} & a & 0 & \\textbf{\\cellcolor[rgb]{1,1,0.6}{-0.61}} & ab \\\\
& b & 1 & -1.22 & cd \\\\
B & c & \\textbf{\\cellcolor[rgb]{1,1,0.6}{{\\Huge 2}}} & -2.22 & """
"""\
\\textbf{\\cellcolor[rgb]{1,1,0.6}{de}} \\\\
\\bottomrule
\\end{tabular}
\\end{table}
"""
).replace("table", environment if environment else "table")
result = stlr.format(precision=2).to_latex(environment=environment)
assert result == expected
def test_environment_option(styler):
with option_context("styler.latex.environment", "bar-env"):
assert "\\begin{bar-env}" in styler.to_latex()
assert "\\begin{foo-env}" in styler.to_latex(environment="foo-env")
def test_parse_latex_table_styles(styler):
styler.set_table_styles(
[
{"selector": "foo", "props": [("attr", "value")]},
{"selector": "bar", "props": [("attr", "overwritten")]},
{"selector": "bar", "props": [("attr", "baz"), ("attr2", "ignored")]},
{"selector": "label", "props": [("", "{fig§item}")]},
]
)
assert _parse_latex_table_styles(styler.table_styles, "bar") == "baz"
# test '§' replaced by ':' [for CSS compatibility]
assert _parse_latex_table_styles(styler.table_styles, "label") == "{fig:item}"
def test_parse_latex_cell_styles_basic(): # test nesting
cell_style = [("itshape", "--rwrap"), ("cellcolor", "[rgb]{0,1,1}--rwrap")]
expected = "\\itshape{\\cellcolor[rgb]{0,1,1}{text}}"
assert _parse_latex_cell_styles(cell_style, "text") == expected
@pytest.mark.parametrize(
"wrap_arg, expected",
[ # test wrapping
("", "\\<command><options> <display_value>"),
("--wrap", "{\\<command><options> <display_value>}"),
("--nowrap", "\\<command><options> <display_value>"),
("--lwrap", "{\\<command><options>} <display_value>"),
("--dwrap", "{\\<command><options>}{<display_value>}"),
("--rwrap", "\\<command><options>{<display_value>}"),
],
)
def test_parse_latex_cell_styles_braces(wrap_arg, expected):
cell_style = [("<command>", f"<options>{wrap_arg}")]
assert _parse_latex_cell_styles(cell_style, "<display_value>") == expected
def test_parse_latex_header_span():
cell = {"attributes": 'colspan="3"', "display_value": "text", "cellstyle": []}
expected = "\\multicolumn{3}{Y}{text}"
assert _parse_latex_header_span(cell, "X", "Y") == expected
cell = {"attributes": 'rowspan="5"', "display_value": "text", "cellstyle": []}
expected = "\\multirow[X]{5}{*}{text}"
assert _parse_latex_header_span(cell, "X", "Y") == expected
cell = {"display_value": "text", "cellstyle": []}
assert _parse_latex_header_span(cell, "X", "Y") == "text"
cell = {"display_value": "text", "cellstyle": [("bfseries", "--rwrap")]}
assert _parse_latex_header_span(cell, "X", "Y") == "\\bfseries{text}"
def test_parse_latex_table_wrapping(styler):
styler.set_table_styles(
[
{"selector": "toprule", "props": ":value"},
{"selector": "bottomrule", "props": ":value"},
{"selector": "midrule", "props": ":value"},
{"selector": "column_format", "props": ":value"},
]
)
assert _parse_latex_table_wrapping(styler.table_styles, styler.caption) is False
assert _parse_latex_table_wrapping(styler.table_styles, "some caption") is True
styler.set_table_styles(
[
{"selector": "not-ignored", "props": ":value"},
],
overwrite=False,
)
assert _parse_latex_table_wrapping(styler.table_styles, None) is True
def test_short_caption(styler):
result = styler.to_latex(caption=("full cap", "short cap"))
assert "\\caption[short cap]{full cap}" in result
@pytest.mark.parametrize(
"css, expected",
[
([("color", "red")], [("color", "{red}")]), # test color and input format types
(
[("color", "rgb(128, 128, 128 )")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
(
[("color", "rgb(128, 50%, 25% )")],
[("color", "[rgb]{0.502, 0.500, 0.250}")],
),
(
[("color", "rgba(128,128,128,1)")],
[("color", "[rgb]{0.502, 0.502, 0.502}")],
),
([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]),
([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]),
([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types
([("font-weight", "bolder")], [("bfseries", "")]),
([("font-weight", "normal")], []),
([("background-color", "red")], [("cellcolor", "{red}--lwrap")]),
(
[("background-color", "#FF00FF")], # test background-color command and wrap
[("cellcolor", "[HTML]{FF00FF}--lwrap")],
),
([("font-style", "italic")], [("itshape", "")]), # test font-style and types
([("font-style", "oblique")], [("slshape", "")]),
([("font-style", "normal")], []),
([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments
([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]),
],
)
def test_parse_latex_css_conversion(css, expected):
result = _parse_latex_css_conversion(css)
assert result == expected
@pytest.mark.parametrize(
"env, inner_env",
[
(None, "tabular"),
("table", "tabular"),
("longtable", "longtable"),
],
)
@pytest.mark.parametrize(
"convert, exp", [(True, "bfseries"), (False, "font-weightbold")]
)
def test_parse_latex_css_convert_minimal(styler, env, inner_env, convert, exp):
# parameters ensure longtable template is also tested
styler.highlight_max(props="font-weight:bold;")
result = styler.to_latex(convert_css=convert, environment=env)
expected = dedent(
f"""\
0 & 0 & \\{exp} -0.61 & ab \\\\
1 & \\{exp} 1 & -1.22 & \\{exp} cd \\\\
\\end{{{inner_env}}}
"""
)
assert expected in result
def test_parse_latex_css_conversion_option():
css = [("command", "option--latex--wrap")]
expected = [("command", "option--wrap")]
result = _parse_latex_css_conversion(css)
assert result == expected
def test_styler_object_after_render(styler):
# GH 42320
pre_render = styler._copy(deepcopy=True)
styler.to_latex(
column_format="rllr",
position="h",
position_float="centering",
hrules=True,
label="my lab",
caption="my cap",
)
assert pre_render.table_styles == styler.table_styles
assert pre_render.caption == styler.caption
def test_longtable_comprehensive(styler):
result = styler.to_latex(
environment="longtable", hrules=True, label="fig:A", caption=("full", "short")
)
expected = dedent(
"""\
\\begin{longtable}{lrrl}
\\caption[short]{full} \\label{fig:A} \\\\
\\toprule
& A & B & C \\\\
\\midrule
\\endfirsthead
\\caption[]{full} \\\\
\\toprule
& A & B & C \\\\
\\midrule
\\endhead
\\midrule
\\multicolumn{4}{r}{Continued on next page} \\\\
\\midrule
\\endfoot
\\bottomrule
\\endlastfoot
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{longtable}
"""
)
assert result == expected
def test_longtable_minimal(styler):
result = styler.to_latex(environment="longtable")
expected = dedent(
"""\
\\begin{longtable}{lrrl}
& A & B & C \\\\
\\endfirsthead
& A & B & C \\\\
\\endhead
\\multicolumn{4}{r}{Continued on next page} \\\\
\\endfoot
\\endlastfoot
0 & 0 & -0.61 & ab \\\\
1 & 1 & -1.22 & cd \\\\
\\end{longtable}
"""
)
assert result == expected
@pytest.mark.parametrize(
"sparse, exp, siunitx",
[
(True, "{} & \\multicolumn{2}{r}{A} & {B}", True),
(False, "{} & {A} & {A} & {B}", True),
(True, " & \\multicolumn{2}{r}{A} & B", False),
(False, " & A & A & B", False),
],
)
def test_longtable_multiindex_columns(df, sparse, exp, siunitx):
cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df.columns = cidx
with_si = "{} & {a} & {b} & {c} \\\\"
without_si = " & a & b & c \\\\"
expected = dedent(
f"""\
\\begin{{longtable}}{{l{"SS" if siunitx else "rr"}l}}
{exp} \\\\
{with_si if siunitx else without_si}
\\endfirsthead
{exp} \\\\
{with_si if siunitx else without_si}
\\endhead
"""
)
result = df.style.to_latex(
environment="longtable", sparse_columns=sparse, siunitx=siunitx
)
assert expected in result
@pytest.mark.parametrize(
"caption, cap_exp",
[
("full", ("{full}", "")),
(("full", "short"), ("{full}", "[short]")),
],
)
@pytest.mark.parametrize("label, lab_exp", [(None, ""), ("tab:A", " \\label{tab:A}")])
def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp):
cap_exp1 = f"\\caption{cap_exp[1]}{cap_exp[0]}"
cap_exp2 = f"\\caption[]{cap_exp[0]}"
expected = dedent(
f"""\
{cap_exp1}{lab_exp} \\\\
& A & B & C \\\\
\\endfirsthead
{cap_exp2} \\\\
"""
)
assert expected in styler.to_latex(
environment="longtable", caption=caption, label=label
)
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"columns, siunitx",
[
(True, True),
(True, False),
(False, False),
],
)
def test_apply_map_header_render_mi(df_ext, index, columns, siunitx):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df_ext.index, df_ext.columns = ridx, cidx
styler = df_ext.style
func = lambda v: "bfseries: --rwrap" if "A" in v or "Z" in v or "c" in v else None
if index:
styler.applymap_index(func, axis="index")
if columns:
styler.applymap_index(func, axis="columns")
result = styler.to_latex(siunitx=siunitx)
expected_index = dedent(
"""\
\\multirow[c]{2}{*}{\\bfseries{A}} & a & 0 & -0.610000 & ab \\\\
\\bfseries{} & b & 1 & -1.220000 & cd \\\\
B & \\bfseries{c} & 2 & -2.220000 & de \\\\
"""
)
assert (expected_index in result) is index
exp_cols_si = dedent(
"""\
{} & {} & \\multicolumn{2}{r}{\\bfseries{Z}} & {Y} \\\\
{} & {} & {a} & {b} & {\\bfseries{c}} \\\\
"""
)
exp_cols_no_si = """\
& & \\multicolumn{2}{r}{\\bfseries{Z}} & Y \\\\
& & a & b & \\bfseries{c} \\\\
"""
assert ((exp_cols_si if siunitx else exp_cols_no_si) in result) is columns
def test_repr_option(styler):
assert "<style" in styler._repr_html_()[:6]
assert styler._repr_latex_() is None
with option_context("styler.render.repr", "latex"):
assert "\\begin{tabular}" in styler._repr_latex_()[:15]
assert styler._repr_html_() is None
@pytest.mark.parametrize("option", ["hrules"])
def test_bool_options(styler, option):
with option_context(f"styler.latex.{option}", False):
latex_false = styler.to_latex()
with option_context(f"styler.latex.{option}", True):
latex_true = styler.to_latex()
assert latex_false != latex_true # options are reactive under to_latex(*no_args)
def test_siunitx_basic_headers(styler):
assert "{} & {A} & {B} & {C} \\\\" in styler.to_latex(siunitx=True)
assert " & A & B & C \\\\" in styler.to_latex() # default siunitx=False
@pytest.mark.parametrize("axis", ["index", "columns"])
def test_css_convert_apply_index(styler, axis):
styler.applymap_index(lambda x: "font-weight: bold;", axis=axis)
for label in getattr(styler, axis):
assert f"\\bfseries {label}" in styler.to_latex(convert_css=True)
def test_hide_index_latex(styler):
# GH 43637
styler.hide([0], axis=0)
result = styler.to_latex()
expected = dedent(
"""\
\\begin{tabular}{lrrl}
& A & B & C \\\\
1 & 1 & -1.22 & cd \\\\
\\end{tabular}
"""
)
assert expected == result
def test_latex_hiding_index_columns_multiindex_alignment():
# gh 43644
midx = MultiIndex.from_product(
[["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"]
)
cidx = MultiIndex.from_product(
[["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"]
)
df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx)
styler = Styler(df, uuid_len=0)
styler.hide(level=1, axis=0).hide(level=0, axis=1)
styler.hide([("i0", "i1", "i2")], axis=0)
styler.hide([("c0", "c1", "c2")], axis=1)
styler.applymap(lambda x: "color:{red};" if x == 5 else "")
styler.applymap_index(lambda x: "color:{blue};" if "j" in x else "")
result = styler.to_latex()
expected = dedent(
"""\
\\begin{tabular}{llrrr}
& c-1 & c1 & \\multicolumn{2}{r}{d1} \\\\
& c-2 & d2 & c2 & d2 \\\\
i-0 & i-2 & & & \\\\
i0 & \\color{blue} j2 & \\color{red} 5 & 6 & 7 \\\\
\\multirow[c]{2}{*}{\\color{blue} j0} & i2 & 9 & 10 & 11 \\\\
\\color{blue} & \\color{blue} j2 & 13 & 14 & 15 \\\\
\\end{tabular}
"""
)
assert result == expected
def test_rendered_links():
# note the majority of testing is done in test_html.py: test_rendered_links
# these test only the alternative latex format is functional
df = DataFrame(["text www.domain.com text"])
result = df.style.format(hyperlinks="latex").to_latex()
assert r"text \href{www.domain.com}{www.domain.com} text" in result
def test_apply_index_hidden_levels():
# gh 45156
styler = DataFrame(
[[1]],
index=MultiIndex.from_tuples([(0, 1)], names=["l0", "l1"]),
columns=MultiIndex.from_tuples([(0, 1)], names=["c0", "c1"]),
).style
styler.hide(level=1)
styler.applymap_index(lambda v: "color: red;", level=0, axis=1)
result = styler.to_latex(convert_css=True)
expected = dedent(
"""\
\\begin{tabular}{lr}
c0 & \\color{red} 0 \\\\
c1 & 1 \\\\
l0 & \\\\
0 & 1 \\\\
\\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize("clines", ["bad", "index", "skip-last", "all", "data"])
def test_clines_validation(clines, styler):
msg = f"`clines` value of {clines} is invalid."
with pytest.raises(ValueError, match=msg):
styler.to_latex(clines=clines)
@pytest.mark.parametrize(
"clines, exp",
[
("all;index", "\n\\cline{1-1}"),
("all;data", "\n\\cline{1-2}"),
("skip-last;index", ""),
("skip-last;data", ""),
(None, ""),
],
)
@pytest.mark.parametrize("env", ["table", "longtable"])
def test_clines_index(clines, exp, env):
df = DataFrame([[1], [2], [3], [4]])
result = df.style.to_latex(clines=clines, environment=env)
expected = f"""\
0 & 1 \\\\{exp}
1 & 2 \\\\{exp}
2 & 3 \\\\{exp}
3 & 4 \\\\{exp}
"""
assert expected in result
@pytest.mark.parametrize(
"clines, expected",
[
(
None,
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
"""
),
),
(
"skip-last;index",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\cline{1-2}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
\\cline{1-2}
"""
),
),
(
"skip-last;data",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
& Y & 2 \\\\
\\cline{1-3}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
& Y & 4 \\\\
\\cline{1-3}
"""
),
),
(
"all;index",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
\\cline{2-2}
& Y & 2 \\\\
\\cline{1-2} \\cline{2-2}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
\\cline{2-2}
& Y & 4 \\\\
\\cline{1-2} \\cline{2-2}
"""
),
),
(
"all;data",
dedent(
"""\
\\multirow[c]{2}{*}{A} & X & 1 \\\\
\\cline{2-3}
& Y & 2 \\\\
\\cline{1-3} \\cline{2-3}
\\multirow[c]{2}{*}{B} & X & 3 \\\\
\\cline{2-3}
& Y & 4 \\\\
\\cline{1-3} \\cline{2-3}
"""
),
),
],
)
@pytest.mark.parametrize("env", ["table"])
def test_clines_multiindex(clines, expected, env):
# also tests simultaneously with hidden rows and a hidden multiindex level
midx = MultiIndex.from_product([["A", "-", "B"], [0], ["X", "Y"]])
df = DataFrame([[1], [2], [99], [99], [3], [4]], index=midx)
styler = df.style
styler.hide([("-", 0, "X"), ("-", 0, "Y")])
styler.hide(level=1)
result = styler.to_latex(clines=clines, environment=env)
assert expected in result
def test_col_format_len(styler):
# gh 46037
result = styler.to_latex(environment="longtable", column_format="lrr{10cm}")
expected = r"\multicolumn{4}{r}{Continued on next page} \\"
assert expected in result

View File

@@ -0,0 +1,85 @@
import numpy as np
import pytest
from pandas import DataFrame
pytest.importorskip("jinja2")
from pandas.io.formats.style import Styler
@pytest.fixture
def df():
return DataFrame(
data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
columns=["A", "B", "C"],
index=["x", "y", "z"],
)
@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)
@pytest.mark.parametrize(
"ttips",
[
DataFrame( # Test basic reindex and ignoring blank
data=[["Min", "Max"], [np.nan, ""]],
columns=["A", "C"],
index=["x", "y"],
),
DataFrame( # Test non-referenced columns, reversed col names, short index
data=[["Max", "Min", "Bad-Col"]], columns=["C", "A", "D"], index=["x"]
),
],
)
def test_tooltip_render(ttips, styler):
# GH 21266
result = styler.set_tooltips(ttips).to_html()
# test tooltip table level class
assert "#T_ .pd-t {\n visibility: hidden;\n" in result
# test 'Min' tooltip added
assert "#T_ #T__row0_col0:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col0 .pd-t::after {\n content: "Min";\n}' in result
assert 'class="data row0 col0" >0<span class="pd-t"></span></td>' in result
# test 'Max' tooltip added
assert "#T_ #T__row0_col2:hover .pd-t {\n visibility: visible;\n}" in result
assert '#T_ #T__row0_col2 .pd-t::after {\n content: "Max";\n}' in result
assert 'class="data row0 col2" >2<span class="pd-t"></span></td>' in result
# test Nan, empty string and bad column ignored
assert "#T_ #T__row1_col0:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row0_col1:hover .pd-t {\n visibility: visible;\n}" not in result
assert "#T_ #T__row1_col2:hover .pd-t {\n visibility: visible;\n}" not in result
assert "Bad-Col" not in result
def test_tooltip_ignored(styler):
# GH 21266
result = styler.to_html() # no set_tooltips() creates no <span>
assert '<style type="text/css">\n</style>' in result
assert '<span class="pd-t"></span>' not in result
def test_tooltip_css_class(styler):
# GH 21266
result = styler.set_tooltips(
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="other-class",
props=[("color", "green")],
).to_html()
assert "#T_ .other-class {\n color: green;\n" in result
assert '#T_ #T__row0_col0 .other-class::after {\n content: "tooltip";\n' in result
# GH 39563
result = styler.set_tooltips( # set_tooltips overwrites previous
DataFrame([["tooltip"]], index=["x"], columns=["A"]),
css_class="another-class",
props="color:green;color:red;",
).to_html()
assert "#T_ .another-class {\n color: green;\n color: red;\n}" in result

View File

@@ -0,0 +1,72 @@
import locale
import pytest
from pandas._config import detect_console_encoding
class MockEncoding:
"""
Used to add a side effect when accessing the 'encoding' property. If the
side effect is a str in nature, the value will be returned. Otherwise, the
side effect should be an exception that will be raised.
"""
def __init__(self, encoding):
super().__init__()
self.val = encoding
@property
def encoding(self):
return self.raise_or_return(self.val)
@staticmethod
def raise_or_return(val):
if isinstance(val, str):
return val
else:
raise val
@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
# Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
# they have values filled.
# GH 21552
with monkeypatch.context() as context:
context.setattr(f"sys.{empty}", MockEncoding(""))
context.setattr(f"sys.{filled}", MockEncoding(filled))
assert detect_console_encoding() == filled
@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
# GH 21552
with monkeypatch.context() as context:
context.setattr("locale.getpreferredencoding", lambda: "foo")
context.setattr("sys.stdout", MockEncoding(encoding))
assert detect_console_encoding() == "foo"
@pytest.mark.parametrize(
"std,locale",
[
["ascii", "ascii"],
["ascii", locale.Error],
[AttributeError, "ascii"],
[AttributeError, locale.Error],
[OSError, "ascii"],
[OSError, locale.Error],
],
)
def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
# When both the stdout/stdin encoding and locale preferred encoding checks
# fail (or return 'ascii', we should default to the sys default encoding.
# GH 21552
with monkeypatch.context() as context:
context.setattr(
"locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
)
context.setattr("sys.stdout", MockEncoding(std))
context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
assert detect_console_encoding() == "sysDefaultEncoding"

View File

@@ -0,0 +1,229 @@
import pytest
import pandas._testing as tm
from pandas.io.formats.css import (
CSSResolver,
CSSWarning,
)
def assert_resolves(css, props, inherited=None):
resolve = CSSResolver()
actual = resolve(css, inherited=inherited)
assert props == actual
def assert_same_resolution(css1, css2, inherited=None):
resolve = CSSResolver()
resolved1 = resolve(css1, inherited=inherited)
resolved2 = resolve(css2, inherited=inherited)
assert resolved1 == resolved2
@pytest.mark.parametrize(
"name,norm,abnorm",
[
(
"whitespace",
"hello: world; foo: bar",
" \t hello \t :\n world \n ; \n foo: \tbar\n\n",
),
("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
("empty-list", "", ";"),
],
)
def test_css_parse_normalisation(name, norm, abnorm):
assert_same_resolution(norm, abnorm)
@pytest.mark.parametrize(
"invalid_css,remainder",
[
# No colon
("hello-world", ""),
("border-style: solid; hello-world", "border-style: solid"),
(
"border-style: solid; hello-world; font-weight: bold",
"border-style: solid; font-weight: bold",
),
# Unclosed string fail
# Invalid size
("font-size: blah", "font-size: 1em"),
("font-size: 1a2b", "font-size: 1em"),
("font-size: 1e5pt", "font-size: 1em"),
("font-size: 1+6pt", "font-size: 1em"),
("font-size: 1unknownunit", "font-size: 1em"),
("font-size: 10", "font-size: 1em"),
("font-size: 10 pt", "font-size: 1em"),
],
)
def test_css_parse_invalid(invalid_css, remainder):
with tm.assert_produces_warning(CSSWarning):
assert_same_resolution(invalid_css, remainder)
@pytest.mark.parametrize(
"shorthand,expansions",
[
("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
(
"border-width",
[
"border-top-width",
"border-right-width",
"border-bottom-width",
"border-left-width",
],
),
(
"border-color",
[
"border-top-color",
"border-right-color",
"border-bottom-color",
"border-left-color",
],
),
(
"border-style",
[
"border-top-style",
"border-right-style",
"border-bottom-style",
"border-left-style",
],
),
],
)
def test_css_side_shorthands(shorthand, expansions):
top, right, bottom, left = expansions
assert_resolves(
f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
)
assert_resolves(
f"{shorthand}: 1pt 4pt 2pt 0pt",
{top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
)
with tm.assert_produces_warning(CSSWarning):
assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
@pytest.mark.parametrize(
"style,inherited,equiv",
[
("margin: 1px; margin: 2px", "", "margin: 2px"),
("margin: 1px", "margin: 2px", "margin: 1px"),
("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
(
"margin: 1px; margin-top: 2px",
"",
"margin-left: 1px; margin-right: 1px; "
+ "margin-bottom: 1px; margin-top: 2px",
),
("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
("margin: 1px", "margin-top: 2px", "margin: 1px"),
(
"margin: 1px; margin-top: inherit",
"margin: 2px",
"margin: 1px; margin-top: 2px",
),
],
)
def test_css_precedence(style, inherited, equiv):
resolve = CSSResolver()
inherited_props = resolve(inherited)
style_props = resolve(style, inherited=inherited_props)
equiv_props = resolve(equiv)
assert style_props == equiv_props
@pytest.mark.parametrize(
"style,equiv",
[
(
"margin: 1px; margin-top: inherit",
"margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
),
("margin-top: inherit", ""),
("margin-top: initial", ""),
],
)
def test_css_none_absent(style, equiv):
assert_same_resolution(style, equiv)
@pytest.mark.parametrize(
"size,resolved",
[
("xx-small", "6pt"),
("x-small", f"{7.5:f}pt"),
("small", f"{9.6:f}pt"),
("medium", "12pt"),
("large", f"{13.5:f}pt"),
("x-large", "18pt"),
("xx-large", "24pt"),
("8px", "6pt"),
("1.25pc", "15pt"),
(".25in", "18pt"),
("02.54cm", "72pt"),
("25.4mm", "72pt"),
("101.6q", "72pt"),
("101.6q", "72pt"),
],
)
@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size
def test_css_absolute_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
@pytest.mark.parametrize(
"size,relative_to,resolved",
[
("1em", None, "12pt"),
("1.0em", None, "12pt"),
("1.25em", None, "15pt"),
("1em", "16pt", "16pt"),
("1.0em", "16pt", "16pt"),
("1.25em", "16pt", "20pt"),
("1rem", "16pt", "12pt"),
("1.0rem", "16pt", "12pt"),
("1.25rem", "16pt", "15pt"),
("100%", None, "12pt"),
("125%", None, "15pt"),
("100%", "16pt", "16pt"),
("125%", "16pt", "20pt"),
("2ex", None, "12pt"),
("2.0ex", None, "12pt"),
("2.50ex", None, "15pt"),
("inherit", "16pt", "16pt"),
("smaller", None, "10pt"),
("smaller", "18pt", "15pt"),
("larger", None, f"{14.4:f}pt"),
("larger", "15pt", "18pt"),
],
)
def test_css_relative_font_size(size, relative_to, resolved):
if relative_to is None:
inherited = None
else:
inherited = {"font-size": relative_to}
assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)

View File

@@ -0,0 +1,234 @@
import numpy as np
from pandas import DataFrame
import pandas._testing as tm
import pandas.io.formats.format as fmt
class TestEngFormatter:
def test_eng_float_formatter(self):
df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
fmt.set_eng_float_format()
result = df.to_string()
expected = (
" A\n"
"0 1.410E+00\n"
"1 141.000E+00\n"
"2 14.100E+03\n"
"3 1.410E+06"
)
assert result == expected
fmt.set_eng_float_format(use_eng_prefix=True)
result = df.to_string()
expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M"
assert result == expected
fmt.set_eng_float_format(accuracy=0)
result = df.to_string()
expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06"
assert result == expected
tm.reset_display_options()
def compare(self, formatter, input, output):
formatted_input = formatter(input)
assert formatted_input == output
def compare_all(self, formatter, in_out):
"""
Parameters:
-----------
formatter: EngFormatter under test
in_out: list of tuples. Each tuple = (number, expected_formatting)
It is tested if 'formatter(number) == expected_formatting'.
*number* should be >= 0 because formatter(-number) == fmt is also
tested. *fmt* is derived from *expected_formatting*
"""
for input, output in in_out:
self.compare(formatter, input, output)
self.compare(formatter, -input, "-" + output[1:])
def test_exponents_with_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
f = np.sqrt(2)
in_out = [
(f * 10**-24, " 1.414y"),
(f * 10**-23, " 14.142y"),
(f * 10**-22, " 141.421y"),
(f * 10**-21, " 1.414z"),
(f * 10**-20, " 14.142z"),
(f * 10**-19, " 141.421z"),
(f * 10**-18, " 1.414a"),
(f * 10**-17, " 14.142a"),
(f * 10**-16, " 141.421a"),
(f * 10**-15, " 1.414f"),
(f * 10**-14, " 14.142f"),
(f * 10**-13, " 141.421f"),
(f * 10**-12, " 1.414p"),
(f * 10**-11, " 14.142p"),
(f * 10**-10, " 141.421p"),
(f * 10**-9, " 1.414n"),
(f * 10**-8, " 14.142n"),
(f * 10**-7, " 141.421n"),
(f * 10**-6, " 1.414u"),
(f * 10**-5, " 14.142u"),
(f * 10**-4, " 141.421u"),
(f * 10**-3, " 1.414m"),
(f * 10**-2, " 14.142m"),
(f * 10**-1, " 141.421m"),
(f * 10**0, " 1.414"),
(f * 10**1, " 14.142"),
(f * 10**2, " 141.421"),
(f * 10**3, " 1.414k"),
(f * 10**4, " 14.142k"),
(f * 10**5, " 141.421k"),
(f * 10**6, " 1.414M"),
(f * 10**7, " 14.142M"),
(f * 10**8, " 141.421M"),
(f * 10**9, " 1.414G"),
(f * 10**10, " 14.142G"),
(f * 10**11, " 141.421G"),
(f * 10**12, " 1.414T"),
(f * 10**13, " 14.142T"),
(f * 10**14, " 141.421T"),
(f * 10**15, " 1.414P"),
(f * 10**16, " 14.142P"),
(f * 10**17, " 141.421P"),
(f * 10**18, " 1.414E"),
(f * 10**19, " 14.142E"),
(f * 10**20, " 141.421E"),
(f * 10**21, " 1.414Z"),
(f * 10**22, " 14.142Z"),
(f * 10**23, " 141.421Z"),
(f * 10**24, " 1.414Y"),
(f * 10**25, " 14.142Y"),
(f * 10**26, " 141.421Y"),
]
self.compare_all(formatter, in_out)
def test_exponents_without_eng_prefix(self):
formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False)
f = np.pi
in_out = [
(f * 10**-24, " 3.1416E-24"),
(f * 10**-23, " 31.4159E-24"),
(f * 10**-22, " 314.1593E-24"),
(f * 10**-21, " 3.1416E-21"),
(f * 10**-20, " 31.4159E-21"),
(f * 10**-19, " 314.1593E-21"),
(f * 10**-18, " 3.1416E-18"),
(f * 10**-17, " 31.4159E-18"),
(f * 10**-16, " 314.1593E-18"),
(f * 10**-15, " 3.1416E-15"),
(f * 10**-14, " 31.4159E-15"),
(f * 10**-13, " 314.1593E-15"),
(f * 10**-12, " 3.1416E-12"),
(f * 10**-11, " 31.4159E-12"),
(f * 10**-10, " 314.1593E-12"),
(f * 10**-9, " 3.1416E-09"),
(f * 10**-8, " 31.4159E-09"),
(f * 10**-7, " 314.1593E-09"),
(f * 10**-6, " 3.1416E-06"),
(f * 10**-5, " 31.4159E-06"),
(f * 10**-4, " 314.1593E-06"),
(f * 10**-3, " 3.1416E-03"),
(f * 10**-2, " 31.4159E-03"),
(f * 10**-1, " 314.1593E-03"),
(f * 10**0, " 3.1416E+00"),
(f * 10**1, " 31.4159E+00"),
(f * 10**2, " 314.1593E+00"),
(f * 10**3, " 3.1416E+03"),
(f * 10**4, " 31.4159E+03"),
(f * 10**5, " 314.1593E+03"),
(f * 10**6, " 3.1416E+06"),
(f * 10**7, " 31.4159E+06"),
(f * 10**8, " 314.1593E+06"),
(f * 10**9, " 3.1416E+09"),
(f * 10**10, " 31.4159E+09"),
(f * 10**11, " 314.1593E+09"),
(f * 10**12, " 3.1416E+12"),
(f * 10**13, " 31.4159E+12"),
(f * 10**14, " 314.1593E+12"),
(f * 10**15, " 3.1416E+15"),
(f * 10**16, " 31.4159E+15"),
(f * 10**17, " 314.1593E+15"),
(f * 10**18, " 3.1416E+18"),
(f * 10**19, " 31.4159E+18"),
(f * 10**20, " 314.1593E+18"),
(f * 10**21, " 3.1416E+21"),
(f * 10**22, " 31.4159E+21"),
(f * 10**23, " 314.1593E+21"),
(f * 10**24, " 3.1416E+24"),
(f * 10**25, " 31.4159E+24"),
(f * 10**26, " 314.1593E+24"),
]
self.compare_all(formatter, in_out)
def test_rounding(self):
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
in_out = [
(5.55555, " 5.556"),
(55.5555, " 55.556"),
(555.555, " 555.555"),
(5555.55, " 5.556k"),
(55555.5, " 55.556k"),
(555555, " 555.555k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
in_out = [
(5.55555, " 5.6"),
(55.5555, " 55.6"),
(555.555, " 555.6"),
(5555.55, " 5.6k"),
(55555.5, " 55.6k"),
(555555, " 555.6k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True)
in_out = [
(5.55555, " 6"),
(55.5555, " 56"),
(555.555, " 556"),
(5555.55, " 6k"),
(55555.5, " 56k"),
(555555, " 556k"),
]
self.compare_all(formatter, in_out)
formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True)
result = formatter(0)
assert result == " 0.000"
def test_nan(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.nan)
assert result == "NaN"
df = DataFrame(
{
"a": [1.5, 10.3, 20.5],
"b": [50.3, 60.67, 70.12],
"c": [100.2, 101.33, 120.33],
}
)
pt = df.pivot_table(values="a", index="b", columns="c")
fmt.set_eng_float_format(accuracy=1)
result = pt.to_string()
assert "NaN" in result
tm.reset_display_options()
def test_inf(self):
# Issue #11981
formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True)
result = formatter(np.inf)
assert result == "inf"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,492 @@
from io import StringIO
import re
from string import ascii_uppercase as uppercase
import sys
import textwrap
import numpy as np
import pytest
from pandas.compat import (
IS64,
PYPY,
)
from pandas import (
CategoricalIndex,
DataFrame,
MultiIndex,
Series,
date_range,
option_context,
)
@pytest.fixture
def duplicate_columns_frame():
"""Dataframe with duplicate column names."""
return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])
def test_info_empty():
df = DataFrame()
buf = StringIO()
df.info(buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Empty DataFrame"""
)
assert result == expected
def test_info_categorical_column_smoke_test():
n = 2500
df = DataFrame({"int64": np.random.randint(100, size=n)})
df["category"] = Series(
np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
).astype("category")
df.isna()
buf = StringIO()
df.info(buf=buf)
df2 = df[df["category"] == "d"]
buf = StringIO()
df2.info(buf=buf)
@pytest.mark.parametrize(
"fixture_func_name",
[
"int_frame",
"float_frame",
"datetime_frame",
"duplicate_columns_frame",
],
)
def test_info_smoke_test(fixture_func_name, request):
frame = request.getfixturevalue(fixture_func_name)
buf = StringIO()
frame.info(buf=buf)
result = buf.getvalue().splitlines()
assert len(result) > 10
@pytest.mark.parametrize(
"num_columns, max_info_columns, verbose",
[
(10, 100, True),
(10, 11, True),
(10, 10, True),
(10, 9, False),
(10, 1, False),
],
)
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):
frame = DataFrame(np.random.randn(5, num_columns))
with option_context("display.max_info_columns", max_info_columns):
io_default = StringIO()
frame.info(buf=io_default)
result = io_default.getvalue()
io_explicit = StringIO()
frame.info(buf=io_explicit, verbose=verbose)
expected = io_explicit.getvalue()
assert result == expected
def test_info_verbose_check_header_separator_body():
buf = StringIO()
size = 1001
start = 5
frame = DataFrame(np.random.randn(3, size))
frame.info(verbose=True, buf=buf)
res = buf.getvalue()
header = " # Column Dtype \n--- ------ ----- "
assert header in res
frame.info(verbose=True, buf=buf)
buf.seek(0)
lines = buf.readlines()
assert len(lines) > 0
for i, line in enumerate(lines):
if i >= start and i < start + size:
line_nr = f" {i - start} "
assert line.startswith(line_nr)
@pytest.mark.parametrize(
"size, header_exp, separator_exp, first_line_exp, last_line_exp",
[
(
4,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 3 3 3 non-null float64",
),
(
11,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 10 10 3 non-null float64",
),
(
101,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 100 100 3 non-null float64",
),
(
1001,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 1000 1000 3 non-null float64",
),
(
10001,
" # Column Non-Null Count Dtype ",
"--- ------ -------------- ----- ",
" 0 0 3 non-null float64",
" 10000 10000 3 non-null float64",
),
],
)
def test_info_verbose_with_counts_spacing(
size, header_exp, separator_exp, first_line_exp, last_line_exp
):
"""Test header column, spacer, first line and last line in verbose mode."""
frame = DataFrame(np.random.randn(3, size))
with StringIO() as buf:
frame.info(verbose=True, show_counts=True, buf=buf)
all_lines = buf.getvalue().splitlines()
# Here table would contain only header, separator and table lines
# dframe repr, index summary, memory usage and dtypes are excluded
table = all_lines[3:-2]
header, separator, first_line, *rest, last_line = table
assert header == header_exp
assert separator == separator_exp
assert first_line == first_line_exp
assert last_line == last_line_exp
def test_info_memory():
# https://github.com/pandas-dev/pandas/issues/21056
df = DataFrame({"a": Series([1, 2], dtype="i8")})
buf = StringIO()
df.info(buf=buf)
result = buf.getvalue()
bytes = float(df.memory_usage().sum())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 a 2 non-null int64
dtypes: int64(1)
memory usage: {bytes} bytes
"""
)
assert result == expected
def test_info_wide():
io = StringIO()
df = DataFrame(np.random.randn(5, 101))
df.info(buf=io)
io = StringIO()
df.info(buf=io, max_cols=101)
result = io.getvalue()
assert len(result.splitlines()) > 100
expected = result
with option_context("display.max_info_columns", 101):
io = StringIO()
df.info(buf=io)
result = io.getvalue()
assert result == expected
def test_info_duplicate_columns_shows_correct_dtypes():
# GH11761
io = StringIO()
frame = DataFrame([[1, 2.0]], columns=["a", "a"])
frame.info(buf=io)
lines = io.getvalue().splitlines(True)
assert " 0 a 1 non-null int64 \n" == lines[5]
assert " 1 a 1 non-null float64\n" == lines[6]
def test_info_shows_column_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
df.info(buf=buf)
res = buf.getvalue()
header = (
" # Column Non-Null Count Dtype \n"
"--- ------ -------------- ----- "
)
assert header in res
for i, dtype in enumerate(dtypes):
name = f" {i:d} {i:d} {n:d} non-null {dtype}"
assert name in res
def test_info_max_cols():
df = DataFrame(np.random.randn(10, 5))
for len_, verbose in [(5, None), (5, False), (12, True)]:
# For verbose always ^ setting ^ summarize ^ full output
with option_context("max_info_columns", 4):
buf = StringIO()
df.info(buf=buf, verbose=verbose)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
for len_, verbose in [(12, None), (5, False), (12, True)]:
# max_cols not exceeded
with option_context("max_info_columns", 5):
buf = StringIO()
df.info(buf=buf, verbose=verbose)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
for len_, max_cols in [(12, 5), (5, 4)]:
# setting truncates
with option_context("max_info_columns", 4):
buf = StringIO()
df.info(buf=buf, max_cols=max_cols)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
# setting wouldn't truncate
with option_context("max_info_columns", 5):
buf = StringIO()
df.info(buf=buf, max_cols=max_cols)
res = buf.getvalue()
assert len(res.strip().split("\n")) == len_
def test_info_memory_usage():
# Ensure memory usage is displayed, when asserted, on the last line
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
# display memory usage case
df.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
assert "memory usage: " in res[-1]
# do not display memory usage case
df.info(buf=buf, memory_usage=False)
res = buf.getvalue().splitlines()
assert "memory usage: " not in res[-1]
df.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
# memory usage is a lower bound, so print it as XYZ+ MB
assert re.match(r"memory usage: [^+]+\+", res[-1])
df.iloc[:, :5].info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
# excluded column with object dtype, so estimate is accurate
assert not re.match(r"memory usage: [^+]+\+", res[-1])
# Test a DataFrame with duplicate columns
dtypes = ["int64", "int64", "int64", "float64"]
data = {}
n = 100
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
df.columns = dtypes
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
df_with_object_index.info(buf=buf, memory_usage=True)
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+\+", res[-1])
df_with_object_index.info(buf=buf, memory_usage="deep")
res = buf.getvalue().splitlines()
assert re.match(r"memory usage: [^+]+$", res[-1])
# Ensure df size is as expected
# (cols * rows * bytes) + index size
df_size = df.memory_usage().sum()
exp_size = len(dtypes) * n * 8 + df.index.nbytes
assert df_size == exp_size
# Ensure number of cols in memory_usage is the same as df
size_df = np.size(df.columns.values) + 1 # index=True; default
assert size_df == np.size(df.memory_usage())
# assert deep works only on object
assert df.memory_usage().sum() == df.memory_usage(deep=True).sum()
# test for validity
DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True)
DataFrame(1, index=["a"], columns=["A"]).index.nbytes
df = DataFrame(
data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
)
df.index.nbytes
df.memory_usage(index=True)
df.index.values.nbytes
mem = df.memory_usage(deep=True).sum()
assert mem > 0
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
> df_with_object_index.memory_usage(index=True).sum()
)
df_object = DataFrame({"a": ["a"]})
assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
assert (
df_with_object_index.memory_usage(index=True, deep=True).sum()
== df_with_object_index.memory_usage(index=True).sum()
)
df_object = DataFrame({"a": ["a"]})
assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()
@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
def test_usage_via_getsizeof():
df = DataFrame(
data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
)
mem = df.memory_usage(deep=True).sum()
# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = mem - sys.getsizeof(df)
assert abs(diff) < 100
def test_info_memory_usage_qualified():
buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
df.info(buf=buf)
assert "+" not in buf.getvalue()
buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=list("ABC"))
df.info(buf=buf)
assert "+" in buf.getvalue()
buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
)
df.info(buf=buf)
assert "+" not in buf.getvalue()
buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
)
df.info(buf=buf)
assert "+" in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
def memory_usage(f):
return f.memory_usage(deep=True).sum()
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
df = DataFrame({"value": np.random.randn(N * M)}, index=index)
unstacked = df.unstack("id")
assert df.values.nbytes == unstacked.values.nbytes
assert memory_usage(df) > memory_usage(unstacked)
# high upper bound
assert memory_usage(unstacked) - memory_usage(df) < 2000
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx)
buf = StringIO()
df.info(buf=buf)
@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
def test_info_int_columns():
# GH#37245
df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
buf = StringIO()
df.info(show_counts=True, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, A to B
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 1 2 non-null int64
1 2 2 non-null int64
dtypes: int64(2)
memory usage: 48.0+ bytes
"""
)
assert result == expected

View File

@@ -0,0 +1,200 @@
import numpy as np
import pytest
import pandas._config.config as cf
import pandas as pd
import pandas.io.formats.format as fmt
import pandas.io.formats.printing as printing
def test_adjoin():
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_repr_binary_type():
import string
letters = string.ascii_letters
try:
raw = bytes(letters, encoding=cf.get_option("display.encoding"))
except TypeError:
raw = bytes(letters)
b = str(raw.decode("utf-8"))
res = printing.pprint_thing(b, quote_strings=True)
assert res == repr(b)
res = printing.pprint_thing(b, quote_strings=False)
assert res == b
class TestFormattBase:
def test_adjoin(self):
data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
expected = "a dd ggg\nb ee hhh\nc ff iii"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
def test_adjoin_unicode(self):
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
expected = "あ dd ggg\nb ええ hhh\nc ff いいい"
adjoined = printing.adjoin(2, *data)
assert adjoined == expected
adj = fmt.EastAsianTextAdjustment()
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 13
assert adj.len(cols[1]) == 13
assert adj.len(cols[2]) == 16
expected = """あ dd ggg
b ええ hhh
c ff いいい"""
adjoined = adj.adjoin(7, *data)
assert adjoined == expected
cols = adjoined.split("\n")
assert adj.len(cols[0]) == 23
assert adj.len(cols[1]) == 23
assert adj.len(cols[2]) == 26
def test_justify(self):
adj = fmt.EastAsianTextAdjustment()
def just(x, *args, **kwargs):
# wrapper to test single str
return adj.justify([x], *args, **kwargs)[0]
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("abc", 5, mode="left") == "abc "
assert just("abc", 5, mode="center") == " abc "
assert just("abc", 5, mode="right") == " abc"
assert just("パンダ", 5, mode="left") == "パンダ"
assert just("パンダ", 5, mode="center") == "パンダ"
assert just("パンダ", 5, mode="right") == "パンダ"
assert just("パンダ", 10, mode="left") == "パンダ "
assert just("パンダ", 10, mode="center") == " パンダ "
assert just("パンダ", 10, mode="right") == " パンダ"
def test_east_asian_len(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("abc") == 3
assert adj.len("abc") == 3
assert adj.len("パンダ") == 6
assert adj.len("パンダ") == 5
assert adj.len("パンダpanda") == 11
assert adj.len("パンダpanda") == 10
def test_ambiguous_width(self):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 4
with cf.option_context("display.unicode.ambiguous_as_wide", True):
adj = fmt.EastAsianTextAdjustment()
assert adj.len("¡¡ab") == 6
data = [["", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい"
adjoined = adj.adjoin(2, *data)
assert adjoined == expected
class TestTableSchemaRepr:
@pytest.mark.filterwarnings(
"ignore:.*signature may therefore change.*:FutureWarning"
)
def test_publishes(self, ip):
ipython = ip.instance(config=ip.config)
df = pd.DataFrame({"A": [1, 2]})
objects = [df["A"], df, df] # dataframe / series
expected_keys = [
{"text/plain", "application/vnd.dataresource+json"},
{"text/plain", "text/html", "application/vnd.dataresource+json"},
]
opt = pd.option_context("display.html.table_schema", True)
for obj, expected in zip(objects, expected_keys):
with opt:
formatted = ipython.display_formatter.format(obj)
assert set(formatted[0].keys()) == expected
with_latex = pd.option_context("display.latex.repr", True)
with opt, with_latex:
formatted = ipython.display_formatter.format(obj)
expected = {
"text/plain",
"text/html",
"text/latex",
"application/vnd.dataresource+json",
}
assert set(formatted[0].keys()) == expected
def test_publishes_not_implemented(self, ip):
# column MultiIndex
# GH 15996
midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx)
opt = pd.option_context("display.html.table_schema", True)
with opt:
formatted = ip.instance(config=ip.config).display_formatter.format(df)
expected = {"text/plain", "text/html"}
assert set(formatted[0].keys()) == expected
def test_config_on(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", True):
result = df._repr_data_resource_()
assert result is not None
def test_config_default_off(self):
df = pd.DataFrame({"A": [1, 2]})
with pd.option_context("display.html.table_schema", False):
result = df._repr_data_resource_()
assert result is None
def test_enable_data_resource_formatter(self, ip):
# GH 10491
formatters = ip.instance(config=ip.config).display_formatter.formatters
mimetype = "application/vnd.dataresource+json"
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# still there, just disabled
assert "application/vnd.dataresource+json" in formatters
assert not formatters[mimetype].enabled
# able to re-set
with pd.option_context("display.html.table_schema", True):
assert "application/vnd.dataresource+json" in formatters
assert formatters[mimetype].enabled
# smoke test that it works
ip.instance(config=ip.config).display_formatter.format(cf)

View File

@@ -0,0 +1,179 @@
from io import StringIO
from string import ascii_uppercase as uppercase
import textwrap
import numpy as np
import pytest
from pandas.compat import PYPY
from pandas import (
CategoricalIndex,
MultiIndex,
Series,
date_range,
)
def test_info_categorical_column_just_works():
n = 2500
data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
s = Series(data).astype("category")
s.isna()
buf = StringIO()
s.info(buf=buf)
s2 = s[s == "d"]
buf = StringIO()
s2.info(buf=buf)
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
s = Series(np.zeros(2), index=idx)
buf = StringIO()
s.info(buf=buf)
@pytest.mark.parametrize("verbose", [True, False])
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
buf = StringIO()
ser.info(verbose=verbose, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.series.Series'>
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
"""
)
if verbose:
expected += textwrap.dedent(
"""\
Series name: sth
Non-Null Count Dtype
-------------- -----
10 non-null int64
"""
)
expected += textwrap.dedent(
f"""\
dtypes: int64(1)
memory usage: {ser.memory_usage()}.0+ bytes
"""
)
assert result == expected
def test_info_memory():
s = Series([1, 2], dtype="i8")
buf = StringIO()
s.info(buf=buf)
result = buf.getvalue()
memory_bytes = float(s.memory_usage())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.series.Series'>
RangeIndex: 2 entries, 0 to 1
Series name: None
Non-Null Count Dtype
-------------- -----
2 non-null int64
dtypes: int64(1)
memory usage: {memory_bytes} bytes
"""
)
assert result == expected
def test_info_wide():
s = Series(np.random.randn(101))
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
with pytest.raises(ValueError, match=msg):
s.info(max_cols=1)
def test_info_shows_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
n = 10
for dtype in dtypes:
s = Series(np.random.randint(2, size=n).astype(dtype))
buf = StringIO()
s.info(buf=buf)
res = buf.getvalue()
name = f"{n:d} non-null {dtype}"
assert name in res
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) > s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) == s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
@pytest.mark.parametrize(
"series, plus",
[
(Series(1, index=[1, 2, 3]), False),
(Series(1, index=list("ABC")), True),
(Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
(
Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
True,
),
],
)
def test_info_memory_usage_qualified(series, plus):
buf = StringIO()
series.info(buf=buf)
if plus:
assert "+" in buf.getvalue()
else:
assert "+" not in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
s = Series(np.random.randn(N * M), index=index)
unstacked = s.unstack("id")
assert s.values.nbytes == unstacked.values.nbytes
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
# high upper bound
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
assert diff < 2000

View File

@@ -0,0 +1,717 @@
import io
import os
from pathlib import Path
import sys
from zipfile import ZipFile
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
compat,
)
import pandas._testing as tm
import pandas.io.common as icom
class TestToCSV:
def test_to_csv_with_single_column(self):
# see gh-18676, https://bugs.python.org/issue32255
#
# Python's CSV library adds an extraneous '""'
# before the newline when the NaN-value is in
# the first row. Otherwise, only the newline
# character is added. This behavior is inconsistent
# and was patched in https://bugs.python.org/pull_request4672.
df1 = DataFrame([None, 1])
expected1 = """\
""
1.0
"""
with tm.ensure_clean("test.csv") as path:
df1.to_csv(path, header=None, index=None)
with open(path) as f:
assert f.read() == expected1
df2 = DataFrame([1, None])
expected2 = """\
1.0
""
"""
with tm.ensure_clean("test.csv") as path:
df2.to_csv(path, header=None, index=None)
with open(path) as f:
assert f.read() == expected2
def test_to_csv_defualt_encoding(self):
# GH17097
df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
with tm.ensure_clean("test.csv") as path:
# the default to_csv encoding is uft-8.
df.to_csv(path)
tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
def test_to_csv_quotechar(self):
df = DataFrame({"col": [1, 2]})
expected = """\
"","col"
"0","1"
"1","2"
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
with open(path) as f:
assert f.read() == expected
expected = """\
$$,$col$
$0$,$1$
$1$,$2$
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, quotechar="$")
with open(path) as f:
assert f.read() == expected
with tm.ensure_clean("test.csv") as path:
with pytest.raises(TypeError, match="quotechar"):
df.to_csv(path, quoting=1, quotechar=None)
def test_to_csv_doublequote(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = '''\
"","col"
"0","a""a"
"1","""bb"""
'''
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
with open(path) as f:
assert f.read() == expected
from _csv import Error
with tm.ensure_clean("test.csv") as path:
with pytest.raises(Error, match="escapechar"):
df.to_csv(path, doublequote=False) # no escapechar set
def test_to_csv_escapechar(self):
df = DataFrame({"col": ['a"a', '"bb"']})
expected = """\
"","col"
"0","a\\"a"
"1","\\"bb\\""
"""
with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
with open(path) as f:
assert f.read() == expected
df = DataFrame({"col": ["a,a", ",bb,"]})
expected = """\
,col
0,a\\,a
1,\\,bb\\,
"""
with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
with open(path) as f:
assert f.read() == expected
def test_csv_to_string(self):
df = DataFrame({"col": [1, 2]})
expected_rows = [",col", "0,1", "1,2"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected
def test_to_csv_decimal(self):
# see gh-781
df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv() == expected_default
expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(decimal=",", sep=";") == expected_european_excel
expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(float_format="%.2f") == expected_float_format_default
expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
assert (
df.to_csv(decimal=",", sep=";", float_format="%.2f")
== expected_float_format
)
# see gh-11553: testing if decimal is taken into account for '0.0'
df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False, decimal="^") == expected
# same but for an index
assert df.set_index("a").to_csv(decimal="^") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
def test_to_csv_float_format(self):
# testing if float_format is taken into account for the index
# GH 11553
df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(float_format="%.2f") == expected
# same for a multi-index
assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
def test_to_csv_na_rep(self):
# see gh-11553
#
# Testing if NaN values are correctly represented in the index.
df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# now with an index containing only NaNs
df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
# check if na_rep parameter does not break anything when no NaN
df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.set_index("a").to_csv(na_rep="_") == expected
assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
assert expected == csv
def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
# GH 29975
# Make sure full na_rep shows up when a dtype is provided
expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
na_rep="ZZZZZ"
)
assert expected == csv
def test_to_csv_date_format(self):
# GH 10209
df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-01 00:00:01",
"2,2013-01-01 00:00:02",
"3,2013-01-01 00:00:03",
"4,2013-01-01 00:00:04",
]
expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv() == expected_default_sec
expected_rows = [
",A",
"0,2013-01-01 00:00:00",
"1,2013-01-02 00:00:00",
"2,2013-01-03 00:00:00",
"3,2013-01-04 00:00:00",
"4,2013-01-05 00:00:00",
]
expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-01",
"2,2013-01-01",
"3,2013-01-01",
"4,2013-01-01",
]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
expected_rows = [
",A",
"0,2013-01-01",
"1,2013-01-02",
"2,2013-01-03",
"3,2013-01-04",
"4,2013-01-05",
]
expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
assert df_day.to_csv() == expected_default_day
assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
# see gh-7791
#
# Testing if date_format parameter is taken into account
# for multi-indexed DataFrames.
df_sec["B"] = 0
df_sec["C"] = 1
expected_rows = ["A,B,C", "2013-01-01,0,1.0"]
expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
def test_to_csv_different_datetime_formats(self):
# GH#21734
df = DataFrame(
{
"date": pd.to_datetime("1970-01-01"),
"datetime": pd.date_range("1970-01-01", periods=2, freq="H"),
}
)
expected_rows = [
"date,datetime",
"1970-01-01,1970-01-01 00:00:00",
"1970-01-01,1970-01-01 01:00:00",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert df.to_csv(index=False) == expected
def test_to_csv_date_format_in_categorical(self):
# GH#40754
ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
ser = ser.astype("category")
expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
assert ser.to_csv(index=False) == expected
ser = pd.Series(
pd.date_range(
start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
).append(pd.DatetimeIndex([pd.NaT]))
)
ser = ser.astype("category")
assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
def test_to_csv_multi_index(self):
# see gh-6618
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
exp_rows = [",1", ",2", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame(
[1],
columns=pd.MultiIndex.from_arrays([[1], [2]]),
index=pd.MultiIndex.from_arrays([[1], [2]]),
)
exp_rows = [",,1", ",,2", "1,2,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["1", "2", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
exp_rows = [",foo", ",bar", "0,1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv() == exp
exp_rows = ["foo", "bar", "1"]
exp = tm.convert_rows_list_to_csv_str(exp_rows)
assert df.to_csv(index=False) == exp
@pytest.mark.parametrize(
"ind,expected",
[
(
pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
"x,data\n1.0,1\n",
),
(
pd.MultiIndex(
levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
),
"x,y,data\n1.0,2.0,1\n",
),
],
)
@pytest.mark.parametrize("klass", [DataFrame, pd.Series])
def test_to_csv_single_level_multi_index(self, ind, expected, klass):
# see gh-19589
result = klass(pd.Series([1], ind, name="data")).to_csv(
line_terminator="\n", header=True
)
assert result == expected
def test_to_csv_string_array_ascii(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_ascii = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("str_test.csv") as path:
df.to_csv(path, encoding="ascii")
with open(path) as f:
assert f.read() == expected_ascii
def test_to_csv_string_array_utf8(self):
# GH 10813
str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
df = DataFrame(str_array)
expected_utf8 = """\
,names
0,"['foo', 'bar']"
1,"['baz', 'qux']"
"""
with tm.ensure_clean("unicode_test.csv") as path:
df.to_csv(path, encoding="utf-8")
with open(path) as f:
assert f.read() == expected_utf8
def test_to_csv_string_with_lf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
df = DataFrame(data)
with tm.ensure_clean("lf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_lf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\nef"'
+ os_linesep
+ b'3,"g\nh\n\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("lf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("lf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_string_with_crlf(self):
# GH 20353
data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
df = DataFrame(data)
with tm.ensure_clean("crlf_test.csv") as path:
# case 1: The default line terminator(=os.linesep)(PR 21406)
os_linesep = os.linesep.encode("utf-8")
expected_noarg = (
b"int,str_crlf"
+ os_linesep
+ b"1,abc"
+ os_linesep
+ b'2,"d\r\nef"'
+ os_linesep
+ b'3,"g\r\nh\r\n\r\ni"'
+ os_linesep
)
df.to_csv(path, index=False)
with open(path, "rb") as f:
assert f.read() == expected_noarg
with tm.ensure_clean("crlf_test.csv") as path:
# case 2: LF as line terminator
expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
df.to_csv(path, line_terminator="\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_lf
with tm.ensure_clean("crlf_test.csv") as path:
# case 3: CRLF as line terminator
# 'line_terminator' should not change inner element
expected_crlf = (
b"int,str_crlf\r\n"
b"1,abc\r\n"
b'2,"d\r\nef"\r\n'
b'3,"g\r\nh\r\n\r\ni"\r\n'
)
df.to_csv(path, line_terminator="\r\n", index=False)
with open(path, "rb") as f:
assert f.read() == expected_crlf
def test_to_csv_stdout_file(self, capsys):
# GH 21561
df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
df.to_csv(sys.stdout, encoding="ascii")
captured = capsys.readouterr()
assert captured.out == expected_ascii
assert not sys.stdout.closed
@pytest.mark.xfail(
compat.is_platform_windows(),
reason=(
"Especially in Windows, file stream should not be passed"
"to csv writer without newline='' option."
"(https://docs.python.org/3.6/library/csv.html#csv.writer)"
),
)
def test_to_csv_write_to_open_file(self):
# GH 21696
df = DataFrame({"a": ["x", "y", "z"]})
expected = """\
manual header
x
y
z
"""
with tm.ensure_clean("test.txt") as path:
with open(path, "w") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path) as f:
assert f.read() == expected
def test_to_csv_write_to_open_file_with_newline_py3(self):
# see gh-21696
# see gh-20353
df = DataFrame({"a": ["x", "y", "z"]})
expected_rows = ["x", "y", "z"]
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
with tm.ensure_clean("test.txt") as path:
with open(path, "w", newline="") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path, "rb") as f:
assert f.read() == bytes(expected, "utf-8")
@pytest.mark.parametrize("to_infer", [True, False])
@pytest.mark.parametrize("read_infer", [True, False])
def test_to_csv_compression(self, compression_only, read_infer, to_infer):
# see gh-15008
compression = compression_only
# We'll complete file extension subsequently.
filename = "test."
filename += icom._compression_to_extension[compression]
df = DataFrame({"A": [1]})
to_compression = "infer" if to_infer else compression
read_compression = "infer" if read_infer else compression
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression=to_compression)
result = pd.read_csv(path, index_col=0, compression=read_compression)
tm.assert_frame_equal(result, df)
def test_to_csv_compression_dict(self, compression_only):
# GH 26023
method = compression_only
df = DataFrame({"ABC": [1]})
filename = "to_csv_compress_as_dict."
extension = {
"gzip": "gz",
"zstd": "zst",
}.get(method, method)
filename += extension
with tm.ensure_clean(filename) as path:
df.to_csv(path, compression={"method": method})
read_df = pd.read_csv(path, index_col=0)
tm.assert_frame_equal(read_df, df)
def test_to_csv_compression_dict_no_method_raises(self):
# GH 26023
df = DataFrame({"ABC": [1]})
compression = {"some_option": True}
msg = "must have key 'method'"
with tm.ensure_clean("out.zip") as path:
with pytest.raises(ValueError, match=msg):
df.to_csv(path, compression=compression)
@pytest.mark.parametrize("compression", ["zip", "infer"])
@pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
def test_to_csv_zip_arguments(self, compression, archive_name):
# GH 26023
df = DataFrame({"ABC": [1]})
with tm.ensure_clean("to_csv_archive_name.zip") as path:
df.to_csv(
path, compression={"method": compression, "archive_name": archive_name}
)
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == archive_name
@pytest.mark.parametrize(
"filename,expected_arcname",
[
("archive.csv", "archive.csv"),
("archive.tsv", "archive.tsv"),
("archive.csv.zip", "archive.csv"),
("archive.tsv.zip", "archive.tsv"),
("archive.zip", "archive"),
],
)
def test_to_csv_zip_infer_name(self, filename, expected_arcname):
# GH 39465
df = DataFrame({"ABC": [1]})
with tm.ensure_clean_dir() as dir:
path = Path(dir, filename)
df.to_csv(path, compression="zip")
with ZipFile(path) as zp:
assert len(zp.filelist) == 1
archived_file = zp.filelist[0].filename
assert archived_file == expected_arcname
@pytest.mark.parametrize("df_new_type", ["Int64"])
def test_to_csv_na_rep_long_string(self, df_new_type):
# see gh-25099
df = DataFrame({"c": [float("nan")] * 3})
df = df.astype(df_new_type)
expected_rows = ["c", "mynull", "mynull", "mynull"]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
assert expected == result
def test_to_csv_timedelta_precision(self):
# GH 6783
s = pd.Series([1, 1]).astype("timedelta64[ns]")
buf = io.StringIO()
s.to_csv(buf)
result = buf.getvalue()
expected_rows = [
",0",
"0,0 days 00:00:00.000000001",
"1,0 days 00:00:00.000000001",
]
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected
def test_na_rep_truncated(self):
# https://github.com/pandas-dev/pandas/issues/31447
result = pd.Series(range(8, 12)).to_csv(na_rep="-")
expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
assert result == expected
result = pd.Series([True, False]).to_csv(na_rep="nan")
expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
assert result == expected
result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
assert result == expected
@pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
def test_to_csv_errors(self, errors):
# GH 22610
data = ["\ud800foo"]
ser = pd.Series(data, index=pd.Index(data))
with tm.ensure_clean("test.csv") as path:
ser.to_csv(path, errors=errors)
# No use in reading back the data as it is not the same anymore
# due to the error handling
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_binary_handle(self, mode):
"""
Binary file objects should work (if 'mode' contains a 'b') or even without
it in most cases.
GH 35058 and GH 19827
"""
df = tm.makeDataFrame()
with tm.ensure_clean() as path:
with open(path, mode="w+b") as handle:
df.to_csv(handle, mode=mode)
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@pytest.mark.parametrize("mode", ["wb", "w"])
def test_to_csv_encoding_binary_handle(self, mode):
"""
Binary file objects should honor a specified encoding.
GH 23854 and GH 13068 with binary handles
"""
# example from GH 23854
content = "a, b, 🐟".encode("utf-8-sig")
buffer = io.BytesIO(content)
df = pd.read_csv(buffer, encoding="utf-8-sig")
buffer = io.BytesIO()
df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
buffer.seek(0) # tests whether file handle wasn't closed
assert buffer.getvalue().startswith(content)
# example from GH 13068
with tm.ensure_clean() as path:
with open(path, "w+b") as handle:
DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
handle.seek(0)
assert handle.read().startswith(b'\xef\xbb\xbf""')
def test_to_csv_iterative_compression_name(compression):
# GH 38714
df = tm.makeDataFrame()
with tm.ensure_clean() as path:
df.to_csv(path, compression=compression, chunksize=1)
tm.assert_frame_equal(
pd.read_csv(path, compression=compression, index_col=0), df
)
def test_to_csv_iterative_compression_buffer(compression):
# GH 38714
df = tm.makeDataFrame()
with io.BytesIO() as buffer:
df.to_csv(buffer, compression=compression, chunksize=1)
buffer.seek(0)
tm.assert_frame_equal(
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed

View File

@@ -0,0 +1,337 @@
"""Tests formatting as writer-agnostic ExcelCells
ExcelFormatter is tested implicitly in pandas/tests/io/excel
"""
import string
import pytest
import pandas.util._test_decorators as td
import pandas._testing as tm
from pandas.io.formats.css import CSSWarning
from pandas.io.formats.excel import CSSToExcelConverter
@pytest.mark.parametrize(
"css,expected",
[
# FONT
# - name
("font-family: foo,bar", {"font": {"name": "foo"}}),
('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
("font-family: foo,\nbar", {"font": {"name": "foo"}}),
("font-family: foo, bar, baz", {"font": {"name": "foo"}}),
("font-family: bar, foo", {"font": {"name": "bar"}}),
("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
# - family
("font-family: serif", {"font": {"name": "serif", "family": 1}}),
("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
("font-family: roman, sans serif", {"font": {"name": "roman"}}),
("font-family: roman, sansserif", {"font": {"name": "roman"}}),
("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
# - size
("font-size: 1em", {"font": {"size": 12}}),
("font-size: xx-small", {"font": {"size": 6}}),
("font-size: x-small", {"font": {"size": 7.5}}),
("font-size: small", {"font": {"size": 9.6}}),
("font-size: medium", {"font": {"size": 12}}),
("font-size: large", {"font": {"size": 13.5}}),
("font-size: x-large", {"font": {"size": 18}}),
("font-size: xx-large", {"font": {"size": 24}}),
("font-size: 50%", {"font": {"size": 6}}),
# - bold
("font-weight: 100", {"font": {"bold": False}}),
("font-weight: 200", {"font": {"bold": False}}),
("font-weight: 300", {"font": {"bold": False}}),
("font-weight: 400", {"font": {"bold": False}}),
("font-weight: normal", {"font": {"bold": False}}),
("font-weight: lighter", {"font": {"bold": False}}),
("font-weight: bold", {"font": {"bold": True}}),
("font-weight: bolder", {"font": {"bold": True}}),
("font-weight: 700", {"font": {"bold": True}}),
("font-weight: 800", {"font": {"bold": True}}),
("font-weight: 900", {"font": {"bold": True}}),
# - italic
("font-style: italic", {"font": {"italic": True}}),
("font-style: oblique", {"font": {"italic": True}}),
# - underline
("text-decoration: underline", {"font": {"underline": "single"}}),
("text-decoration: overline", {}),
("text-decoration: none", {}),
# - strike
("text-decoration: line-through", {"font": {"strike": True}}),
(
"text-decoration: underline line-through",
{"font": {"strike": True, "underline": "single"}},
),
(
"text-decoration: underline; text-decoration: line-through",
{"font": {"strike": True}},
),
# - color
("color: red", {"font": {"color": "FF0000"}}),
("color: #ff0000", {"font": {"color": "FF0000"}}),
("color: #f0a", {"font": {"color": "FF00AA"}}),
# - shadow
("text-shadow: none", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
("text-shadow: 0px -2em", {"font": {"shadow": True}}),
# FILL
# - color, fillType
(
"background-color: red",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #ff0000",
{"fill": {"fgColor": "FF0000", "patternType": "solid"}},
),
(
"background-color: #f0a",
{"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
),
# BORDER
# - style
(
"border-style: solid",
{
"border": {
"top": {"style": "medium"},
"bottom": {"style": "medium"},
"left": {"style": "medium"},
"right": {"style": "medium"},
}
},
),
(
"border-style: solid; border-width: thin",
{
"border": {
"top": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
"right": {"style": "thin"},
}
},
),
(
"border-top-style: solid; border-top-width: thin",
{"border": {"top": {"style": "thin"}}},
),
(
"border-top-style: solid; border-top-width: 1pt",
{"border": {"top": {"style": "thin"}}},
),
("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
(
"border-top-style: solid; border-top-width: medium",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: 2pt",
{"border": {"top": {"style": "medium"}}},
),
(
"border-top-style: solid; border-top-width: thick",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: solid; border-top-width: 4pt",
{"border": {"top": {"style": "thick"}}},
),
(
"border-top-style: dotted",
{"border": {"top": {"style": "mediumDashDotDot"}}},
),
(
"border-top-style: dotted; border-top-width: thin",
{"border": {"top": {"style": "dotted"}}},
),
("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
(
"border-top-style: dashed; border-top-width: thin",
{"border": {"top": {"style": "dashed"}}},
),
("border-top-style: double", {"border": {"top": {"style": "double"}}}),
# - color
(
"border-style: solid; border-color: #0000ff",
{
"border": {
"top": {"style": "medium", "color": "0000FF"},
"right": {"style": "medium", "color": "0000FF"},
"bottom": {"style": "medium", "color": "0000FF"},
"left": {"style": "medium", "color": "0000FF"},
}
},
),
(
"border-top-style: double; border-top-color: blue",
{"border": {"top": {"style": "double", "color": "0000FF"}}},
),
(
"border-top-style: solid; border-top-color: #06c",
{"border": {"top": {"style": "medium", "color": "0066CC"}}},
),
# ALIGNMENT
# - horizontal
("text-align: center", {"alignment": {"horizontal": "center"}}),
("text-align: left", {"alignment": {"horizontal": "left"}}),
("text-align: right", {"alignment": {"horizontal": "right"}}),
("text-align: justify", {"alignment": {"horizontal": "justify"}}),
# - vertical
("vertical-align: top", {"alignment": {"vertical": "top"}}),
("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
("vertical-align: middle", {"alignment": {"vertical": "center"}}),
("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
# - wrap_text
("white-space: nowrap", {"alignment": {"wrap_text": False}}),
("white-space: pre", {"alignment": {"wrap_text": False}}),
("white-space: pre-line", {"alignment": {"wrap_text": False}}),
("white-space: normal", {"alignment": {"wrap_text": True}}),
# NUMBER FORMAT
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
(
"number-format: 0§[Red](0)§-§@;",
{"number_format": {"format_code": "0;[red](0);-;@"}}, # GH 46152
),
],
)
def test_css_to_excel(css, expected):
convert = CSSToExcelConverter()
assert expected == convert(css)
def test_css_to_excel_multiple():
convert = CSSToExcelConverter()
actual = convert(
"""
font-weight: bold;
text-decoration: underline;
color: red;
border-width: thin;
text-align: center;
vertical-align: top;
unused: something;
"""
)
assert {
"font": {"bold": True, "underline": "single", "color": "FF0000"},
"border": {
"top": {"style": "thin"},
"right": {"style": "thin"},
"bottom": {"style": "thin"},
"left": {"style": "thin"},
},
"alignment": {"horizontal": "center", "vertical": "top"},
} == actual
@pytest.mark.parametrize(
"css,inherited,expected",
[
("font-weight: bold", "", {"font": {"bold": True}}),
("", "font-weight: bold", {"font": {"bold": True}}),
(
"font-weight: bold",
"font-style: italic",
{"font": {"bold": True, "italic": True}},
),
("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
("font-style: inherit", "", {}),
(
"font-style: normal; font-style: inherit",
"font-style: italic",
{"font": {"italic": True}},
),
],
)
def test_css_to_excel_inherited(css, inherited, expected):
convert = CSSToExcelConverter(inherited)
assert expected == convert(css)
@pytest.mark.parametrize(
"input_color,output_color",
(
list(CSSToExcelConverter.NAMED_COLORS.items())
+ [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
+ [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
),
)
def test_css_to_excel_good_colors(input_color, output_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
expected["fill"] = {"patternType": "solid", "fgColor": output_color}
expected["font"] = {"color": output_color}
expected["border"] = {
k: {"color": output_color} for k in ("top", "right", "bottom", "left")
}
with tm.assert_produces_warning(None):
convert = CSSToExcelConverter()
assert expected == convert(css)
@pytest.mark.parametrize("input_color", [None, "not-a-color"])
def test_css_to_excel_bad_colors(input_color):
# see gh-18392
css = (
f"border-top-color: {input_color}; "
f"border-right-color: {input_color}; "
f"border-bottom-color: {input_color}; "
f"border-left-color: {input_color}; "
f"background-color: {input_color}; "
f"color: {input_color}"
)
expected = {}
if input_color is not None:
expected["fill"] = {"patternType": "solid"}
with tm.assert_produces_warning(CSSWarning):
convert = CSSToExcelConverter()
assert expected == convert(css)
def tests_css_named_colors_valid():
upper_hexs = set(map(str.upper, string.hexdigits))
for color in CSSToExcelConverter.NAMED_COLORS.values():
assert len(color) == 6 and all(c in upper_hexs for c in color)
@td.skip_if_no_mpl
def test_css_named_colors_from_mpl_present():
from matplotlib.colors import CSS4_COLORS as mpl_colors
pd_colors = CSSToExcelConverter.NAMED_COLORS
for name, color in mpl_colors.items():
assert name in pd_colors and pd_colors[name] == color[1:]

View File

@@ -0,0 +1,890 @@
from datetime import datetime
from io import StringIO
import re
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
option_context,
)
import pandas._testing as tm
import pandas.io.formats.format as fmt
lorem_ipsum = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
"tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "
"veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex "
"ea commodo consequat. Duis aute irure dolor in reprehenderit in "
"voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur "
"sint occaecat cupidatat non proident, sunt in culpa qui officia "
"deserunt mollit anim id est laborum."
)
def expected_html(datapath, name):
"""
Read HTML file from formats data directory.
Parameters
----------
datapath : pytest fixture
The datapath fixture injected into a test by pytest.
name : str
The name of the HTML file without the suffix.
Returns
-------
str : contents of HTML file.
"""
filename = ".".join([name, "html"])
filepath = datapath("io", "formats", "data", "html", filename)
with open(filepath, encoding="utf-8") as f:
html = f.read()
return html.rstrip()
@pytest.fixture(params=["mixed", "empty"])
def biggie_df_fixture(request):
"""Fixture for a big mixed Dataframe and an empty Dataframe"""
if request.param == "mixed":
df = DataFrame(
{"A": np.random.randn(200), "B": tm.makeStringIndex(200)},
index=np.arange(200),
)
df.loc[:20, "A"] = np.nan
df.loc[:20, "B"] = np.nan
return df
elif request.param == "empty":
df = DataFrame(index=np.arange(200))
return df
@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS)
def justify(request):
return request.param
@pytest.mark.parametrize("col_space", [30, 50])
def test_to_html_with_col_space(col_space):
df = DataFrame(np.random.random(size=(1, 3)))
# check that col_space affects HTML generation
# and be very brittle about it.
result = df.to_html(col_space=col_space)
hdrs = [x for x in result.split(r"\n") if re.search(r"<th[>\s]", x)]
assert len(hdrs) > 0
for h in hdrs:
assert "min-width" in h
assert str(col_space) in h
def test_to_html_with_column_specific_col_space_raises():
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
msg = (
"Col_space length\\(\\d+\\) should match "
"DataFrame number of columns\\(\\d+\\)"
)
with pytest.raises(ValueError, match=msg):
df.to_html(col_space=[30, 40])
with pytest.raises(ValueError, match=msg):
df.to_html(col_space=[30, 40, 50, 60])
msg = "unknown column"
with pytest.raises(ValueError, match=msg):
df.to_html(col_space={"a": "foo", "b": 23, "d": 34})
def test_to_html_with_column_specific_col_space():
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
result = df.to_html(col_space={"a": "2em", "b": 23})
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
assert 'min-width: 2em;">a</th>' in hdrs[1]
assert 'min-width: 23px;">b</th>' in hdrs[2]
assert "<th>c</th>" in hdrs[3]
result = df.to_html(col_space=["1em", 2, 3])
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
assert 'min-width: 1em;">a</th>' in hdrs[1]
assert 'min-width: 2px;">b</th>' in hdrs[2]
assert 'min-width: 3px;">c</th>' in hdrs[3]
def test_to_html_with_empty_string_label():
# GH 3547, to_html regards empty string labels as repeated labels
data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}
df = DataFrame(data).set_index(["c1", "c2"])
result = df.to_html()
assert "rowspan" not in result
@pytest.mark.parametrize(
"df,expected",
[
(DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"),
(DataFrame({"A": ["\u03c3"]}), "unicode_2"),
],
)
def test_to_html_unicode(df, expected, datapath):
expected = expected_html(datapath, expected)
result = df.to_html()
assert result == expected
def test_to_html_encoding(float_frame, tmp_path):
# GH 28663
path = tmp_path / "test.html"
float_frame.to_html(path, encoding="gbk")
with open(str(path), encoding="gbk") as f:
assert float_frame.to_html() == f.read()
def test_to_html_decimal(datapath):
# GH 12031
df = DataFrame({"A": [6.0, 3.1, 2.2]})
result = df.to_html(decimal=",")
expected = expected_html(datapath, "gh12031_expected_output")
assert result == expected
@pytest.mark.parametrize(
"kwargs,string,expected",
[
({}, "<type 'str'>", "escaped"),
({"escape": False}, "<b>bold</b>", "escape_disabled"),
],
)
def test_to_html_escaped(kwargs, string, expected, datapath):
a = "str<ing1 &amp;"
b = "stri>ng2 &amp;"
test_dict = {"co<l1": {a: string, b: string}, "co>l2": {a: string, b: string}}
result = DataFrame(test_dict).to_html(**kwargs)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize("index_is_named", [True, False])
def test_to_html_multiindex_index_false(index_is_named, datapath):
# GH 8452
df = DataFrame(
{"a": range(2), "b": range(3, 5), "c": range(5, 7), "d": range(3, 5)}
)
df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
if index_is_named:
df.index = Index(df.index.values, name="idx")
result = df.to_html(index=False)
expected = expected_html(datapath, "gh8452_expected_output")
assert result == expected
@pytest.mark.parametrize(
"multi_sparse,expected",
[
(False, "multiindex_sparsify_false_multi_sparse_1"),
(False, "multiindex_sparsify_false_multi_sparse_2"),
(True, "multiindex_sparsify_1"),
(True, "multiindex_sparsify_2"),
],
)
def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath):
index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None])
df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
if expected.endswith("2"):
df.columns = index[::2]
with option_context("display.multi_sparse", multi_sparse):
result = df.to_html()
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"max_rows,expected",
[
(60, "gh14882_expected_output_1"),
# Test that ... appears in a middle level
(56, "gh14882_expected_output_2"),
],
)
def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath):
# GH 14882 - Issue on truncation with odd length DataFrame
index = MultiIndex.from_product(
[[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=["a", "b", "c"]
)
df = DataFrame({"n": range(len(index))}, index=index)
result = df.to_html(max_rows=max_rows)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"df,formatters,expected",
[
(
DataFrame(
[[0, 1], [2, 3], [4, 5], [6, 7]],
columns=["foo", None],
index=np.arange(4),
),
{"__index__": lambda x: "abcd"[x]},
"index_formatter",
),
(
DataFrame({"months": [datetime(2016, 1, 1), datetime(2016, 2, 2)]}),
{"months": lambda x: x.strftime("%Y-%m")},
"datetime64_monthformatter",
),
(
DataFrame(
{
"hod": pd.to_datetime(
["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f"
)
}
),
{"hod": lambda x: x.strftime("%H:%M")},
"datetime64_hourformatter",
),
(
DataFrame(
{
"i": pd.Series([1, 2], dtype="int64"),
"f": pd.Series([1, 2], dtype="float64"),
"I": pd.Series([1, 2], dtype="Int64"),
"s": pd.Series([1, 2], dtype="string"),
"b": pd.Series([True, False], dtype="boolean"),
"c": pd.Series(["a", "b"], dtype=pd.CategoricalDtype(["a", "b"])),
"o": pd.Series([1, "2"], dtype=object),
}
),
[lambda x: "formatted"] * 7,
"various_dtypes_formatted",
),
],
)
def test_to_html_formatters(df, formatters, expected, datapath):
expected = expected_html(datapath, expected)
result = df.to_html(formatters=formatters)
assert result == expected
def test_to_html_regression_GH6098():
df = DataFrame(
{
"clé1": ["a", "a", "b", "b", "a"],
"clé2": ["1er", "2ème", "1er", "2ème", "1er"],
"données1": np.random.randn(5),
"données2": np.random.randn(5),
}
)
# it works
df.pivot_table(index=["clé1"], columns=["clé2"])._repr_html_()
def test_to_html_truncate(datapath):
index = pd.date_range(start="20010101", freq="D", periods=20)
df = DataFrame(index=index, columns=range(20))
result = df.to_html(max_rows=8, max_cols=4)
expected = expected_html(datapath, "truncate")
assert result == expected
@pytest.mark.parametrize("size", [1, 5])
def test_html_invalid_formatters_arg_raises(size):
# issue-28469
df = DataFrame(columns=["a", "b", "c"])
msg = "Formatters length({}) should match DataFrame number of columns(3)"
with pytest.raises(ValueError, match=re.escape(msg.format(size))):
df.to_html(formatters=["{}".format] * size)
def test_to_html_truncate_formatter(datapath):
# issue-25955
data = [
{"A": 1, "B": 2, "C": 3, "D": 4},
{"A": 5, "B": 6, "C": 7, "D": 8},
{"A": 9, "B": 10, "C": 11, "D": 12},
{"A": 13, "B": 14, "C": 15, "D": 16},
]
df = DataFrame(data)
fmt = lambda x: str(x) + "_mod"
formatters = [fmt, fmt, None, None]
result = df.to_html(formatters=formatters, max_cols=3)
expected = expected_html(datapath, "truncate_formatter")
assert result == expected
@pytest.mark.parametrize(
"sparsify,expected",
[(True, "truncate_multi_index"), (False, "truncate_multi_index_sparse_off")],
)
def test_to_html_truncate_multi_index(sparsify, expected, datapath):
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
df = DataFrame(index=arrays, columns=arrays)
result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"option,result,expected",
[
(None, lambda df: df.to_html(), "1"),
(None, lambda df: df.to_html(border=0), "0"),
(0, lambda df: df.to_html(), "0"),
(0, lambda df: df._repr_html_(), "0"),
],
)
def test_to_html_border(option, result, expected):
df = DataFrame({"A": [1, 2]})
if option is None:
result = result(df)
else:
with option_context("display.html.border", option):
result = result(df)
expected = f'border="{expected}"'
assert expected in result
@pytest.mark.parametrize("biggie_df_fixture", ["mixed"], indirect=True)
def test_to_html(biggie_df_fixture):
# TODO: split this test
df = biggie_df_fixture
s = df.to_html()
buf = StringIO()
retval = df.to_html(buf=buf)
assert retval is None
assert buf.getvalue() == s
assert isinstance(s, str)
df.to_html(columns=["B", "A"], col_space=17)
df.to_html(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"})
df.to_html(columns=["B", "A"], float_format=str)
df.to_html(columns=["B", "A"], col_space=12, float_format=str)
@pytest.mark.parametrize("biggie_df_fixture", ["empty"], indirect=True)
def test_to_html_empty_dataframe(biggie_df_fixture):
df = biggie_df_fixture
df.to_html()
def test_to_html_filename(biggie_df_fixture, tmpdir):
df = biggie_df_fixture
expected = df.to_html()
path = tmpdir.join("test.html")
df.to_html(path)
result = path.read()
assert result == expected
def test_to_html_with_no_bold():
df = DataFrame({"x": np.random.randn(5)})
html = df.to_html(bold_rows=False)
result = html[html.find("</thead>")]
assert "<strong" not in result
def test_to_html_columns_arg(float_frame):
result = float_frame.to_html(columns=["A"])
assert "<th>B</th>" not in result
@pytest.mark.parametrize(
"columns,justify,expected",
[
(
MultiIndex.from_tuples(
list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
names=["CL0", "CL1"],
),
"left",
"multiindex_1",
),
(
MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
"right",
"multiindex_2",
),
],
)
def test_to_html_multiindex(columns, justify, expected, datapath):
df = DataFrame([list("abcd"), list("efgh")], columns=columns)
result = df.to_html(justify=justify)
expected = expected_html(datapath, expected)
assert result == expected
def test_to_html_justify(justify, datapath):
df = DataFrame(
{"A": [6, 30000, 2], "B": [1, 2, 70000], "C": [223442, 0, 1]},
columns=["A", "B", "C"],
)
result = df.to_html(justify=justify)
expected = expected_html(datapath, "justify").format(justify=justify)
assert result == expected
@pytest.mark.parametrize(
"justify", ["super-right", "small-left", "noinherit", "tiny", "pandas"]
)
def test_to_html_invalid_justify(justify):
# GH 17527
df = DataFrame()
msg = "Invalid value for justify parameter"
with pytest.raises(ValueError, match=msg):
df.to_html(justify=justify)
class TestHTMLIndex:
@pytest.fixture
def df(self):
index = ["foo", "bar", "baz"]
df = DataFrame(
{"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]},
columns=["A", "B", "C"],
index=index,
)
return df
@pytest.fixture
def expected_without_index(self, datapath):
return expected_html(datapath, "index_2")
def test_to_html_flat_index_without_name(
self, datapath, df, expected_without_index
):
expected_with_index = expected_html(datapath, "index_1")
assert df.to_html() == expected_with_index
result = df.to_html(index=False)
for i in df.index:
assert i not in result
assert result == expected_without_index
def test_to_html_flat_index_with_name(self, datapath, df, expected_without_index):
df.index = Index(["foo", "bar", "baz"], name="idx")
expected_with_index = expected_html(datapath, "index_3")
assert df.to_html() == expected_with_index
assert df.to_html(index=False) == expected_without_index
def test_to_html_multiindex_without_names(
self, datapath, df, expected_without_index
):
tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")]
df.index = MultiIndex.from_tuples(tuples)
expected_with_index = expected_html(datapath, "index_4")
assert df.to_html() == expected_with_index
result = df.to_html(index=False)
for i in ["foo", "bar", "car", "bike"]:
assert i not in result
# must be the same result as normal index
assert result == expected_without_index
def test_to_html_multiindex_with_names(self, datapath, df, expected_without_index):
tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")]
df.index = MultiIndex.from_tuples(tuples, names=["idx1", "idx2"])
expected_with_index = expected_html(datapath, "index_5")
assert df.to_html() == expected_with_index
assert df.to_html(index=False) == expected_without_index
@pytest.mark.parametrize("classes", ["sortable draggable", ["sortable", "draggable"]])
def test_to_html_with_classes(classes, datapath):
df = DataFrame()
expected = expected_html(datapath, "with_classes")
result = df.to_html(classes=classes)
assert result == expected
def test_to_html_no_index_max_rows(datapath):
# GH 14998
df = DataFrame({"A": [1, 2, 3, 4]})
result = df.to_html(index=False, max_rows=1)
expected = expected_html(datapath, "gh14998_expected_output")
assert result == expected
def test_to_html_multiindex_max_cols(datapath):
# GH 6131
index = MultiIndex(
levels=[["ba", "bb", "bc"], ["ca", "cb", "cc"]],
codes=[[0, 1, 2], [0, 1, 2]],
names=["b", "c"],
)
columns = MultiIndex(
levels=[["d"], ["aa", "ab", "ac"]],
codes=[[0, 0, 0], [0, 1, 2]],
names=[None, "a"],
)
data = np.array(
[[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]]
)
df = DataFrame(data, index, columns)
result = df.to_html(max_cols=2)
expected = expected_html(datapath, "gh6131_expected_output")
assert result == expected
def test_to_html_multi_indexes_index_false(datapath):
# GH 22579
df = DataFrame(
{"a": range(10), "b": range(10, 20), "c": range(10, 20), "d": range(10, 20)}
)
df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
df.index = MultiIndex.from_product([["a", "b"], ["c", "d", "e", "f", "g"]])
result = df.to_html(index=False)
expected = expected_html(datapath, "gh22579_expected_output")
assert result == expected
@pytest.mark.parametrize("index_names", [True, False])
@pytest.mark.parametrize("header", [True, False])
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"column_index, column_type",
[
(Index([0, 1]), "unnamed_standard"),
(Index([0, 1], name="columns.name"), "named_standard"),
(MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
(
MultiIndex.from_product(
[["a"], ["b", "c"]], names=["columns.name.0", "columns.name.1"]
),
"named_multi",
),
],
)
@pytest.mark.parametrize(
"row_index, row_type",
[
(Index([0, 1]), "unnamed_standard"),
(Index([0, 1], name="index.name"), "named_standard"),
(MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
(
MultiIndex.from_product(
[["a"], ["b", "c"]], names=["index.name.0", "index.name.1"]
),
"named_multi",
),
],
)
def test_to_html_basic_alignment(
datapath, row_index, row_type, column_index, column_type, index, header, index_names
):
# GH 22747, GH 22579
df = DataFrame(np.zeros((2, 2), dtype=int), index=row_index, columns=column_index)
result = df.to_html(index=index, header=header, index_names=index_names)
if not index:
row_type = "none"
elif not index_names and row_type.startswith("named"):
row_type = "un" + row_type
if not header:
column_type = "none"
elif not index_names and column_type.startswith("named"):
column_type = "un" + column_type
filename = "index_" + row_type + "_columns_" + column_type
expected = expected_html(datapath, filename)
assert result == expected
@pytest.mark.parametrize("index_names", [True, False])
@pytest.mark.parametrize("header", [True, False])
@pytest.mark.parametrize("index", [True, False])
@pytest.mark.parametrize(
"column_index, column_type",
[
(Index(np.arange(8)), "unnamed_standard"),
(Index(np.arange(8), name="columns.name"), "named_standard"),
(
MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
"unnamed_multi",
),
(
MultiIndex.from_product(
[["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
),
"named_multi",
),
],
)
@pytest.mark.parametrize(
"row_index, row_type",
[
(Index(np.arange(8)), "unnamed_standard"),
(Index(np.arange(8), name="index.name"), "named_standard"),
(
MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
"unnamed_multi",
),
(
MultiIndex.from_product(
[["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
),
"named_multi",
),
],
)
def test_to_html_alignment_with_truncation(
datapath, row_index, row_type, column_index, column_type, index, header, index_names
):
# GH 22747, GH 22579
df = DataFrame(np.arange(64).reshape(8, 8), index=row_index, columns=column_index)
result = df.to_html(
max_rows=4, max_cols=4, index=index, header=header, index_names=index_names
)
if not index:
row_type = "none"
elif not index_names and row_type.startswith("named"):
row_type = "un" + row_type
if not header:
column_type = "none"
elif not index_names and column_type.startswith("named"):
column_type = "un" + column_type
filename = "trunc_df_index_" + row_type + "_columns_" + column_type
expected = expected_html(datapath, filename)
assert result == expected
@pytest.mark.parametrize("index", [False, 0])
def test_to_html_truncation_index_false_max_rows(datapath, index):
# GH 15019
data = [
[1.764052, 0.400157],
[0.978738, 2.240893],
[1.867558, -0.977278],
[0.950088, -0.151357],
[-0.103219, 0.410599],
]
df = DataFrame(data)
result = df.to_html(max_rows=4, index=index)
expected = expected_html(datapath, "gh15019_expected_output")
assert result == expected
@pytest.mark.parametrize("index", [False, 0])
@pytest.mark.parametrize(
"col_index_named, expected_output",
[(False, "gh22783_expected_output"), (True, "gh22783_named_columns_index")],
)
def test_to_html_truncation_index_false_max_cols(
datapath, index, col_index_named, expected_output
):
# GH 22783
data = [
[1.764052, 0.400157, 0.978738, 2.240893, 1.867558],
[-0.977278, 0.950088, -0.151357, -0.103219, 0.410599],
]
df = DataFrame(data)
if col_index_named:
df.columns.rename("columns.name", inplace=True)
result = df.to_html(max_cols=4, index=index)
expected = expected_html(datapath, expected_output)
assert result == expected
@pytest.mark.parametrize("notebook", [True, False])
def test_to_html_notebook_has_style(notebook):
df = DataFrame({"A": [1, 2, 3]})
result = df.to_html(notebook=notebook)
if notebook:
assert "tbody tr th:only-of-type" in result
assert "vertical-align: middle;" in result
assert "thead th" in result
else:
assert "tbody tr th:only-of-type" not in result
assert "vertical-align: middle;" not in result
assert "thead th" not in result
def test_to_html_with_index_names_false():
# GH 16493
df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
result = df.to_html(index_names=False)
assert "myindexname" not in result
def test_to_html_with_id():
# GH 8496
df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
result = df.to_html(index_names=False, table_id="TEST_ID")
assert ' id="TEST_ID"' in result
@pytest.mark.parametrize(
"value,float_format,expected",
[
(0.19999, "%.3f", "gh21625_expected_output"),
(100.0, "%.0f", "gh22270_expected_output"),
],
)
def test_to_html_float_format_no_fixed_width(value, float_format, expected, datapath):
# GH 21625, GH 22270
df = DataFrame({"x": [value]})
expected = expected_html(datapath, expected)
result = df.to_html(float_format=float_format)
assert result == expected
@pytest.mark.parametrize(
"render_links,expected",
[(True, "render_links_true"), (False, "render_links_false")],
)
def test_to_html_render_links(render_links, expected, datapath):
# GH 2679
data = [
[0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"],
[0, "www.pydata.org", "pydata.org"],
]
df = DataFrame(data, columns=["foo", "bar", None])
result = df.to_html(render_links=render_links)
expected = expected_html(datapath, expected)
assert result == expected
@pytest.mark.parametrize(
"method,expected",
[
("to_html", lambda x: lorem_ipsum),
("_repr_html_", lambda x: lorem_ipsum[: x - 4] + "..."), # regression case
],
)
@pytest.mark.parametrize("max_colwidth", [10, 20, 50, 100])
def test_ignore_display_max_colwidth(method, expected, max_colwidth):
# see gh-17004
df = DataFrame([lorem_ipsum])
with option_context("display.max_colwidth", max_colwidth):
result = getattr(df, method)()
expected = expected(max_colwidth)
assert expected in result
@pytest.mark.parametrize("classes", [True, 0])
def test_to_html_invalid_classes_type(classes):
# GH 25608
df = DataFrame()
msg = "classes must be a string, list, or tuple"
with pytest.raises(TypeError, match=msg):
df.to_html(classes=classes)
def test_to_html_round_column_headers():
# GH 17280
df = DataFrame([1], columns=[0.55555])
with option_context("display.precision", 3):
html = df.to_html(notebook=False)
notebook = df.to_html(notebook=True)
assert "0.55555" in html
assert "0.556" in notebook
@pytest.mark.parametrize("unit", ["100px", "10%", "5em", 150])
def test_to_html_with_col_space_units(unit):
# GH 25941
df = DataFrame(np.random.random(size=(1, 3)))
result = df.to_html(col_space=unit)
result = result.split("tbody")[0]
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
if isinstance(unit, int):
unit = str(unit) + "px"
for h in hdrs:
expected = f'<th style="min-width: {unit};">'
assert expected in h
def test_html_repr_min_rows_default(datapath):
# gh-27991
# default setting no truncation even if above min_rows
df = DataFrame({"a": range(20)})
result = df._repr_html_()
expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation")
assert result == expected
# default of max_rows 60 triggers truncation if above
df = DataFrame({"a": range(61)})
result = df._repr_html_()
expected = expected_html(datapath, "html_repr_min_rows_default_truncated")
assert result == expected
@pytest.mark.parametrize(
"max_rows,min_rows,expected",
[
# truncated after first two rows
(10, 4, "html_repr_max_rows_10_min_rows_4"),
# when set to None, follow value of max_rows
(12, None, "html_repr_max_rows_12_min_rows_None"),
# when set value higher as max_rows, use the minimum
(10, 12, "html_repr_max_rows_10_min_rows_12"),
# max_rows of None -> never truncate
(None, 12, "html_repr_max_rows_None_min_rows_12"),
],
)
def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
# gh-27991
df = DataFrame({"a": range(61)})
expected = expected_html(datapath, expected)
with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
result = df._repr_html_()
assert result == expected
def test_to_html_multilevel(multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
ymd.columns.name = "foo"
ymd.to_html()
ymd.T.to_html()
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
def test_to_html_na_rep_and_float_format(na_rep, datapath):
# https://github.com/pandas-dev/pandas/issues/13828
df = DataFrame(
[
["A", 1.2225],
["A", None],
],
columns=["Group", "Data"],
)
result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format)
expected = expected_html(datapath, "gh13828_expected_output")
expected = expected.format(na_rep=na_rep)
assert result == expected
def test_to_html_float_format_object_col(datapath):
# GH#40024
df = DataFrame(data={"x": [1000.0, "test"]})
result = df.to_html(float_format=lambda x: f"{x:,.0f}")
expected = expected_html(datapath, "gh40024_expected_output")
assert result == expected

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,101 @@
from io import StringIO
import pytest
import pandas as pd
import pandas._testing as tm
pytest.importorskip("tabulate")
def test_simple():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf)
result = buf.getvalue()
assert (
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
def test_empty_frame():
buf = StringIO()
df = pd.DataFrame({"id": [], "first_name": [], "last_name": []}).set_index("id")
df.to_markdown(buf=buf)
result = buf.getvalue()
assert result == (
"| id | first_name | last_name |\n"
"|------|--------------|-------------|"
)
def test_other_tablefmt():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf, tablefmt="jira")
result = buf.getvalue()
assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
def test_other_headers():
buf = StringIO()
df = pd.DataFrame([1, 2, 3])
df.to_markdown(buf=buf, headers=["foo", "bar"])
result = buf.getvalue()
assert result == (
"| foo | bar |\n|------:|------:|\n| 0 "
"| 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
def test_series():
buf = StringIO()
s = pd.Series([1, 2, 3], name="foo")
s.to_markdown(buf=buf)
result = buf.getvalue()
assert result == (
"| | foo |\n|---:|------:|\n| 0 | 1 "
"|\n| 1 | 2 |\n| 2 | 3 |"
)
def test_no_buf(capsys):
df = pd.DataFrame([1, 2, 3])
result = df.to_markdown()
assert (
result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
@pytest.mark.parametrize("index", [True, False, None])
@pytest.mark.parametrize("showindex", [True, False, None])
def test_index(index, showindex):
# GH 32667
kwargs = {}
if index is not None:
kwargs["index"] = index
if showindex is not None:
kwargs["showindex"] = showindex
df = pd.DataFrame([1, 2, 3])
yes_index_result = (
"| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |"
)
no_index_result = "| 0 |\n|----:|\n| 1 |\n| 2 |\n| 3 |"
warning = FutureWarning if "showindex" in kwargs else None
with tm.assert_produces_warning(warning):
result = df.to_markdown(**kwargs)
if "showindex" in kwargs:
# give showindex higher priority if specified
if showindex:
expected = yes_index_result
else:
expected = no_index_result
else:
if index in [True, None]:
expected = yes_index_result
else:
expected = no_index_result
assert result == expected

View File

@@ -0,0 +1,340 @@
from datetime import datetime
from io import StringIO
from textwrap import dedent
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
option_context,
to_datetime,
)
def test_repr_embedded_ndarray():
arr = np.empty(10, dtype=[("err", object)])
for i in range(len(arr)):
arr["err"][i] = np.random.randn(i)
df = DataFrame(arr)
repr(df["err"])
repr(df)
df.to_string()
def test_repr_tuples():
buf = StringIO()
df = DataFrame({"tups": list(zip(range(10), range(10)))})
repr(df)
df.to_string(col_space=10, buf=buf)
def test_to_string_truncate():
# GH 9784 - dont truncate when calling DataFrame.to_string
df = DataFrame(
[
{
"a": "foo",
"b": "bar",
"c": "let's make this a very VERY long line that is longer "
"than the default 50 character limit",
"d": 1,
},
{"a": "foo", "b": "bar", "c": "stuff", "d": 1},
]
)
df.set_index(["a", "b", "c"])
assert df.to_string() == (
" a b "
" c d\n"
"0 foo bar let's make this a very VERY long line t"
"hat is longer than the default 50 character limit 1\n"
"1 foo bar "
" stuff 1"
)
with option_context("max_colwidth", 20):
# the display option has no effect on the to_string method
assert df.to_string() == (
" a b "
" c d\n"
"0 foo bar let's make this a very VERY long line t"
"hat is longer than the default 50 character limit 1\n"
"1 foo bar "
" stuff 1"
)
assert df.to_string(max_colwidth=20) == (
" a b c d\n"
"0 foo bar let's make this ... 1\n"
"1 foo bar stuff 1"
)
@pytest.mark.parametrize(
"input_array, expected",
[
("a", "a"),
(["a", "b"], "a\nb"),
([1, "a"], "1\na"),
(1, "1"),
([0, -1], " 0\n-1"),
(1.0, "1.0"),
([" a", " b"], " a\n b"),
([".1", "1"], ".1\n 1"),
(["10", "-10"], " 10\n-10"),
],
)
def test_format_remove_leading_space_series(input_array, expected):
# GH: 24980
s = Series(input_array).to_string(index=False)
assert s == expected
@pytest.mark.parametrize(
"input_array, expected",
[
({"A": ["a"]}, "A\na"),
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
],
)
def test_format_remove_leading_space_dataframe(input_array, expected):
# GH: 24980
df = DataFrame(input_array).to_string(index=False)
assert df == expected
@pytest.mark.parametrize(
"max_cols, max_rows, expected",
[
(
10,
None,
" 0 1 2 3 4 ... 6 7 8 9 10\n"
" 0 0 0 0 0 ... 0 0 0 0 0\n"
" 0 0 0 0 0 ... 0 0 0 0 0\n"
" 0 0 0 0 0 ... 0 0 0 0 0\n"
" 0 0 0 0 0 ... 0 0 0 0 0",
),
(
None,
2,
" 0 1 2 3 4 5 6 7 8 9 10\n"
" 0 0 0 0 0 0 0 0 0 0 0\n"
" .. .. .. .. .. .. .. .. .. .. ..\n"
" 0 0 0 0 0 0 0 0 0 0 0",
),
(
10,
2,
" 0 1 2 3 4 ... 6 7 8 9 10\n"
" 0 0 0 0 0 ... 0 0 0 0 0\n"
" .. .. .. .. .. ... .. .. .. .. ..\n"
" 0 0 0 0 0 ... 0 0 0 0 0",
),
(
9,
2,
" 0 1 2 3 ... 7 8 9 10\n"
" 0 0 0 0 ... 0 0 0 0\n"
" .. .. .. .. ... .. .. .. ..\n"
" 0 0 0 0 ... 0 0 0 0",
),
(
1,
1,
" 0 ...\n 0 ...\n.. ...",
),
],
)
def test_truncation_no_index(max_cols, max_rows, expected):
df = DataFrame([[0] * 11] * 4)
assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected
def test_to_string_unicode_columns(float_frame):
df = DataFrame({"\u03c3": np.arange(10.0)})
buf = StringIO()
df.to_string(buf=buf)
buf.getvalue()
buf = StringIO()
df.info(buf=buf)
buf.getvalue()
result = float_frame.to_string()
assert isinstance(result, str)
def test_to_string_utf8_columns():
n = "\u05d0".encode()
with option_context("display.max_rows", 1):
df = DataFrame([1, 2], columns=[n])
repr(df)
def test_to_string_unicode_two():
dm = DataFrame({"c/\u03c3": []})
buf = StringIO()
dm.to_string(buf)
def test_to_string_unicode_three():
dm = DataFrame(["\xc2"])
buf = StringIO()
dm.to_string(buf)
def test_to_string_with_formatters():
df = DataFrame(
{
"int": [1, 2, 3],
"float": [1.0, 2.0, 3.0],
"object": [(1, 2), True, False],
},
columns=["int", "float", "object"],
)
formatters = [
("int", lambda x: f"0x{x:x}"),
("float", lambda x: f"[{x: 4.1f}]"),
("object", lambda x: f"-{x!s}-"),
]
result = df.to_string(formatters=dict(formatters))
result2 = df.to_string(formatters=list(zip(*formatters))[1])
assert result == (
" int float object\n"
"0 0x1 [ 1.0] -(1, 2)-\n"
"1 0x2 [ 2.0] -True-\n"
"2 0x3 [ 3.0] -False-"
)
assert result == result2
def test_to_string_with_datetime64_monthformatter():
months = [datetime(2016, 1, 1), datetime(2016, 2, 2)]
x = DataFrame({"months": months})
def format_func(x):
return x.strftime("%Y-%m")
result = x.to_string(formatters={"months": format_func})
expected = dedent(
"""\
months
0 2016-01
1 2016-02"""
)
assert result.strip() == expected
def test_to_string_with_datetime64_hourformatter():
x = DataFrame(
{"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")}
)
def format_func(x):
return x.strftime("%H:%M")
result = x.to_string(formatters={"hod": format_func})
expected = dedent(
"""\
hod
0 10:10
1 12:12"""
)
assert result.strip() == expected
def test_to_string_with_formatters_unicode():
df = DataFrame({"c/\u03c3": [1, 2, 3]})
result = df.to_string(formatters={"c/\u03c3": str})
expected = dedent(
"""\
c/\u03c3
0 1
1 2
2 3"""
)
assert result == expected
def test_to_string_complex_number_trims_zeros():
s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j])
result = s.to_string()
expected = dedent(
"""\
0 1.00+1.00j
1 1.00+1.00j
2 1.05+1.00j"""
)
assert result == expected
def test_nullable_float_to_string(float_ea_dtype):
# https://github.com/pandas-dev/pandas/issues/36775
dtype = float_ea_dtype
s = Series([0.0, 1.0, None], dtype=dtype)
result = s.to_string()
expected = dedent(
"""\
0 0.0
1 1.0
2 <NA>"""
)
assert result == expected
def test_nullable_int_to_string(any_int_ea_dtype):
# https://github.com/pandas-dev/pandas/issues/36775
dtype = any_int_ea_dtype
s = Series([0, 1, None], dtype=dtype)
result = s.to_string()
expected = dedent(
"""\
0 0
1 1
2 <NA>"""
)
assert result == expected
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
def test_to_string_na_rep_and_float_format(na_rep):
# GH 13828
df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"])
result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format)
expected = dedent(
f"""\
Group Data
0 A 1.22
1 A {na_rep}"""
)
assert result == expected
@pytest.mark.parametrize(
"data,expected",
[
(
{"col1": [1, 2], "col2": [3, 4]},
" col1 col2\n0 1 3\n1 2 4",
),
(
{"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]},
" col1 col2\n0 Abc NaN\n1 0.756 4.5435",
),
(
{"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]},
" col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23",
),
],
)
def test_to_string_max_rows_zero(data, expected):
# GH35394
result = DataFrame(data=data).to_string(max_rows=0)
assert result == expected