first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,35 @@
import pytest
from pandas.compat._optional import (
get_version,
import_optional_dependency,
)
from pandas.util.version import Version
pytestmark = [
pytest.mark.filterwarnings(
# Looks like tree.getiterator is deprecated in favor of tree.iter
"ignore:This method will be removed in future versions:"
"PendingDeprecationWarning"
),
pytest.mark.filterwarnings(
"ignore:This method will be removed in future versions:DeprecationWarning"
),
# GH 26552
pytest.mark.filterwarnings(
"ignore:As the xlwt package is no longer maintained:FutureWarning"
),
# GH 38571
pytest.mark.filterwarnings(
"ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning"
),
]
if import_optional_dependency("xlrd", errors="ignore") is None:
xlrd_version = None
else:
import xlrd
xlrd_version = Version(get_version(xlrd))

View File

@ -0,0 +1,67 @@
import pytest
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td
import pandas._testing as tm
from pandas.io.parsers import read_csv
@pytest.fixture
def frame(float_frame):
"""
Returns the first ten items in fixture "float_frame".
"""
return float_frame[:10]
@pytest.fixture
def tsframe():
return tm.makeTimeDataFrame()[:5]
@pytest.fixture(params=[True, False])
def merge_cells(request):
return request.param
@pytest.fixture
def df_ref(datapath):
"""
Obtain the reference data from read_csv with the Python engine.
"""
filepath = datapath("io", "data", "csv", "test1.csv")
df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python")
return df_ref
@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"])
def read_ext(request):
"""
Valid extensions for reading Excel files.
"""
return request.param
# Checking for file leaks can hang on Windows CI
@pytest.fixture(autouse=not is_platform_windows())
def check_for_file_leaks():
"""
Fixture to run around every test to ensure that we are not leaking files.
See also
--------
_test_decorators.check_file_leaks
"""
# GH#30162
psutil = td.safe_import("psutil")
if not psutil:
yield
else:
proc = psutil.Process()
flist = proc.open_files()
yield
flist2 = proc.open_files()
assert flist == flist2

View File

@ -0,0 +1,38 @@
import functools
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
pytest.importorskip("odf")
@pytest.fixture(autouse=True)
def cd_and_set_engine(monkeypatch, datapath):
func = functools.partial(pd.read_excel, engine="odf")
monkeypatch.setattr(pd, "read_excel", func)
monkeypatch.chdir(datapath("io", "data", "excel"))
def test_read_invalid_types_raises():
# the invalid_value_type.ods required manually editing
# of the included content.xml file
with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
pd.read_excel("invalid_value_type.ods")
def test_read_writer_table():
# Also test reading tables from an text OpenDocument file
# (.odt)
index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
expected = pd.DataFrame(
[[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
index=index,
columns=["Column 1", "Unnamed: 2", "Column 3"],
)
result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,58 @@
import re
import pytest
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
odf = pytest.importorskip("odf")
pytestmark = pytest.mark.parametrize("ext", [".ods"])
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with odf!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="odf", mode="a")
def test_kwargs(ext):
# GH 42286
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
kwargs = {"kwarg": 1}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass
@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
def test_engine_kwargs(ext, engine_kwargs):
# GH 42286
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
with tm.ensure_clean(ext) as f:
if engine_kwargs is not None:
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass

View File

@ -0,0 +1,377 @@
from pathlib import Path
import re
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import (
ExcelWriter,
_OpenpyxlWriter,
)
openpyxl = pytest.importorskip("openpyxl")
pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
def test_to_excel_styleconverter(ext):
from openpyxl import styles
hstyle = {
"font": {"color": "00FF0000", "bold": True},
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
"alignment": {"horizontal": "center", "vertical": "top"},
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
"number_format": {"format_code": "0.00"},
"protection": {"locked": True, "hidden": False},
}
font_color = styles.Color("00FF0000")
font = styles.Font(bold=True, color=font_color)
side = styles.Side(style=styles.borders.BORDER_THIN)
border = styles.Border(top=side, right=side, bottom=side, left=side)
alignment = styles.Alignment(horizontal="center", vertical="top")
fill_color = styles.Color(rgb="006666FF", tint=0.3)
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
number_format = "0.00"
protection = styles.Protection(locked=True, hidden=False)
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
assert kw["font"] == font
assert kw["border"] == border
assert kw["alignment"] == alignment
assert kw["fill"] == fill
assert kw["number_format"] == number_format
assert kw["protection"] == protection
def test_write_cells_merge_styled(ext):
from pandas.io.formats.excel import ExcelCell
sheet_name = "merge_styled"
sty_b1 = {"font": {"color": "00FF0000"}}
sty_a2 = {"font": {"color": "0000FF00"}}
initial_cells = [
ExcelCell(col=1, row=0, val=42, style=sty_b1),
ExcelCell(col=0, row=1, val=99, style=sty_a2),
]
sty_merged = {"font": {"color": "000000FF", "bold": True}}
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
openpyxl_sty_merged = sty_kwargs["font"]
merge_cells = [
ExcelCell(
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
)
]
with tm.ensure_clean(ext) as path:
with _OpenpyxlWriter(path) as writer:
writer.write_cells(initial_cells, sheet_name=sheet_name)
writer.write_cells(merge_cells, sheet_name=sheet_name)
wks = writer.sheets[sheet_name]
xcell_b1 = wks["B1"]
xcell_a2 = wks["A2"]
assert xcell_b1.font == openpyxl_sty_merged
assert xcell_a2.font == openpyxl_sty_merged
@pytest.mark.parametrize("iso_dates", [True, False])
def test_kwargs(ext, iso_dates):
# GH 42286 GH 43445
kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)
@pytest.mark.parametrize("iso_dates", [True, False])
def test_engine_kwargs_write(ext, iso_dates):
# GH 42286 GH 43445
engine_kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)
def test_engine_kwargs_append_invalid(ext):
# GH 43445
# test whether an invalid engine kwargs actually raises
with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with pytest.raises(
TypeError,
match=re.escape(
"load_workbook() got an unexpected keyword argument 'apple_banana'"
),
):
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
) as writer:
# ExcelWriter needs us to write something to close properly
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
def test_engine_kwargs_append_data_only(ext, data_only, expected):
# GH 43445
# tests whether the data_only engine_kwarg actually works well for
# openpyxl's load_workbook
with tm.ensure_clean(ext) as f:
DataFrame(["=1+1"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
) as writer:
assert writer.sheets["Sheet1"]["B2"].value == expected
# ExcelWriter needs us to writer something to close properly?
DataFrame().to_excel(writer, sheet_name="Sheet2")
@pytest.mark.parametrize(
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
)
def test_write_append_mode(ext, mode, expected):
df = DataFrame([1], columns=["baz"])
with tm.ensure_clean(ext) as f:
wb = openpyxl.Workbook()
wb.worksheets[0].title = "foo"
wb.worksheets[0]["A1"].value = "foo"
wb.create_sheet("bar")
wb.worksheets[1]["A1"].value = "bar"
wb.save(f)
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
df.to_excel(writer, sheet_name="baz", index=False)
wb2 = openpyxl.load_workbook(f)
result = [sheet.title for sheet in wb2.worksheets]
assert result == expected
for index, cell_value in enumerate(expected):
assert wb2.worksheets[index]["A1"].value == cell_value
@pytest.mark.parametrize(
"if_sheet_exists,num_sheets,expected",
[
("new", 2, ["apple", "banana"]),
("replace", 1, ["pear"]),
("overlay", 1, ["pear", "banana"]),
],
)
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
# GH 40230
df1 = DataFrame({"fruit": ["apple", "banana"]})
df2 = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df2.to_excel(writer, sheet_name="foo", index=False)
wb = openpyxl.load_workbook(f)
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)
wb.close()
@pytest.mark.parametrize(
"startrow, startcol, greeting, goodbye",
[
(0, 0, ["poop", "world"], ["goodbye", "people"]),
(0, 1, ["hello", "world"], ["poop", "people"]),
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
],
)
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
df2 = DataFrame(["poop"])
with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
) as writer:
# use startrow+1 because we don't have a header
df2.to_excel(
writer,
index=False,
header=False,
startrow=startrow + 1,
startcol=startcol,
sheet_name="poo",
)
result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"if_sheet_exists,msg",
[
(
"invalid",
"'invalid' is not valid for if_sheet_exists. Valid options "
"are 'error', 'new', 'replace' and 'overlay'.",
),
(
"error",
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
(
None,
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
],
)
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
# GH 40230
df = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=re.escape(msg)):
df.to_excel(f, "foo", engine="openpyxl")
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df.to_excel(writer, sheet_name="foo")
def test_to_excel_with_openpyxl_engine(ext):
# GH 29854
with tm.ensure_clean(ext) as filename:
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
df = pd.concat([df1, df2], axis=1)
styled = df.style.applymap(
lambda val: "color: %s" % ("red" if val < 0 else "black")
).highlight_max()
styled.to_excel(filename, engine="openpyxl")
@pytest.mark.parametrize("read_only", [True, False])
def test_read_workbook(datapath, ext, read_only):
# GH 39528
filename = datapath("io", "data", "excel", "test1" + ext)
wb = openpyxl.load_workbook(filename, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = pd.read_excel(filename)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"header, expected_data",
[
(
0,
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
},
),
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
],
)
@pytest.mark.parametrize(
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_bad_dimension(
datapath, ext, header, expected_data, filename, read_only, request
):
# GH 38956, 39001 - no/incorrect dimension information
path = datapath("io", "data", "excel", f"{filename}{ext}")
if read_only is None:
result = pd.read_excel(path, header=header)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl", header=header)
wb.close()
expected = DataFrame(expected_data)
tm.assert_frame_equal(result, expected)
def test_append_mode_file(ext):
# GH 39576
df = DataFrame()
with tm.ensure_clean(ext) as f:
df.to_excel(f, engine="openpyxl")
with ExcelWriter(
f, mode="a", engine="openpyxl", if_sheet_exists="new"
) as writer:
df.to_excel(writer)
# make sure that zip files are not concatenated by making sure that
# "docProps/app.xml" only occurs twice in the file
data = Path(f).read_bytes()
first = data.find(b"docProps/app.xml")
second = data.find(b"docProps/app.xml", first + 1)
third = data.find(b"docProps/app.xml", second + 1)
assert second != -1 and third == -1
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_with_empty_trailing_rows(datapath, ext, read_only, request):
# GH 39181
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = DataFrame(
{
"Title": [np.nan, "A", 1, 2, 3],
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
}
)
tm.assert_frame_equal(result, expected)
# When read_only is None, use read_excel instead of a workbook
@pytest.mark.parametrize("read_only", [True, False, None])
def test_read_empty_with_blank_row(datapath, ext, read_only):
# GH 39547 - empty excel file with a row that has no data
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
expected = DataFrame()
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,167 @@
import numpy as np
import pytest
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
from pandas.io.formats.excel import ExcelFormatter
pytest.importorskip("jinja2")
# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
# could compute styles and render to excel without jinja2, since there is no
# 'template' file, but this needs the import error to delayed until render time.
def assert_equal_cell_styles(cell1, cell2):
# TODO: should find a better way to check equality
assert cell1.alignment.__dict__ == cell2.alignment.__dict__
assert cell1.border.__dict__ == cell2.border.__dict__
assert cell1.fill.__dict__ == cell2.fill.__dict__
assert cell1.font.__dict__ == cell2.font.__dict__
assert cell1.number_format == cell2.number_format
assert cell1.protection.__dict__ == cell2.protection.__dict__
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
def test_styler_to_excel_unstyled(engine):
# compare DataFrame.to_excel and Styler.to_excel when no styles applied
pytest.importorskip(engine)
df = DataFrame(np.random.randn(2, 2))
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
df.style.to_excel(writer, sheet_name="unstyled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
assert len(col1) == len(col2)
for cell1, cell2 in zip(col1, col2):
assert cell1.value == cell2.value
assert_equal_cell_styles(cell1, cell2)
shared_style_params = [
(
"background-color: #111222",
["fill", "fgColor", "rgb"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
(
"color: #111222",
["font", "color", "value"],
{"xlsxwriter": "FF111222", "openpyxl": "00111222"},
),
("font-family: Arial;", ["font", "name"], "arial"),
("font-weight: bold;", ["font", "b"], True),
("font-style: italic;", ["font", "i"], True),
("text-decoration: underline;", ["font", "u"], "single"),
("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
("text-align: left;", ["alignment", "horizontal"], "left"),
(
"vertical-align: bottom;",
["alignment", "vertical"],
{"xlsxwriter": None, "openpyxl": "bottom"}, # xlsxwriter Fails
),
]
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.randn(1, 1))
styler = df.style.applymap(lambda x: css)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="dataframe")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
for attr in attrs:
u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr)
if isinstance(expected, dict):
assert u_cell is None or u_cell != expected[engine]
assert s_cell == expected[engine]
else:
assert u_cell is None or u_cell != expected
assert s_cell == expected
@pytest.mark.parametrize(
"engine",
["xlsxwriter", "openpyxl"],
)
@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
pytest.importorskip(engine)
df = DataFrame(np.random.randn(1, 1))
styler = df.style
styler.applymap_index(lambda x: css, axis=0)
styler.applymap_index(lambda x: css, axis=1)
null_styler = df.style
null_styler.applymap(lambda x: "null: css;")
null_styler.applymap_index(lambda x: "null: css;", axis=0)
null_styler.applymap_index(lambda x: "null: css;", axis=1)
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine=engine) as writer:
null_styler.to_excel(writer, sheet_name="null_styled")
styler.to_excel(writer, sheet_name="styled")
openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
# test null styled index cells does not have expected styles
# test styled cell has expected styles
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
for attr in attrs:
ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr)
uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr)
if isinstance(expected, dict):
assert ui_cell is None or ui_cell != expected[engine]
assert si_cell == expected[engine]
assert uc_cell is None or uc_cell != expected[engine]
assert sc_cell == expected[engine]
else:
assert ui_cell is None or ui_cell != expected
assert si_cell == expected
assert uc_cell is None or uc_cell != expected
assert sc_cell == expected
def test_styler_custom_converter():
openpyxl = pytest.importorskip("openpyxl")
def custom_converter(css):
return {"font": {"color": {"rgb": "111222"}}}
df = DataFrame(np.random.randn(1, 1))
styler = df.style.applymap(lambda x: "color: #888999")
with tm.ensure_clean(".xlsx") as path:
with ExcelWriter(path, engine="openpyxl") as writer:
ExcelFormatter(styler, style_converter=custom_converter).write(
writer, sheet_name="custom"
)
wb = openpyxl.load_workbook(path)
assert wb["custom"].cell(2, 2).font.color.value == "00111222"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
import io
import pytest
from pandas.compat._optional import import_optional_dependency
import pandas as pd
import pandas._testing as tm
from pandas.tests.io.excel import xlrd_version
from pandas.util.version import Version
from pandas.io.excel import ExcelFile
from pandas.io.excel._base import inspect_excel_format
xlrd = pytest.importorskip("xlrd")
xlwt = pytest.importorskip("xlwt")
pytestmark = pytest.mark.filterwarnings(
"ignore:As the xlwt package is no longer maintained:FutureWarning"
)
# error: Unsupported operand types for <= ("Version" and "None")
if xlrd_version >= Version("2"): # type: ignore[operator]
exts = [".xls"]
else:
exts = [".xls", ".xlsx", ".xlsm"]
@pytest.fixture(params=exts)
def read_ext_xlrd(request):
"""
Valid extensions for reading Excel files with xlrd.
Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
"""
return request.param
def test_read_xlrd_book(read_ext_xlrd, frame):
df = frame
engine = "xlrd"
sheet_name = "SheetA"
with tm.ensure_clean(read_ext_xlrd) as pth:
df.to_excel(pth, sheet_name)
book = xlrd.open_workbook(pth)
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
tm.assert_frame_equal(df, result)
result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0)
tm.assert_frame_equal(df, result)
def test_excel_file_warning_with_xlsx_file(datapath):
# GH 29375
path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
with tm.assert_produces_warning(
FutureWarning,
raise_on_extra_warnings=False,
match="The xlrd engine is no longer maintained",
):
ExcelFile(path, engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)
def test_read_excel_warning_with_xlsx_file(datapath):
# GH 29375
path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
if xlrd_version >= Version("2"):
with pytest.raises(
ValueError,
match="Your version of xlrd is ",
):
pd.read_excel(path, "Sheet1", engine=None)
else:
with tm.assert_produces_warning(
FutureWarning,
raise_on_extra_warnings=False,
match="The xlrd engine is no longer maintained",
):
pd.read_excel(path, "Sheet1", engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)
@pytest.mark.parametrize(
"file_header",
[
b"\x09\x00\x04\x00\x07\x00\x10\x00",
b"\x09\x02\x06\x00\x00\x00\x10\x00",
b"\x09\x04\x06\x00\x00\x00\x10\x00",
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
],
)
def test_read_old_xls_files(file_header):
# GH 41226
f = io.BytesIO(file_header)
assert inspect_excel_format(f) == "xls"

View File

@ -0,0 +1,84 @@
import re
import warnings
import pytest
from pandas import DataFrame
import pandas._testing as tm
from pandas.io.excel import ExcelWriter
xlsxwriter = pytest.importorskip("xlsxwriter")
pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
def test_column_format(ext):
# Test that column formats are applied to cells. Test for issue #9167.
# Applicable to xlsxwriter only.
with warnings.catch_warnings():
# Ignore the openpyxl lxml warning.
warnings.simplefilter("ignore")
openpyxl = pytest.importorskip("openpyxl")
with tm.ensure_clean(ext) as path:
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
with ExcelWriter(path) as writer:
frame.to_excel(writer)
# Add a number format to col B and ensure it is applied to cells.
num_format = "#,##0"
write_workbook = writer.book
write_worksheet = write_workbook.worksheets()[0]
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)
read_workbook = openpyxl.load_workbook(path)
try:
read_worksheet = read_workbook["Sheet1"]
except TypeError:
# compat
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
# Get the number format from the cell.
try:
cell = read_worksheet["B2"]
except TypeError:
# compat
cell = read_worksheet.cell("B2")
try:
read_num_format = cell.number_format
except AttributeError:
read_num_format = cell.style.number_format._format_code
assert read_num_format == num_format
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with xlsxwriter!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="xlsxwriter", mode="a")
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_kwargs(ext, nan_inf_to_errors):
# GH 42286
kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="xlsxwriter", **kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors
@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_engine_kwargs(ext, nan_inf_to_errors):
# GH 42286
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors

View File

@ -0,0 +1,127 @@
import re
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
options,
)
import pandas._testing as tm
from pandas.io.excel import (
ExcelWriter,
_XlwtWriter,
)
xlwt = pytest.importorskip("xlwt")
pytestmark = pytest.mark.parametrize("ext,", [".xls"])
def test_excel_raise_error_on_multiindex_columns_and_no_index(ext):
# MultiIndex as columns is not yet implemented 9794
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(10, 3), columns=cols)
msg = (
"Writing to Excel with MultiIndex columns and no index "
"\\('index'=False\\) is not yet implemented."
)
with pytest.raises(NotImplementedError, match=msg):
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=False)
def test_excel_multiindex_columns_and_index_true(ext):
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(10, 3), columns=cols)
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=True)
def test_excel_multiindex_index(ext):
# MultiIndex as index works so assert no error #9794
cols = MultiIndex.from_tuples(
[("site", ""), ("2014", "height"), ("2014", "weight")]
)
df = DataFrame(np.random.randn(3, 10), index=cols)
with tm.ensure_clean(ext) as path:
df.to_excel(path, index=False)
def test_to_excel_styleconverter(ext):
hstyle = {
"font": {"bold": True},
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
"alignment": {"horizontal": "center", "vertical": "top"},
}
xls_style = _XlwtWriter._convert_to_style(hstyle)
assert xls_style.font.bold
assert xlwt.Borders.THIN == xls_style.borders.top
assert xlwt.Borders.THIN == xls_style.borders.right
assert xlwt.Borders.THIN == xls_style.borders.bottom
assert xlwt.Borders.THIN == xls_style.borders.left
assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz
assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert
def test_write_append_mode_raises(ext):
msg = "Append mode is not supported with xlwt!"
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=msg):
ExcelWriter(f, engine="xlwt", mode="a")
def test_to_excel_xlwt_warning(ext):
# GH 26552
df = DataFrame(np.random.randn(3, 10))
with tm.ensure_clean(ext) as path:
with tm.assert_produces_warning(
FutureWarning,
match="As the xlwt package is no longer maintained",
):
df.to_excel(path)
def test_option_xls_writer_deprecated(ext):
# GH 26552
with tm.assert_produces_warning(
FutureWarning,
match="As the xlwt package is no longer maintained",
check_stacklevel=False,
):
options.io.excel.xls.writer = "xlwt"
@pytest.mark.parametrize("style_compression", [0, 2])
def test_kwargs(ext, style_compression):
# GH 42286
kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="xlwt", **kwargs) as writer:
assert (
writer.book._Workbook__styles.style_compression == style_compression
)
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)
@pytest.mark.parametrize("style_compression", [0, 2])
def test_engine_kwargs(ext, style_compression):
# GH 42286
engine_kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer:
assert writer.book._Workbook__styles.style_compression == style_compression
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)