433 lines
15 KiB
Python
433 lines
15 KiB
Python
import contextlib
|
|
from pathlib import Path
|
|
import re
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas.compat import is_platform_windows
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import pandas._testing as tm
|
|
|
|
from pandas.io.excel import (
|
|
ExcelWriter,
|
|
_OpenpyxlWriter,
|
|
)
|
|
from pandas.io.excel._openpyxl import OpenpyxlReader
|
|
|
|
openpyxl = pytest.importorskip("openpyxl")
|
|
|
|
if is_platform_windows():
|
|
pytestmark = pytest.mark.single_cpu
|
|
|
|
|
|
@pytest.fixture
|
|
def ext():
|
|
return ".xlsx"
|
|
|
|
|
|
def test_to_excel_styleconverter():
|
|
from openpyxl import styles
|
|
|
|
hstyle = {
|
|
"font": {"color": "00FF0000", "bold": True},
|
|
"borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
|
|
"alignment": {"horizontal": "center", "vertical": "top"},
|
|
"fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
|
|
"number_format": {"format_code": "0.00"},
|
|
"protection": {"locked": True, "hidden": False},
|
|
}
|
|
|
|
font_color = styles.Color("00FF0000")
|
|
font = styles.Font(bold=True, color=font_color)
|
|
side = styles.Side(style=styles.borders.BORDER_THIN)
|
|
border = styles.Border(top=side, right=side, bottom=side, left=side)
|
|
alignment = styles.Alignment(horizontal="center", vertical="top")
|
|
fill_color = styles.Color(rgb="006666FF", tint=0.3)
|
|
fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
|
|
|
|
number_format = "0.00"
|
|
|
|
protection = styles.Protection(locked=True, hidden=False)
|
|
|
|
kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
|
|
assert kw["font"] == font
|
|
assert kw["border"] == border
|
|
assert kw["alignment"] == alignment
|
|
assert kw["fill"] == fill
|
|
assert kw["number_format"] == number_format
|
|
assert kw["protection"] == protection
|
|
|
|
|
|
def test_write_cells_merge_styled(ext):
|
|
from pandas.io.formats.excel import ExcelCell
|
|
|
|
sheet_name = "merge_styled"
|
|
|
|
sty_b1 = {"font": {"color": "00FF0000"}}
|
|
sty_a2 = {"font": {"color": "0000FF00"}}
|
|
|
|
initial_cells = [
|
|
ExcelCell(col=1, row=0, val=42, style=sty_b1),
|
|
ExcelCell(col=0, row=1, val=99, style=sty_a2),
|
|
]
|
|
|
|
sty_merged = {"font": {"color": "000000FF", "bold": True}}
|
|
sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
|
|
openpyxl_sty_merged = sty_kwargs["font"]
|
|
merge_cells = [
|
|
ExcelCell(
|
|
col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
|
|
)
|
|
]
|
|
|
|
with tm.ensure_clean(ext) as path:
|
|
with _OpenpyxlWriter(path) as writer:
|
|
writer._write_cells(initial_cells, sheet_name=sheet_name)
|
|
writer._write_cells(merge_cells, sheet_name=sheet_name)
|
|
|
|
wks = writer.sheets[sheet_name]
|
|
xcell_b1 = wks["B1"]
|
|
xcell_a2 = wks["A2"]
|
|
assert xcell_b1.font == openpyxl_sty_merged
|
|
assert xcell_a2.font == openpyxl_sty_merged
|
|
|
|
|
|
@pytest.mark.parametrize("iso_dates", [True, False])
|
|
def test_engine_kwargs_write(ext, iso_dates):
|
|
# GH 42286 GH 43445
|
|
engine_kwargs = {"iso_dates": iso_dates}
|
|
with tm.ensure_clean(ext) as f:
|
|
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
|
|
assert writer.book.iso_dates == iso_dates
|
|
# ExcelWriter won't allow us to close without writing something
|
|
DataFrame().to_excel(writer)
|
|
|
|
|
|
def test_engine_kwargs_append_invalid(ext):
|
|
# GH 43445
|
|
# test whether an invalid engine kwargs actually raises
|
|
with tm.ensure_clean(ext) as f:
|
|
DataFrame(["hello", "world"]).to_excel(f)
|
|
with pytest.raises(
|
|
TypeError,
|
|
match=re.escape(
|
|
"load_workbook() got an unexpected keyword argument 'apple_banana'"
|
|
),
|
|
):
|
|
with ExcelWriter(
|
|
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
|
|
) as writer:
|
|
# ExcelWriter needs us to write something to close properly
|
|
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
|
|
|
|
|
|
@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
|
|
def test_engine_kwargs_append_data_only(ext, data_only, expected):
|
|
# GH 43445
|
|
# tests whether the data_only engine_kwarg actually works well for
|
|
# openpyxl's load_workbook
|
|
with tm.ensure_clean(ext) as f:
|
|
DataFrame(["=1+1"]).to_excel(f)
|
|
with ExcelWriter(
|
|
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
|
|
) as writer:
|
|
assert writer.sheets["Sheet1"]["B2"].value == expected
|
|
# ExcelWriter needs us to writer something to close properly?
|
|
DataFrame().to_excel(writer, sheet_name="Sheet2")
|
|
|
|
# ensure that data_only also works for reading
|
|
# and that formulas/values roundtrip
|
|
assert (
|
|
pd.read_excel(
|
|
f,
|
|
sheet_name="Sheet1",
|
|
engine="openpyxl",
|
|
engine_kwargs={"data_only": data_only},
|
|
).iloc[0, 1]
|
|
== expected
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
|
|
@pytest.mark.parametrize("kwarg_value", [True, False])
|
|
def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
|
|
# GH 55027
|
|
# test that `read_only` and `data_only` can be passed to
|
|
# `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
|
|
filename = datapath("io", "data", "excel", "test1" + ext)
|
|
with contextlib.closing(
|
|
OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
|
|
) as reader:
|
|
assert getattr(reader.book, kwarg_name) == kwarg_value
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
|
|
)
|
|
def test_write_append_mode(ext, mode, expected):
|
|
df = DataFrame([1], columns=["baz"])
|
|
|
|
with tm.ensure_clean(ext) as f:
|
|
wb = openpyxl.Workbook()
|
|
wb.worksheets[0].title = "foo"
|
|
wb.worksheets[0]["A1"].value = "foo"
|
|
wb.create_sheet("bar")
|
|
wb.worksheets[1]["A1"].value = "bar"
|
|
wb.save(f)
|
|
|
|
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
|
|
df.to_excel(writer, sheet_name="baz", index=False)
|
|
|
|
with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
|
|
result = [sheet.title for sheet in wb2.worksheets]
|
|
assert result == expected
|
|
|
|
for index, cell_value in enumerate(expected):
|
|
assert wb2.worksheets[index]["A1"].value == cell_value
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"if_sheet_exists,num_sheets,expected",
|
|
[
|
|
("new", 2, ["apple", "banana"]),
|
|
("replace", 1, ["pear"]),
|
|
("overlay", 1, ["pear", "banana"]),
|
|
],
|
|
)
|
|
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
|
|
# GH 40230
|
|
df1 = DataFrame({"fruit": ["apple", "banana"]})
|
|
df2 = DataFrame({"fruit": ["pear"]})
|
|
|
|
with tm.ensure_clean(ext) as f:
|
|
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
|
|
with ExcelWriter(
|
|
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
|
) as writer:
|
|
df2.to_excel(writer, sheet_name="foo", index=False)
|
|
|
|
with contextlib.closing(openpyxl.load_workbook(f)) as wb:
|
|
assert len(wb.sheetnames) == num_sheets
|
|
assert wb.sheetnames[0] == "foo"
|
|
result = pd.read_excel(wb, "foo", engine="openpyxl")
|
|
assert list(result["fruit"]) == expected
|
|
if len(wb.sheetnames) == 2:
|
|
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
|
|
tm.assert_frame_equal(result, df2)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"startrow, startcol, greeting, goodbye",
|
|
[
|
|
(0, 0, ["poop", "world"], ["goodbye", "people"]),
|
|
(0, 1, ["hello", "world"], ["poop", "people"]),
|
|
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
|
|
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
|
|
],
|
|
)
|
|
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
|
|
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
|
|
df2 = DataFrame(["poop"])
|
|
|
|
with tm.ensure_clean(ext) as f:
|
|
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
|
|
with ExcelWriter(
|
|
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
|
|
) as writer:
|
|
# use startrow+1 because we don't have a header
|
|
df2.to_excel(
|
|
writer,
|
|
index=False,
|
|
header=False,
|
|
startrow=startrow + 1,
|
|
startcol=startcol,
|
|
sheet_name="poo",
|
|
)
|
|
|
|
result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
|
|
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"if_sheet_exists,msg",
|
|
[
|
|
(
|
|
"invalid",
|
|
"'invalid' is not valid for if_sheet_exists. Valid options "
|
|
"are 'error', 'new', 'replace' and 'overlay'.",
|
|
),
|
|
(
|
|
"error",
|
|
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
|
),
|
|
(
|
|
None,
|
|
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
|
|
),
|
|
],
|
|
)
|
|
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
|
|
# GH 40230
|
|
df = DataFrame({"fruit": ["pear"]})
|
|
with tm.ensure_clean(ext) as f:
|
|
with pytest.raises(ValueError, match=re.escape(msg)):
|
|
df.to_excel(f, sheet_name="foo", engine="openpyxl")
|
|
with ExcelWriter(
|
|
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
|
|
) as writer:
|
|
df.to_excel(writer, sheet_name="foo")
|
|
|
|
|
|
def test_to_excel_with_openpyxl_engine(ext):
|
|
# GH 29854
|
|
with tm.ensure_clean(ext) as filename:
|
|
df1 = DataFrame({"A": np.linspace(1, 10, 10)})
|
|
df2 = DataFrame({"B": np.linspace(1, 20, 10)})
|
|
df = pd.concat([df1, df2], axis=1)
|
|
styled = df.style.map(
|
|
lambda val: f"color: {'red' if val < 0 else 'black'}"
|
|
).highlight_max()
|
|
|
|
styled.to_excel(filename, engine="openpyxl")
|
|
|
|
|
|
@pytest.mark.parametrize("read_only", [True, False])
|
|
def test_read_workbook(datapath, ext, read_only):
|
|
# GH 39528
|
|
filename = datapath("io", "data", "excel", "test1" + ext)
|
|
with contextlib.closing(
|
|
openpyxl.load_workbook(filename, read_only=read_only)
|
|
) as wb:
|
|
result = pd.read_excel(wb, engine="openpyxl")
|
|
expected = pd.read_excel(filename)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"header, expected_data",
|
|
[
|
|
(
|
|
0,
|
|
{
|
|
"Title": [np.nan, "A", 1, 2, 3],
|
|
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
|
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
|
},
|
|
),
|
|
(2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
|
|
)
|
|
# When read_only is None, use read_excel instead of a workbook
|
|
@pytest.mark.parametrize("read_only", [True, False, None])
|
|
def test_read_with_bad_dimension(
|
|
datapath, ext, header, expected_data, filename, read_only
|
|
):
|
|
# GH 38956, 39001 - no/incorrect dimension information
|
|
path = datapath("io", "data", "excel", f"{filename}{ext}")
|
|
if read_only is None:
|
|
result = pd.read_excel(path, header=header)
|
|
else:
|
|
with contextlib.closing(
|
|
openpyxl.load_workbook(path, read_only=read_only)
|
|
) as wb:
|
|
result = pd.read_excel(wb, engine="openpyxl", header=header)
|
|
expected = DataFrame(expected_data)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_append_mode_file(ext):
|
|
# GH 39576
|
|
df = DataFrame()
|
|
|
|
with tm.ensure_clean(ext) as f:
|
|
df.to_excel(f, engine="openpyxl")
|
|
|
|
with ExcelWriter(
|
|
f, mode="a", engine="openpyxl", if_sheet_exists="new"
|
|
) as writer:
|
|
df.to_excel(writer)
|
|
|
|
# make sure that zip files are not concatenated by making sure that
|
|
# "docProps/app.xml" only occurs twice in the file
|
|
data = Path(f).read_bytes()
|
|
first = data.find(b"docProps/app.xml")
|
|
second = data.find(b"docProps/app.xml", first + 1)
|
|
third = data.find(b"docProps/app.xml", second + 1)
|
|
assert second != -1 and third == -1
|
|
|
|
|
|
# When read_only is None, use read_excel instead of a workbook
|
|
@pytest.mark.parametrize("read_only", [True, False, None])
|
|
def test_read_with_empty_trailing_rows(datapath, ext, read_only):
|
|
# GH 39181
|
|
path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
|
|
if read_only is None:
|
|
result = pd.read_excel(path)
|
|
else:
|
|
with contextlib.closing(
|
|
openpyxl.load_workbook(path, read_only=read_only)
|
|
) as wb:
|
|
result = pd.read_excel(wb, engine="openpyxl")
|
|
expected = DataFrame(
|
|
{
|
|
"Title": [np.nan, "A", 1, 2, 3],
|
|
"Unnamed: 1": [np.nan, "B", 4, 5, 6],
|
|
"Unnamed: 2": [np.nan, "C", 7, 8, 9],
|
|
}
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
# When read_only is None, use read_excel instead of a workbook
|
|
@pytest.mark.parametrize("read_only", [True, False, None])
|
|
def test_read_empty_with_blank_row(datapath, ext, read_only):
|
|
# GH 39547 - empty excel file with a row that has no data
|
|
path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
|
|
if read_only is None:
|
|
result = pd.read_excel(path)
|
|
else:
|
|
with contextlib.closing(
|
|
openpyxl.load_workbook(path, read_only=read_only)
|
|
) as wb:
|
|
result = pd.read_excel(wb, engine="openpyxl")
|
|
expected = DataFrame()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_book_and_sheets_consistent(ext):
|
|
# GH#45687 - Ensure sheets is updated if user modifies book
|
|
with tm.ensure_clean(ext) as f:
|
|
with ExcelWriter(f, engine="openpyxl") as writer:
|
|
assert writer.sheets == {}
|
|
sheet = writer.book.create_sheet("test_name", 0)
|
|
assert writer.sheets == {"test_name": sheet}
|
|
|
|
|
|
def test_ints_spelled_with_decimals(datapath, ext):
|
|
# GH 46988 - openpyxl returns this sheet with floats
|
|
path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
|
|
result = pd.read_excel(path)
|
|
expected = DataFrame(range(2, 12), columns=[1])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_read_multiindex_header_no_index_names(datapath, ext):
|
|
# GH#47487
|
|
path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
|
|
result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
|
|
expected = DataFrame(
|
|
[[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
|
|
columns=pd.MultiIndex.from_tuples(
|
|
[("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
|
|
),
|
|
index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|