640 lines
19 KiB
Python
640 lines
19 KiB
Python
from __future__ import annotations
|
|
|
|
import mmap
|
|
from typing import (
|
|
TYPE_CHECKING,
|
|
Any,
|
|
cast,
|
|
)
|
|
|
|
import numpy as np
|
|
|
|
from pandas.compat._optional import import_optional_dependency
|
|
from pandas.util._decorators import doc
|
|
|
|
from pandas.core.shared_docs import _shared_docs
|
|
|
|
from pandas.io.excel._base import (
|
|
BaseExcelReader,
|
|
ExcelWriter,
|
|
)
|
|
from pandas.io.excel._util import (
|
|
combine_kwargs,
|
|
validate_freeze_panes,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from openpyxl import Workbook
|
|
from openpyxl.descriptors.serialisable import Serialisable
|
|
|
|
from pandas._typing import (
|
|
ExcelWriterIfSheetExists,
|
|
FilePath,
|
|
ReadBuffer,
|
|
Scalar,
|
|
StorageOptions,
|
|
WriteExcelBuffer,
|
|
)
|
|
|
|
|
|
class OpenpyxlWriter(ExcelWriter):
|
|
_engine = "openpyxl"
|
|
_supported_extensions = (".xlsx", ".xlsm")
|
|
|
|
def __init__(
|
|
self,
|
|
path: FilePath | WriteExcelBuffer | ExcelWriter,
|
|
engine: str | None = None,
|
|
date_format: str | None = None,
|
|
datetime_format: str | None = None,
|
|
mode: str = "w",
|
|
storage_options: StorageOptions | None = None,
|
|
if_sheet_exists: ExcelWriterIfSheetExists | None = None,
|
|
engine_kwargs: dict[str, Any] | None = None,
|
|
**kwargs,
|
|
) -> None:
|
|
# Use the openpyxl module as the Excel writer.
|
|
from openpyxl.workbook import Workbook
|
|
|
|
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
|
|
|
|
super().__init__(
|
|
path,
|
|
mode=mode,
|
|
storage_options=storage_options,
|
|
if_sheet_exists=if_sheet_exists,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
# ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
|
|
# the file and later write to it
|
|
if "r+" in self._mode: # Load from existing workbook
|
|
from openpyxl import load_workbook
|
|
|
|
try:
|
|
self._book = load_workbook(self._handles.handle, **engine_kwargs)
|
|
except TypeError:
|
|
self._handles.handle.close()
|
|
raise
|
|
self._handles.handle.seek(0)
|
|
else:
|
|
# Create workbook object with default optimized_write=True.
|
|
try:
|
|
self._book = Workbook(**engine_kwargs)
|
|
except TypeError:
|
|
self._handles.handle.close()
|
|
raise
|
|
|
|
if self.book.worksheets:
|
|
self.book.remove(self.book.worksheets[0])
|
|
|
|
@property
|
|
def book(self) -> Workbook:
|
|
"""
|
|
Book instance of class openpyxl.workbook.Workbook.
|
|
|
|
This attribute can be used to access engine-specific features.
|
|
"""
|
|
return self._book
|
|
|
|
@property
|
|
def sheets(self) -> dict[str, Any]:
|
|
"""Mapping of sheet names to sheet objects."""
|
|
result = {name: self.book[name] for name in self.book.sheetnames}
|
|
return result
|
|
|
|
def _save(self) -> None:
|
|
"""
|
|
Save workbook to disk.
|
|
"""
|
|
self.book.save(self._handles.handle)
|
|
if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
|
|
# truncate file to the written content
|
|
self._handles.handle.truncate()
|
|
|
|
@classmethod
|
|
def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]:
|
|
"""
|
|
Convert a style_dict to a set of kwargs suitable for initializing
|
|
or updating-on-copy an openpyxl v2 style object.
|
|
|
|
Parameters
|
|
----------
|
|
style_dict : dict
|
|
A dict with zero or more of the following keys (or their synonyms).
|
|
'font'
|
|
'fill'
|
|
'border' ('borders')
|
|
'alignment'
|
|
'number_format'
|
|
'protection'
|
|
|
|
Returns
|
|
-------
|
|
style_kwargs : dict
|
|
A dict with the same, normalized keys as ``style_dict`` but each
|
|
value has been replaced with a native openpyxl style object of the
|
|
appropriate class.
|
|
"""
|
|
_style_key_map = {"borders": "border"}
|
|
|
|
style_kwargs: dict[str, Serialisable] = {}
|
|
for k, v in style_dict.items():
|
|
k = _style_key_map.get(k, k)
|
|
_conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
|
|
new_v = _conv_to_x(v)
|
|
if new_v:
|
|
style_kwargs[k] = new_v
|
|
|
|
return style_kwargs
|
|
|
|
@classmethod
|
|
def _convert_to_color(cls, color_spec):
|
|
"""
|
|
Convert ``color_spec`` to an openpyxl v2 Color object.
|
|
|
|
Parameters
|
|
----------
|
|
color_spec : str, dict
|
|
A 32-bit ARGB hex string, or a dict with zero or more of the
|
|
following keys.
|
|
'rgb'
|
|
'indexed'
|
|
'auto'
|
|
'theme'
|
|
'tint'
|
|
'index'
|
|
'type'
|
|
|
|
Returns
|
|
-------
|
|
color : openpyxl.styles.Color
|
|
"""
|
|
from openpyxl.styles import Color
|
|
|
|
if isinstance(color_spec, str):
|
|
return Color(color_spec)
|
|
else:
|
|
return Color(**color_spec)
|
|
|
|
@classmethod
|
|
def _convert_to_font(cls, font_dict):
|
|
"""
|
|
Convert ``font_dict`` to an openpyxl v2 Font object.
|
|
|
|
Parameters
|
|
----------
|
|
font_dict : dict
|
|
A dict with zero or more of the following keys (or their synonyms).
|
|
'name'
|
|
'size' ('sz')
|
|
'bold' ('b')
|
|
'italic' ('i')
|
|
'underline' ('u')
|
|
'strikethrough' ('strike')
|
|
'color'
|
|
'vertAlign' ('vertalign')
|
|
'charset'
|
|
'scheme'
|
|
'family'
|
|
'outline'
|
|
'shadow'
|
|
'condense'
|
|
|
|
Returns
|
|
-------
|
|
font : openpyxl.styles.Font
|
|
"""
|
|
from openpyxl.styles import Font
|
|
|
|
_font_key_map = {
|
|
"sz": "size",
|
|
"b": "bold",
|
|
"i": "italic",
|
|
"u": "underline",
|
|
"strike": "strikethrough",
|
|
"vertalign": "vertAlign",
|
|
}
|
|
|
|
font_kwargs = {}
|
|
for k, v in font_dict.items():
|
|
k = _font_key_map.get(k, k)
|
|
if k == "color":
|
|
v = cls._convert_to_color(v)
|
|
font_kwargs[k] = v
|
|
|
|
return Font(**font_kwargs)
|
|
|
|
@classmethod
|
|
def _convert_to_stop(cls, stop_seq):
|
|
"""
|
|
Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
|
|
suitable for initializing the ``GradientFill`` ``stop`` parameter.
|
|
|
|
Parameters
|
|
----------
|
|
stop_seq : iterable
|
|
An iterable that yields objects suitable for consumption by
|
|
``_convert_to_color``.
|
|
|
|
Returns
|
|
-------
|
|
stop : list of openpyxl.styles.Color
|
|
"""
|
|
return map(cls._convert_to_color, stop_seq)
|
|
|
|
@classmethod
|
|
def _convert_to_fill(cls, fill_dict: dict[str, Any]):
|
|
"""
|
|
Convert ``fill_dict`` to an openpyxl v2 Fill object.
|
|
|
|
Parameters
|
|
----------
|
|
fill_dict : dict
|
|
A dict with one or more of the following keys (or their synonyms),
|
|
'fill_type' ('patternType', 'patterntype')
|
|
'start_color' ('fgColor', 'fgcolor')
|
|
'end_color' ('bgColor', 'bgcolor')
|
|
or one or more of the following keys (or their synonyms).
|
|
'type' ('fill_type')
|
|
'degree'
|
|
'left'
|
|
'right'
|
|
'top'
|
|
'bottom'
|
|
'stop'
|
|
|
|
Returns
|
|
-------
|
|
fill : openpyxl.styles.Fill
|
|
"""
|
|
from openpyxl.styles import (
|
|
GradientFill,
|
|
PatternFill,
|
|
)
|
|
|
|
_pattern_fill_key_map = {
|
|
"patternType": "fill_type",
|
|
"patterntype": "fill_type",
|
|
"fgColor": "start_color",
|
|
"fgcolor": "start_color",
|
|
"bgColor": "end_color",
|
|
"bgcolor": "end_color",
|
|
}
|
|
|
|
_gradient_fill_key_map = {"fill_type": "type"}
|
|
|
|
pfill_kwargs = {}
|
|
gfill_kwargs = {}
|
|
for k, v in fill_dict.items():
|
|
pk = _pattern_fill_key_map.get(k)
|
|
gk = _gradient_fill_key_map.get(k)
|
|
if pk in ["start_color", "end_color"]:
|
|
v = cls._convert_to_color(v)
|
|
if gk == "stop":
|
|
v = cls._convert_to_stop(v)
|
|
if pk:
|
|
pfill_kwargs[pk] = v
|
|
elif gk:
|
|
gfill_kwargs[gk] = v
|
|
else:
|
|
pfill_kwargs[k] = v
|
|
gfill_kwargs[k] = v
|
|
|
|
try:
|
|
return PatternFill(**pfill_kwargs)
|
|
except TypeError:
|
|
return GradientFill(**gfill_kwargs)
|
|
|
|
@classmethod
|
|
def _convert_to_side(cls, side_spec):
|
|
"""
|
|
Convert ``side_spec`` to an openpyxl v2 Side object.
|
|
|
|
Parameters
|
|
----------
|
|
side_spec : str, dict
|
|
A string specifying the border style, or a dict with zero or more
|
|
of the following keys (or their synonyms).
|
|
'style' ('border_style')
|
|
'color'
|
|
|
|
Returns
|
|
-------
|
|
side : openpyxl.styles.Side
|
|
"""
|
|
from openpyxl.styles import Side
|
|
|
|
_side_key_map = {"border_style": "style"}
|
|
|
|
if isinstance(side_spec, str):
|
|
return Side(style=side_spec)
|
|
|
|
side_kwargs = {}
|
|
for k, v in side_spec.items():
|
|
k = _side_key_map.get(k, k)
|
|
if k == "color":
|
|
v = cls._convert_to_color(v)
|
|
side_kwargs[k] = v
|
|
|
|
return Side(**side_kwargs)
|
|
|
|
@classmethod
|
|
def _convert_to_border(cls, border_dict):
|
|
"""
|
|
Convert ``border_dict`` to an openpyxl v2 Border object.
|
|
|
|
Parameters
|
|
----------
|
|
border_dict : dict
|
|
A dict with zero or more of the following keys (or their synonyms).
|
|
'left'
|
|
'right'
|
|
'top'
|
|
'bottom'
|
|
'diagonal'
|
|
'diagonal_direction'
|
|
'vertical'
|
|
'horizontal'
|
|
'diagonalUp' ('diagonalup')
|
|
'diagonalDown' ('diagonaldown')
|
|
'outline'
|
|
|
|
Returns
|
|
-------
|
|
border : openpyxl.styles.Border
|
|
"""
|
|
from openpyxl.styles import Border
|
|
|
|
_border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
|
|
|
|
border_kwargs = {}
|
|
for k, v in border_dict.items():
|
|
k = _border_key_map.get(k, k)
|
|
if k == "color":
|
|
v = cls._convert_to_color(v)
|
|
if k in ["left", "right", "top", "bottom", "diagonal"]:
|
|
v = cls._convert_to_side(v)
|
|
border_kwargs[k] = v
|
|
|
|
return Border(**border_kwargs)
|
|
|
|
@classmethod
|
|
def _convert_to_alignment(cls, alignment_dict):
|
|
"""
|
|
Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
|
|
|
|
Parameters
|
|
----------
|
|
alignment_dict : dict
|
|
A dict with zero or more of the following keys (or their synonyms).
|
|
'horizontal'
|
|
'vertical'
|
|
'text_rotation'
|
|
'wrap_text'
|
|
'shrink_to_fit'
|
|
'indent'
|
|
Returns
|
|
-------
|
|
alignment : openpyxl.styles.Alignment
|
|
"""
|
|
from openpyxl.styles import Alignment
|
|
|
|
return Alignment(**alignment_dict)
|
|
|
|
@classmethod
|
|
def _convert_to_number_format(cls, number_format_dict):
|
|
"""
|
|
Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
|
|
initializer.
|
|
|
|
Parameters
|
|
----------
|
|
number_format_dict : dict
|
|
A dict with zero or more of the following keys.
|
|
'format_code' : str
|
|
|
|
Returns
|
|
-------
|
|
number_format : str
|
|
"""
|
|
return number_format_dict["format_code"]
|
|
|
|
@classmethod
|
|
def _convert_to_protection(cls, protection_dict):
|
|
"""
|
|
Convert ``protection_dict`` to an openpyxl v2 Protection object.
|
|
|
|
Parameters
|
|
----------
|
|
protection_dict : dict
|
|
A dict with zero or more of the following keys.
|
|
'locked'
|
|
'hidden'
|
|
|
|
Returns
|
|
-------
|
|
"""
|
|
from openpyxl.styles import Protection
|
|
|
|
return Protection(**protection_dict)
|
|
|
|
def _write_cells(
|
|
self,
|
|
cells,
|
|
sheet_name: str | None = None,
|
|
startrow: int = 0,
|
|
startcol: int = 0,
|
|
freeze_panes: tuple[int, int] | None = None,
|
|
) -> None:
|
|
# Write the frame cells using openpyxl.
|
|
sheet_name = self._get_sheet_name(sheet_name)
|
|
|
|
_style_cache: dict[str, dict[str, Serialisable]] = {}
|
|
|
|
if sheet_name in self.sheets and self._if_sheet_exists != "new":
|
|
if "r+" in self._mode:
|
|
if self._if_sheet_exists == "replace":
|
|
old_wks = self.sheets[sheet_name]
|
|
target_index = self.book.index(old_wks)
|
|
del self.book[sheet_name]
|
|
wks = self.book.create_sheet(sheet_name, target_index)
|
|
elif self._if_sheet_exists == "error":
|
|
raise ValueError(
|
|
f"Sheet '{sheet_name}' already exists and "
|
|
f"if_sheet_exists is set to 'error'."
|
|
)
|
|
elif self._if_sheet_exists == "overlay":
|
|
wks = self.sheets[sheet_name]
|
|
else:
|
|
raise ValueError(
|
|
f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
|
|
"Valid options are 'error', 'new', 'replace' and 'overlay'."
|
|
)
|
|
else:
|
|
wks = self.sheets[sheet_name]
|
|
else:
|
|
wks = self.book.create_sheet()
|
|
wks.title = sheet_name
|
|
|
|
if validate_freeze_panes(freeze_panes):
|
|
freeze_panes = cast(tuple[int, int], freeze_panes)
|
|
wks.freeze_panes = wks.cell(
|
|
row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
|
|
)
|
|
|
|
for cell in cells:
|
|
xcell = wks.cell(
|
|
row=startrow + cell.row + 1, column=startcol + cell.col + 1
|
|
)
|
|
xcell.value, fmt = self._value_with_fmt(cell.val)
|
|
if fmt:
|
|
xcell.number_format = fmt
|
|
|
|
style_kwargs: dict[str, Serialisable] | None = {}
|
|
if cell.style:
|
|
key = str(cell.style)
|
|
style_kwargs = _style_cache.get(key)
|
|
if style_kwargs is None:
|
|
style_kwargs = self._convert_to_style_kwargs(cell.style)
|
|
_style_cache[key] = style_kwargs
|
|
|
|
if style_kwargs:
|
|
for k, v in style_kwargs.items():
|
|
setattr(xcell, k, v)
|
|
|
|
if cell.mergestart is not None and cell.mergeend is not None:
|
|
wks.merge_cells(
|
|
start_row=startrow + cell.row + 1,
|
|
start_column=startcol + cell.col + 1,
|
|
end_column=startcol + cell.mergeend + 1,
|
|
end_row=startrow + cell.mergestart + 1,
|
|
)
|
|
|
|
# When cells are merged only the top-left cell is preserved
|
|
# The behaviour of the other cells in a merged range is
|
|
# undefined
|
|
if style_kwargs:
|
|
first_row = startrow + cell.row + 1
|
|
last_row = startrow + cell.mergestart + 1
|
|
first_col = startcol + cell.col + 1
|
|
last_col = startcol + cell.mergeend + 1
|
|
|
|
for row in range(first_row, last_row + 1):
|
|
for col in range(first_col, last_col + 1):
|
|
if row == first_row and col == first_col:
|
|
# Ignore first cell. It is already handled.
|
|
continue
|
|
xcell = wks.cell(column=col, row=row)
|
|
for k, v in style_kwargs.items():
|
|
setattr(xcell, k, v)
|
|
|
|
|
|
class OpenpyxlReader(BaseExcelReader["Workbook"]):
|
|
@doc(storage_options=_shared_docs["storage_options"])
|
|
def __init__(
|
|
self,
|
|
filepath_or_buffer: FilePath | ReadBuffer[bytes],
|
|
storage_options: StorageOptions | None = None,
|
|
engine_kwargs: dict | None = None,
|
|
) -> None:
|
|
"""
|
|
Reader using openpyxl engine.
|
|
|
|
Parameters
|
|
----------
|
|
filepath_or_buffer : str, path object or Workbook
|
|
Object to be parsed.
|
|
{storage_options}
|
|
engine_kwargs : dict, optional
|
|
Arbitrary keyword arguments passed to excel engine.
|
|
"""
|
|
import_optional_dependency("openpyxl")
|
|
super().__init__(
|
|
filepath_or_buffer,
|
|
storage_options=storage_options,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@property
|
|
def _workbook_class(self) -> type[Workbook]:
|
|
from openpyxl import Workbook
|
|
|
|
return Workbook
|
|
|
|
def load_workbook(
|
|
self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs
|
|
) -> Workbook:
|
|
from openpyxl import load_workbook
|
|
|
|
default_kwargs = {"read_only": True, "data_only": True, "keep_links": False}
|
|
|
|
return load_workbook(
|
|
filepath_or_buffer,
|
|
**(default_kwargs | engine_kwargs),
|
|
)
|
|
|
|
@property
|
|
def sheet_names(self) -> list[str]:
|
|
return [sheet.title for sheet in self.book.worksheets]
|
|
|
|
def get_sheet_by_name(self, name: str):
|
|
self.raise_if_bad_sheet_by_name(name)
|
|
return self.book[name]
|
|
|
|
def get_sheet_by_index(self, index: int):
|
|
self.raise_if_bad_sheet_by_index(index)
|
|
return self.book.worksheets[index]
|
|
|
|
def _convert_cell(self, cell) -> Scalar:
|
|
from openpyxl.cell.cell import (
|
|
TYPE_ERROR,
|
|
TYPE_NUMERIC,
|
|
)
|
|
|
|
if cell.value is None:
|
|
return "" # compat with xlrd
|
|
elif cell.data_type == TYPE_ERROR:
|
|
return np.nan
|
|
elif cell.data_type == TYPE_NUMERIC:
|
|
val = int(cell.value)
|
|
if val == cell.value:
|
|
return val
|
|
return float(cell.value)
|
|
|
|
return cell.value
|
|
|
|
def get_sheet_data(
|
|
self, sheet, file_rows_needed: int | None = None
|
|
) -> list[list[Scalar]]:
|
|
if self.book.read_only:
|
|
sheet.reset_dimensions()
|
|
|
|
data: list[list[Scalar]] = []
|
|
last_row_with_data = -1
|
|
for row_number, row in enumerate(sheet.rows):
|
|
converted_row = [self._convert_cell(cell) for cell in row]
|
|
while converted_row and converted_row[-1] == "":
|
|
# trim trailing empty elements
|
|
converted_row.pop()
|
|
if converted_row:
|
|
last_row_with_data = row_number
|
|
data.append(converted_row)
|
|
if file_rows_needed is not None and len(data) >= file_rows_needed:
|
|
break
|
|
|
|
# Trim trailing empty rows
|
|
data = data[: last_row_with_data + 1]
|
|
|
|
if len(data) > 0:
|
|
# extend rows to max width
|
|
max_width = max(len(data_row) for data_row in data)
|
|
if min(len(data_row) for data_row in data) < max_width:
|
|
empty_cell: list[Scalar] = [""]
|
|
data = [
|
|
data_row + (max_width - len(data_row)) * empty_cell
|
|
for data_row in data
|
|
]
|
|
|
|
return data
|