projektAI/venv/Lib/site-packages/pandas/io/excel/_openpyxl.py
2021-06-06 22:13:05 +02:00

568 lines
17 KiB
Python

from distutils.version import LooseVersion
import mmap
from typing import TYPE_CHECKING, Dict, List, Optional
import numpy as np
from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
from pandas.compat._optional import get_version, import_optional_dependency
from pandas.io.excel._base import BaseExcelReader, ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
if TYPE_CHECKING:
from openpyxl.descriptors.serialisable import Serialisable
class OpenpyxlWriter(ExcelWriter):
engine = "openpyxl"
supported_extensions = (".xlsx", ".xlsm")
def __init__(
self,
path,
engine=None,
mode: str = "w",
storage_options: StorageOptions = None,
**engine_kwargs,
):
# Use the openpyxl module as the Excel writer.
from openpyxl.workbook import Workbook
super().__init__(
path, mode=mode, storage_options=storage_options, **engine_kwargs
)
# ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
# the file and later write to it
if "r+" in self.mode: # Load from existing workbook
from openpyxl import load_workbook
self.book = load_workbook(self.handles.handle)
self.handles.handle.seek(0)
else:
# Create workbook object with default optimized_write=True.
self.book = Workbook()
if self.book.worksheets:
self.book.remove(self.book.worksheets[0])
def save(self):
"""
Save workbook to disk.
"""
self.book.save(self.handles.handle)
if "r+" in self.mode and not isinstance(self.handles.handle, mmap.mmap):
# truncate file to the written content
self.handles.handle.truncate()
@classmethod
def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, "Serialisable"]:
"""
Convert a style_dict to a set of kwargs suitable for initializing
or updating-on-copy an openpyxl v2 style object.
Parameters
----------
style_dict : dict
A dict with zero or more of the following keys (or their synonyms).
'font'
'fill'
'border' ('borders')
'alignment'
'number_format'
'protection'
Returns
-------
style_kwargs : dict
A dict with the same, normalized keys as ``style_dict`` but each
value has been replaced with a native openpyxl style object of the
appropriate class.
"""
_style_key_map = {"borders": "border"}
style_kwargs: Dict[str, Serialisable] = {}
for k, v in style_dict.items():
if k in _style_key_map:
k = _style_key_map[k]
_conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
new_v = _conv_to_x(v)
if new_v:
style_kwargs[k] = new_v
return style_kwargs
@classmethod
def _convert_to_color(cls, color_spec):
"""
Convert ``color_spec`` to an openpyxl v2 Color object.
Parameters
----------
color_spec : str, dict
A 32-bit ARGB hex string, or a dict with zero or more of the
following keys.
'rgb'
'indexed'
'auto'
'theme'
'tint'
'index'
'type'
Returns
-------
color : openpyxl.styles.Color
"""
from openpyxl.styles import Color
if isinstance(color_spec, str):
return Color(color_spec)
else:
return Color(**color_spec)
@classmethod
def _convert_to_font(cls, font_dict):
"""
Convert ``font_dict`` to an openpyxl v2 Font object.
Parameters
----------
font_dict : dict
A dict with zero or more of the following keys (or their synonyms).
'name'
'size' ('sz')
'bold' ('b')
'italic' ('i')
'underline' ('u')
'strikethrough' ('strike')
'color'
'vertAlign' ('vertalign')
'charset'
'scheme'
'family'
'outline'
'shadow'
'condense'
Returns
-------
font : openpyxl.styles.Font
"""
from openpyxl.styles import Font
_font_key_map = {
"sz": "size",
"b": "bold",
"i": "italic",
"u": "underline",
"strike": "strikethrough",
"vertalign": "vertAlign",
}
font_kwargs = {}
for k, v in font_dict.items():
if k in _font_key_map:
k = _font_key_map[k]
if k == "color":
v = cls._convert_to_color(v)
font_kwargs[k] = v
return Font(**font_kwargs)
@classmethod
def _convert_to_stop(cls, stop_seq):
"""
Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
suitable for initializing the ``GradientFill`` ``stop`` parameter.
Parameters
----------
stop_seq : iterable
An iterable that yields objects suitable for consumption by
``_convert_to_color``.
Returns
-------
stop : list of openpyxl.styles.Color
"""
return map(cls._convert_to_color, stop_seq)
@classmethod
def _convert_to_fill(cls, fill_dict):
"""
Convert ``fill_dict`` to an openpyxl v2 Fill object.
Parameters
----------
fill_dict : dict
A dict with one or more of the following keys (or their synonyms),
'fill_type' ('patternType', 'patterntype')
'start_color' ('fgColor', 'fgcolor')
'end_color' ('bgColor', 'bgcolor')
or one or more of the following keys (or their synonyms).
'type' ('fill_type')
'degree'
'left'
'right'
'top'
'bottom'
'stop'
Returns
-------
fill : openpyxl.styles.Fill
"""
from openpyxl.styles import GradientFill, PatternFill
_pattern_fill_key_map = {
"patternType": "fill_type",
"patterntype": "fill_type",
"fgColor": "start_color",
"fgcolor": "start_color",
"bgColor": "end_color",
"bgcolor": "end_color",
}
_gradient_fill_key_map = {"fill_type": "type"}
pfill_kwargs = {}
gfill_kwargs = {}
for k, v in fill_dict.items():
pk = gk = None
if k in _pattern_fill_key_map:
pk = _pattern_fill_key_map[k]
if k in _gradient_fill_key_map:
gk = _gradient_fill_key_map[k]
if pk in ["start_color", "end_color"]:
v = cls._convert_to_color(v)
if gk == "stop":
v = cls._convert_to_stop(v)
if pk:
pfill_kwargs[pk] = v
elif gk:
gfill_kwargs[gk] = v
else:
pfill_kwargs[k] = v
gfill_kwargs[k] = v
try:
return PatternFill(**pfill_kwargs)
except TypeError:
return GradientFill(**gfill_kwargs)
@classmethod
def _convert_to_side(cls, side_spec):
"""
Convert ``side_spec`` to an openpyxl v2 Side object.
Parameters
----------
side_spec : str, dict
A string specifying the border style, or a dict with zero or more
of the following keys (or their synonyms).
'style' ('border_style')
'color'
Returns
-------
side : openpyxl.styles.Side
"""
from openpyxl.styles import Side
_side_key_map = {"border_style": "style"}
if isinstance(side_spec, str):
return Side(style=side_spec)
side_kwargs = {}
for k, v in side_spec.items():
if k in _side_key_map:
k = _side_key_map[k]
if k == "color":
v = cls._convert_to_color(v)
side_kwargs[k] = v
return Side(**side_kwargs)
@classmethod
def _convert_to_border(cls, border_dict):
"""
Convert ``border_dict`` to an openpyxl v2 Border object.
Parameters
----------
border_dict : dict
A dict with zero or more of the following keys (or their synonyms).
'left'
'right'
'top'
'bottom'
'diagonal'
'diagonal_direction'
'vertical'
'horizontal'
'diagonalUp' ('diagonalup')
'diagonalDown' ('diagonaldown')
'outline'
Returns
-------
border : openpyxl.styles.Border
"""
from openpyxl.styles import Border
_border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
border_kwargs = {}
for k, v in border_dict.items():
if k in _border_key_map:
k = _border_key_map[k]
if k == "color":
v = cls._convert_to_color(v)
if k in ["left", "right", "top", "bottom", "diagonal"]:
v = cls._convert_to_side(v)
border_kwargs[k] = v
return Border(**border_kwargs)
@classmethod
def _convert_to_alignment(cls, alignment_dict):
"""
Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
Parameters
----------
alignment_dict : dict
A dict with zero or more of the following keys (or their synonyms).
'horizontal'
'vertical'
'text_rotation'
'wrap_text'
'shrink_to_fit'
'indent'
Returns
-------
alignment : openpyxl.styles.Alignment
"""
from openpyxl.styles import Alignment
return Alignment(**alignment_dict)
@classmethod
def _convert_to_number_format(cls, number_format_dict):
"""
Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
initializer.
Parameters
----------
number_format_dict : dict
A dict with zero or more of the following keys.
'format_code' : str
Returns
-------
number_format : str
"""
return number_format_dict["format_code"]
@classmethod
def _convert_to_protection(cls, protection_dict):
"""
Convert ``protection_dict`` to an openpyxl v2 Protection object.
Parameters
----------
protection_dict : dict
A dict with zero or more of the following keys.
'locked'
'hidden'
Returns
-------
"""
from openpyxl.styles import Protection
return Protection(**protection_dict)
def write_cells(
self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None
):
# Write the frame cells using openpyxl.
sheet_name = self._get_sheet_name(sheet_name)
_style_cache: Dict[str, Dict[str, Serialisable]] = {}
if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.create_sheet()
wks.title = sheet_name
self.sheets[sheet_name] = wks
if validate_freeze_panes(freeze_panes):
wks.freeze_panes = wks.cell(
row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
)
for cell in cells:
xcell = wks.cell(
row=startrow + cell.row + 1, column=startcol + cell.col + 1
)
xcell.value, fmt = self._value_with_fmt(cell.val)
if fmt:
xcell.number_format = fmt
style_kwargs: Optional[Dict[str, Serialisable]] = {}
if cell.style:
key = str(cell.style)
style_kwargs = _style_cache.get(key)
if style_kwargs is None:
style_kwargs = self._convert_to_style_kwargs(cell.style)
_style_cache[key] = style_kwargs
if style_kwargs:
for k, v in style_kwargs.items():
setattr(xcell, k, v)
if cell.mergestart is not None and cell.mergeend is not None:
wks.merge_cells(
start_row=startrow + cell.row + 1,
start_column=startcol + cell.col + 1,
end_column=startcol + cell.mergeend + 1,
end_row=startrow + cell.mergestart + 1,
)
# When cells are merged only the top-left cell is preserved
# The behaviour of the other cells in a merged range is
# undefined
if style_kwargs:
first_row = startrow + cell.row + 1
last_row = startrow + cell.mergestart + 1
first_col = startcol + cell.col + 1
last_col = startcol + cell.mergeend + 1
for row in range(first_row, last_row + 1):
for col in range(first_col, last_col + 1):
if row == first_row and col == first_col:
# Ignore first cell. It is already handled.
continue
xcell = wks.cell(column=col, row=row)
for k, v in style_kwargs.items():
setattr(xcell, k, v)
class OpenpyxlReader(BaseExcelReader):
def __init__(
self,
filepath_or_buffer: FilePathOrBuffer,
storage_options: StorageOptions = None,
) -> None:
"""
Reader using openpyxl engine.
Parameters
----------
filepath_or_buffer : string, path object or Workbook
Object to be parsed.
storage_options : dict, optional
passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
"""
import_optional_dependency("openpyxl")
super().__init__(filepath_or_buffer, storage_options=storage_options)
@property
def _workbook_class(self):
from openpyxl import Workbook
return Workbook
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
from openpyxl import load_workbook
return load_workbook(
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
)
def close(self):
# https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()
super().close()
@property
def sheet_names(self) -> List[str]:
return self.book.sheetnames
def get_sheet_by_name(self, name: str):
self.raise_if_bad_sheet_by_name(name)
return self.book[name]
def get_sheet_by_index(self, index: int):
self.raise_if_bad_sheet_by_index(index)
return self.book.worksheets[index]
def _convert_cell(self, cell, convert_float: bool) -> Scalar:
from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC
if cell.value is None:
return "" # compat with xlrd
elif cell.is_date:
return cell.value
elif cell.data_type == TYPE_ERROR:
return np.nan
elif cell.data_type == TYPE_BOOL:
return bool(cell.value)
elif cell.data_type == TYPE_NUMERIC:
# GH5394
if convert_float:
val = int(cell.value)
if val == cell.value:
return val
else:
return float(cell.value)
return cell.value
def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
# GH 39001
# Reading of excel file depends on dimension data being correct but
# writers sometimes omit or get it wrong
import openpyxl
version = LooseVersion(get_version(openpyxl))
# There is no good way of determining if a sheet is read-only
# https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605
is_readonly = hasattr(sheet, "reset_dimensions")
if version >= "3.0.0" and is_readonly:
sheet.reset_dimensions()
data: List[List[Scalar]] = []
last_row_with_data = -1
for row_number, row in enumerate(sheet.rows):
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
if not all(cell == "" for cell in converted_row):
last_row_with_data = row_number
data.append(converted_row)
# Trim trailing empty rows
data = data[: last_row_with_data + 1]
if version >= "3.0.0" and is_readonly and len(data) > 0:
# With dimension reset, openpyxl no longer pads rows
max_width = max(len(data_row) for data_row in data)
if min(len(data_row) for data_row in data) < max_width:
empty_cell: List[Scalar] = [""]
data = [
data_row + (max_width - len(data_row)) * empty_cell
for data_row in data
]
return data