PCQRSCANER/venv/Lib/site-packages/xlsxwriter/sharedstrings.py
2019-12-22 21:51:47 +01:00

166 lines
4.8 KiB
Python

###############################################################################
#
# SharedStrings - A class for writing the Excel XLSX sharedStrings file.
#
# Copyright 2013-2019, John McNamara, jmcnamara@cpan.org
#
# Standard packages.
import re
import sys
# Package imports.
from . import xmlwriter
class SharedStrings(xmlwriter.XMLwriter):
"""
A class for writing the Excel XLSX sharedStrings file.
"""
###########################################################################
#
# Public API.
#
###########################################################################
def __init__(self):
"""
Constructor.
"""
super(SharedStrings, self).__init__()
self.string_table = None
###########################################################################
#
# Private API.
#
###########################################################################
def _assemble_xml_file(self):
# Assemble and write the XML file.
# Write the XML declaration.
self._xml_declaration()
# Write the sst element.
self._write_sst()
# Write the sst strings.
self._write_sst_strings()
# Close the sst tag.
self._xml_end_tag('sst')
# Close the file.
self._xml_close()
###########################################################################
#
# XML methods.
#
###########################################################################
def _write_sst(self):
# Write the <sst> element.
xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
attributes = [
('xmlns', xmlns),
('count', self.string_table.count),
('uniqueCount', self.string_table.unique_count),
]
self._xml_start_tag('sst', attributes)
def _write_sst_strings(self):
# Write the sst string elements.
for string in (self.string_table._get_strings()):
self._write_si(string)
def _write_si(self, string):
# Write the <si> element.
attributes = []
# Excel escapes control characters with _xHHHH_ and also escapes any
# literal strings of that type by encoding the leading underscore.
# So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
# The following substitutions deal with those cases.
# Escape the escape.
string = re.sub('(_x[0-9a-fA-F]{4}_)', r'_x005F\1', string)
# Convert control character to the _xHHHH_ escape.
string = re.sub(r'([\x00-\x08\x0B-\x1F])',
lambda match: "_x%04X_" %
ord(match.group(1)), string)
# Escape Unicode non-characters FFFE and FFFF.
if sys.version_info[0] == 2:
non_char1 = unichr(0xFFFE)
non_char2 = unichr(0xFFFF)
else:
non_char1 = "\uFFFE"
non_char2 = "\uFFFF"
string = re.sub(non_char1, '_xFFFE_', string)
string = re.sub(non_char2, '_xFFFF_', string)
# Add attribute to preserve leading or trailing whitespace.
if re.search(r'^\s', string) or re.search(r'\s$', string):
attributes.append(('xml:space', 'preserve'))
# Write any rich strings without further tags.
if re.search('^<r>', string) and re.search('</r>$', string):
self._xml_rich_si_element(string)
else:
self._xml_si_element(string, attributes)
# A metadata class to store Excel strings between worksheets.
class SharedStringTable(object):
"""
A class to track Excel shared strings between worksheets.
"""
def __init__(self):
self.count = 0
self.unique_count = 0
self.string_table = {}
self.string_array = []
def _get_shared_string_index(self, string):
"""" Get the index of the string in the Shared String table. """
if string not in self.string_table:
# String isn't already stored in the table so add it.
index = self.unique_count
self.string_table[string] = index
self.count += 1
self.unique_count += 1
return index
else:
# String exists in the table.
index = self.string_table[string]
self.count += 1
return index
def _get_shared_string(self, index):
"""" Get a shared string from the index. """
return self.string_array[index]
def _sort_string_data(self):
"""" Sort the shared string data and convert from dict to list. """
self.string_array = sorted(self.string_table,
key=self.string_table.__getitem__)
self.string_table = {}
def _get_strings(self):
"""" Return the sorted string list. """
return self.string_array