505 lines
15 KiB
Python
505 lines
15 KiB
Python
|
"""
|
||
|
Printing tools.
|
||
|
"""
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import sys
|
||
|
from typing import (
|
||
|
Any,
|
||
|
Callable,
|
||
|
Dict,
|
||
|
Iterable,
|
||
|
Mapping,
|
||
|
Sequence,
|
||
|
TypeVar,
|
||
|
Union,
|
||
|
)
|
||
|
|
||
|
from pandas._config import get_option
|
||
|
|
||
|
from pandas.core.dtypes.inference import is_sequence
|
||
|
|
||
|
EscapeChars = Union[Mapping[str, str], Iterable[str]]
|
||
|
_KT = TypeVar("_KT")
|
||
|
_VT = TypeVar("_VT")
|
||
|
|
||
|
|
||
|
def adjoin(space: int, *lists: list[str], **kwargs) -> str:
|
||
|
"""
|
||
|
Glues together two sets of strings using the amount of space requested.
|
||
|
The idea is to prettify.
|
||
|
|
||
|
----------
|
||
|
space : int
|
||
|
number of spaces for padding
|
||
|
lists : str
|
||
|
list of str which being joined
|
||
|
strlen : callable
|
||
|
function used to calculate the length of each str. Needed for unicode
|
||
|
handling.
|
||
|
justfunc : callable
|
||
|
function used to justify str. Needed for unicode handling.
|
||
|
"""
|
||
|
strlen = kwargs.pop("strlen", len)
|
||
|
justfunc = kwargs.pop("justfunc", justify)
|
||
|
|
||
|
out_lines = []
|
||
|
newLists = []
|
||
|
lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
|
||
|
# not the last one
|
||
|
lengths.append(max(map(len, lists[-1])))
|
||
|
maxLen = max(map(len, lists))
|
||
|
for i, lst in enumerate(lists):
|
||
|
nl = justfunc(lst, lengths[i], mode="left")
|
||
|
nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
|
||
|
newLists.append(nl)
|
||
|
toJoin = zip(*newLists)
|
||
|
for lines in toJoin:
|
||
|
out_lines.append("".join(lines))
|
||
|
return "\n".join(out_lines)
|
||
|
|
||
|
|
||
|
def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
|
||
|
"""
|
||
|
Perform ljust, center, rjust against string or list-like
|
||
|
"""
|
||
|
if mode == "left":
|
||
|
return [x.ljust(max_len) for x in texts]
|
||
|
elif mode == "center":
|
||
|
return [x.center(max_len) for x in texts]
|
||
|
else:
|
||
|
return [x.rjust(max_len) for x in texts]
|
||
|
|
||
|
|
||
|
# Unicode consolidation
|
||
|
# ---------------------
|
||
|
#
|
||
|
# pprinting utility functions for generating Unicode text or
|
||
|
# bytes(3.x)/str(2.x) representations of objects.
|
||
|
# Try to use these as much as possible rather than rolling your own.
|
||
|
#
|
||
|
# When to use
|
||
|
# -----------
|
||
|
#
|
||
|
# 1) If you're writing code internal to pandas (no I/O directly involved),
|
||
|
# use pprint_thing().
|
||
|
#
|
||
|
# It will always return unicode text which can handled by other
|
||
|
# parts of the package without breakage.
|
||
|
#
|
||
|
# 2) if you need to write something out to file, use
|
||
|
# pprint_thing_encoded(encoding).
|
||
|
#
|
||
|
# If no encoding is specified, it defaults to utf-8. Since encoding pure
|
||
|
# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
|
||
|
# working with straight ascii.
|
||
|
|
||
|
|
||
|
def _pprint_seq(
|
||
|
seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
|
||
|
) -> str:
|
||
|
"""
|
||
|
internal. pprinter for iterables. you should probably use pprint_thing()
|
||
|
rather than calling this directly.
|
||
|
|
||
|
bounds length of printed sequence, depending on options
|
||
|
"""
|
||
|
if isinstance(seq, set):
|
||
|
fmt = "{{{body}}}"
|
||
|
else:
|
||
|
fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
|
||
|
|
||
|
if max_seq_items is False:
|
||
|
nitems = len(seq)
|
||
|
else:
|
||
|
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
|
||
|
|
||
|
s = iter(seq)
|
||
|
# handle sets, no slicing
|
||
|
r = [
|
||
|
pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
|
||
|
for i in range(min(nitems, len(seq)))
|
||
|
]
|
||
|
body = ", ".join(r)
|
||
|
|
||
|
if nitems < len(seq):
|
||
|
body += ", ..."
|
||
|
elif isinstance(seq, tuple) and len(seq) == 1:
|
||
|
body += ","
|
||
|
|
||
|
return fmt.format(body=body)
|
||
|
|
||
|
|
||
|
def _pprint_dict(
|
||
|
seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
|
||
|
) -> str:
|
||
|
"""
|
||
|
internal. pprinter for iterables. you should probably use pprint_thing()
|
||
|
rather than calling this directly.
|
||
|
"""
|
||
|
fmt = "{{{things}}}"
|
||
|
pairs = []
|
||
|
|
||
|
pfmt = "{key}: {val}"
|
||
|
|
||
|
if max_seq_items is False:
|
||
|
nitems = len(seq)
|
||
|
else:
|
||
|
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
|
||
|
|
||
|
for k, v in list(seq.items())[:nitems]:
|
||
|
pairs.append(
|
||
|
pfmt.format(
|
||
|
key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||
|
val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||
|
)
|
||
|
)
|
||
|
|
||
|
if nitems < len(seq):
|
||
|
return fmt.format(things=", ".join(pairs) + ", ...")
|
||
|
else:
|
||
|
return fmt.format(things=", ".join(pairs))
|
||
|
|
||
|
|
||
|
def pprint_thing(
|
||
|
thing: Any,
|
||
|
_nest_lvl: int = 0,
|
||
|
escape_chars: EscapeChars | None = None,
|
||
|
default_escapes: bool = False,
|
||
|
quote_strings: bool = False,
|
||
|
max_seq_items: int | None = None,
|
||
|
) -> str:
|
||
|
"""
|
||
|
This function is the sanctioned way of converting objects
|
||
|
to a string representation and properly handles nested sequences.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
thing : anything to be formatted
|
||
|
_nest_lvl : internal use only. pprint_thing() is mutually-recursive
|
||
|
with pprint_sequence, this argument is used to keep track of the
|
||
|
current nesting level, and limit it.
|
||
|
escape_chars : list or dict, optional
|
||
|
Characters to escape. If a dict is passed the values are the
|
||
|
replacements
|
||
|
default_escapes : bool, default False
|
||
|
Whether the input escape characters replaces or adds to the defaults
|
||
|
max_seq_items : int or None, default None
|
||
|
Pass through to other pretty printers to limit sequence printing
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
str
|
||
|
"""
|
||
|
|
||
|
def as_escaped_string(
|
||
|
thing: Any, escape_chars: EscapeChars | None = escape_chars
|
||
|
) -> str:
|
||
|
translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
|
||
|
if isinstance(escape_chars, dict):
|
||
|
if default_escapes:
|
||
|
translate.update(escape_chars)
|
||
|
else:
|
||
|
translate = escape_chars
|
||
|
escape_chars = list(escape_chars.keys())
|
||
|
else:
|
||
|
escape_chars = escape_chars or ()
|
||
|
|
||
|
result = str(thing)
|
||
|
for c in escape_chars:
|
||
|
result = result.replace(c, translate[c])
|
||
|
return result
|
||
|
|
||
|
if hasattr(thing, "__next__"):
|
||
|
return str(thing)
|
||
|
elif isinstance(thing, dict) and _nest_lvl < get_option(
|
||
|
"display.pprint_nest_depth"
|
||
|
):
|
||
|
result = _pprint_dict(
|
||
|
thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
|
||
|
)
|
||
|
elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
|
||
|
result = _pprint_seq(
|
||
|
thing,
|
||
|
_nest_lvl,
|
||
|
escape_chars=escape_chars,
|
||
|
quote_strings=quote_strings,
|
||
|
max_seq_items=max_seq_items,
|
||
|
)
|
||
|
elif isinstance(thing, str) and quote_strings:
|
||
|
result = f"'{as_escaped_string(thing)}'"
|
||
|
else:
|
||
|
result = as_escaped_string(thing)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def pprint_thing_encoded(
|
||
|
object, encoding: str = "utf-8", errors: str = "replace"
|
||
|
) -> bytes:
|
||
|
value = pprint_thing(object) # get unicode representation of object
|
||
|
return value.encode(encoding, errors)
|
||
|
|
||
|
|
||
|
def enable_data_resource_formatter(enable: bool) -> None:
|
||
|
if "IPython" not in sys.modules:
|
||
|
# definitely not in IPython
|
||
|
return
|
||
|
from IPython import get_ipython
|
||
|
|
||
|
ip = get_ipython()
|
||
|
if ip is None:
|
||
|
# still not in IPython
|
||
|
return
|
||
|
|
||
|
formatters = ip.display_formatter.formatters
|
||
|
mimetype = "application/vnd.dataresource+json"
|
||
|
|
||
|
if enable:
|
||
|
if mimetype not in formatters:
|
||
|
# define tableschema formatter
|
||
|
from IPython.core.formatters import BaseFormatter
|
||
|
from traitlets import ObjectName
|
||
|
|
||
|
class TableSchemaFormatter(BaseFormatter):
|
||
|
print_method = ObjectName("_repr_data_resource_")
|
||
|
_return_type = (dict,)
|
||
|
|
||
|
# register it:
|
||
|
formatters[mimetype] = TableSchemaFormatter()
|
||
|
# enable it if it's been disabled:
|
||
|
formatters[mimetype].enabled = True
|
||
|
else:
|
||
|
# unregister tableschema mime-type
|
||
|
if mimetype in formatters:
|
||
|
formatters[mimetype].enabled = False
|
||
|
|
||
|
|
||
|
def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
|
||
|
return pprint_thing(
|
||
|
thing,
|
||
|
escape_chars=("\t", "\r", "\n"),
|
||
|
quote_strings=True,
|
||
|
max_seq_items=max_seq_items,
|
||
|
)
|
||
|
|
||
|
|
||
|
def format_object_summary(
|
||
|
obj,
|
||
|
formatter: Callable,
|
||
|
is_justify: bool = True,
|
||
|
name: str | None = None,
|
||
|
indent_for_name: bool = True,
|
||
|
line_break_each_value: bool = False,
|
||
|
) -> str:
|
||
|
"""
|
||
|
Return the formatted obj as a unicode string
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
obj : object
|
||
|
must be iterable and support __getitem__
|
||
|
formatter : callable
|
||
|
string formatter for an element
|
||
|
is_justify : bool
|
||
|
should justify the display
|
||
|
name : name, optional
|
||
|
defaults to the class name of the obj
|
||
|
indent_for_name : bool, default True
|
||
|
Whether subsequent lines should be indented to
|
||
|
align with the name.
|
||
|
line_break_each_value : bool, default False
|
||
|
If True, inserts a line break for each value of ``obj``.
|
||
|
If False, only break lines when the a line of values gets wider
|
||
|
than the display width.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
summary string
|
||
|
"""
|
||
|
from pandas.io.formats.console import get_console_size
|
||
|
from pandas.io.formats.format import get_adjustment
|
||
|
|
||
|
display_width, _ = get_console_size()
|
||
|
if display_width is None:
|
||
|
display_width = get_option("display.width") or 80
|
||
|
if name is None:
|
||
|
name = type(obj).__name__
|
||
|
|
||
|
if indent_for_name:
|
||
|
name_len = len(name)
|
||
|
space1 = f'\n{(" " * (name_len + 1))}'
|
||
|
space2 = f'\n{(" " * (name_len + 2))}'
|
||
|
else:
|
||
|
space1 = "\n"
|
||
|
space2 = "\n " # space for the opening '['
|
||
|
|
||
|
n = len(obj)
|
||
|
if line_break_each_value:
|
||
|
# If we want to vertically align on each value of obj, we need to
|
||
|
# separate values by a line break and indent the values
|
||
|
sep = ",\n " + " " * len(name)
|
||
|
else:
|
||
|
sep = ","
|
||
|
max_seq_items = get_option("display.max_seq_items") or n
|
||
|
|
||
|
# are we a truncated display
|
||
|
is_truncated = n > max_seq_items
|
||
|
|
||
|
# adj can optionally handle unicode eastern asian width
|
||
|
adj = get_adjustment()
|
||
|
|
||
|
def _extend_line(
|
||
|
s: str, line: str, value: str, display_width: int, next_line_prefix: str
|
||
|
) -> tuple[str, str]:
|
||
|
if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
|
||
|
s += line.rstrip()
|
||
|
line = next_line_prefix
|
||
|
line += value
|
||
|
return s, line
|
||
|
|
||
|
def best_len(values: list[str]) -> int:
|
||
|
if values:
|
||
|
return max(adj.len(x) for x in values)
|
||
|
else:
|
||
|
return 0
|
||
|
|
||
|
close = ", "
|
||
|
|
||
|
if n == 0:
|
||
|
summary = f"[]{close}"
|
||
|
elif n == 1 and not line_break_each_value:
|
||
|
first = formatter(obj[0])
|
||
|
summary = f"[{first}]{close}"
|
||
|
elif n == 2 and not line_break_each_value:
|
||
|
first = formatter(obj[0])
|
||
|
last = formatter(obj[-1])
|
||
|
summary = f"[{first}, {last}]{close}"
|
||
|
else:
|
||
|
if max_seq_items == 1:
|
||
|
# If max_seq_items=1 show only last element
|
||
|
head = []
|
||
|
tail = [formatter(x) for x in obj[-1:]]
|
||
|
elif n > max_seq_items:
|
||
|
n = min(max_seq_items // 2, 10)
|
||
|
head = [formatter(x) for x in obj[:n]]
|
||
|
tail = [formatter(x) for x in obj[-n:]]
|
||
|
else:
|
||
|
head = []
|
||
|
tail = [formatter(x) for x in obj]
|
||
|
|
||
|
# adjust all values to max length if needed
|
||
|
if is_justify:
|
||
|
if line_break_each_value:
|
||
|
# Justify each string in the values of head and tail, so the
|
||
|
# strings will right align when head and tail are stacked
|
||
|
# vertically.
|
||
|
head, tail = _justify(head, tail)
|
||
|
elif is_truncated or not (
|
||
|
len(", ".join(head)) < display_width
|
||
|
and len(", ".join(tail)) < display_width
|
||
|
):
|
||
|
# Each string in head and tail should align with each other
|
||
|
max_length = max(best_len(head), best_len(tail))
|
||
|
head = [x.rjust(max_length) for x in head]
|
||
|
tail = [x.rjust(max_length) for x in tail]
|
||
|
# If we are not truncated and we are only a single
|
||
|
# line, then don't justify
|
||
|
|
||
|
if line_break_each_value:
|
||
|
# Now head and tail are of type List[Tuple[str]]. Below we
|
||
|
# convert them into List[str], so there will be one string per
|
||
|
# value. Also truncate items horizontally if wider than
|
||
|
# max_space
|
||
|
max_space = display_width - len(space2)
|
||
|
value = tail[0]
|
||
|
for max_items in reversed(range(1, len(value) + 1)):
|
||
|
pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
|
||
|
if len(pprinted_seq) < max_space:
|
||
|
head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
|
||
|
tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
|
||
|
break
|
||
|
|
||
|
summary = ""
|
||
|
line = space2
|
||
|
|
||
|
for head_value in head:
|
||
|
word = head_value + sep + " "
|
||
|
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||
|
|
||
|
if is_truncated:
|
||
|
# remove trailing space of last line
|
||
|
summary += line.rstrip() + space2 + "..."
|
||
|
line = space2
|
||
|
|
||
|
for tail_item in tail[:-1]:
|
||
|
word = tail_item + sep + " "
|
||
|
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||
|
|
||
|
# last value: no sep added + 1 space of width used for trailing ','
|
||
|
summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
|
||
|
summary += line
|
||
|
|
||
|
# right now close is either '' or ', '
|
||
|
# Now we want to include the ']', but not the maybe space.
|
||
|
close = "]" + close.rstrip(" ")
|
||
|
summary += close
|
||
|
|
||
|
if len(summary) > (display_width) or line_break_each_value:
|
||
|
summary += space1
|
||
|
else: # one row
|
||
|
summary += " "
|
||
|
|
||
|
# remove initial space
|
||
|
summary = "[" + summary[len(space2) :]
|
||
|
|
||
|
return summary
|
||
|
|
||
|
|
||
|
def _justify(
|
||
|
head: list[Sequence[str]], tail: list[Sequence[str]]
|
||
|
) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
|
||
|
"""
|
||
|
Justify items in head and tail, so they are right-aligned when stacked.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
head : list-like of list-likes of strings
|
||
|
tail : list-like of list-likes of strings
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
tuple of list of tuples of strings
|
||
|
Same as head and tail, but items are right aligned when stacked
|
||
|
vertically.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> _justify([['a', 'b']], [['abc', 'abcd']])
|
||
|
([(' a', ' b')], [('abc', 'abcd')])
|
||
|
"""
|
||
|
combined = head + tail
|
||
|
|
||
|
# For each position for the sequences in ``combined``,
|
||
|
# find the length of the largest string.
|
||
|
max_length = [0] * len(combined[0])
|
||
|
for inner_seq in combined:
|
||
|
length = [len(item) for item in inner_seq]
|
||
|
max_length = [max(x, y) for x, y in zip(max_length, length)]
|
||
|
|
||
|
# justify each item in each list-like in head and tail using max_length
|
||
|
head_tuples = [
|
||
|
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
|
||
|
]
|
||
|
tail_tuples = [
|
||
|
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
|
||
|
]
|
||
|
return head_tuples, tail_tuples
|
||
|
|
||
|
|
||
|
class PrettyDict(Dict[_KT, _VT]):
|
||
|
"""Dict extension to support abbreviated __repr__"""
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
return pprint_thing(self)
|