projektAI/venv/Lib/site-packages/pandas/tests/io/formats/test_to_latex.py

1476 lines
43 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
import codecs
from datetime import datetime
from textwrap import dedent
import pytest
import pandas as pd
from pandas import DataFrame, Series
import pandas._testing as tm
from pandas.io.formats.format import DataFrameFormatter
from pandas.io.formats.latex import (
RegularTableBuilder,
RowBodyIterator,
RowHeaderIterator,
RowStringConverter,
)
def _dedent(string):
"""Dedent without new line in the beginning.
Built-in textwrap.dedent would keep new line character in the beginning
of multi-line string starting from the new line.
This version drops the leading new line character.
"""
return dedent(string).lstrip()
@pytest.fixture
def df_short():
"""Short dataframe for testing table/tabular/longtable LaTeX env."""
return DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
class TestToLatex:
def test_to_latex_to_file(self, float_frame):
with tm.ensure_clean("test.tex") as path:
float_frame.to_latex(path)
with open(path) as f:
assert float_frame.to_latex() == f.read()
def test_to_latex_to_file_utf8_with_encoding(self):
# test with utf-8 and encoding option (GH 7061)
df = DataFrame([["au\xdfgangen"]])
with tm.ensure_clean("test.tex") as path:
df.to_latex(path, encoding="utf-8")
with codecs.open(path, "r", encoding="utf-8") as f:
assert df.to_latex() == f.read()
def test_to_latex_to_file_utf8_without_encoding(self):
# test with utf-8 without encoding option
df = DataFrame([["au\xdfgangen"]])
with tm.ensure_clean("test.tex") as path:
df.to_latex(path)
with codecs.open(path, "r", encoding="utf-8") as f:
assert df.to_latex() == f.read()
def test_to_latex_tabular_with_index(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_tabular_without_index(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(index=False)
expected = _dedent(
r"""
\begin{tabular}{rl}
\toprule
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize(
"bad_column_format",
[5, 1.2, ["l", "r"], ("r", "c"), {"r", "c", "l"}, {"a": "r", "b": "l"}],
)
def test_to_latex_bad_column_format(self, bad_column_format):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
msg = r"column_format must be str or unicode"
with pytest.raises(ValueError, match=msg):
df.to_latex(column_format=bad_column_format)
def test_to_latex_column_format_just_works(self, float_frame):
# GH Bug #9402
float_frame.to_latex(column_format="lcr")
def test_to_latex_column_format(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(column_format="lcr")
expected = _dedent(
r"""
\begin{tabular}{lcr}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_float_format_object_col(self):
# GH#40024
ser = Series([1000.0, "test"])
result = ser.to_latex(float_format="{:,.0f}".format)
expected = _dedent(
r"""
\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & 1,000 \\
1 & test \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_empty_tabular(self):
df = DataFrame()
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_series(self):
s = Series(["a", "b", "c"])
result = s.to_latex()
expected = _dedent(
r"""
\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & a \\
1 & b \\
2 & c \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_midrule_location(self):
# GH 18326
df = DataFrame({"a": [1, 2]})
df.index.name = "foo"
result = df.to_latex(index_names=False)
expected = _dedent(
r"""
\begin{tabular}{lr}
\toprule
{} & a \\
\midrule
0 & 1 \\
1 & 2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
class TestToLatexLongtable:
def test_to_latex_empty_longtable(self):
df = DataFrame()
result = df.to_latex(longtable=True)
expected = _dedent(
r"""
\begin{longtable}{l}
\toprule
Empty DataFrame
Columns: Index([], dtype='object')
Index: Index([], dtype='object') \\
\end{longtable}
"""
)
assert result == expected
def test_to_latex_longtable_with_index(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(longtable=True)
expected = _dedent(
r"""
\begin{longtable}{lrl}
\toprule
{} & a & b \\
\midrule
\endfirsthead
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
def test_to_latex_longtable_without_index(self):
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(index=False, longtable=True)
expected = _dedent(
r"""
\begin{longtable}{rl}
\toprule
a & b \\
\midrule
\endfirsthead
\toprule
a & b \\
\midrule
\endhead
\midrule
\multicolumn{2}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
\end{longtable}
"""
)
assert result == expected
@pytest.mark.parametrize(
"df, expected_number",
[
(DataFrame({"a": [1, 2]}), 1),
(DataFrame({"a": [1, 2], "b": [3, 4]}), 2),
(DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}), 3),
],
)
def test_to_latex_longtable_continued_on_next_page(self, df, expected_number):
result = df.to_latex(index=False, longtable=True)
assert fr"\multicolumn{{{expected_number}}}" in result
class TestToLatexHeader:
def test_to_latex_no_header_with_index(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(header=False)
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_no_header_without_index(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(index=False, header=False)
expected = _dedent(
r"""
\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_specified_header_with_index(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(header=["AA", "BB"])
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & AA & BB \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_specified_header_without_index(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(header=["AA", "BB"], index=False)
expected = _dedent(
r"""
\begin{tabular}{rl}
\toprule
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize(
"header, num_aliases",
[
(["A"], 1),
(("B",), 1),
(("Col1", "Col2", "Col3"), 3),
(("Col1", "Col2", "Col3", "Col4"), 4),
],
)
def test_to_latex_number_of_items_in_header_missmatch_raises(
self,
header,
num_aliases,
):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
msg = f"Writing 2 cols but got {num_aliases} aliases"
with pytest.raises(ValueError, match=msg):
df.to_latex(header=header)
def test_to_latex_decimal(self):
# GH 12031
df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]})
result = df.to_latex(decimal=",")
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1,0 & b1 \\
1 & 2,1 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
class TestToLatexBold:
def test_to_latex_bold_rows(self):
# GH 16707
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(bold_rows=True)
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
\textbf{0} & 1 & b1 \\
\textbf{1} & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_no_bold_rows(self):
# GH 16707
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(bold_rows=False)
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
class TestToLatexCaptionLabel:
@pytest.fixture
def caption_table(self):
"""Caption for table/tabular LaTeX environment."""
return "a table in a \\texttt{table/tabular} environment"
@pytest.fixture
def short_caption(self):
"""Short caption for testing \\caption[short_caption]{full_caption}."""
return "a table"
@pytest.fixture
def label_table(self):
"""Label for table/tabular LaTeX environment."""
return "tab:table_tabular"
@pytest.fixture
def caption_longtable(self):
"""Caption for longtable LaTeX environment."""
return "a table in a \\texttt{longtable} environment"
@pytest.fixture
def label_longtable(self):
"""Label for longtable LaTeX environment."""
return "tab:longtable"
def test_to_latex_caption_only(self, df_short, caption_table):
# GH 25436
result = df_short.to_latex(caption=caption_table)
expected = _dedent(
r"""
\begin{table}
\centering
\caption{a table in a \texttt{table/tabular} environment}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_label_only(self, df_short, label_table):
# GH 25436
result = df_short.to_latex(label=label_table)
expected = _dedent(
r"""
\begin{table}
\centering
\label{tab:table_tabular}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_caption_and_label(self, df_short, caption_table, label_table):
# GH 25436
result = df_short.to_latex(caption=caption_table, label=label_table)
expected = _dedent(
r"""
\begin{table}
\centering
\caption{a table in a \texttt{table/tabular} environment}
\label{tab:table_tabular}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_caption_and_shortcaption(
self,
df_short,
caption_table,
short_caption,
):
result = df_short.to_latex(caption=(caption_table, short_caption))
expected = _dedent(
r"""
\begin{table}
\centering
\caption[a table]{a table in a \texttt{table/tabular} environment}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_caption_and_shortcaption_list_is_ok(self, df_short):
caption = ("Long-long-caption", "Short")
result_tuple = df_short.to_latex(caption=caption)
result_list = df_short.to_latex(caption=list(caption))
assert result_tuple == result_list
def test_to_latex_caption_shortcaption_and_label(
self,
df_short,
caption_table,
short_caption,
label_table,
):
# test when the short_caption is provided alongside caption and label
result = df_short.to_latex(
caption=(caption_table, short_caption),
label=label_table,
)
expected = _dedent(
r"""
\begin{table}
\centering
\caption[a table]{a table in a \texttt{table/tabular} environment}
\label{tab:table_tabular}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
@pytest.mark.parametrize(
"bad_caption",
[
("full_caption", "short_caption", "extra_string"),
("full_caption", "short_caption", 1),
("full_caption", "short_caption", None),
("full_caption",),
(None,),
],
)
def test_to_latex_bad_caption_raises(self, bad_caption):
# test that wrong number of params is raised
df = DataFrame({"a": [1]})
msg = "caption must be either a string or a tuple of two strings"
with pytest.raises(ValueError, match=msg):
df.to_latex(caption=bad_caption)
def test_to_latex_two_chars_caption(self, df_short):
# test that two chars caption is handled correctly
# it must not be unpacked into long_caption, short_caption.
result = df_short.to_latex(caption="xy")
expected = _dedent(
r"""
\begin{table}
\centering
\caption{xy}
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_longtable_caption_only(self, df_short, caption_longtable):
# GH 25436
# test when no caption and no label is provided
# is performed by test_to_latex_longtable()
result = df_short.to_latex(longtable=True, caption=caption_longtable)
expected = _dedent(
r"""
\begin{longtable}{lrl}
\caption{a table in a \texttt{longtable} environment}\\
\toprule
{} & a & b \\
\midrule
\endfirsthead
\caption[]{a table in a \texttt{longtable} environment} \\
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
def test_to_latex_longtable_label_only(self, df_short, label_longtable):
# GH 25436
result = df_short.to_latex(longtable=True, label=label_longtable)
expected = _dedent(
r"""
\begin{longtable}{lrl}
\label{tab:longtable}\\
\toprule
{} & a & b \\
\midrule
\endfirsthead
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
def test_to_latex_longtable_caption_and_label(
self,
df_short,
caption_longtable,
label_longtable,
):
# GH 25436
result = df_short.to_latex(
longtable=True,
caption=caption_longtable,
label=label_longtable,
)
expected = _dedent(
r"""
\begin{longtable}{lrl}
\caption{a table in a \texttt{longtable} environment}
\label{tab:longtable}\\
\toprule
{} & a & b \\
\midrule
\endfirsthead
\caption[]{a table in a \texttt{longtable} environment} \\
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
def test_to_latex_longtable_caption_shortcaption_and_label(
self,
df_short,
caption_longtable,
short_caption,
label_longtable,
):
# test when the caption, the short_caption and the label are provided
result = df_short.to_latex(
longtable=True,
caption=(caption_longtable, short_caption),
label=label_longtable,
)
expected = _dedent(
r"""
\begin{longtable}{lrl}
\caption[a table]{a table in a \texttt{longtable} environment}
\label{tab:longtable}\\
\toprule
{} & a & b \\
\midrule
\endfirsthead
\caption[]{a table in a \texttt{longtable} environment} \\
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
class TestToLatexEscape:
@pytest.fixture
def df_with_symbols(self):
"""Dataframe with special characters for testing chars escaping."""
a = "a"
b = "b"
yield DataFrame({"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}})
def test_to_latex_escape_false(self, df_with_symbols):
result = df_with_symbols.to_latex(escape=False)
expected = _dedent(
r"""
\begin{tabular}{lll}
\toprule
{} & co$e^x$ & co^l1 \\
\midrule
a & a & a \\
b & b & b \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_escape_default(self, df_with_symbols):
result = df_with_symbols.to_latex() # default: escape=True
expected = _dedent(
r"""
\begin{tabular}{lll}
\toprule
{} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\
\midrule
a & a & a \\
b & b & b \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_special_escape(self):
df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"])
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & a\textbackslash b\textbackslash c \\
1 & \textasciicircum a\textasciicircum b\textasciicircum c \\
2 & \textasciitilde a\textasciitilde b\textasciitilde c \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_escape_special_chars(self):
special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"]
df = DataFrame(data=special_characters)
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{ll}
\toprule
{} & 0 \\
\midrule
0 & \& \\
1 & \% \\
2 & \$ \\
3 & \# \\
4 & \_ \\
5 & \{ \\
6 & \} \\
7 & \textasciitilde \\
8 & \textasciicircum \\
9 & \textbackslash \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_specified_header_special_chars_without_escape(self):
# GH 7124
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(header=["$A$", "$B$"], escape=False)
expected = _dedent(
r"""
\begin{tabular}{lrl}
\toprule
{} & $A$ & $B$ \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
class TestToLatexPosition:
def test_to_latex_position(self):
the_position = "h"
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(position=the_position)
expected = _dedent(
r"""
\begin{table}[h]
\centering
\begin{tabular}{lrl}
\toprule
{} & a & b \\
\midrule
0 & 1 & b1 \\
1 & 2 & b2 \\
\bottomrule
\end{tabular}
\end{table}
"""
)
assert result == expected
def test_to_latex_longtable_position(self):
the_position = "t"
df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
result = df.to_latex(longtable=True, position=the_position)
expected = _dedent(
r"""
\begin{longtable}[t]{lrl}
\toprule
{} & a & b \\
\midrule
\endfirsthead
\toprule
{} & a & b \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{{Continued on next page}} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
0 & 1 & b1 \\
1 & 2 & b2 \\
\end{longtable}
"""
)
assert result == expected
class TestToLatexFormatters:
def test_to_latex_with_formatters(self):
df = DataFrame(
{
"datetime64": [
datetime(2016, 1, 1),
datetime(2016, 2, 5),
datetime(2016, 3, 3),
],
"float": [1.0, 2.0, 3.0],
"int": [1, 2, 3],
"object": [(1, 2), True, False],
}
)
formatters = {
"datetime64": lambda x: x.strftime("%Y-%m"),
"float": lambda x: f"[{x: 4.1f}]",
"int": lambda x: f"0x{x:x}",
"object": lambda x: f"-{x!s}-",
"__index__": lambda x: f"index: {x}",
}
result = df.to_latex(formatters=dict(formatters))
expected = _dedent(
r"""
\begin{tabular}{llrrl}
\toprule
{} & datetime64 & float & int & object \\
\midrule
index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\
index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\
index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_float_format_no_fixed_width_3decimals(self):
# GH 21625
df = DataFrame({"x": [0.19999]})
result = df.to_latex(float_format="%.3f")
expected = _dedent(
r"""
\begin{tabular}{lr}
\toprule
{} & x \\
\midrule
0 & 0.200 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_float_format_no_fixed_width_integer(self):
# GH 22270
df = DataFrame({"x": [100.0]})
result = df.to_latex(float_format="%.0f")
expected = _dedent(
r"""
\begin{tabular}{lr}
\toprule
{} & x \\
\midrule
0 & 100 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
def test_to_latex_na_rep_and_float_format(self, na_rep):
df = DataFrame(
[
["A", 1.2225],
["A", None],
],
columns=["Group", "Data"],
)
result = df.to_latex(na_rep=na_rep, float_format="{:.2f}".format)
expected = _dedent(
fr"""
\begin{{tabular}}{{llr}}
\toprule
{{}} & Group & Data \\
\midrule
0 & A & 1.22 \\
1 & A & {na_rep} \\
\bottomrule
\end{{tabular}}
"""
)
assert result == expected
class TestToLatexMultiindex:
@pytest.fixture
def multiindex_frame(self):
"""Multiindex dataframe for testing multirow LaTeX macros."""
yield DataFrame.from_dict(
{
("c1", 0): Series({x: x for x in range(4)}),
("c1", 1): Series({x: x + 4 for x in range(4)}),
("c2", 0): Series({x: x for x in range(4)}),
("c2", 1): Series({x: x + 4 for x in range(4)}),
("c3", 0): Series({x: x for x in range(4)}),
}
).T
@pytest.fixture
def multicolumn_frame(self):
"""Multicolumn dataframe for testing multicolumn LaTeX macros."""
yield DataFrame(
{
("c1", 0): {x: x for x in range(5)},
("c1", 1): {x: x + 5 for x in range(5)},
("c2", 0): {x: x for x in range(5)},
("c2", 1): {x: x + 5 for x in range(5)},
("c3", 0): {x: x for x in range(5)},
}
)
def test_to_latex_multindex_header(self):
# GH 16718
df = DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]})
df = df.set_index(["a", "b"])
observed = df.to_latex(header=["r1", "r2"])
expected = _dedent(
r"""
\begin{tabular}{llrr}
\toprule
& & r1 & r2 \\
a & b & & \\
\midrule
0 & 1 & 2 & 3 \\
\bottomrule
\end{tabular}
"""
)
assert observed == expected
def test_to_latex_multiindex_empty_name(self):
# GH 18669
mi = pd.MultiIndex.from_product([[1, 2]], names=[""])
df = DataFrame(-1, index=mi, columns=range(4))
observed = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lrrrr}
\toprule
& 0 & 1 & 2 & 3 \\
{} & & & & \\
\midrule
1 & -1 & -1 & -1 & -1 \\
2 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
"""
)
assert observed == expected
def test_to_latex_multiindex_column_tabular(self):
df = DataFrame({("x", "y"): ["a"]})
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{ll}
\toprule
{} & x \\
{} & y \\
\midrule
0 & a \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multiindex_small_tabular(self):
df = DataFrame({("x", "y"): ["a"]}).T
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lll}
\toprule
& & 0 \\
\midrule
x & y & a \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multiindex_tabular(self, multiindex_frame):
result = multiindex_frame.to_latex()
expected = _dedent(
r"""
\begin{tabular}{llrrrr}
\toprule
& & 0 & 1 & 2 & 3 \\
\midrule
c1 & 0 & 0 & 1 & 2 & 3 \\
& 1 & 4 & 5 & 6 & 7 \\
c2 & 0 & 0 & 1 & 2 & 3 \\
& 1 & 4 & 5 & 6 & 7 \\
c3 & 0 & 0 & 1 & 2 & 3 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multicolumn_tabular(self, multiindex_frame):
# GH 14184
df = multiindex_frame.T
df.columns.names = ["a", "b"]
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lrrrrr}
\toprule
a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
b & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 4 & 0 & 4 & 0 \\
1 & 1 & 5 & 1 & 5 & 1 \\
2 & 2 & 6 & 2 & 6 & 2 \\
3 & 3 & 7 & 3 & 7 & 3 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_index_has_name_tabular(self):
# GH 10660
df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]})
result = df.set_index(["a", "b"]).to_latex()
expected = _dedent(
r"""
\begin{tabular}{llr}
\toprule
& & c \\
a & b & \\
\midrule
0 & a & 1 \\
& b & 2 \\
1 & a & 3 \\
& b & 4 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_groupby_tabular(self):
# GH 10660
df = DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]})
result = df.groupby("a").describe().to_latex()
expected = _dedent(
r"""
\begin{tabular}{lrrrrrrrr}
\toprule
{} & \multicolumn{8}{l}{c} \\
{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\
a & & & & & & & & \\
\midrule
0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\
1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multiindex_dupe_level(self):
# see gh-14484
#
# If an index is repeated in subsequent rows, it should be
# replaced with a blank in the created table. This should
# ONLY happen if all higher order indices (to the left) are
# equal too. In this test, 'c' has to be printed both times
# because the higher order index 'A' != 'B'.
df = DataFrame(
index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"]
)
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lll}
\toprule
& & col \\
\midrule
A & c & NaN \\
B & c & NaN \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multicolumn_default(self, multicolumn_frame):
result = multicolumn_frame.to_latex()
expected = _dedent(
r"""
\begin{tabular}{lrrrrr}
\toprule
{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\
{} & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 5 & 0 & 5 & 0 \\
1 & 1 & 6 & 1 & 6 & 1 \\
2 & 2 & 7 & 2 & 7 & 2 \\
3 & 3 & 8 & 3 & 8 & 3 \\
4 & 4 & 9 & 4 & 9 & 4 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multicolumn_false(self, multicolumn_frame):
result = multicolumn_frame.to_latex(multicolumn=False)
expected = _dedent(
r"""
\begin{tabular}{lrrrrr}
\toprule
{} & c1 & & c2 & & c3 \\
{} & 0 & 1 & 0 & 1 & 0 \\
\midrule
0 & 0 & 5 & 0 & 5 & 0 \\
1 & 1 & 6 & 1 & 6 & 1 \\
2 & 2 & 7 & 2 & 7 & 2 \\
3 & 3 & 8 & 3 & 8 & 3 \\
4 & 4 & 9 & 4 & 9 & 4 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multirow_true(self, multicolumn_frame):
result = multicolumn_frame.T.to_latex(multirow=True)
expected = _dedent(
r"""
\begin{tabular}{llrrrrr}
\toprule
& & 0 & 1 & 2 & 3 & 4 \\
\midrule
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
def test_to_latex_multicolumnrow_with_multicol_format(self, multicolumn_frame):
multicolumn_frame.index = multicolumn_frame.T.index
result = multicolumn_frame.T.to_latex(
multirow=True,
multicolumn=True,
multicolumn_format="c",
)
expected = _dedent(
r"""
\begin{tabular}{llrrrrr}
\toprule
& & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\
& & 0 & 1 & 0 & 1 & 0 \\
\midrule
\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\
& 1 & 5 & 6 & 7 & 8 & 9 \\
\cline{1-7}
c3 & 0 & 0 & 1 & 2 & 3 & 4 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
@pytest.mark.parametrize("name0", [None, "named0"])
@pytest.mark.parametrize("name1", [None, "named1"])
@pytest.mark.parametrize("axes", [[0], [1], [0, 1]])
def test_to_latex_multiindex_names(self, name0, name1, axes):
# GH 18667
names = [name0, name1]
mi = pd.MultiIndex.from_product([[1, 2], [3, 4]])
df = DataFrame(-1, index=mi.copy(), columns=mi.copy())
for idx in axes:
df.axes[idx].names = names
idx_names = tuple(n or "{}" for n in names)
idx_names_row = (
f"{idx_names[0]} & {idx_names[1]} & & & & \\\\\n"
if (0 in axes and any(names))
else ""
)
placeholder = "{}" if any(names) and 1 in axes else " "
col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names]
observed = df.to_latex()
expected = r"""\begin{tabular}{llrrrr}
\toprule
& %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\
& %s & 3 & 4 & 3 & 4 \\
%s\midrule
1 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
2 & 3 & -1 & -1 & -1 & -1 \\
& 4 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
""" % tuple(
list(col_names) + [idx_names_row]
)
assert observed == expected
@pytest.mark.parametrize("one_row", [True, False])
def test_to_latex_multiindex_nans(self, one_row):
# GH 14249
df = DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]})
if one_row:
df = df.iloc[[0]]
observed = df.set_index(["a", "b"]).to_latex()
expected = _dedent(
r"""
\begin{tabular}{llr}
\toprule
& & c \\
a & b & \\
\midrule
NaN & 2 & 4 \\
"""
)
if not one_row:
expected += r"""1.0 & 3 & 5 \\
"""
expected += r"""\bottomrule
\end{tabular}
"""
assert observed == expected
def test_to_latex_non_string_index(self):
# GH 19981
df = DataFrame([[1, 2, 3]] * 2).set_index([0, 1])
result = df.to_latex()
expected = _dedent(
r"""
\begin{tabular}{llr}
\toprule
& & 2 \\
0 & 1 & \\
\midrule
1 & 2 & 3 \\
& 2 & 3 \\
\bottomrule
\end{tabular}
"""
)
assert result == expected
class TestTableBuilder:
@pytest.fixture
def dataframe(self):
return DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
@pytest.fixture
def table_builder(self, dataframe):
return RegularTableBuilder(formatter=DataFrameFormatter(dataframe))
def test_create_row_iterator(self, table_builder):
iterator = table_builder._create_row_iterator(over="header")
assert isinstance(iterator, RowHeaderIterator)
def test_create_body_iterator(self, table_builder):
iterator = table_builder._create_row_iterator(over="body")
assert isinstance(iterator, RowBodyIterator)
def test_create_body_wrong_kwarg_raises(self, table_builder):
with pytest.raises(ValueError, match="must be either 'header' or 'body'"):
table_builder._create_row_iterator(over="SOMETHING BAD")
class TestRowStringConverter:
@pytest.mark.parametrize(
"row_num, expected",
[
(0, r"{} & Design & ratio & xy \\"),
(1, r"0 & 1 & 4 & 10 \\"),
(2, r"1 & 2 & 5 & 11 \\"),
],
)
def test_get_strrow_normal_without_escape(self, row_num, expected):
df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]})
row_string_converter = RowStringConverter(
formatter=DataFrameFormatter(df, escape=True),
)
assert row_string_converter.get_strrow(row_num=row_num) == expected
@pytest.mark.parametrize(
"row_num, expected",
[
(0, r"{} & Design \# & ratio, \% & x\&y \\"),
(1, r"0 & 1 & 4 & 10 \\"),
(2, r"1 & 2 & 5 & 11 \\"),
],
)
def test_get_strrow_normal_with_escape(self, row_num, expected):
df = DataFrame(
{r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]}
)
row_string_converter = RowStringConverter(
formatter=DataFrameFormatter(df, escape=True),
)
assert row_string_converter.get_strrow(row_num=row_num) == expected
@pytest.mark.parametrize(
"row_num, expected",
[
(0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"),
(1, r"{} & 0 & 1 & 0 & 1 & 0 \\"),
(2, r"0 & 0 & 5 & 0 & 5 & 0 \\"),
],
)
def test_get_strrow_multindex_multicolumn(self, row_num, expected):
df = DataFrame(
{
("c1", 0): {x: x for x in range(5)},
("c1", 1): {x: x + 5 for x in range(5)},
("c2", 0): {x: x for x in range(5)},
("c2", 1): {x: x + 5 for x in range(5)},
("c3", 0): {x: x for x in range(5)},
}
)
row_string_converter = RowStringConverter(
formatter=DataFrameFormatter(df),
multicolumn=True,
multicolumn_format="r",
multirow=True,
)
assert row_string_converter.get_strrow(row_num=row_num) == expected