from datetime import ( datetime, timedelta, ) from io import StringIO import numpy as np import pytest from pandas import ( NA, Categorical, DataFrame, MultiIndex, NaT, PeriodIndex, Series, Timestamp, date_range, option_context, period_range, ) import pandas._testing as tm import pandas.io.formats.format as fmt class TestDataFrameReprInfoEtc: def test_repr_bytes_61_lines(self): # GH#12857 lets = list("ACDEFGHIJKLMNOP") slen = 50 nseqs = 1000 words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] df = DataFrame(words).astype("U1") assert (df.dtypes == object).all() # smoke tests; at one point this raised with 61 but not 60 repr(df) repr(df.iloc[:60, :]) repr(df.iloc[:61, :]) def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) obj = DataFrame(np.random.randn(2, 4), index=index) obj = tm.get_obj(obj, frame_or_series) repr(obj) def test_assign_index_sequences(self): # GH#2200 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( ["a", "b"] ) index = list(df.index) index[0] = ("faz", "boo") df.index = index repr(df) # this travels an improper code path index[0] = ["faz", "boo"] df.index = index repr(df) def test_repr_with_mi_nat(self): df = DataFrame({"X": [1, 2]}, index=[[NaT, Timestamp("20130101")], ["a", "b"]]) result = repr(df) expected = " X\nNaT a 1\n2013-01-01 b 2" assert result == expected def test_repr_with_different_nulls(self): # GH45263 df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT]) result = repr(df) expected = """ 0 True 1 None 2 NaN 3 NaT 4""" assert result == expected def test_repr_with_different_nulls_cols(self): # GH45263 d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]} df = DataFrame(data=d) result = repr(df) expected = """ NaN None NaT True 0 1 3 6 8 1 2 4 7 9""" assert result == expected def test_multiindex_na_repr(self): # only an issue with long columns df3 = DataFrame( { "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, "B" * 30: {("A", "A0006000", "nuit"): np.nan}, "C" * 30: {("A", "A0006000", "nuit"): np.nan}, "D" * 30: {("A", "A0006000", "nuit"): np.nan}, "E" * 30: {("A", "A0006000", "nuit"): "A"}, "F" * 30: {("A", "A0006000", "nuit"): np.nan}, } ) idf = df3.set_index(["A" * 30, "C" * 30]) repr(idf) def test_repr_name_coincide(self): index = MultiIndex.from_tuples( [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] ) df = DataFrame({"value": [0, 1]}, index=index) lines = repr(df).split("\n") assert lines[2].startswith("a 0 foo") def test_repr_to_string( self, multiindex_year_month_day_dataframe_random_data, multiindex_dataframe_random_data, ): ymd = multiindex_year_month_day_dataframe_random_data frame = multiindex_dataframe_random_data repr(frame) repr(ymd) repr(frame.T) repr(ymd.T) buf = StringIO() frame.to_string(buf=buf) ymd.to_string(buf=buf) frame.T.to_string(buf=buf) ymd.T.to_string(buf=buf) def test_repr_empty(self): # empty repr(DataFrame()) # empty with index frame = DataFrame(index=np.arange(1000)) repr(frame) def test_repr_mixed(self, float_string_frame): buf = StringIO() # mixed repr(float_string_frame) float_string_frame.info(verbose=False, buf=buf) @pytest.mark.slow def test_repr_mixed_big(self): # big mixed biggie = DataFrame( {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200) ) biggie.loc[:20, "A"] = np.nan biggie.loc[:20, "B"] = np.nan repr(biggie) def test_repr(self, float_frame): buf = StringIO() # small one repr(float_frame) float_frame.info(verbose=False, buf=buf) # even smaller float_frame.reindex(columns=["A"]).info(verbose=False, buf=buf) float_frame.reindex(columns=["A", "B"]).info(verbose=False, buf=buf) # exhausting cases in DataFrame.info # columns but no index no_index = DataFrame(columns=[0, 1, 3]) repr(no_index) # no columns or index DataFrame().info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) assert "\t" not in repr(df) assert "\r" not in repr(df) assert "a\n" not in repr(df) def test_repr_dimensions(self): df = DataFrame([[1, 2], [3, 4]]) with option_context("display.show_dimensions", True): assert "2 rows x 2 columns" in repr(df) with option_context("display.show_dimensions", False): assert "2 rows x 2 columns" not in repr(df) with option_context("display.show_dimensions", "truncate"): assert "2 rows x 2 columns" not in repr(df) @pytest.mark.slow def test_repr_big(self): # big one biggie = DataFrame(np.zeros((200, 4)), columns=range(4), index=range(200)) repr(biggie) def test_repr_unsortable(self, float_frame): # columns are not sortable unsortable = DataFrame( { "foo": [1] * 50, datetime.today(): [1] * 50, "bar": ["bar"] * 50, datetime.today() + timedelta(1): ["bar"] * 50, }, index=np.arange(50), ) repr(unsortable) fmt.set_option("display.precision", 3) repr(float_frame) fmt.set_option("display.max_rows", 10, "display.max_columns", 2) repr(float_frame) fmt.set_option("display.max_rows", 1000, "display.max_columns", 1000) repr(float_frame) tm.reset_display_options() def test_repr_unicode(self): uval = "\u03c3\u03c3\u03c3\u03c3" df = DataFrame({"A": [uval, uval]}) result = repr(df) ex_top = " A" assert result.split("\n")[0].rstrip() == ex_top df = DataFrame({"A": [uval, uval]}) result = repr(df) assert result.split("\n")[0].rstrip() == ex_top def test_unicode_string_with_unicode(self): df = DataFrame({"A": ["\u05d0"]}) str(df) def test_repr_unicode_columns(self): df = DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_str_to_bytes_raises(self): # GH 26447 df = DataFrame({"A": ["abc"]}) msg = "^'str' object cannot be interpreted as an integer$" with pytest.raises(TypeError, match=msg): bytes(df) def test_very_wide_info_repr(self): df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) repr(df) def test_repr_column_name_unicode_truncation_bug(self): # #1906 df = DataFrame( { "Id": [7117434], "StringCol": ( "Is it possible to modify drop plot code" "so that the output graph is displayed " "in iphone simulator, Is it possible to " "modify drop plot code so that the " "output graph is \xe2\x80\xa8displayed " "in iphone simulator.Now we are adding " "the CSV file externally. I want to Call " "the File through the code.." ), } ) with option_context("display.max_columns", 20): assert "StringCol" in repr(df) def test_latex_repr(self): pytest.importorskip("jinja2") expected = r"""\begin{tabular}{llll} \toprule & 0 & 1 & 2 \\ \midrule 0 & $\alpha$ & b & c \\ 1 & 1 & 2 & 3 \\ \bottomrule \end{tabular} """ with option_context( "styler.format.escape", None, "styler.render.repr", "latex" ): df = DataFrame([[r"$\alpha$", "b", "c"], [1, 2, 3]]) result = df._repr_latex_() assert result == expected # GH 12182 assert df._repr_latex_() is None def test_repr_categorical_dates_periods(self): # normal DataFrame dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") p = period_range("2011-01", freq="M", periods=5) df = DataFrame({"dt": dt, "p": p}) exp = """ dt p 0 2011-01-01 09:00:00-05:00 2011-01 1 2011-01-01 10:00:00-05:00 2011-02 2 2011-01-01 11:00:00-05:00 2011-03 3 2011-01-01 12:00:00-05:00 2011-04 4 2011-01-01 13:00:00-05:00 2011-05""" assert repr(df) == exp df2 = DataFrame({"dt": Categorical(dt), "p": Categorical(p)}) assert repr(df2) == exp @pytest.mark.parametrize("arg", [np.datetime64, np.timedelta64]) @pytest.mark.parametrize( "box, expected", [[Series, "0 NaT\ndtype: object"], [DataFrame, " 0\n0 NaT"]], ) def test_repr_np_nat_with_object(self, arg, box, expected): # GH 25445 result = repr(box([arg("NaT")], dtype=object)) assert result == expected def test_frame_datetime64_pre1900_repr(self): df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")}) # it works! repr(df) def test_frame_to_string_with_periodindex(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") frame = DataFrame(np.random.randn(3, 4), index=index) # it works! frame.to_string() def test_to_string_ea_na_in_multiindex(self): # GH#47986 df = DataFrame( {"a": [1, 2]}, index=MultiIndex.from_arrays([Series([NA, 1], dtype="Int64")]), ) result = df.to_string() expected = """ a 1 1 2""" assert result == expected def test_datetime64tz_slice_non_truncate(self): # GH 30263 df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")}) expected = repr(df) df = df.iloc[:, :5] result = repr(df) assert result == expected def test_masked_ea_with_formatter(self): # GH#39336 df = DataFrame( { "a": Series([0.123456789, 1.123456789], dtype="Float64"), "b": Series([1, 2], dtype="Int64"), } ) result = df.to_string(formatters=["{:.2f}".format, "{:.2f}".format]) expected = """ a b 0 0.12 1.00 1 1.12 2.00""" assert result == expected