from __future__ import annotations from abc import ( ABC, abstractmethod, ) import sys from textwrap import dedent from typing import ( TYPE_CHECKING, Iterable, Iterator, Mapping, Sequence, ) from pandas._config import get_option from pandas._typing import ( Dtype, WriteBuffer, ) from pandas.io.formats import format as fmt from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: from pandas import ( DataFrame, Index, Series, ) frame_max_cols_sub = dedent( """\ max_cols : int, optional When to switch from the verbose to the truncated output. If the DataFrame has more than `max_cols` columns, the truncated output is used. By default, the setting in ``pandas.options.display.max_info_columns`` is used.""" ) show_counts_sub = dedent( """\ show_counts : bool, optional Whether to show the non-null counts. By default, this is shown only if the DataFrame is smaller than ``pandas.options.display.max_info_rows`` and ``pandas.options.display.max_info_columns``. A value of True always shows the counts, and False never shows the counts.""" ) frame_examples_sub = dedent( """\ >>> int_values = [1, 2, 3, 4, 5] >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, ... "float_col": float_values}) >>> df int_col text_col float_col 0 1 alpha 0.00 1 2 beta 0.25 2 3 gamma 0.50 3 4 delta 0.75 4 5 epsilon 1.00 Prints information of all columns: >>> df.info(verbose=True) RangeIndex: 5 entries, 0 to 4 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 int_col 5 non-null int64 1 text_col 5 non-null object 2 float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 248.0+ bytes Prints a summary of columns count and its dtypes but not per column information: >>> df.info(verbose=False) RangeIndex: 5 entries, 0 to 4 Columns: 3 entries, int_col to float_col dtypes: float64(1), int64(1), object(1) memory usage: 248.0+ bytes Pipe output of DataFrame.info to buffer instead of sys.stdout, get buffer content and writes to a text file: >>> import io >>> buffer = io.StringIO() >>> df.info(buf=buffer) >>> s = buffer.getvalue() >>> with open("df_info.txt", "w", ... encoding="utf-8") as f: # doctest: +SKIP ... f.write(s) 260 The `memory_usage` parameter allows deep introspection mode, specially useful for big DataFrames and fine-tune memory optimization: >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) >>> df = pd.DataFrame({ ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) ... }) >>> df.info() RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 column_1 1000000 non-null object 1 column_2 1000000 non-null object 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 22.9+ MB >>> df.info(memory_usage='deep') RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 column_1 1000000 non-null object 1 column_2 1000000 non-null object 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 165.9 MB""" ) frame_see_also_sub = dedent( """\ DataFrame.describe: Generate descriptive statistics of DataFrame columns. DataFrame.memory_usage: Memory usage of DataFrame columns.""" ) frame_sub_kwargs = { "klass": "DataFrame", "type_sub": " and columns", "max_cols_sub": frame_max_cols_sub, "show_counts_sub": show_counts_sub, "examples_sub": frame_examples_sub, "see_also_sub": frame_see_also_sub, "version_added_sub": "", } series_examples_sub = dedent( """\ >>> int_values = [1, 2, 3, 4, 5] >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] >>> s = pd.Series(text_values, index=int_values) >>> s.info() Index: 5 entries, 1 to 5 Series name: None Non-Null Count Dtype -------------- ----- 5 non-null object dtypes: object(1) memory usage: 80.0+ bytes Prints a summary excluding information about its values: >>> s.info(verbose=False) Index: 5 entries, 1 to 5 dtypes: object(1) memory usage: 80.0+ bytes Pipe output of Series.info to buffer instead of sys.stdout, get buffer content and writes to a text file: >>> import io >>> buffer = io.StringIO() >>> s.info(buf=buffer) >>> s = buffer.getvalue() >>> with open("df_info.txt", "w", ... encoding="utf-8") as f: # doctest: +SKIP ... f.write(s) 260 The `memory_usage` parameter allows deep introspection mode, specially useful for big Series and fine-tune memory optimization: >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) >>> s.info() RangeIndex: 1000000 entries, 0 to 999999 Series name: None Non-Null Count Dtype -------------- ----- 1000000 non-null object dtypes: object(1) memory usage: 7.6+ MB >>> s.info(memory_usage='deep') RangeIndex: 1000000 entries, 0 to 999999 Series name: None Non-Null Count Dtype -------------- ----- 1000000 non-null object dtypes: object(1) memory usage: 55.3 MB""" ) series_see_also_sub = dedent( """\ Series.describe: Generate descriptive statistics of Series. Series.memory_usage: Memory usage of Series.""" ) series_sub_kwargs = { "klass": "Series", "type_sub": "", "max_cols_sub": "", "show_counts_sub": show_counts_sub, "examples_sub": series_examples_sub, "see_also_sub": series_see_also_sub, "version_added_sub": "\n.. versionadded:: 1.4.0\n", } INFO_DOCSTRING = dedent( """ Print a concise summary of a {klass}. This method prints information about a {klass} including the index dtype{type_sub}, non-null values and memory usage. {version_added_sub}\ Parameters ---------- verbose : bool, optional Whether to print the full summary. By default, the setting in ``pandas.options.display.max_info_columns`` is followed. buf : writable buffer, defaults to sys.stdout Where to send the output. By default, the output is printed to sys.stdout. Pass a writable buffer if you need to further process the output. {max_cols_sub} memory_usage : bool, str, optional Specifies whether total memory usage of the {klass} elements (including the index) should be displayed. By default, this follows the ``pandas.options.display.memory_usage`` setting. True always show memory usage. False never shows memory usage. A value of 'deep' is equivalent to "True with deep introspection". Memory usage is shown in human-readable units (base-2 representation). Without deep introspection a memory estimation is made based in column dtype and number of rows assuming values consume the same memory amount for corresponding dtypes. With deep memory introspection, a real memory usage calculation is performed at the cost of computational resources. See the :ref:`Frequently Asked Questions ` for more details. {show_counts_sub} Returns ------- None This method prints a summary of a {klass} and returns None. See Also -------- {see_also_sub} Examples -------- {examples_sub} """ ) def _put_str(s: str | Dtype, space: int) -> str: """ Make string of specified length, padding to the right if necessary. Parameters ---------- s : Union[str, Dtype] String to be formatted. space : int Length to force string to be of. Returns ------- str String coerced to given length. Examples -------- >>> pd.io.formats.info._put_str("panda", 6) 'panda ' >>> pd.io.formats.info._put_str("panda", 4) 'pand' """ return str(s)[:space].ljust(space) def _sizeof_fmt(num: float, size_qualifier: str) -> str: """ Return size in human readable format. Parameters ---------- num : int Size in bytes. size_qualifier : str Either empty, or '+' (if lower bound). Returns ------- str Size in human readable format. Examples -------- >>> _sizeof_fmt(23028, '') '22.5 KB' >>> _sizeof_fmt(23028, '+') '22.5+ KB' """ for x in ["bytes", "KB", "MB", "GB", "TB"]: if num < 1024.0: return f"{num:3.1f}{size_qualifier} {x}" num /= 1024.0 return f"{num:3.1f}{size_qualifier} PB" def _initialize_memory_usage( memory_usage: bool | str | None = None, ) -> bool | str: """Get memory usage based on inputs and display options.""" if memory_usage is None: memory_usage = get_option("display.memory_usage") return memory_usage class BaseInfo(ABC): """ Base class for DataFrameInfo and SeriesInfo. Parameters ---------- data : DataFrame or Series Either dataframe or series. memory_usage : bool or str, optional If "deep", introspect the data deeply by interrogating object dtypes for system-level memory consumption, and include it in the returned values. """ data: DataFrame | Series memory_usage: bool | str @property @abstractmethod def dtypes(self) -> Iterable[Dtype]: """ Dtypes. Returns ------- dtypes : sequence Dtype of each of the DataFrame's columns (or one series column). """ @property @abstractmethod def dtype_counts(self) -> Mapping[str, int]: """Mapping dtype - number of counts.""" @property @abstractmethod def non_null_counts(self) -> Sequence[int]: """Sequence of non-null counts for all columns or column (if series).""" @property @abstractmethod def memory_usage_bytes(self) -> int: """ Memory usage in bytes. Returns ------- memory_usage_bytes : int Object's total memory usage in bytes. """ @property def memory_usage_string(self) -> str: """Memory usage in a form of human readable string.""" return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" @property def size_qualifier(self) -> str: size_qualifier = "" if self.memory_usage: if self.memory_usage != "deep": # size_qualifier is just a best effort; not guaranteed to catch # all cases (e.g., it misses categorical data even with object # categories) if ( "object" in self.dtype_counts or self.data.index._is_memory_usage_qualified() ): size_qualifier = "+" return size_qualifier @abstractmethod def render( self, *, buf: WriteBuffer[str] | None, max_cols: int | None, verbose: bool | None, show_counts: bool | None, ) -> None: pass class DataFrameInfo(BaseInfo): """ Class storing dataframe-specific info. """ def __init__( self, data: DataFrame, memory_usage: bool | str | None = None, ) -> None: self.data: DataFrame = data self.memory_usage = _initialize_memory_usage(memory_usage) @property def dtype_counts(self) -> Mapping[str, int]: return _get_dataframe_dtype_counts(self.data) @property def dtypes(self) -> Iterable[Dtype]: """ Dtypes. Returns ------- dtypes Dtype of each of the DataFrame's columns. """ return self.data.dtypes @property def ids(self) -> Index: """ Column names. Returns ------- ids : Index DataFrame's column names. """ return self.data.columns @property def col_count(self) -> int: """Number of columns to be summarized.""" return len(self.ids) @property def non_null_counts(self) -> Sequence[int]: """Sequence of non-null counts for all columns or column (if series).""" return self.data.count() @property def memory_usage_bytes(self) -> int: deep = self.memory_usage == "deep" return self.data.memory_usage(index=True, deep=deep).sum() def render( self, *, buf: WriteBuffer[str] | None, max_cols: int | None, verbose: bool | None, show_counts: bool | None, ) -> None: printer = DataFrameInfoPrinter( info=self, max_cols=max_cols, verbose=verbose, show_counts=show_counts, ) printer.to_buffer(buf) class SeriesInfo(BaseInfo): """ Class storing series-specific info. """ def __init__( self, data: Series, memory_usage: bool | str | None = None, ) -> None: self.data: Series = data self.memory_usage = _initialize_memory_usage(memory_usage) def render( self, *, buf: WriteBuffer[str] | None = None, max_cols: int | None = None, verbose: bool | None = None, show_counts: bool | None = None, ) -> None: if max_cols is not None: raise ValueError( "Argument `max_cols` can only be passed " "in DataFrame.info, not Series.info" ) printer = SeriesInfoPrinter( info=self, verbose=verbose, show_counts=show_counts, ) printer.to_buffer(buf) @property def non_null_counts(self) -> Sequence[int]: return [self.data.count()] @property def dtypes(self) -> Iterable[Dtype]: return [self.data.dtypes] @property def dtype_counts(self) -> Mapping[str, int]: from pandas.core.frame import DataFrame return _get_dataframe_dtype_counts(DataFrame(self.data)) @property def memory_usage_bytes(self) -> int: """Memory usage in bytes. Returns ------- memory_usage_bytes : int Object's total memory usage in bytes. """ deep = self.memory_usage == "deep" return self.data.memory_usage(index=True, deep=deep) class InfoPrinterAbstract: """ Class for printing dataframe or series info. """ def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: """Save dataframe info into buffer.""" table_builder = self._create_table_builder() lines = table_builder.get_lines() if buf is None: # pragma: no cover buf = sys.stdout fmt.buffer_put_lines(buf, lines) @abstractmethod def _create_table_builder(self) -> TableBuilderAbstract: """Create instance of table builder.""" class DataFrameInfoPrinter(InfoPrinterAbstract): """ Class for printing dataframe info. Parameters ---------- info : DataFrameInfo Instance of DataFrameInfo. max_cols : int, optional When to switch from the verbose to the truncated output. verbose : bool, optional Whether to print the full summary. show_counts : bool, optional Whether to show the non-null counts. """ def __init__( self, info: DataFrameInfo, max_cols: int | None = None, verbose: bool | None = None, show_counts: bool | None = None, ) -> None: self.info = info self.data = info.data self.verbose = verbose self.max_cols = self._initialize_max_cols(max_cols) self.show_counts = self._initialize_show_counts(show_counts) @property def max_rows(self) -> int: """Maximum info rows to be displayed.""" return get_option("display.max_info_rows", len(self.data) + 1) @property def exceeds_info_cols(self) -> bool: """Check if number of columns to be summarized does not exceed maximum.""" return bool(self.col_count > self.max_cols) @property def exceeds_info_rows(self) -> bool: """Check if number of rows to be summarized does not exceed maximum.""" return bool(len(self.data) > self.max_rows) @property def col_count(self) -> int: """Number of columns to be summarized.""" return self.info.col_count def _initialize_max_cols(self, max_cols: int | None) -> int: if max_cols is None: return get_option("display.max_info_columns", self.col_count + 1) return max_cols def _initialize_show_counts(self, show_counts: bool | None) -> bool: if show_counts is None: return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) else: return show_counts def _create_table_builder(self) -> DataFrameTableBuilder: """ Create instance of table builder based on verbosity and display settings. """ if self.verbose: return DataFrameTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) elif self.verbose is False: # specifically set to False, not necessarily None return DataFrameTableBuilderNonVerbose(info=self.info) else: if self.exceeds_info_cols: return DataFrameTableBuilderNonVerbose(info=self.info) else: return DataFrameTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) class SeriesInfoPrinter(InfoPrinterAbstract): """Class for printing series info. Parameters ---------- info : SeriesInfo Instance of SeriesInfo. verbose : bool, optional Whether to print the full summary. show_counts : bool, optional Whether to show the non-null counts. """ def __init__( self, info: SeriesInfo, verbose: bool | None = None, show_counts: bool | None = None, ) -> None: self.info = info self.data = info.data self.verbose = verbose self.show_counts = self._initialize_show_counts(show_counts) def _create_table_builder(self) -> SeriesTableBuilder: """ Create instance of table builder based on verbosity. """ if self.verbose or self.verbose is None: return SeriesTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) else: return SeriesTableBuilderNonVerbose(info=self.info) def _initialize_show_counts(self, show_counts: bool | None) -> bool: if show_counts is None: return True else: return show_counts class TableBuilderAbstract(ABC): """ Abstract builder for info table. """ _lines: list[str] info: BaseInfo @abstractmethod def get_lines(self) -> list[str]: """Product in a form of list of lines (strings).""" @property def data(self) -> DataFrame | Series: return self.info.data @property def dtypes(self) -> Iterable[Dtype]: """Dtypes of each of the DataFrame's columns.""" return self.info.dtypes @property def dtype_counts(self) -> Mapping[str, int]: """Mapping dtype - number of counts.""" return self.info.dtype_counts @property def display_memory_usage(self) -> bool: """Whether to display memory usage.""" return bool(self.info.memory_usage) @property def memory_usage_string(self) -> str: """Memory usage string with proper size qualifier.""" return self.info.memory_usage_string @property def non_null_counts(self) -> Sequence[int]: return self.info.non_null_counts def add_object_type_line(self) -> None: """Add line with string representation of dataframe to the table.""" self._lines.append(str(type(self.data))) def add_index_range_line(self) -> None: """Add line with range of indices to the table.""" self._lines.append(self.data.index._summary()) def add_dtypes_line(self) -> None: """Add summary line with dtypes present in dataframe.""" collected_dtypes = [ f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) ] self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") class DataFrameTableBuilder(TableBuilderAbstract): """ Abstract builder for dataframe info table. Parameters ---------- info : DataFrameInfo. Instance of DataFrameInfo. """ def __init__(self, *, info: DataFrameInfo) -> None: self.info: DataFrameInfo = info def get_lines(self) -> list[str]: self._lines = [] if self.col_count == 0: self._fill_empty_info() else: self._fill_non_empty_info() return self._lines def _fill_empty_info(self) -> None: """Add lines to the info table, pertaining to empty dataframe.""" self.add_object_type_line() self.add_index_range_line() self._lines.append(f"Empty {type(self.data).__name__}\n") @abstractmethod def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty dataframe.""" @property def data(self) -> DataFrame: """DataFrame.""" return self.info.data @property def ids(self) -> Index: """Dataframe columns.""" return self.info.ids @property def col_count(self) -> int: """Number of dataframe columns to be summarized.""" return self.info.col_count def add_memory_usage_line(self) -> None: """Add line containing memory usage.""" self._lines.append(f"memory usage: {self.memory_usage_string}") class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): """ Dataframe info table builder for non-verbose output. """ def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty dataframe.""" self.add_object_type_line() self.add_index_range_line() self.add_columns_summary_line() self.add_dtypes_line() if self.display_memory_usage: self.add_memory_usage_line() def add_columns_summary_line(self) -> None: self._lines.append(self.ids._summary(name="Columns")) class TableBuilderVerboseMixin(TableBuilderAbstract): """ Mixin for verbose info output. """ SPACING: str = " " * 2 strrows: Sequence[Sequence[str]] gross_column_widths: Sequence[int] with_counts: bool @property @abstractmethod def headers(self) -> Sequence[str]: """Headers names of the columns in verbose table.""" @property def header_column_widths(self) -> Sequence[int]: """Widths of header columns (only titles).""" return [len(col) for col in self.headers] def _get_gross_column_widths(self) -> Sequence[int]: """Get widths of columns containing both headers and actual content.""" body_column_widths = self._get_body_column_widths() return [ max(*widths) for widths in zip(self.header_column_widths, body_column_widths) ] def _get_body_column_widths(self) -> Sequence[int]: """Get widths of table content columns.""" strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) return [max(len(x) for x in col) for col in strcols] def _gen_rows(self) -> Iterator[Sequence[str]]: """ Generator function yielding rows content. Each element represents a row comprising a sequence of strings. """ if self.with_counts: return self._gen_rows_with_counts() else: return self._gen_rows_without_counts() @abstractmethod def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data with counts.""" @abstractmethod def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data without counts.""" def add_header_line(self) -> None: header_line = self.SPACING.join( [ _put_str(header, col_width) for header, col_width in zip(self.headers, self.gross_column_widths) ] ) self._lines.append(header_line) def add_separator_line(self) -> None: separator_line = self.SPACING.join( [ _put_str("-" * header_colwidth, gross_colwidth) for header_colwidth, gross_colwidth in zip( self.header_column_widths, self.gross_column_widths ) ] ) self._lines.append(separator_line) def add_body_lines(self) -> None: for row in self.strrows: body_line = self.SPACING.join( [ _put_str(col, gross_colwidth) for col, gross_colwidth in zip(row, self.gross_column_widths) ] ) self._lines.append(body_line) def _gen_non_null_counts(self) -> Iterator[str]: """Iterator with string representation of non-null counts.""" for count in self.non_null_counts: yield f"{count} non-null" def _gen_dtypes(self) -> Iterator[str]: """Iterator with string representation of column dtypes.""" for dtype in self.dtypes: yield pprint_thing(dtype) class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): """ Dataframe info table builder for verbose output. """ def __init__( self, *, info: DataFrameInfo, with_counts: bool, ) -> None: self.info = info self.with_counts = with_counts self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty dataframe.""" self.add_object_type_line() self.add_index_range_line() self.add_columns_summary_line() self.add_header_line() self.add_separator_line() self.add_body_lines() self.add_dtypes_line() if self.display_memory_usage: self.add_memory_usage_line() @property def headers(self) -> Sequence[str]: """Headers names of the columns in verbose table.""" if self.with_counts: return [" # ", "Column", "Non-Null Count", "Dtype"] return [" # ", "Column", "Dtype"] def add_columns_summary_line(self) -> None: self._lines.append(f"Data columns (total {self.col_count} columns):") def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data without counts.""" yield from zip( self._gen_line_numbers(), self._gen_columns(), self._gen_dtypes(), ) def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data with counts.""" yield from zip( self._gen_line_numbers(), self._gen_columns(), self._gen_non_null_counts(), self._gen_dtypes(), ) def _gen_line_numbers(self) -> Iterator[str]: """Iterator with string representation of column numbers.""" for i, _ in enumerate(self.ids): yield f" {i}" def _gen_columns(self) -> Iterator[str]: """Iterator with string representation of column names.""" for col in self.ids: yield pprint_thing(col) class SeriesTableBuilder(TableBuilderAbstract): """ Abstract builder for series info table. Parameters ---------- info : SeriesInfo. Instance of SeriesInfo. """ def __init__(self, *, info: SeriesInfo) -> None: self.info: SeriesInfo = info def get_lines(self) -> list[str]: self._lines = [] self._fill_non_empty_info() return self._lines @property def data(self) -> Series: """Series.""" return self.info.data def add_memory_usage_line(self) -> None: """Add line containing memory usage.""" self._lines.append(f"memory usage: {self.memory_usage_string}") @abstractmethod def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty series.""" class SeriesTableBuilderNonVerbose(SeriesTableBuilder): """ Series info table builder for non-verbose output. """ def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty series.""" self.add_object_type_line() self.add_index_range_line() self.add_dtypes_line() if self.display_memory_usage: self.add_memory_usage_line() class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): """ Series info table builder for verbose output. """ def __init__( self, *, info: SeriesInfo, with_counts: bool, ) -> None: self.info = info self.with_counts = with_counts self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty series.""" self.add_object_type_line() self.add_index_range_line() self.add_series_name_line() self.add_header_line() self.add_separator_line() self.add_body_lines() self.add_dtypes_line() if self.display_memory_usage: self.add_memory_usage_line() def add_series_name_line(self) -> None: self._lines.append(f"Series name: {self.data.name}") @property def headers(self) -> Sequence[str]: """Headers names of the columns in verbose table.""" if self.with_counts: return ["Non-Null Count", "Dtype"] return ["Dtype"] def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data without counts.""" yield from self._gen_dtypes() def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: """Iterator with string representation of body data with counts.""" yield from zip( self._gen_non_null_counts(), self._gen_dtypes(), ) def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: """ Create mapping between datatypes and their number of occurrences. """ # groupby dtype.name to collect e.g. Categorical columns return df.dtypes.value_counts().groupby(lambda x: x.name).sum()