projektAI/venv/Lib/site-packages/pandas/io/parquet.py

""" parquet compat """

from distutils.version import LooseVersion
import io
import os
from typing import Any, AnyStr, Dict, List, Optional, Tuple
from warnings import catch_warnings

from pandas._typing import FilePathOrBuffer, StorageOptions
from pandas.compat._optional import import_optional_dependency
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc

from pandas import DataFrame, MultiIndex, get_option
from pandas.core import generic

from pandas.io.common import IOHandles, get_handle, is_fsspec_url, stringify_path


def get_engine(engine: str) -> "BaseImpl":
    """ return our implementation """
    if engine == "auto":
        engine = get_option("io.parquet.engine")

    if engine == "auto":
        # try engines in this order
        engine_classes = [PyArrowImpl, FastParquetImpl]

        error_msgs = ""
        for engine_class in engine_classes:
            try:
                return engine_class()
            except ImportError as err:
                error_msgs += "\n - " + str(err)

        raise ImportError(
            "Unable to find a usable engine; "
            "tried using: 'pyarrow', 'fastparquet'.\n"
            "A suitable version of "
            "pyarrow or fastparquet is required for parquet "
            "support.\n"
            "Trying to import the above resulted in these errors:"
            f"{error_msgs}"
        )

    if engine == "pyarrow":
        return PyArrowImpl()
    elif engine == "fastparquet":
        return FastParquetImpl()

    raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")


def _get_path_or_handle(
    path: FilePathOrBuffer,
    fs: Any,
    storage_options: StorageOptions = None,
    mode: str = "rb",
    is_dir: bool = False,
) -> Tuple[FilePathOrBuffer, Optional[IOHandles], Any]:
    """File handling for PyArrow."""
    path_or_handle = stringify_path(path)
    if is_fsspec_url(path_or_handle) and fs is None:
        fsspec = import_optional_dependency("fsspec")

        fs, path_or_handle = fsspec.core.url_to_fs(
            path_or_handle, **(storage_options or {})
        )
    elif storage_options:
        raise ValueError("storage_options passed with buffer or non-fsspec filepath")

    handles = None
    if (
        not fs
        and not is_dir
        and isinstance(path_or_handle, str)
        and not os.path.isdir(path_or_handle)
    ):
        # use get_handle only when we are very certain that it is not a directory
        # fsspec resources can also point to directories
        # this branch is used for example when reading from non-fsspec URLs
        handles = get_handle(path_or_handle, mode, is_text=False)
        fs = None
        path_or_handle = handles.handle
    return path_or_handle, handles, fs


class BaseImpl:
    @staticmethod
    def validate_dataframe(df: DataFrame):

        if not isinstance(df, DataFrame):
            raise ValueError("to_parquet only supports IO with DataFrames")

        # must have value column names for all index levels (strings only)
        if isinstance(df.columns, MultiIndex):
            if not all(
                x.inferred_type in {"string", "empty"} for x in df.columns.levels
            ):
                raise ValueError(
                    """
                    parquet must have string column names for all values in
                     each level of the MultiIndex
                    """
                )
        else:
            if df.columns.inferred_type not in {"string", "empty"}:
                raise ValueError("parquet must have string column names")

        # index level names must be strings
        valid_names = all(
            isinstance(name, str) for name in df.index.names if name is not None
        )
        if not valid_names:
            raise ValueError("Index level names must be strings")

    def write(self, df: DataFrame, path, compression, **kwargs):
        raise AbstractMethodError(self)

    def read(self, path, columns=None, **kwargs):
        raise AbstractMethodError(self)


class PyArrowImpl(BaseImpl):
    def __init__(self):
        import_optional_dependency(
            "pyarrow", extra="pyarrow is required for parquet support."
        )
        import pyarrow.parquet

        # import utils to register the pyarrow extension types
        import pandas.core.arrays._arrow_utils  # noqa

        self.api = pyarrow

    def write(
        self,
        df: DataFrame,
        path: FilePathOrBuffer[AnyStr],
        compression: Optional[str] = "snappy",
        index: Optional[bool] = None,
        storage_options: StorageOptions = None,
        partition_cols: Optional[List[str]] = None,
        **kwargs,
    ):
        self.validate_dataframe(df)

        from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)}
        if index is not None:
            from_pandas_kwargs["preserve_index"] = index

        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)

        path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(
            path,
            kwargs.pop("filesystem", None),
            storage_options=storage_options,
            mode="wb",
            is_dir=partition_cols is not None,
        )
        try:
            if partition_cols is not None:
                # writes to multiple files under the given path
                self.api.parquet.write_to_dataset(
                    table,
                    path_or_handle,
                    compression=compression,
                    partition_cols=partition_cols,
                    **kwargs,
                )
            else:
                # write to single output file
                self.api.parquet.write_table(
                    table, path_or_handle, compression=compression, **kwargs
                )
        finally:
            if handles is not None:
                handles.close()

    def read(
        self,
        path,
        columns=None,
        use_nullable_dtypes=False,
        storage_options: StorageOptions = None,
        **kwargs,
    ):
        kwargs["use_pandas_metadata"] = True

        to_pandas_kwargs = {}
        if use_nullable_dtypes:
            if LooseVersion(self.api.__version__) >= "0.16":
                import pandas as pd

                mapping = {
                    self.api.int8(): pd.Int8Dtype(),
                    self.api.int16(): pd.Int16Dtype(),
                    self.api.int32(): pd.Int32Dtype(),
                    self.api.int64(): pd.Int64Dtype(),
                    self.api.uint8(): pd.UInt8Dtype(),
                    self.api.uint16(): pd.UInt16Dtype(),
                    self.api.uint32(): pd.UInt32Dtype(),
                    self.api.uint64(): pd.UInt64Dtype(),
                    self.api.bool_(): pd.BooleanDtype(),
                    self.api.string(): pd.StringDtype(),
                }
                to_pandas_kwargs["types_mapper"] = mapping.get
            else:
                raise ValueError(
                    "'use_nullable_dtypes=True' is only supported for pyarrow >= 0.16 "
                    f"({self.api.__version__} is installed"
                )

        path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(
            path,
            kwargs.pop("filesystem", None),
            storage_options=storage_options,
            mode="rb",
        )
        try:
            return self.api.parquet.read_table(
                path_or_handle, columns=columns, **kwargs
            ).to_pandas(**to_pandas_kwargs)
        finally:
            if handles is not None:
                handles.close()


class FastParquetImpl(BaseImpl):
    def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        fastparquet = import_optional_dependency(
            "fastparquet", extra="fastparquet is required for parquet support."
        )
        self.api = fastparquet

    def write(
        self,
        df: DataFrame,
        path,
        compression="snappy",
        index=None,
        partition_cols=None,
        storage_options: StorageOptions = None,
        **kwargs,
    ):
        self.validate_dataframe(df)
        # thriftpy/protocol/compact.py:339:
        # DeprecationWarning: tostring() is deprecated.
        # Use tobytes() instead.

        if "partition_on" in kwargs and partition_cols is not None:
            raise ValueError(
                "Cannot use both partition_on and "
                "partition_cols. Use partition_cols for partitioning data"
            )
        elif "partition_on" in kwargs:
            partition_cols = kwargs.pop("partition_on")

        if partition_cols is not None:
            kwargs["file_scheme"] = "hive"

        # cannot use get_handle as write() does not accept file buffers
        path = stringify_path(path)
        if is_fsspec_url(path):
            fsspec = import_optional_dependency("fsspec")

            # if filesystem is provided by fsspec, file must be opened in 'wb' mode.
            kwargs["open_with"] = lambda path, _: fsspec.open(
                path, "wb", **(storage_options or {})
            ).open()
        elif storage_options:
            raise ValueError(
                "storage_options passed with file object or non-fsspec file path"
            )

        with catch_warnings(record=True):
            self.api.write(
                path,
                df,
                compression=compression,
                write_index=index,
                partition_on=partition_cols,
                **kwargs,
            )

    def read(
        self, path, columns=None, storage_options: StorageOptions = None, **kwargs
    ):
        use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
        if use_nullable_dtypes:
            raise ValueError(
                "The 'use_nullable_dtypes' argument is not supported for the "
                "fastparquet engine"
            )
        path = stringify_path(path)
        parquet_kwargs = {}
        handles = None
        if is_fsspec_url(path):
            fsspec = import_optional_dependency("fsspec")

            parquet_kwargs["open_with"] = lambda path, _: fsspec.open(
                path, "rb", **(storage_options or {})
            ).open()
        elif isinstance(path, str) and not os.path.isdir(path):
            # use get_handle only when we are very certain that it is not a directory
            # fsspec resources can also point to directories
            # this branch is used for example when reading from non-fsspec URLs
            handles = get_handle(path, "rb", is_text=False)
            path = handles.handle
        parquet_file = self.api.ParquetFile(path, **parquet_kwargs)

        result = parquet_file.to_pandas(columns=columns, **kwargs)

        if handles is not None:
            handles.close()
        return result


@doc(storage_options=generic._shared_docs["storage_options"])
def to_parquet(
    df: DataFrame,
    path: Optional[FilePathOrBuffer] = None,
    engine: str = "auto",
    compression: Optional[str] = "snappy",
    index: Optional[bool] = None,
    storage_options: StorageOptions = None,
    partition_cols: Optional[List[str]] = None,
    **kwargs,
) -> Optional[bytes]:
    """
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str or file-like object, default None
        If a string, it will be used as Root Directory path
        when writing a partitioned dataset. By file-like object,
        we refer to objects with a write() method, such as a file handle
        (e.g. via builtin open function) or io.BytesIO. The engine
        fastparquet does not accept file-like objects. If path is None,
        a bytes object is returned.

        .. versionchanged:: 1.2.0

    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy'
        Name of the compression to use. Use ``None`` for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.

        .. versionadded:: 0.24.0

    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.

        .. versionadded:: 0.24.0

    {storage_options}

        .. versionadded:: 1.2.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    """
    if isinstance(partition_cols, str):
        partition_cols = [partition_cols]
    impl = get_engine(engine)

    path_or_buf: FilePathOrBuffer = io.BytesIO() if path is None else path

    impl.write(
        df,
        path_or_buf,
        compression=compression,
        index=index,
        partition_cols=partition_cols,
        storage_options=storage_options,
        **kwargs,
    )

    if path is None:
        assert isinstance(path_or_buf, io.BytesIO)
        return path_or_buf.getvalue()
    else:
        return None


def read_parquet(
    path,
    engine: str = "auto",
    columns=None,
    use_nullable_dtypes: bool = False,
    **kwargs,
):
    """
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        Any valid string path is acceptable. The string could be a URL. Valid
        URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
        expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``

        If you want to pass in a path object, pandas accepts any
        ``os.PathLike``.

        By file-like object, we refer to objects with a ``read()`` method,
        such as a file handle (e.g. via builtin ``open`` function)
        or ``StringIO``.
    engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. versionadded:: 1.2.0
    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame
    """
    impl = get_engine(engine)
    return impl.read(
        path, columns=columns, use_nullable_dtypes=use_nullable_dtypes, **kwargs
    )
Działa 2021-06-06 22:13:05 +02:00			`""" parquet compat """`

			`from distutils.version import LooseVersion`
			`import io`
			`import os`
			`from typing import Any, AnyStr, Dict, List, Optional, Tuple`
			`from warnings import catch_warnings`

			`from pandas._typing import FilePathOrBuffer, StorageOptions`
			`from pandas.compat._optional import import_optional_dependency`
			`from pandas.errors import AbstractMethodError`
			`from pandas.util._decorators import doc`

			`from pandas import DataFrame, MultiIndex, get_option`
			`from pandas.core import generic`

			`from pandas.io.common import IOHandles, get_handle, is_fsspec_url, stringify_path`


			`def get_engine(engine: str) -> "BaseImpl":`
			`""" return our implementation """`
			`if engine == "auto":`
			`engine = get_option("io.parquet.engine")`

			`if engine == "auto":`
			`# try engines in this order`
			`engine_classes = [PyArrowImpl, FastParquetImpl]`

			`error_msgs = ""`
			`for engine_class in engine_classes:`
			`try:`
			`return engine_class()`
			`except ImportError as err:`
			`error_msgs += "\n - " + str(err)`

			`raise ImportError(`
			`"Unable to find a usable engine; "`
			`"tried using: 'pyarrow', 'fastparquet'.\n"`
			`"A suitable version of "`
			`"pyarrow or fastparquet is required for parquet "`
			`"support.\n"`
			`"Trying to import the above resulted in these errors:"`
			`f"{error_msgs}"`
			`)`

			`if engine == "pyarrow":`
			`return PyArrowImpl()`
			`elif engine == "fastparquet":`
			`return FastParquetImpl()`

			`raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")`


			`def _get_path_or_handle(`
			`path: FilePathOrBuffer,`
			`fs: Any,`
			`storage_options: StorageOptions = None,`
			`mode: str = "rb",`
			`is_dir: bool = False,`
			`) -> Tuple[FilePathOrBuffer, Optional[IOHandles], Any]:`
			`"""File handling for PyArrow."""`
			`path_or_handle = stringify_path(path)`
			`if is_fsspec_url(path_or_handle) and fs is None:`
			`fsspec = import_optional_dependency("fsspec")`

			`fs, path_or_handle = fsspec.core.url_to_fs(`
			`path_or_handle, **(storage_options or {})`
			`)`
			`elif storage_options:`
			`raise ValueError("storage_options passed with buffer or non-fsspec filepath")`

			`handles = None`
			`if (`
			`not fs`
			`and not is_dir`
			`and isinstance(path_or_handle, str)`
			`and not os.path.isdir(path_or_handle)`
			`):`
			`# use get_handle only when we are very certain that it is not a directory`
			`# fsspec resources can also point to directories`
			`# this branch is used for example when reading from non-fsspec URLs`
			`handles = get_handle(path_or_handle, mode, is_text=False)`
			`fs = None`
			`path_or_handle = handles.handle`
			`return path_or_handle, handles, fs`


			`class BaseImpl:`
			`@staticmethod`
			`def validate_dataframe(df: DataFrame):`

			`if not isinstance(df, DataFrame):`
			`raise ValueError("to_parquet only supports IO with DataFrames")`

			`# must have value column names for all index levels (strings only)`
			`if isinstance(df.columns, MultiIndex):`
			`if not all(`
			`x.inferred_type in {"string", "empty"} for x in df.columns.levels`
			`):`
			`raise ValueError(`
			`"""`
			`parquet must have string column names for all values in`
			`each level of the MultiIndex`
			`"""`
			`)`
			`else:`
			`if df.columns.inferred_type not in {"string", "empty"}:`
			`raise ValueError("parquet must have string column names")`

			`# index level names must be strings`
			`valid_names = all(`
			`isinstance(name, str) for name in df.index.names if name is not None`
			`)`
			`if not valid_names:`
			`raise ValueError("Index level names must be strings")`

			`def write(self, df: DataFrame, path, compression, **kwargs):`
			`raise AbstractMethodError(self)`

			`def read(self, path, columns=None, **kwargs):`
			`raise AbstractMethodError(self)`


			`class PyArrowImpl(BaseImpl):`
			`def __init__(self):`
			`import_optional_dependency(`
			`"pyarrow", extra="pyarrow is required for parquet support."`
			`)`
			`import pyarrow.parquet`

			`# import utils to register the pyarrow extension types`
			`import pandas.core.arrays._arrow_utils # noqa`

			`self.api = pyarrow`

			`def write(`
			`self,`
			`df: DataFrame,`
			`path: FilePathOrBuffer[AnyStr],`
			`compression: Optional[str] = "snappy",`
			`index: Optional[bool] = None,`
			`storage_options: StorageOptions = None,`
			`partition_cols: Optional[List[str]] = None,`
			`**kwargs,`
			`):`
			`self.validate_dataframe(df)`

			`from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)}`
			`if index is not None:`
			`from_pandas_kwargs["preserve_index"] = index`

			`table = self.api.Table.from_pandas(df, **from_pandas_kwargs)`

			`path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(`
			`path,`
			`kwargs.pop("filesystem", None),`
			`storage_options=storage_options,`
			`mode="wb",`
			`is_dir=partition_cols is not None,`
			`)`
			`try:`
			`if partition_cols is not None:`
			`# writes to multiple files under the given path`
			`self.api.parquet.write_to_dataset(`
			`table,`
			`path_or_handle,`
			`compression=compression,`
			`partition_cols=partition_cols,`
			`**kwargs,`
			`)`
			`else:`
			`# write to single output file`
			`self.api.parquet.write_table(`
			`table, path_or_handle, compression=compression, **kwargs`
			`)`
			`finally:`
			`if handles is not None:`
			`handles.close()`

			`def read(`
			`self,`
			`path,`
			`columns=None,`
			`use_nullable_dtypes=False,`
			`storage_options: StorageOptions = None,`
			`**kwargs,`
			`):`
			`kwargs["use_pandas_metadata"] = True`

			`to_pandas_kwargs = {}`
			`if use_nullable_dtypes:`
			`if LooseVersion(self.api.__version__) >= "0.16":`
			`import pandas as pd`

			`mapping = {`
			`self.api.int8(): pd.Int8Dtype(),`
			`self.api.int16(): pd.Int16Dtype(),`
			`self.api.int32(): pd.Int32Dtype(),`
			`self.api.int64(): pd.Int64Dtype(),`
			`self.api.uint8(): pd.UInt8Dtype(),`
			`self.api.uint16(): pd.UInt16Dtype(),`
			`self.api.uint32(): pd.UInt32Dtype(),`
			`self.api.uint64(): pd.UInt64Dtype(),`
			`self.api.bool_(): pd.BooleanDtype(),`
			`self.api.string(): pd.StringDtype(),`
			`}`
			`to_pandas_kwargs["types_mapper"] = mapping.get`
			`else:`
			`raise ValueError(`
			`"'use_nullable_dtypes=True' is only supported for pyarrow >= 0.16 "`
			`f"({self.api.__version__} is installed"`
			`)`

			`path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(`
			`path,`
			`kwargs.pop("filesystem", None),`
			`storage_options=storage_options,`
			`mode="rb",`
			`)`
			`try:`
			`return self.api.parquet.read_table(`
			`path_or_handle, columns=columns, **kwargs`
			`).to_pandas(**to_pandas_kwargs)`
			`finally:`
			`if handles is not None:`
			`handles.close()`


			`class FastParquetImpl(BaseImpl):`
			`def __init__(self):`
			`# since pandas is a dependency of fastparquet`
			`# we need to import on first use`
			`fastparquet = import_optional_dependency(`
			`"fastparquet", extra="fastparquet is required for parquet support."`
			`)`
			`self.api = fastparquet`

			`def write(`
			`self,`
			`df: DataFrame,`
			`path,`
			`compression="snappy",`
			`index=None,`
			`partition_cols=None,`
			`storage_options: StorageOptions = None,`
			`**kwargs,`
			`):`
			`self.validate_dataframe(df)`
			`# thriftpy/protocol/compact.py:339:`
			`# DeprecationWarning: tostring() is deprecated.`
			`# Use tobytes() instead.`

			`if "partition_on" in kwargs and partition_cols is not None:`
			`raise ValueError(`
			`"Cannot use both partition_on and "`
			`"partition_cols. Use partition_cols for partitioning data"`
			`)`
			`elif "partition_on" in kwargs:`
			`partition_cols = kwargs.pop("partition_on")`

			`if partition_cols is not None:`
			`kwargs["file_scheme"] = "hive"`

			`# cannot use get_handle as write() does not accept file buffers`
			`path = stringify_path(path)`
			`if is_fsspec_url(path):`
			`fsspec = import_optional_dependency("fsspec")`

			`# if filesystem is provided by fsspec, file must be opened in 'wb' mode.`
			`kwargs["open_with"] = lambda path, _: fsspec.open(`
			`path, "wb", **(storage_options or {})`
			`).open()`
			`elif storage_options:`
			`raise ValueError(`
			`"storage_options passed with file object or non-fsspec file path"`
			`)`

			`with catch_warnings(record=True):`
			`self.api.write(`
			`path,`
			`df,`
			`compression=compression,`
			`write_index=index,`
			`partition_on=partition_cols,`
			`**kwargs,`
			`)`

			`def read(`
			`self, path, columns=None, storage_options: StorageOptions = None, **kwargs`
			`):`
			`use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)`
			`if use_nullable_dtypes:`
			`raise ValueError(`
			`"The 'use_nullable_dtypes' argument is not supported for the "`
			`"fastparquet engine"`
			`)`
			`path = stringify_path(path)`
			`parquet_kwargs = {}`
			`handles = None`
			`if is_fsspec_url(path):`
			`fsspec = import_optional_dependency("fsspec")`

			`parquet_kwargs["open_with"] = lambda path, _: fsspec.open(`
			`path, "rb", **(storage_options or {})`
			`).open()`
			`elif isinstance(path, str) and not os.path.isdir(path):`
			`# use get_handle only when we are very certain that it is not a directory`
			`# fsspec resources can also point to directories`
			`# this branch is used for example when reading from non-fsspec URLs`
			`handles = get_handle(path, "rb", is_text=False)`
			`path = handles.handle`
			`parquet_file = self.api.ParquetFile(path, **parquet_kwargs)`

			`result = parquet_file.to_pandas(columns=columns, **kwargs)`

			`if handles is not None:`
			`handles.close()`
			`return result`


			`@doc(storage_options=generic._shared_docs["storage_options"])`
			`def to_parquet(`
			`df: DataFrame,`
			`path: Optional[FilePathOrBuffer] = None,`
			`engine: str = "auto",`
			`compression: Optional[str] = "snappy",`
			`index: Optional[bool] = None,`
			`storage_options: StorageOptions = None,`
			`partition_cols: Optional[List[str]] = None,`
			`**kwargs,`
			`) -> Optional[bytes]:`
			`"""`
			`Write a DataFrame to the parquet format.`

			`Parameters`
			`----------`
			`df : DataFrame`
			`path : str or file-like object, default None`
			`If a string, it will be used as Root Directory path`
			`when writing a partitioned dataset. By file-like object,`
			`we refer to objects with a write() method, such as a file handle`
			`(e.g. via builtin open function) or io.BytesIO. The engine`
			`fastparquet does not accept file-like objects. If path is None,`
			`a bytes object is returned.`

			`.. versionchanged:: 1.2.0`

			`engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'`
			`Parquet library to use. If 'auto', then the option`
			``io.parquet.engine`` is used. The default ``io.parquet.engine``
			`behavior is to try 'pyarrow', falling back to 'fastparquet' if`
			`'pyarrow' is unavailable.`
			`compression : {{'snappy', 'gzip', 'brotli', None}}, default 'snappy'`
			Name of the compression to use. Use ``None`` for no compression.
			`index : bool, default None`
			If ``True``, include the dataframe's index(es) in the file output. If
			``False``, they will not be written to the file.
			If ``None``, similar to ``True`` the dataframe's index(es)
			`will be saved. However, instead of being saved as values,`
			`the RangeIndex will be stored as a range in the metadata so it`
			`doesn't require much space and is faster. Other indexes will`
			`be included as columns in the file output.`

			`.. versionadded:: 0.24.0`

			`partition_cols : str or list, optional, default None`
			`Column names by which to partition the dataset.`
			`Columns are partitioned in the order they are given.`
			`Must be None if path is not a string.`

			`.. versionadded:: 0.24.0`

			`{storage_options}`

			`.. versionadded:: 1.2.0`

			`kwargs`
			`Additional keyword arguments passed to the engine`

			`Returns`
			`-------`
			`bytes if no path argument is provided else None`
			`"""`
			`if isinstance(partition_cols, str):`
			`partition_cols = [partition_cols]`
			`impl = get_engine(engine)`

			`path_or_buf: FilePathOrBuffer = io.BytesIO() if path is None else path`

			`impl.write(`
			`df,`
			`path_or_buf,`
			`compression=compression,`
			`index=index,`
			`partition_cols=partition_cols,`
			`storage_options=storage_options,`
			`**kwargs,`
			`)`

			`if path is None:`
			`assert isinstance(path_or_buf, io.BytesIO)`
			`return path_or_buf.getvalue()`
			`else:`
			`return None`


			`def read_parquet(`
			`path,`
			`engine: str = "auto",`
			`columns=None,`
			`use_nullable_dtypes: bool = False,`
			`**kwargs,`
			`):`
			`"""`
			`Load a parquet object from the file path, returning a DataFrame.`

			`Parameters`
			`----------`
			`path : str, path object or file-like object`
			`Any valid string path is acceptable. The string could be a URL. Valid`
			`URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is`
			`expected. A local file could be:`
			``file://localhost/path/to/table.parquet``.
			`A file URL can also be a path to a directory that contains multiple`
			`partitioned parquet files. Both pyarrow and fastparquet support`
			`paths to directories as well as file URLs. A directory path could be:`
			``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``

			`If you want to pass in a path object, pandas accepts any`
			``os.PathLike``.

			By file-like object, we refer to objects with a ``read()`` method,
			such as a file handle (e.g. via builtin ``open`` function)
			or ``StringIO``.
			`engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'`
			`Parquet library to use. If 'auto', then the option`
			``io.parquet.engine`` is used. The default ``io.parquet.engine``
			`behavior is to try 'pyarrow', falling back to 'fastparquet' if`
			`'pyarrow' is unavailable.`
			`columns : list, default=None`
			`If not None, only these columns will be read from the file.`
			`use_nullable_dtypes : bool, default False`
			If True, use dtypes that use ``pd.NA`` as missing value indicator
			for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
			As new dtypes are added that support ``pd.NA`` in the future, the
			`output with this option will change to use those dtypes.`
			`Note: this is an experimental option, and behaviour (e.g. additional`
			`support dtypes) may change without notice.`

			`.. versionadded:: 1.2.0`
			`**kwargs`
			`Any additional kwargs are passed to the engine.`

			`Returns`
			`-------`
			`DataFrame`
			`"""`
			`impl = get_engine(engine)`
			`return impl.read(`
			`path, columns=columns, use_nullable_dtypes=use_nullable_dtypes, **kwargs`
			`)`