""" Read SAS sas7bdat or xport files. """ from __future__ import annotations from abc import ( ABCMeta, abstractmethod, ) from types import TracebackType from typing import ( TYPE_CHECKING, Hashable, overload, ) from pandas._typing import ( CompressionOptions, FilePath, ReadBuffer, ) from pandas.util._decorators import doc from pandas.core.shared_docs import _shared_docs from pandas.io.common import stringify_path if TYPE_CHECKING: from pandas import DataFrame # TODO(PY38): replace with Protocol in Python 3.8 class ReaderBase(metaclass=ABCMeta): """ Protocol for XportReader and SAS7BDATReader classes. """ @abstractmethod def read(self, nrows: int | None = None) -> DataFrame: pass @abstractmethod def close(self) -> None: pass def __enter__(self) -> ReaderBase: return self def __exit__( self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None, ) -> None: self.close() @overload def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], *, format: str | None = ..., index: Hashable | None = ..., encoding: str | None = ..., chunksize: int = ..., iterator: bool = ..., compression: CompressionOptions = ..., ) -> ReaderBase: ... @overload def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], *, format: str | None = ..., index: Hashable | None = ..., encoding: str | None = ..., chunksize: None = ..., iterator: bool = ..., compression: CompressionOptions = ..., ) -> DataFrame | ReaderBase: ... @doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], *, format: str | None = None, index: Hashable | None = None, encoding: str | None = None, chunksize: int | None = None, iterator: bool = False, compression: CompressionOptions = "infer", ) -> DataFrame | ReaderBase: """ Read SAS files stored as either XPORT or SAS7BDAT format files. Parameters ---------- filepath_or_buffer : str, path object, or file-like object String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``read()`` function. The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. A local file could be: ``file://localhost/path/to/table.sas7bdat``. format : str {{'xport', 'sas7bdat'}} or None If None, file format is inferred from file extension. If 'xport' or 'sas7bdat', uses the corresponding format. index : identifier of index column, defaults to None Identifier of column that should be used as index of the DataFrame. encoding : str, default is None Encoding for text data. If None, text data are stored as raw bytes. chunksize : int Read file `chunksize` lines at a time, returns iterator. .. versionchanged:: 1.2 ``TextFileReader`` is a context manager. iterator : bool, defaults to False If True, returns an iterator for reading the file incrementally. .. versionchanged:: 1.2 ``TextFileReader`` is a context manager. {decompression_options} Returns ------- DataFrame if iterator=False and chunksize=None, else SAS7BDATReader or XportReader """ if format is None: buffer_error_msg = ( "If this is a buffer object rather " "than a string name, you must specify a format string" ) filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): raise ValueError(buffer_error_msg) fname = filepath_or_buffer.lower() if ".xpt" in fname: format = "xport" elif ".sas7bdat" in fname: format = "sas7bdat" else: raise ValueError( f"unable to infer format of SAS file from filename: {repr(fname)}" ) reader: ReaderBase if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader reader = XportReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize, compression=compression, ) elif format.lower() == "sas7bdat": from pandas.io.sas.sas7bdat import SAS7BDATReader reader = SAS7BDATReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize, compression=compression, ) else: raise ValueError("unknown SAS format") if iterator or chunksize: return reader with reader: return reader.read()