Inzynierka/Lib/site-packages/scipy/io/_harwell_boeing/hb.py

"""
Implementation of Harwell-Boeing read/write.

At the moment not the full Harwell-Boeing format is supported. Supported
features are:

    - assembled, non-symmetric, real matrices
    - integer for pointer/indices
    - exponential format for float values, and int format

"""
# TODO:
#   - Add more support (symmetric/complex matrices, non-assembled matrices ?)

# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
# takes a lot of memory. Being faster would require compiled code.
# write is not efficient. Although not a terribly exciting task,
# having reusable facilities to efficiently read/write fortran-formatted files
# would be useful outside this module.

import warnings

import numpy as np
from scipy.sparse import csc_matrix
from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat

__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
           "HBMatrixType"]


class MalformedHeader(Exception):
    pass


class LineOverflow(Warning):
    pass


def _nbytes_full(fmt, nlines):
    """Return the number of bytes to read to get every full lines for the
    given parsed fortran format."""
    return (fmt.repeat * fmt.width + 1) * (nlines - 1)


class HBInfo:
    @classmethod
    def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
        """Create a HBInfo instance from an existing sparse matrix.

        Parameters
        ----------
        m : sparse matrix
            the HBInfo instance will derive its parameters from m
        title : str
            Title to put in the HB header
        key : str
            Key
        mxtype : HBMatrixType
            type of the input matrix
        fmt : dict
            not implemented

        Returns
        -------
        hb_info : HBInfo instance
        """
        m = m.tocsc(copy=False)

        pointer = m.indptr
        indices = m.indices
        values = m.data

        nrows, ncols = m.shape
        nnon_zeros = m.nnz

        if fmt is None:
            # +1 because HB use one-based indexing (Fortran), and we will write
            # the indices /pointer as such
            pointer_fmt = IntFormat.from_number(np.max(pointer+1))
            indices_fmt = IntFormat.from_number(np.max(indices+1))

            if values.dtype.kind in np.typecodes["AllFloat"]:
                values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
            elif values.dtype.kind in np.typecodes["AllInteger"]:
                values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
            else:
                raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
        else:
            raise NotImplementedError("fmt argument not supported yet.")

        if mxtype is None:
            if not np.isrealobj(values):
                raise ValueError("Complex values not supported yet")
            if values.dtype.kind in np.typecodes["AllInteger"]:
                tp = "integer"
            elif values.dtype.kind in np.typecodes["AllFloat"]:
                tp = "real"
            else:
                raise NotImplementedError("type %s for values not implemented"
                                          % values.dtype)
            mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
        else:
            raise ValueError("mxtype argument not handled yet.")

        def _nlines(fmt, size):
            nlines = size // fmt.repeat
            if nlines * fmt.repeat != size:
                nlines += 1
            return nlines

        pointer_nlines = _nlines(pointer_fmt, pointer.size)
        indices_nlines = _nlines(indices_fmt, indices.size)
        values_nlines = _nlines(values_fmt, values.size)

        total_nlines = pointer_nlines + indices_nlines + values_nlines

        return cls(title, key,
            total_nlines, pointer_nlines, indices_nlines, values_nlines,
            mxtype, nrows, ncols, nnon_zeros,
            pointer_fmt.fortran_format, indices_fmt.fortran_format,
            values_fmt.fortran_format)

    @classmethod
    def from_file(cls, fid):
        """Create a HBInfo instance from a file object containing a matrix in the
        HB format.

        Parameters
        ----------
        fid : file-like matrix
            File or file-like object containing a matrix in the HB format.

        Returns
        -------
        hb_info : HBInfo instance
        """
        # First line
        line = fid.readline().strip("\n")
        if not len(line) > 72:
            raise ValueError("Expected at least 72 characters for first line, "
                             "got: \n%s" % line)
        title = line[:72]
        key = line[72:]

        # Second line
        line = fid.readline().strip("\n")
        if not len(line.rstrip()) >= 56:
            raise ValueError("Expected at least 56 characters for second line, "
                             "got: \n%s" % line)
        total_nlines = _expect_int(line[:14])
        pointer_nlines = _expect_int(line[14:28])
        indices_nlines = _expect_int(line[28:42])
        values_nlines = _expect_int(line[42:56])

        rhs_nlines = line[56:72].strip()
        if rhs_nlines == '':
            rhs_nlines = 0
        else:
            rhs_nlines = _expect_int(rhs_nlines)
        if not rhs_nlines == 0:
            raise ValueError("Only files without right hand side supported for "
                             "now.")

        # Third line
        line = fid.readline().strip("\n")
        if not len(line) >= 70:
            raise ValueError("Expected at least 72 character for third line, got:\n"
                             "%s" % line)

        mxtype_s = line[:3].upper()
        if not len(mxtype_s) == 3:
            raise ValueError("mxtype expected to be 3 characters long")

        mxtype = HBMatrixType.from_fortran(mxtype_s)
        if mxtype.value_type not in ["real", "integer"]:
            raise ValueError("Only real or integer matrices supported for "
                             "now (detected %s)" % mxtype)
        if not mxtype.structure == "unsymmetric":
            raise ValueError("Only unsymmetric matrices supported for "
                             "now (detected %s)" % mxtype)
        if not mxtype.storage == "assembled":
            raise ValueError("Only assembled matrices supported for now")

        if not line[3:14] == " " * 11:
            raise ValueError("Malformed data for third line: %s" % line)

        nrows = _expect_int(line[14:28])
        ncols = _expect_int(line[28:42])
        nnon_zeros = _expect_int(line[42:56])
        nelementals = _expect_int(line[56:70])
        if not nelementals == 0:
            raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
                             % nelementals)

        # Fourth line
        line = fid.readline().strip("\n")

        ct = line.split()
        if not len(ct) == 3:
            raise ValueError("Expected 3 formats, got %s" % ct)

        return cls(title, key,
                   total_nlines, pointer_nlines, indices_nlines, values_nlines,
                   mxtype, nrows, ncols, nnon_zeros,
                   ct[0], ct[1], ct[2],
                   rhs_nlines, nelementals)

    def __init__(self, title, key,
            total_nlines, pointer_nlines, indices_nlines, values_nlines,
            mxtype, nrows, ncols, nnon_zeros,
            pointer_format_str, indices_format_str, values_format_str,
            right_hand_sides_nlines=0, nelementals=0):
        """Do not use this directly, but the class ctrs (from_* functions)."""
        self.title = title
        self.key = key
        if title is None:
            title = "No Title"
        if len(title) > 72:
            raise ValueError("title cannot be > 72 characters")

        if key is None:
            key = "|No Key"
        if len(key) > 8:
            warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)

        self.total_nlines = total_nlines
        self.pointer_nlines = pointer_nlines
        self.indices_nlines = indices_nlines
        self.values_nlines = values_nlines

        parser = FortranFormatParser()
        pointer_format = parser.parse(pointer_format_str)
        if not isinstance(pointer_format, IntFormat):
            raise ValueError("Expected int format for pointer format, got %s"
                             % pointer_format)

        indices_format = parser.parse(indices_format_str)
        if not isinstance(indices_format, IntFormat):
            raise ValueError("Expected int format for indices format, got %s" %
                             indices_format)

        values_format = parser.parse(values_format_str)
        if isinstance(values_format, ExpFormat):
            if mxtype.value_type not in ["real", "complex"]:
                raise ValueError("Inconsistency between matrix type %s and "
                                 "value type %s" % (mxtype, values_format))
            values_dtype = np.float64
        elif isinstance(values_format, IntFormat):
            if mxtype.value_type not in ["integer"]:
                raise ValueError("Inconsistency between matrix type %s and "
                                 "value type %s" % (mxtype, values_format))
            # XXX: fortran int -> dtype association ?
            values_dtype = int
        else:
            raise ValueError("Unsupported format for values %r" % (values_format,))

        self.pointer_format = pointer_format
        self.indices_format = indices_format
        self.values_format = values_format

        self.pointer_dtype = np.int32
        self.indices_dtype = np.int32
        self.values_dtype = values_dtype

        self.pointer_nlines = pointer_nlines
        self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)

        self.indices_nlines = indices_nlines
        self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)

        self.values_nlines = values_nlines
        self.values_nbytes_full = _nbytes_full(values_format, values_nlines)

        self.nrows = nrows
        self.ncols = ncols
        self.nnon_zeros = nnon_zeros
        self.nelementals = nelementals
        self.mxtype = mxtype

    def dump(self):
        """Gives the header corresponding to this instance as a string."""
        header = [self.title.ljust(72) + self.key.ljust(8)]

        header.append("%14d%14d%14d%14d" %
                      (self.total_nlines, self.pointer_nlines,
                       self.indices_nlines, self.values_nlines))
        header.append("%14s%14d%14d%14d%14d" %
                      (self.mxtype.fortran_format.ljust(14), self.nrows,
                       self.ncols, self.nnon_zeros, 0))

        pffmt = self.pointer_format.fortran_format
        iffmt = self.indices_format.fortran_format
        vffmt = self.values_format.fortran_format
        header.append("%16s%16s%20s" %
                      (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
        return "\n".join(header)


def _expect_int(value, msg=None):
    try:
        return int(value)
    except ValueError as e:
        if msg is None:
            msg = "Expected an int, got %s"
        raise ValueError(msg % value) from e


def _read_hb_data(content, header):
    # XXX: look at a way to reduce memory here (big string creation)
    ptr_string = "".join([content.read(header.pointer_nbytes_full),
                           content.readline()])
    ptr = np.fromstring(ptr_string,
            dtype=int, sep=' ')

    ind_string = "".join([content.read(header.indices_nbytes_full),
                       content.readline()])
    ind = np.fromstring(ind_string,
            dtype=int, sep=' ')

    val_string = "".join([content.read(header.values_nbytes_full),
                          content.readline()])
    val = np.fromstring(val_string,
            dtype=header.values_dtype, sep=' ')

    try:
        return csc_matrix((val, ind-1, ptr-1),
                          shape=(header.nrows, header.ncols))
    except ValueError as e:
        raise e


def _write_data(m, fid, header):
    m = m.tocsc(copy=False)

    def write_array(f, ar, nlines, fmt):
        # ar_nlines is the number of full lines, n is the number of items per
        # line, ffmt the fortran format
        pyfmt = fmt.python_format
        pyfmt_full = pyfmt * fmt.repeat

        # for each array to write, we first write the full lines, and special
        # case for partial line
        full = ar[:(nlines - 1) * fmt.repeat]
        for row in full.reshape((nlines-1, fmt.repeat)):
            f.write(pyfmt_full % tuple(row) + "\n")
        nremain = ar.size - full.size
        if nremain > 0:
            f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")

    fid.write(header.dump())
    fid.write("\n")
    # +1 is for Fortran one-based indexing
    write_array(fid, m.indptr+1, header.pointer_nlines,
                header.pointer_format)
    write_array(fid, m.indices+1, header.indices_nlines,
                header.indices_format)
    write_array(fid, m.data, header.values_nlines,
                header.values_format)


class HBMatrixType:
    """Class to hold the matrix type."""
    # q2f* translates qualified names to Fortran character
    _q2f_type = {
        "real": "R",
        "complex": "C",
        "pattern": "P",
        "integer": "I",
    }
    _q2f_structure = {
            "symmetric": "S",
            "unsymmetric": "U",
            "hermitian": "H",
            "skewsymmetric": "Z",
            "rectangular": "R"
    }
    _q2f_storage = {
        "assembled": "A",
        "elemental": "E",
    }

    _f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
    _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
    _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])

    @classmethod
    def from_fortran(cls, fmt):
        if not len(fmt) == 3:
            raise ValueError("Fortran format for matrix type should be 3 "
                             "characters long")
        try:
            value_type = cls._f2q_type[fmt[0]]
            structure = cls._f2q_structure[fmt[1]]
            storage = cls._f2q_storage[fmt[2]]
            return cls(value_type, structure, storage)
        except KeyError as e:
            raise ValueError("Unrecognized format %s" % fmt) from e

    def __init__(self, value_type, structure, storage="assembled"):
        self.value_type = value_type
        self.structure = structure
        self.storage = storage

        if value_type not in self._q2f_type:
            raise ValueError("Unrecognized type %s" % value_type)
        if structure not in self._q2f_structure:
            raise ValueError("Unrecognized structure %s" % structure)
        if storage not in self._q2f_storage:
            raise ValueError("Unrecognized storage %s" % storage)

    @property
    def fortran_format(self):
        return self._q2f_type[self.value_type] + \
               self._q2f_structure[self.structure] + \
               self._q2f_storage[self.storage]

    def __repr__(self):
        return "HBMatrixType(%s, %s, %s)" % \
               (self.value_type, self.structure, self.storage)


class HBFile:
    def __init__(self, file, hb_info=None):
        """Create a HBFile instance.

        Parameters
        ----------
        file : file-object
            StringIO work as well
        hb_info : HBInfo, optional
            Should be given as an argument for writing, in which case the file
            should be writable.
        """
        self._fid = file
        if hb_info is None:
            self._hb_info = HBInfo.from_file(file)
        else:
            #raise OSError("file %s is not writable, and hb_info "
            #              "was given." % file)
            self._hb_info = hb_info

    @property
    def title(self):
        return self._hb_info.title

    @property
    def key(self):
        return self._hb_info.key

    @property
    def type(self):
        return self._hb_info.mxtype.value_type

    @property
    def structure(self):
        return self._hb_info.mxtype.structure

    @property
    def storage(self):
        return self._hb_info.mxtype.storage

    def read_matrix(self):
        return _read_hb_data(self._fid, self._hb_info)

    def write_matrix(self, m):
        return _write_data(m, self._fid, self._hb_info)


def hb_read(path_or_open_file):
    """Read HB-format file.

    Parameters
    ----------
    path_or_open_file : path-like or file-like
        If a file-like object, it is used as-is. Otherwise, it is opened
        before reading.

    Returns
    -------
    data : scipy.sparse.csc_matrix instance
        The data read from the HB file as a sparse matrix.

    Notes
    -----
    At the moment not the full Harwell-Boeing format is supported. Supported
    features are:

        - assembled, non-symmetric, real matrices
        - integer for pointer/indices
        - exponential format for float values, and int format

    Examples
    --------
    We can read and write a harwell-boeing format file:

    >>> from scipy.io import hb_read, hb_write
    >>> from scipy.sparse import csr_matrix, eye
    >>> data = csr_matrix(eye(3))  # create a sparse matrix
    >>> hb_write("data.hb", data)  # write a hb file
    >>> print(hb_read("data.hb"))  # read a hb file
      (0, 0)	1.0
      (1, 1)	1.0
      (2, 2)	1.0

    """
    def _get_matrix(fid):
        hb = HBFile(fid)
        return hb.read_matrix()

    if hasattr(path_or_open_file, 'read'):
        return _get_matrix(path_or_open_file)
    else:
        with open(path_or_open_file) as f:
            return _get_matrix(f)


def hb_write(path_or_open_file, m, hb_info=None):
    """Write HB-format file.

    Parameters
    ----------
    path_or_open_file : path-like or file-like
        If a file-like object, it is used as-is. Otherwise, it is opened
        before writing.
    m : sparse-matrix
        the sparse matrix to write
    hb_info : HBInfo
        contains the meta-data for write

    Returns
    -------
    None

    Notes
    -----
    At the moment not the full Harwell-Boeing format is supported. Supported
    features are:

        - assembled, non-symmetric, real matrices
        - integer for pointer/indices
        - exponential format for float values, and int format

    Examples
    --------
    We can read and write a harwell-boeing format file:

    >>> from scipy.io import hb_read, hb_write
    >>> from scipy.sparse import csr_matrix, eye
    >>> data = csr_matrix(eye(3))  # create a sparse matrix
    >>> hb_write("data.hb", data)  # write a hb file
    >>> print(hb_read("data.hb"))  # read a hb file
      (0, 0)	1.0
      (1, 1)	1.0
      (2, 2)	1.0

    """
    m = m.tocsc(copy=False)

    if hb_info is None:
        hb_info = HBInfo.from_data(m)

    def _set_matrix(fid):
        hb = HBFile(fid, hb_info)
        return hb.write_matrix(m)

    if hasattr(path_or_open_file, 'write'):
        return _set_matrix(path_or_open_file)
    else:
        with open(path_or_open_file, 'w') as f:
            return _set_matrix(f)