571 lines
19 KiB
Python
571 lines
19 KiB
Python
|
"""
|
||
|
Implementation of Harwell-Boeing read/write.
|
||
|
|
||
|
At the moment not the full Harwell-Boeing format is supported. Supported
|
||
|
features are:
|
||
|
|
||
|
- assembled, non-symmetric, real matrices
|
||
|
- integer for pointer/indices
|
||
|
- exponential format for float values, and int format
|
||
|
|
||
|
"""
|
||
|
# TODO:
|
||
|
# - Add more support (symmetric/complex matrices, non-assembled matrices ?)
|
||
|
|
||
|
# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
|
||
|
# takes a lot of memory. Being faster would require compiled code.
|
||
|
# write is not efficient. Although not a terribly exciting task,
|
||
|
# having reusable facilities to efficiently read/write fortran-formatted files
|
||
|
# would be useful outside this module.
|
||
|
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
from scipy.sparse import csc_matrix
|
||
|
from ._fortran_format_parser import FortranFormatParser, IntFormat, ExpFormat
|
||
|
|
||
|
__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
|
||
|
"HBMatrixType"]
|
||
|
|
||
|
|
||
|
class MalformedHeader(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class LineOverflow(Warning):
|
||
|
pass
|
||
|
|
||
|
|
||
|
def _nbytes_full(fmt, nlines):
|
||
|
"""Return the number of bytes to read to get every full lines for the
|
||
|
given parsed fortran format."""
|
||
|
return (fmt.repeat * fmt.width + 1) * (nlines - 1)
|
||
|
|
||
|
|
||
|
class HBInfo:
|
||
|
@classmethod
|
||
|
def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
|
||
|
"""Create a HBInfo instance from an existing sparse matrix.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
m : sparse matrix
|
||
|
the HBInfo instance will derive its parameters from m
|
||
|
title : str
|
||
|
Title to put in the HB header
|
||
|
key : str
|
||
|
Key
|
||
|
mxtype : HBMatrixType
|
||
|
type of the input matrix
|
||
|
fmt : dict
|
||
|
not implemented
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
hb_info : HBInfo instance
|
||
|
"""
|
||
|
m = m.tocsc(copy=False)
|
||
|
|
||
|
pointer = m.indptr
|
||
|
indices = m.indices
|
||
|
values = m.data
|
||
|
|
||
|
nrows, ncols = m.shape
|
||
|
nnon_zeros = m.nnz
|
||
|
|
||
|
if fmt is None:
|
||
|
# +1 because HB use one-based indexing (Fortran), and we will write
|
||
|
# the indices /pointer as such
|
||
|
pointer_fmt = IntFormat.from_number(np.max(pointer+1))
|
||
|
indices_fmt = IntFormat.from_number(np.max(indices+1))
|
||
|
|
||
|
if values.dtype.kind in np.typecodes["AllFloat"]:
|
||
|
values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
|
||
|
elif values.dtype.kind in np.typecodes["AllInteger"]:
|
||
|
values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
|
||
|
else:
|
||
|
raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
|
||
|
else:
|
||
|
raise NotImplementedError("fmt argument not supported yet.")
|
||
|
|
||
|
if mxtype is None:
|
||
|
if not np.isrealobj(values):
|
||
|
raise ValueError("Complex values not supported yet")
|
||
|
if values.dtype.kind in np.typecodes["AllInteger"]:
|
||
|
tp = "integer"
|
||
|
elif values.dtype.kind in np.typecodes["AllFloat"]:
|
||
|
tp = "real"
|
||
|
else:
|
||
|
raise NotImplementedError("type %s for values not implemented"
|
||
|
% values.dtype)
|
||
|
mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
|
||
|
else:
|
||
|
raise ValueError("mxtype argument not handled yet.")
|
||
|
|
||
|
def _nlines(fmt, size):
|
||
|
nlines = size // fmt.repeat
|
||
|
if nlines * fmt.repeat != size:
|
||
|
nlines += 1
|
||
|
return nlines
|
||
|
|
||
|
pointer_nlines = _nlines(pointer_fmt, pointer.size)
|
||
|
indices_nlines = _nlines(indices_fmt, indices.size)
|
||
|
values_nlines = _nlines(values_fmt, values.size)
|
||
|
|
||
|
total_nlines = pointer_nlines + indices_nlines + values_nlines
|
||
|
|
||
|
return cls(title, key,
|
||
|
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||
|
mxtype, nrows, ncols, nnon_zeros,
|
||
|
pointer_fmt.fortran_format, indices_fmt.fortran_format,
|
||
|
values_fmt.fortran_format)
|
||
|
|
||
|
@classmethod
|
||
|
def from_file(cls, fid):
|
||
|
"""Create a HBInfo instance from a file object containing a matrix in the
|
||
|
HB format.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
fid : file-like matrix
|
||
|
File or file-like object containing a matrix in the HB format.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
hb_info : HBInfo instance
|
||
|
"""
|
||
|
# First line
|
||
|
line = fid.readline().strip("\n")
|
||
|
if not len(line) > 72:
|
||
|
raise ValueError("Expected at least 72 characters for first line, "
|
||
|
"got: \n%s" % line)
|
||
|
title = line[:72]
|
||
|
key = line[72:]
|
||
|
|
||
|
# Second line
|
||
|
line = fid.readline().strip("\n")
|
||
|
if not len(line.rstrip()) >= 56:
|
||
|
raise ValueError("Expected at least 56 characters for second line, "
|
||
|
"got: \n%s" % line)
|
||
|
total_nlines = _expect_int(line[:14])
|
||
|
pointer_nlines = _expect_int(line[14:28])
|
||
|
indices_nlines = _expect_int(line[28:42])
|
||
|
values_nlines = _expect_int(line[42:56])
|
||
|
|
||
|
rhs_nlines = line[56:72].strip()
|
||
|
if rhs_nlines == '':
|
||
|
rhs_nlines = 0
|
||
|
else:
|
||
|
rhs_nlines = _expect_int(rhs_nlines)
|
||
|
if not rhs_nlines == 0:
|
||
|
raise ValueError("Only files without right hand side supported for "
|
||
|
"now.")
|
||
|
|
||
|
# Third line
|
||
|
line = fid.readline().strip("\n")
|
||
|
if not len(line) >= 70:
|
||
|
raise ValueError("Expected at least 72 character for third line, got:\n"
|
||
|
"%s" % line)
|
||
|
|
||
|
mxtype_s = line[:3].upper()
|
||
|
if not len(mxtype_s) == 3:
|
||
|
raise ValueError("mxtype expected to be 3 characters long")
|
||
|
|
||
|
mxtype = HBMatrixType.from_fortran(mxtype_s)
|
||
|
if mxtype.value_type not in ["real", "integer"]:
|
||
|
raise ValueError("Only real or integer matrices supported for "
|
||
|
"now (detected %s)" % mxtype)
|
||
|
if not mxtype.structure == "unsymmetric":
|
||
|
raise ValueError("Only unsymmetric matrices supported for "
|
||
|
"now (detected %s)" % mxtype)
|
||
|
if not mxtype.storage == "assembled":
|
||
|
raise ValueError("Only assembled matrices supported for now")
|
||
|
|
||
|
if not line[3:14] == " " * 11:
|
||
|
raise ValueError("Malformed data for third line: %s" % line)
|
||
|
|
||
|
nrows = _expect_int(line[14:28])
|
||
|
ncols = _expect_int(line[28:42])
|
||
|
nnon_zeros = _expect_int(line[42:56])
|
||
|
nelementals = _expect_int(line[56:70])
|
||
|
if not nelementals == 0:
|
||
|
raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
|
||
|
% nelementals)
|
||
|
|
||
|
# Fourth line
|
||
|
line = fid.readline().strip("\n")
|
||
|
|
||
|
ct = line.split()
|
||
|
if not len(ct) == 3:
|
||
|
raise ValueError("Expected 3 formats, got %s" % ct)
|
||
|
|
||
|
return cls(title, key,
|
||
|
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||
|
mxtype, nrows, ncols, nnon_zeros,
|
||
|
ct[0], ct[1], ct[2],
|
||
|
rhs_nlines, nelementals)
|
||
|
|
||
|
def __init__(self, title, key,
|
||
|
total_nlines, pointer_nlines, indices_nlines, values_nlines,
|
||
|
mxtype, nrows, ncols, nnon_zeros,
|
||
|
pointer_format_str, indices_format_str, values_format_str,
|
||
|
right_hand_sides_nlines=0, nelementals=0):
|
||
|
"""Do not use this directly, but the class ctrs (from_* functions)."""
|
||
|
self.title = title
|
||
|
self.key = key
|
||
|
if title is None:
|
||
|
title = "No Title"
|
||
|
if len(title) > 72:
|
||
|
raise ValueError("title cannot be > 72 characters")
|
||
|
|
||
|
if key is None:
|
||
|
key = "|No Key"
|
||
|
if len(key) > 8:
|
||
|
warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
|
||
|
|
||
|
self.total_nlines = total_nlines
|
||
|
self.pointer_nlines = pointer_nlines
|
||
|
self.indices_nlines = indices_nlines
|
||
|
self.values_nlines = values_nlines
|
||
|
|
||
|
parser = FortranFormatParser()
|
||
|
pointer_format = parser.parse(pointer_format_str)
|
||
|
if not isinstance(pointer_format, IntFormat):
|
||
|
raise ValueError("Expected int format for pointer format, got %s"
|
||
|
% pointer_format)
|
||
|
|
||
|
indices_format = parser.parse(indices_format_str)
|
||
|
if not isinstance(indices_format, IntFormat):
|
||
|
raise ValueError("Expected int format for indices format, got %s" %
|
||
|
indices_format)
|
||
|
|
||
|
values_format = parser.parse(values_format_str)
|
||
|
if isinstance(values_format, ExpFormat):
|
||
|
if mxtype.value_type not in ["real", "complex"]:
|
||
|
raise ValueError("Inconsistency between matrix type %s and "
|
||
|
"value type %s" % (mxtype, values_format))
|
||
|
values_dtype = np.float64
|
||
|
elif isinstance(values_format, IntFormat):
|
||
|
if mxtype.value_type not in ["integer"]:
|
||
|
raise ValueError("Inconsistency between matrix type %s and "
|
||
|
"value type %s" % (mxtype, values_format))
|
||
|
# XXX: fortran int -> dtype association ?
|
||
|
values_dtype = int
|
||
|
else:
|
||
|
raise ValueError("Unsupported format for values %r" % (values_format,))
|
||
|
|
||
|
self.pointer_format = pointer_format
|
||
|
self.indices_format = indices_format
|
||
|
self.values_format = values_format
|
||
|
|
||
|
self.pointer_dtype = np.int32
|
||
|
self.indices_dtype = np.int32
|
||
|
self.values_dtype = values_dtype
|
||
|
|
||
|
self.pointer_nlines = pointer_nlines
|
||
|
self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
|
||
|
|
||
|
self.indices_nlines = indices_nlines
|
||
|
self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
|
||
|
|
||
|
self.values_nlines = values_nlines
|
||
|
self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
|
||
|
|
||
|
self.nrows = nrows
|
||
|
self.ncols = ncols
|
||
|
self.nnon_zeros = nnon_zeros
|
||
|
self.nelementals = nelementals
|
||
|
self.mxtype = mxtype
|
||
|
|
||
|
def dump(self):
|
||
|
"""Gives the header corresponding to this instance as a string."""
|
||
|
header = [self.title.ljust(72) + self.key.ljust(8)]
|
||
|
|
||
|
header.append("%14d%14d%14d%14d" %
|
||
|
(self.total_nlines, self.pointer_nlines,
|
||
|
self.indices_nlines, self.values_nlines))
|
||
|
header.append("%14s%14d%14d%14d%14d" %
|
||
|
(self.mxtype.fortran_format.ljust(14), self.nrows,
|
||
|
self.ncols, self.nnon_zeros, 0))
|
||
|
|
||
|
pffmt = self.pointer_format.fortran_format
|
||
|
iffmt = self.indices_format.fortran_format
|
||
|
vffmt = self.values_format.fortran_format
|
||
|
header.append("%16s%16s%20s" %
|
||
|
(pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
|
||
|
return "\n".join(header)
|
||
|
|
||
|
|
||
|
def _expect_int(value, msg=None):
|
||
|
try:
|
||
|
return int(value)
|
||
|
except ValueError as e:
|
||
|
if msg is None:
|
||
|
msg = "Expected an int, got %s"
|
||
|
raise ValueError(msg % value) from e
|
||
|
|
||
|
|
||
|
def _read_hb_data(content, header):
|
||
|
# XXX: look at a way to reduce memory here (big string creation)
|
||
|
ptr_string = "".join([content.read(header.pointer_nbytes_full),
|
||
|
content.readline()])
|
||
|
ptr = np.fromstring(ptr_string,
|
||
|
dtype=int, sep=' ')
|
||
|
|
||
|
ind_string = "".join([content.read(header.indices_nbytes_full),
|
||
|
content.readline()])
|
||
|
ind = np.fromstring(ind_string,
|
||
|
dtype=int, sep=' ')
|
||
|
|
||
|
val_string = "".join([content.read(header.values_nbytes_full),
|
||
|
content.readline()])
|
||
|
val = np.fromstring(val_string,
|
||
|
dtype=header.values_dtype, sep=' ')
|
||
|
|
||
|
try:
|
||
|
return csc_matrix((val, ind-1, ptr-1),
|
||
|
shape=(header.nrows, header.ncols))
|
||
|
except ValueError as e:
|
||
|
raise e
|
||
|
|
||
|
|
||
|
def _write_data(m, fid, header):
|
||
|
m = m.tocsc(copy=False)
|
||
|
|
||
|
def write_array(f, ar, nlines, fmt):
|
||
|
# ar_nlines is the number of full lines, n is the number of items per
|
||
|
# line, ffmt the fortran format
|
||
|
pyfmt = fmt.python_format
|
||
|
pyfmt_full = pyfmt * fmt.repeat
|
||
|
|
||
|
# for each array to write, we first write the full lines, and special
|
||
|
# case for partial line
|
||
|
full = ar[:(nlines - 1) * fmt.repeat]
|
||
|
for row in full.reshape((nlines-1, fmt.repeat)):
|
||
|
f.write(pyfmt_full % tuple(row) + "\n")
|
||
|
nremain = ar.size - full.size
|
||
|
if nremain > 0:
|
||
|
f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
|
||
|
|
||
|
fid.write(header.dump())
|
||
|
fid.write("\n")
|
||
|
# +1 is for Fortran one-based indexing
|
||
|
write_array(fid, m.indptr+1, header.pointer_nlines,
|
||
|
header.pointer_format)
|
||
|
write_array(fid, m.indices+1, header.indices_nlines,
|
||
|
header.indices_format)
|
||
|
write_array(fid, m.data, header.values_nlines,
|
||
|
header.values_format)
|
||
|
|
||
|
|
||
|
class HBMatrixType:
|
||
|
"""Class to hold the matrix type."""
|
||
|
# q2f* translates qualified names to Fortran character
|
||
|
_q2f_type = {
|
||
|
"real": "R",
|
||
|
"complex": "C",
|
||
|
"pattern": "P",
|
||
|
"integer": "I",
|
||
|
}
|
||
|
_q2f_structure = {
|
||
|
"symmetric": "S",
|
||
|
"unsymmetric": "U",
|
||
|
"hermitian": "H",
|
||
|
"skewsymmetric": "Z",
|
||
|
"rectangular": "R"
|
||
|
}
|
||
|
_q2f_storage = {
|
||
|
"assembled": "A",
|
||
|
"elemental": "E",
|
||
|
}
|
||
|
|
||
|
_f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
|
||
|
_f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
|
||
|
_f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
|
||
|
|
||
|
@classmethod
|
||
|
def from_fortran(cls, fmt):
|
||
|
if not len(fmt) == 3:
|
||
|
raise ValueError("Fortran format for matrix type should be 3 "
|
||
|
"characters long")
|
||
|
try:
|
||
|
value_type = cls._f2q_type[fmt[0]]
|
||
|
structure = cls._f2q_structure[fmt[1]]
|
||
|
storage = cls._f2q_storage[fmt[2]]
|
||
|
return cls(value_type, structure, storage)
|
||
|
except KeyError as e:
|
||
|
raise ValueError("Unrecognized format %s" % fmt) from e
|
||
|
|
||
|
def __init__(self, value_type, structure, storage="assembled"):
|
||
|
self.value_type = value_type
|
||
|
self.structure = structure
|
||
|
self.storage = storage
|
||
|
|
||
|
if value_type not in self._q2f_type:
|
||
|
raise ValueError("Unrecognized type %s" % value_type)
|
||
|
if structure not in self._q2f_structure:
|
||
|
raise ValueError("Unrecognized structure %s" % structure)
|
||
|
if storage not in self._q2f_storage:
|
||
|
raise ValueError("Unrecognized storage %s" % storage)
|
||
|
|
||
|
@property
|
||
|
def fortran_format(self):
|
||
|
return self._q2f_type[self.value_type] + \
|
||
|
self._q2f_structure[self.structure] + \
|
||
|
self._q2f_storage[self.storage]
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "HBMatrixType(%s, %s, %s)" % \
|
||
|
(self.value_type, self.structure, self.storage)
|
||
|
|
||
|
|
||
|
class HBFile:
|
||
|
def __init__(self, file, hb_info=None):
|
||
|
"""Create a HBFile instance.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
file : file-object
|
||
|
StringIO work as well
|
||
|
hb_info : HBInfo, optional
|
||
|
Should be given as an argument for writing, in which case the file
|
||
|
should be writable.
|
||
|
"""
|
||
|
self._fid = file
|
||
|
if hb_info is None:
|
||
|
self._hb_info = HBInfo.from_file(file)
|
||
|
else:
|
||
|
#raise OSError("file %s is not writable, and hb_info "
|
||
|
# "was given." % file)
|
||
|
self._hb_info = hb_info
|
||
|
|
||
|
@property
|
||
|
def title(self):
|
||
|
return self._hb_info.title
|
||
|
|
||
|
@property
|
||
|
def key(self):
|
||
|
return self._hb_info.key
|
||
|
|
||
|
@property
|
||
|
def type(self):
|
||
|
return self._hb_info.mxtype.value_type
|
||
|
|
||
|
@property
|
||
|
def structure(self):
|
||
|
return self._hb_info.mxtype.structure
|
||
|
|
||
|
@property
|
||
|
def storage(self):
|
||
|
return self._hb_info.mxtype.storage
|
||
|
|
||
|
def read_matrix(self):
|
||
|
return _read_hb_data(self._fid, self._hb_info)
|
||
|
|
||
|
def write_matrix(self, m):
|
||
|
return _write_data(m, self._fid, self._hb_info)
|
||
|
|
||
|
|
||
|
def hb_read(path_or_open_file):
|
||
|
"""Read HB-format file.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
path_or_open_file : path-like or file-like
|
||
|
If a file-like object, it is used as-is. Otherwise, it is opened
|
||
|
before reading.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
data : scipy.sparse.csc_matrix instance
|
||
|
The data read from the HB file as a sparse matrix.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
At the moment not the full Harwell-Boeing format is supported. Supported
|
||
|
features are:
|
||
|
|
||
|
- assembled, non-symmetric, real matrices
|
||
|
- integer for pointer/indices
|
||
|
- exponential format for float values, and int format
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
We can read and write a harwell-boeing format file:
|
||
|
|
||
|
>>> from scipy.io import hb_read, hb_write
|
||
|
>>> from scipy.sparse import csr_matrix, eye
|
||
|
>>> data = csr_matrix(eye(3)) # create a sparse matrix
|
||
|
>>> hb_write("data.hb", data) # write a hb file
|
||
|
>>> print(hb_read("data.hb")) # read a hb file
|
||
|
(0, 0) 1.0
|
||
|
(1, 1) 1.0
|
||
|
(2, 2) 1.0
|
||
|
|
||
|
"""
|
||
|
def _get_matrix(fid):
|
||
|
hb = HBFile(fid)
|
||
|
return hb.read_matrix()
|
||
|
|
||
|
if hasattr(path_or_open_file, 'read'):
|
||
|
return _get_matrix(path_or_open_file)
|
||
|
else:
|
||
|
with open(path_or_open_file) as f:
|
||
|
return _get_matrix(f)
|
||
|
|
||
|
|
||
|
def hb_write(path_or_open_file, m, hb_info=None):
|
||
|
"""Write HB-format file.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
path_or_open_file : path-like or file-like
|
||
|
If a file-like object, it is used as-is. Otherwise, it is opened
|
||
|
before writing.
|
||
|
m : sparse-matrix
|
||
|
the sparse matrix to write
|
||
|
hb_info : HBInfo
|
||
|
contains the meta-data for write
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
None
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
At the moment not the full Harwell-Boeing format is supported. Supported
|
||
|
features are:
|
||
|
|
||
|
- assembled, non-symmetric, real matrices
|
||
|
- integer for pointer/indices
|
||
|
- exponential format for float values, and int format
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
We can read and write a harwell-boeing format file:
|
||
|
|
||
|
>>> from scipy.io import hb_read, hb_write
|
||
|
>>> from scipy.sparse import csr_matrix, eye
|
||
|
>>> data = csr_matrix(eye(3)) # create a sparse matrix
|
||
|
>>> hb_write("data.hb", data) # write a hb file
|
||
|
>>> print(hb_read("data.hb")) # read a hb file
|
||
|
(0, 0) 1.0
|
||
|
(1, 1) 1.0
|
||
|
(2, 2) 1.0
|
||
|
|
||
|
"""
|
||
|
m = m.tocsc(copy=False)
|
||
|
|
||
|
if hb_info is None:
|
||
|
hb_info = HBInfo.from_data(m)
|
||
|
|
||
|
def _set_matrix(fid):
|
||
|
hb = HBFile(fid, hb_info)
|
||
|
return hb.write_matrix(m)
|
||
|
|
||
|
if hasattr(path_or_open_file, 'write'):
|
||
|
return _set_matrix(path_or_open_file)
|
||
|
else:
|
||
|
with open(path_or_open_file, 'w') as f:
|
||
|
return _set_matrix(f)
|