625 lines
20 KiB
Python
625 lines
20 KiB
Python
''' Classes for read / write of matlab (TM) 4 files
|
|
'''
|
|
import sys
|
|
import warnings
|
|
|
|
import numpy as np
|
|
|
|
import scipy.sparse
|
|
|
|
from ._miobase import (MatFileReader, docfiller, matdims, read_dtype,
|
|
convert_dtypes, arr_to_chars, arr_dtype_number)
|
|
|
|
from ._mio_utils import squeeze_element, chars_to_strings
|
|
from functools import reduce
|
|
|
|
|
|
__all__ = [
|
|
'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN',
|
|
'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info',
|
|
'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE',
|
|
'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS',
|
|
'np_to_mtypes', 'order_codes'
|
|
]
|
|
|
|
|
|
SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
|
|
|
|
miDOUBLE = 0
|
|
miSINGLE = 1
|
|
miINT32 = 2
|
|
miINT16 = 3
|
|
miUINT16 = 4
|
|
miUINT8 = 5
|
|
|
|
mdtypes_template = {
|
|
miDOUBLE: 'f8',
|
|
miSINGLE: 'f4',
|
|
miINT32: 'i4',
|
|
miINT16: 'i2',
|
|
miUINT16: 'u2',
|
|
miUINT8: 'u1',
|
|
'header': [('mopt', 'i4'),
|
|
('mrows', 'i4'),
|
|
('ncols', 'i4'),
|
|
('imagf', 'i4'),
|
|
('namlen', 'i4')],
|
|
'U1': 'U1',
|
|
}
|
|
|
|
np_to_mtypes = {
|
|
'f8': miDOUBLE,
|
|
'c32': miDOUBLE,
|
|
'c24': miDOUBLE,
|
|
'c16': miDOUBLE,
|
|
'f4': miSINGLE,
|
|
'c8': miSINGLE,
|
|
'i4': miINT32,
|
|
'i2': miINT16,
|
|
'u2': miUINT16,
|
|
'u1': miUINT8,
|
|
'S1': miUINT8,
|
|
}
|
|
|
|
# matrix classes
|
|
mxFULL_CLASS = 0
|
|
mxCHAR_CLASS = 1
|
|
mxSPARSE_CLASS = 2
|
|
|
|
order_codes = {
|
|
0: '<',
|
|
1: '>',
|
|
2: 'VAX D-float', # !
|
|
3: 'VAX G-float',
|
|
4: 'Cray', # !!
|
|
}
|
|
|
|
mclass_info = {
|
|
mxFULL_CLASS: 'double',
|
|
mxCHAR_CLASS: 'char',
|
|
mxSPARSE_CLASS: 'sparse',
|
|
}
|
|
|
|
|
|
class VarHeader4:
|
|
# Mat4 variables never logical or global
|
|
is_logical = False
|
|
is_global = False
|
|
|
|
def __init__(self,
|
|
name,
|
|
dtype,
|
|
mclass,
|
|
dims,
|
|
is_complex):
|
|
self.name = name
|
|
self.dtype = dtype
|
|
self.mclass = mclass
|
|
self.dims = dims
|
|
self.is_complex = is_complex
|
|
|
|
|
|
class VarReader4:
|
|
''' Class to read matlab 4 variables '''
|
|
|
|
def __init__(self, file_reader):
|
|
self.file_reader = file_reader
|
|
self.mat_stream = file_reader.mat_stream
|
|
self.dtypes = file_reader.dtypes
|
|
self.chars_as_strings = file_reader.chars_as_strings
|
|
self.squeeze_me = file_reader.squeeze_me
|
|
|
|
def read_header(self):
|
|
''' Read and return header for variable '''
|
|
data = read_dtype(self.mat_stream, self.dtypes['header'])
|
|
name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
|
|
if data['mopt'] < 0 or data['mopt'] > 5000:
|
|
raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
|
|
M, rest = divmod(data['mopt'], 1000) # order code
|
|
if M not in (0, 1):
|
|
warnings.warn("We do not support byte ordering '%s'; returned "
|
|
"data may be corrupt" % order_codes[M],
|
|
UserWarning, stacklevel=3)
|
|
O, rest = divmod(rest, 100) # unused, should be 0
|
|
if O != 0:
|
|
raise ValueError('O in MOPT integer should be 0, wrong format?')
|
|
P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
|
|
T = rest # matrix type code e.g., mxFULL_CLASS (see above)
|
|
dims = (data['mrows'], data['ncols'])
|
|
is_complex = data['imagf'] == 1
|
|
dtype = self.dtypes[P]
|
|
return VarHeader4(
|
|
name,
|
|
dtype,
|
|
T,
|
|
dims,
|
|
is_complex)
|
|
|
|
def array_from_header(self, hdr, process=True):
|
|
mclass = hdr.mclass
|
|
if mclass == mxFULL_CLASS:
|
|
arr = self.read_full_array(hdr)
|
|
elif mclass == mxCHAR_CLASS:
|
|
arr = self.read_char_array(hdr)
|
|
if process and self.chars_as_strings:
|
|
arr = chars_to_strings(arr)
|
|
elif mclass == mxSPARSE_CLASS:
|
|
# no current processing (below) makes sense for sparse
|
|
return self.read_sparse_array(hdr)
|
|
else:
|
|
raise TypeError('No reader for class code %s' % mclass)
|
|
if process and self.squeeze_me:
|
|
return squeeze_element(arr)
|
|
return arr
|
|
|
|
def read_sub_array(self, hdr, copy=True):
|
|
''' Mat4 read using header `hdr` dtype and dims
|
|
|
|
Parameters
|
|
----------
|
|
hdr : object
|
|
object with attributes ``dtype``, ``dims``. dtype is assumed to be
|
|
the correct endianness
|
|
copy : bool, optional
|
|
copies array before return if True (default True)
|
|
(buffer is usually read only)
|
|
|
|
Returns
|
|
-------
|
|
arr : ndarray
|
|
of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims``
|
|
'''
|
|
dt = hdr.dtype
|
|
dims = hdr.dims
|
|
num_bytes = dt.itemsize
|
|
for d in dims:
|
|
num_bytes *= d
|
|
buffer = self.mat_stream.read(int(num_bytes))
|
|
if len(buffer) != num_bytes:
|
|
raise ValueError("Not enough bytes to read matrix '%s'; is this "
|
|
"a badly-formed file? Consider listing matrices "
|
|
"with `whosmat` and loading named matrices with "
|
|
"`variable_names` kwarg to `loadmat`" % hdr.name)
|
|
arr = np.ndarray(shape=dims,
|
|
dtype=dt,
|
|
buffer=buffer,
|
|
order='F')
|
|
if copy:
|
|
arr = arr.copy()
|
|
return arr
|
|
|
|
def read_full_array(self, hdr):
|
|
''' Full (rather than sparse) matrix getter
|
|
|
|
Read matrix (array) can be real or complex
|
|
|
|
Parameters
|
|
----------
|
|
hdr : ``VarHeader4`` instance
|
|
|
|
Returns
|
|
-------
|
|
arr : ndarray
|
|
complex array if ``hdr.is_complex`` is True, otherwise a real
|
|
numeric array
|
|
'''
|
|
if hdr.is_complex:
|
|
# avoid array copy to save memory
|
|
res = self.read_sub_array(hdr, copy=False)
|
|
res_j = self.read_sub_array(hdr, copy=False)
|
|
return res + (res_j * 1j)
|
|
return self.read_sub_array(hdr)
|
|
|
|
def read_char_array(self, hdr):
|
|
''' latin-1 text matrix (char matrix) reader
|
|
|
|
Parameters
|
|
----------
|
|
hdr : ``VarHeader4`` instance
|
|
|
|
Returns
|
|
-------
|
|
arr : ndarray
|
|
with dtype 'U1', shape given by `hdr` ``dims``
|
|
'''
|
|
arr = self.read_sub_array(hdr).astype(np.uint8)
|
|
S = arr.tobytes().decode('latin-1')
|
|
return np.ndarray(shape=hdr.dims,
|
|
dtype=np.dtype('U1'),
|
|
buffer=np.array(S)).copy()
|
|
|
|
def read_sparse_array(self, hdr):
|
|
''' Read and return sparse matrix type
|
|
|
|
Parameters
|
|
----------
|
|
hdr : ``VarHeader4`` instance
|
|
|
|
Returns
|
|
-------
|
|
arr : ``scipy.sparse.coo_matrix``
|
|
with dtype ``float`` and shape read from the sparse matrix data
|
|
|
|
Notes
|
|
-----
|
|
MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
|
|
N is the number of non-zero values. Column 1 values [0:N] are the
|
|
(1-based) row indices of the each non-zero value, column 2 [0:N] are the
|
|
column indices, column 3 [0:N] are the (real) values. The last values
|
|
[-1,0:2] of the rows, column indices are shape[0] and shape[1]
|
|
respectively of the output matrix. The last value for the values column
|
|
is a padding 0. mrows and ncols values from the header give the shape of
|
|
the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column
|
|
matrix, where the fourth column contains the imaginary component; the
|
|
last value is again 0. Complex sparse data do *not* have the header
|
|
``imagf`` field set to True; the fact that the data are complex is only
|
|
detectable because there are 4 storage columns.
|
|
'''
|
|
res = self.read_sub_array(hdr)
|
|
tmp = res[:-1,:]
|
|
# All numbers are float64 in Matlab, but SciPy sparse expects int shape
|
|
dims = (int(res[-1,0]), int(res[-1,1]))
|
|
I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
|
|
J = np.ascontiguousarray(tmp[:,1],dtype='intc')
|
|
I -= 1 # for 1-based indexing
|
|
J -= 1
|
|
if res.shape[1] == 3:
|
|
V = np.ascontiguousarray(tmp[:,2],dtype='float')
|
|
else:
|
|
V = np.ascontiguousarray(tmp[:,2],dtype='complex')
|
|
V.imag = tmp[:,3]
|
|
return scipy.sparse.coo_matrix((V,(I,J)), dims)
|
|
|
|
def shape_from_header(self, hdr):
|
|
'''Read the shape of the array described by the header.
|
|
The file position after this call is unspecified.
|
|
'''
|
|
mclass = hdr.mclass
|
|
if mclass == mxFULL_CLASS:
|
|
shape = tuple(map(int, hdr.dims))
|
|
elif mclass == mxCHAR_CLASS:
|
|
shape = tuple(map(int, hdr.dims))
|
|
if self.chars_as_strings:
|
|
shape = shape[:-1]
|
|
elif mclass == mxSPARSE_CLASS:
|
|
dt = hdr.dtype
|
|
dims = hdr.dims
|
|
|
|
if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
|
|
return ()
|
|
|
|
# Read only the row and column counts
|
|
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
|
|
rows = np.ndarray(shape=(), dtype=dt,
|
|
buffer=self.mat_stream.read(dt.itemsize))
|
|
self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
|
|
cols = np.ndarray(shape=(), dtype=dt,
|
|
buffer=self.mat_stream.read(dt.itemsize))
|
|
|
|
shape = (int(rows), int(cols))
|
|
else:
|
|
raise TypeError('No reader for class code %s' % mclass)
|
|
|
|
if self.squeeze_me:
|
|
shape = tuple([x for x in shape if x != 1])
|
|
return shape
|
|
|
|
|
|
class MatFile4Reader(MatFileReader):
|
|
''' Reader for Mat4 files '''
|
|
@docfiller
|
|
def __init__(self, mat_stream, *args, **kwargs):
|
|
''' Initialize matlab 4 file reader
|
|
|
|
%(matstream_arg)s
|
|
%(load_args)s
|
|
'''
|
|
super().__init__(mat_stream, *args, **kwargs)
|
|
self._matrix_reader = None
|
|
|
|
def guess_byte_order(self):
|
|
self.mat_stream.seek(0)
|
|
mopt = read_dtype(self.mat_stream, np.dtype('i4'))
|
|
self.mat_stream.seek(0)
|
|
if mopt == 0:
|
|
return '<'
|
|
if mopt < 0 or mopt > 5000:
|
|
# Number must have been byteswapped
|
|
return SYS_LITTLE_ENDIAN and '>' or '<'
|
|
# Not byteswapped
|
|
return SYS_LITTLE_ENDIAN and '<' or '>'
|
|
|
|
def initialize_read(self):
|
|
''' Run when beginning read of variables
|
|
|
|
Sets up readers from parameters in `self`
|
|
'''
|
|
self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
|
|
self._matrix_reader = VarReader4(self)
|
|
|
|
def read_var_header(self):
|
|
''' Read and return header, next position
|
|
|
|
Parameters
|
|
----------
|
|
None
|
|
|
|
Returns
|
|
-------
|
|
header : object
|
|
object that can be passed to self.read_var_array, and that
|
|
has attributes ``name`` and ``is_global``
|
|
next_position : int
|
|
position in stream of next variable
|
|
'''
|
|
hdr = self._matrix_reader.read_header()
|
|
n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
|
|
remaining_bytes = hdr.dtype.itemsize * n
|
|
if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
|
|
remaining_bytes *= 2
|
|
next_position = self.mat_stream.tell() + remaining_bytes
|
|
return hdr, next_position
|
|
|
|
def read_var_array(self, header, process=True):
|
|
''' Read array, given `header`
|
|
|
|
Parameters
|
|
----------
|
|
header : header object
|
|
object with fields defining variable header
|
|
process : {True, False}, optional
|
|
If True, apply recursive post-processing during loading of array.
|
|
|
|
Returns
|
|
-------
|
|
arr : array
|
|
array with post-processing applied or not according to
|
|
`process`.
|
|
'''
|
|
return self._matrix_reader.array_from_header(header, process)
|
|
|
|
def get_variables(self, variable_names=None):
|
|
''' get variables from stream as dictionary
|
|
|
|
Parameters
|
|
----------
|
|
variable_names : None or str or sequence of str, optional
|
|
variable name, or sequence of variable names to get from Mat file /
|
|
file stream. If None, then get all variables in file.
|
|
'''
|
|
if isinstance(variable_names, str):
|
|
variable_names = [variable_names]
|
|
elif variable_names is not None:
|
|
variable_names = list(variable_names)
|
|
self.mat_stream.seek(0)
|
|
# set up variable reader
|
|
self.initialize_read()
|
|
mdict = {}
|
|
while not self.end_of_stream():
|
|
hdr, next_position = self.read_var_header()
|
|
name = 'None' if hdr.name is None else hdr.name.decode('latin1')
|
|
if variable_names is not None and name not in variable_names:
|
|
self.mat_stream.seek(next_position)
|
|
continue
|
|
mdict[name] = self.read_var_array(hdr)
|
|
self.mat_stream.seek(next_position)
|
|
if variable_names is not None:
|
|
variable_names.remove(name)
|
|
if len(variable_names) == 0:
|
|
break
|
|
return mdict
|
|
|
|
def list_variables(self):
|
|
''' list variables from stream '''
|
|
self.mat_stream.seek(0)
|
|
# set up variable reader
|
|
self.initialize_read()
|
|
vars = []
|
|
while not self.end_of_stream():
|
|
hdr, next_position = self.read_var_header()
|
|
name = 'None' if hdr.name is None else hdr.name.decode('latin1')
|
|
shape = self._matrix_reader.shape_from_header(hdr)
|
|
info = mclass_info.get(hdr.mclass, 'unknown')
|
|
vars.append((name, shape, info))
|
|
|
|
self.mat_stream.seek(next_position)
|
|
return vars
|
|
|
|
|
|
def arr_to_2d(arr, oned_as='row'):
|
|
''' Make ``arr`` exactly two dimensional
|
|
|
|
If `arr` has more than 2 dimensions, raise a ValueError
|
|
|
|
Parameters
|
|
----------
|
|
arr : array
|
|
oned_as : {'row', 'column'}, optional
|
|
Whether to reshape 1-D vectors as row vectors or column vectors.
|
|
See documentation for ``matdims`` for more detail
|
|
|
|
Returns
|
|
-------
|
|
arr2d : array
|
|
2-D version of the array
|
|
'''
|
|
dims = matdims(arr, oned_as)
|
|
if len(dims) > 2:
|
|
raise ValueError('Matlab 4 files cannot save arrays with more than '
|
|
'2 dimensions')
|
|
return arr.reshape(dims)
|
|
|
|
|
|
class VarWriter4:
|
|
def __init__(self, file_writer):
|
|
self.file_stream = file_writer.file_stream
|
|
self.oned_as = file_writer.oned_as
|
|
|
|
def write_bytes(self, arr):
|
|
self.file_stream.write(arr.tobytes(order='F'))
|
|
|
|
def write_string(self, s):
|
|
self.file_stream.write(s)
|
|
|
|
def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
|
|
''' Write header for given data options
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
name of variable
|
|
shape : sequence
|
|
Shape of array as it will be read in matlab
|
|
P : int, optional
|
|
code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
|
|
miINT16, miUINT16, miUINT8``
|
|
T : int, optional
|
|
code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
|
|
mxSPARSE_CLASS``
|
|
imagf : int, optional
|
|
flag indicating complex
|
|
'''
|
|
header = np.empty((), mdtypes_template['header'])
|
|
M = not SYS_LITTLE_ENDIAN
|
|
O = 0
|
|
header['mopt'] = (M * 1000 +
|
|
O * 100 +
|
|
P * 10 +
|
|
T)
|
|
header['mrows'] = shape[0]
|
|
header['ncols'] = shape[1]
|
|
header['imagf'] = imagf
|
|
header['namlen'] = len(name) + 1
|
|
self.write_bytes(header)
|
|
data = name + '\0'
|
|
self.write_string(data.encode('latin1'))
|
|
|
|
def write(self, arr, name):
|
|
''' Write matrix `arr`, with name `name`
|
|
|
|
Parameters
|
|
----------
|
|
arr : array_like
|
|
array to write
|
|
name : str
|
|
name in matlab workspace
|
|
'''
|
|
# we need to catch sparse first, because np.asarray returns an
|
|
# an object array for scipy.sparse
|
|
if scipy.sparse.issparse(arr):
|
|
self.write_sparse(arr, name)
|
|
return
|
|
arr = np.asarray(arr)
|
|
dt = arr.dtype
|
|
if not dt.isnative:
|
|
arr = arr.astype(dt.newbyteorder('='))
|
|
dtt = dt.type
|
|
if dtt is np.object_:
|
|
raise TypeError('Cannot save object arrays in Mat4')
|
|
elif dtt is np.void:
|
|
raise TypeError('Cannot save void type arrays')
|
|
elif dtt in (np.str_, np.bytes_):
|
|
self.write_char(arr, name)
|
|
return
|
|
self.write_numeric(arr, name)
|
|
|
|
def write_numeric(self, arr, name):
|
|
arr = arr_to_2d(arr, self.oned_as)
|
|
imagf = arr.dtype.kind == 'c'
|
|
try:
|
|
P = np_to_mtypes[arr.dtype.str[1:]]
|
|
except KeyError:
|
|
if imagf:
|
|
arr = arr.astype('c128')
|
|
else:
|
|
arr = arr.astype('f8')
|
|
P = miDOUBLE
|
|
self.write_header(name,
|
|
arr.shape,
|
|
P=P,
|
|
T=mxFULL_CLASS,
|
|
imagf=imagf)
|
|
if imagf:
|
|
self.write_bytes(arr.real)
|
|
self.write_bytes(arr.imag)
|
|
else:
|
|
self.write_bytes(arr)
|
|
|
|
def write_char(self, arr, name):
|
|
if arr.dtype.type == np.str_ and arr.dtype.itemsize != np.dtype('U1').itemsize:
|
|
arr = arr_to_chars(arr)
|
|
arr = arr_to_2d(arr, self.oned_as)
|
|
dims = arr.shape
|
|
self.write_header(
|
|
name,
|
|
dims,
|
|
P=miUINT8,
|
|
T=mxCHAR_CLASS)
|
|
if arr.dtype.kind == 'U':
|
|
# Recode unicode to latin1
|
|
n_chars = np.prod(dims)
|
|
st_arr = np.ndarray(shape=(),
|
|
dtype=arr_dtype_number(arr, n_chars),
|
|
buffer=arr)
|
|
st = st_arr.item().encode('latin-1')
|
|
arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
|
|
self.write_bytes(arr)
|
|
|
|
def write_sparse(self, arr, name):
|
|
''' Sparse matrices are 2-D
|
|
|
|
See docstring for VarReader4.read_sparse_array
|
|
'''
|
|
A = arr.tocoo() # convert to sparse COO format (ijv)
|
|
imagf = A.dtype.kind == 'c'
|
|
ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
|
|
ijv[:-1,0] = A.row
|
|
ijv[:-1,1] = A.col
|
|
ijv[:-1,0:2] += 1 # 1 based indexing
|
|
if imagf:
|
|
ijv[:-1,2] = A.data.real
|
|
ijv[:-1,3] = A.data.imag
|
|
else:
|
|
ijv[:-1,2] = A.data
|
|
ijv[-1,0:2] = A.shape
|
|
self.write_header(
|
|
name,
|
|
ijv.shape,
|
|
P=miDOUBLE,
|
|
T=mxSPARSE_CLASS)
|
|
self.write_bytes(ijv)
|
|
|
|
|
|
class MatFile4Writer:
|
|
''' Class for writing matlab 4 format files '''
|
|
def __init__(self, file_stream, oned_as=None):
|
|
self.file_stream = file_stream
|
|
if oned_as is None:
|
|
oned_as = 'row'
|
|
self.oned_as = oned_as
|
|
self._matrix_writer = None
|
|
|
|
def put_variables(self, mdict, write_header=None):
|
|
''' Write variables in `mdict` to stream
|
|
|
|
Parameters
|
|
----------
|
|
mdict : mapping
|
|
mapping with method ``items`` return name, contents pairs
|
|
where ``name`` which will appeak in the matlab workspace in
|
|
file load, and ``contents`` is something writeable to a
|
|
matlab file, such as a NumPy array.
|
|
write_header : {None, True, False}
|
|
If True, then write the matlab file header before writing the
|
|
variables. If None (the default) then write the file header
|
|
if we are at position 0 in the stream. By setting False
|
|
here, and setting the stream position to the end of the file,
|
|
you can append variables to a matlab file
|
|
'''
|
|
# there is no header for a matlab 4 mat file, so we ignore the
|
|
# ``write_header`` input argument. It's there for compatibility
|
|
# with the matlab 5 version of this method
|
|
self._matrix_writer = VarWriter4(self)
|
|
for name, var in mdict.items():
|
|
self._matrix_writer.write(var, name)
|