3RNN/Lib/site-packages/scipy/sparse/_dia.py

566 lines
18 KiB
Python
Raw Normal View History

2024-05-26 19:49:15 +02:00
"""Sparse DIAgonal format"""
__docformat__ = "restructuredtext en"
__all__ = ['dia_array', 'dia_matrix', 'isspmatrix_dia']
import numpy as np
from .._lib._util import copy_if_needed
from ._matrix import spmatrix
from ._base import issparse, _formats, _spbase, sparray
from ._data import _data_matrix
from ._sputils import (
isshape, upcast_char, getdtype, get_sum_dtype, validateaxis, check_shape
)
from ._sparsetools import dia_matvec
class _dia_base(_data_matrix):
_format = 'dia'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
_data_matrix.__init__(self)
if issparse(arg1):
if arg1.format == "dia":
if copy:
arg1 = arg1.copy()
self.data = arg1.data
self.offsets = arg1.offsets
self._shape = check_shape(arg1.shape)
else:
if arg1.format == self.format and copy:
A = arg1.copy()
else:
A = arg1.todia()
self.data = A.data
self.offsets = A.offsets
self._shape = check_shape(A.shape)
elif isinstance(arg1, tuple):
if isshape(arg1):
# It's a tuple of matrix dimensions (M, N)
# create empty matrix
self._shape = check_shape(arg1)
self.data = np.zeros((0,0), getdtype(dtype, default=float))
idx_dtype = self._get_index_dtype(maxval=max(self.shape))
self.offsets = np.zeros((0), dtype=idx_dtype)
else:
try:
# Try interpreting it as (data, offsets)
data, offsets = arg1
except Exception as e:
message = 'unrecognized form for dia_array constructor'
raise ValueError(message) from e
else:
if shape is None:
raise ValueError('expected a shape argument')
if not copy:
copy = copy_if_needed
self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
offsets = np.array(arg1[1],
dtype=self._get_index_dtype(maxval=max(shape)),
copy=copy)
self.offsets = np.atleast_1d(offsets)
self._shape = check_shape(shape)
else:
#must be dense, convert to COO first, then to DIA
try:
arg1 = np.asarray(arg1)
except Exception as e:
raise ValueError("unrecognized form for"
" %s_matrix constructor" % self.format) from e
if isinstance(self, sparray) and arg1.ndim != 2:
raise ValueError(f"DIA arrays don't support {arg1.ndim}D input. Use 2D")
A = self._coo_container(arg1, dtype=dtype, shape=shape).todia()
self.data = A.data
self.offsets = A.offsets
self._shape = check_shape(A.shape)
if dtype is not None:
self.data = self.data.astype(dtype)
#check format
if self.offsets.ndim != 1:
raise ValueError('offsets array must have rank 1')
if self.data.ndim != 2:
raise ValueError('data array must have rank 2')
if self.data.shape[0] != len(self.offsets):
raise ValueError('number of diagonals (%d) '
'does not match the number of offsets (%d)'
% (self.data.shape[0], len(self.offsets)))
if len(np.unique(self.offsets)) != len(self.offsets):
raise ValueError('offset array contains duplicate values')
def __repr__(self):
_, fmt = _formats[self.format]
sparse_cls = 'array' if isinstance(self, sparray) else 'matrix'
shape_str = 'x'.join(str(x) for x in self.shape)
ndiag = self.data.shape[0]
return (
f"<{shape_str} sparse {sparse_cls} of type '{self.dtype.type}'\n"
f"\twith {self.nnz} stored elements ({ndiag} diagonals) in {fmt} format>"
)
def _data_mask(self):
"""Returns a mask of the same shape as self.data, where
mask[i,j] is True when data[i,j] corresponds to a stored element."""
num_rows, num_cols = self.shape
offset_inds = np.arange(self.data.shape[1])
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
return mask
def count_nonzero(self):
mask = self._data_mask()
return np.count_nonzero(self.data[mask])
def _getnnz(self, axis=None):
if axis is not None:
raise NotImplementedError("_getnnz over an axis is not implemented "
"for DIA format")
M,N = self.shape
nnz = 0
for k in self.offsets:
if k > 0:
nnz += min(M,N-k)
else:
nnz += min(M+k,N)
return int(nnz)
_getnnz.__doc__ = _spbase._getnnz.__doc__
count_nonzero.__doc__ = _spbase.count_nonzero.__doc__
def sum(self, axis=None, dtype=None, out=None):
validateaxis(axis)
if axis is not None and axis < 0:
axis += 2
res_dtype = get_sum_dtype(self.dtype)
num_rows, num_cols = self.shape
ret = None
if axis == 0:
mask = self._data_mask()
x = (self.data * mask).sum(axis=0)
if x.shape[0] == num_cols:
res = x
else:
res = np.zeros(num_cols, dtype=x.dtype)
res[:x.shape[0]] = x
ret = self._ascontainer(res, dtype=res_dtype)
else:
row_sums = np.zeros((num_rows, 1), dtype=res_dtype)
one = np.ones(num_cols, dtype=res_dtype)
dia_matvec(num_rows, num_cols, len(self.offsets),
self.data.shape[1], self.offsets, self.data, one, row_sums)
row_sums = self._ascontainer(row_sums)
if axis is None:
return row_sums.sum(dtype=dtype, out=out)
ret = self._ascontainer(row_sums.sum(axis=axis))
if out is not None and out.shape != ret.shape:
raise ValueError("dimensions do not match")
return ret.sum(axis=(), dtype=dtype, out=out)
sum.__doc__ = _spbase.sum.__doc__
def _add_sparse(self, other):
# Check if other is also of type dia_array
if not isinstance(other, type(self)):
# If other is not of type dia_array, default to
# converting to csr_matrix, as is done in the _add_sparse
# method of parent class _spbase
return self.tocsr()._add_sparse(other)
# The task is to compute m = self + other
# Start by making a copy of self, of the datatype
# that should result from adding self and other
dtype = np.promote_types(self.dtype, other.dtype)
m = self.astype(dtype, copy=True)
# Then, add all the stored diagonals of other.
for d in other.offsets:
# Check if the diagonal has already been added.
if d in m.offsets:
# If the diagonal is already there, we need to take
# the sum of the existing and the new
m.setdiag(m.diagonal(d) + other.diagonal(d), d)
else:
m.setdiag(other.diagonal(d), d)
return m
def _matmul_vector(self, other):
x = other
y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
x.dtype.char))
L = self.data.shape[1]
M,N = self.shape
dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data,
x.ravel(), y.ravel())
return y
def _setdiag(self, values, k=0):
M, N = self.shape
if values.ndim == 0:
# broadcast
values_n = np.inf
else:
values_n = len(values)
if k < 0:
n = min(M + k, N, values_n)
min_index = 0
max_index = n
else:
n = min(M, N - k, values_n)
min_index = k
max_index = k + n
if values.ndim != 0:
# allow also longer sequences
values = values[:n]
data_rows, data_cols = self.data.shape
if k in self.offsets:
if max_index > data_cols:
data = np.zeros((data_rows, max_index), dtype=self.data.dtype)
data[:, :data_cols] = self.data
self.data = data
self.data[self.offsets == k, min_index:max_index] = values
else:
self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
m = max(max_index, data_cols)
data = np.zeros((data_rows + 1, m), dtype=self.data.dtype)
data[:-1, :data_cols] = self.data
data[-1, min_index:max_index] = values
self.data = data
def todia(self, copy=False):
if copy:
return self.copy()
else:
return self
todia.__doc__ = _spbase.todia.__doc__
def transpose(self, axes=None, copy=False):
if axes is not None and axes != (1, 0):
raise ValueError("Sparse arrays/matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation.")
num_rows, num_cols = self.shape
max_dim = max(self.shape)
# flip diagonal offsets
offsets = -self.offsets
# re-align the data matrix
r = np.arange(len(offsets), dtype=np.intc)[:, None]
c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
pad_amount = max(0, max_dim-self.data.shape[1])
data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
dtype=self.data.dtype)))
data = data[r, c]
return self._dia_container((data, offsets), shape=(
num_cols, num_rows), copy=copy)
transpose.__doc__ = _spbase.transpose.__doc__
def diagonal(self, k=0):
rows, cols = self.shape
if k <= -rows or k >= cols:
return np.empty(0, dtype=self.data.dtype)
idx, = np.nonzero(self.offsets == k)
first_col = max(0, k)
last_col = min(rows + k, cols)
result_size = last_col - first_col
if idx.size == 0:
return np.zeros(result_size, dtype=self.data.dtype)
result = self.data[idx[0], first_col:last_col]
padding = result_size - len(result)
if padding > 0:
result = np.pad(result, (0, padding), mode='constant')
return result
diagonal.__doc__ = _spbase.diagonal.__doc__
def tocsc(self, copy=False):
if self.nnz == 0:
return self._csc_container(self.shape, dtype=self.dtype)
num_rows, num_cols = self.shape
num_offsets, offset_len = self.data.shape
offset_inds = np.arange(offset_len)
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
mask &= (self.data != 0)
idx_dtype = self._get_index_dtype(maxval=max(self.shape))
indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)[:num_cols])
if offset_len < num_cols:
indptr[offset_len+1:] = indptr[offset_len]
indices = row.T[mask.T].astype(idx_dtype, copy=False)
data = self.data.T[mask.T]
return self._csc_container((data, indices, indptr), shape=self.shape,
dtype=self.dtype)
tocsc.__doc__ = _spbase.tocsc.__doc__
def tocoo(self, copy=False):
num_rows, num_cols = self.shape
num_offsets, offset_len = self.data.shape
offset_inds = np.arange(offset_len)
row = offset_inds - self.offsets[:,None]
mask = (row >= 0)
mask &= (row < num_rows)
mask &= (offset_inds < num_cols)
mask &= (self.data != 0)
row = row[mask]
col = np.tile(offset_inds, num_offsets)[mask.ravel()]
idx_dtype = self._get_index_dtype(
arrays=(self.offsets,), maxval=max(self.shape)
)
row = row.astype(idx_dtype, copy=False)
col = col.astype(idx_dtype, copy=False)
data = self.data[mask]
# Note: this cannot set has_canonical_format=True, because despite the
# lack of duplicates, we do not generate sorted indices.
return self._coo_container(
(data, (row, col)), shape=self.shape, dtype=self.dtype, copy=False
)
tocoo.__doc__ = _spbase.tocoo.__doc__
# needed by _data_matrix
def _with_data(self, data, copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the structure arrays are copied.
"""
if copy:
return self._dia_container(
(data, self.offsets.copy()), shape=self.shape
)
else:
return self._dia_container(
(data, self.offsets), shape=self.shape
)
def resize(self, *shape):
shape = check_shape(shape)
M, N = shape
# we do not need to handle the case of expanding N
self.data = self.data[:, :N]
if (M > self.shape[0] and
np.any(self.offsets + self.shape[0] < self.data.shape[1])):
# explicitly clear values that were previously hidden
mask = (self.offsets[:, None] + self.shape[0] <=
np.arange(self.data.shape[1]))
self.data[mask] = 0
self._shape = shape
resize.__doc__ = _spbase.resize.__doc__
def isspmatrix_dia(x):
"""Is `x` of dia_matrix type?
Parameters
----------
x
object to check for being a dia matrix
Returns
-------
bool
True if `x` is a dia matrix, False otherwise
Examples
--------
>>> from scipy.sparse import dia_array, dia_matrix, coo_matrix, isspmatrix_dia
>>> isspmatrix_dia(dia_matrix([[5]]))
True
>>> isspmatrix_dia(dia_array([[5]]))
False
>>> isspmatrix_dia(coo_matrix([[5]]))
False
"""
return isinstance(x, dia_matrix)
# This namespace class separates array from matrix with isinstance
class dia_array(_dia_base, sparray):
"""
Sparse array with DIAgonal storage.
This can be instantiated in several ways:
dia_array(D)
where D is a 2-D ndarray
dia_array(S)
with another sparse array or matrix S (equivalent to S.todia())
dia_array((M, N), [dtype])
to construct an empty array with shape (M, N),
dtype is optional, defaulting to dtype='d'.
dia_array((data, offsets), shape=(M, N))
where the ``data[k,:]`` stores the diagonal entries for
diagonal ``offsets[k]`` (See example below)
Attributes
----------
dtype : dtype
Data type of the array
shape : 2-tuple
Shape of the array
ndim : int
Number of dimensions (this is always 2)
nnz
size
data
DIA format data array of the array
offsets
DIA format offset array of the array
T
Notes
-----
Sparse arrays can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import dia_array
>>> dia_array((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
>>> offsets = np.array([0, -1, 2])
>>> dia_array((data, offsets), shape=(4, 4)).toarray()
array([[1, 0, 3, 0],
[1, 2, 0, 4],
[0, 2, 3, 0],
[0, 0, 3, 4]])
>>> from scipy.sparse import dia_array
>>> n = 10
>>> ex = np.ones(n)
>>> data = np.array([ex, 2 * ex, ex])
>>> offsets = np.array([-1, 0, 1])
>>> dia_array((data, offsets), shape=(n, n)).toarray()
array([[2., 1., 0., ..., 0., 0., 0.],
[1., 2., 1., ..., 0., 0., 0.],
[0., 1., 2., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 2., 1., 0.],
[0., 0., 0., ..., 1., 2., 1.],
[0., 0., 0., ..., 0., 1., 2.]])
"""
class dia_matrix(spmatrix, _dia_base):
"""
Sparse matrix with DIAgonal storage.
This can be instantiated in several ways:
dia_matrix(D)
where D is a 2-D ndarray
dia_matrix(S)
with another sparse array or matrix S (equivalent to S.todia())
dia_matrix((M, N), [dtype])
to construct an empty matrix with shape (M, N),
dtype is optional, defaulting to dtype='d'.
dia_matrix((data, offsets), shape=(M, N))
where the ``data[k,:]`` stores the diagonal entries for
diagonal ``offsets[k]`` (See example below)
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
size
data
DIA format data array of the matrix
offsets
DIA format offset array of the matrix
T
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import dia_matrix
>>> dia_matrix((3, 4), dtype=np.int8).toarray()
array([[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]], dtype=int8)
>>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
>>> offsets = np.array([0, -1, 2])
>>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
array([[1, 0, 3, 0],
[1, 2, 0, 4],
[0, 2, 3, 0],
[0, 0, 3, 4]])
>>> from scipy.sparse import dia_matrix
>>> n = 10
>>> ex = np.ones(n)
>>> data = np.array([ex, 2 * ex, ex])
>>> offsets = np.array([-1, 0, 1])
>>> dia_matrix((data, offsets), shape=(n, n)).toarray()
array([[2., 1., 0., ..., 0., 0., 0.],
[1., 2., 1., ..., 0., 0., 0.],
[0., 1., 2., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 2., 1., 0.],
[0., 0., 0., ..., 1., 2., 1.],
[0., 0., 0., ..., 0., 1., 2.]])
"""