Inzynierka_Gwiazdy/machine_learning/Lib/site-packages/scipy/sparse/_construct.py

948 lines
30 KiB
Python
Raw Normal View History

2023-09-20 19:46:58 +02:00
"""Functions to construct sparse matrices
"""
__docformat__ = "restructuredtext en"
__all__ = ['spdiags', 'eye', 'identity', 'kron', 'kronsum',
'hstack', 'vstack', 'bmat', 'rand', 'random', 'diags', 'block_diag']
import numbers
from functools import partial
import numpy as np
from scipy._lib._util import check_random_state, rng_integers
from ._sputils import upcast, get_index_dtype, isscalarlike
from ._sparsetools import csr_hstack
from ._csr import csr_matrix
from ._csc import csc_matrix
from ._bsr import bsr_matrix
from ._coo import coo_matrix
from ._dia import dia_matrix
from ._base import issparse
def spdiags(data, diags, m=None, n=None, format=None):
"""
Return a sparse matrix from diagonals.
Parameters
----------
data : array_like
Matrix diagonals stored row-wise
diags : sequence of int or an int
Diagonals to set:
* k = 0 the main diagonal
* k > 0 the kth upper diagonal
* k < 0 the kth lower diagonal
m, n : int, tuple, optional
Shape of the result. If `n` is None and `m` is a given tuple,
the shape is this tuple. If omitted, the matrix is square and
its shape is len(data[0]).
format : str, optional
Format of the result. By default (format=None) an appropriate sparse
matrix format is returned. This choice is subject to change.
See Also
--------
diags : more convenient form of this function
dia_matrix : the sparse DIAgonal format.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import spdiags
>>> data = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
>>> diags = np.array([0, -1, 2])
>>> spdiags(data, diags, 4, 4).toarray()
array([[1, 0, 3, 0],
[1, 2, 0, 4],
[0, 2, 3, 0],
[0, 0, 3, 4]])
"""
if m is None and n is None:
m = n = len(data[0])
elif n is None:
m, n = m
return dia_matrix((data, diags), shape=(m, n)).asformat(format)
def diags(diagonals, offsets=0, shape=None, format=None, dtype=None):
"""
Construct a sparse matrix from diagonals.
Parameters
----------
diagonals : sequence of array_like
Sequence of arrays containing the matrix diagonals,
corresponding to `offsets`.
offsets : sequence of int or an int, optional
Diagonals to set:
- k = 0 the main diagonal (default)
- k > 0 the kth upper diagonal
- k < 0 the kth lower diagonal
shape : tuple of int, optional
Shape of the result. If omitted, a square matrix large enough
to contain the diagonals is returned.
format : {"dia", "csr", "csc", "lil", ...}, optional
Matrix format of the result. By default (format=None) an
appropriate sparse matrix format is returned. This choice is
subject to change.
dtype : dtype, optional
Data type of the matrix.
See Also
--------
spdiags : construct matrix from diagonals
Notes
-----
This function differs from `spdiags` in the way it handles
off-diagonals.
The result from `diags` is the sparse equivalent of::
np.diag(diagonals[0], offsets[0])
+ ...
+ np.diag(diagonals[k], offsets[k])
Repeated diagonal offsets are disallowed.
.. versionadded:: 0.11
Examples
--------
>>> from scipy.sparse import diags
>>> diagonals = [[1, 2, 3, 4], [1, 2, 3], [1, 2]]
>>> diags(diagonals, [0, -1, 2]).toarray()
array([[1, 0, 1, 0],
[1, 2, 0, 2],
[0, 2, 3, 0],
[0, 0, 3, 4]])
Broadcasting of scalars is supported (but shape needs to be
specified):
>>> diags([1, -2, 1], [-1, 0, 1], shape=(4, 4)).toarray()
array([[-2., 1., 0., 0.],
[ 1., -2., 1., 0.],
[ 0., 1., -2., 1.],
[ 0., 0., 1., -2.]])
If only one diagonal is wanted (as in `numpy.diag`), the following
works as well:
>>> diags([1, 2, 3], 1).toarray()
array([[ 0., 1., 0., 0.],
[ 0., 0., 2., 0.],
[ 0., 0., 0., 3.],
[ 0., 0., 0., 0.]])
"""
# if offsets is not a sequence, assume that there's only one diagonal
if isscalarlike(offsets):
# now check that there's actually only one diagonal
if len(diagonals) == 0 or isscalarlike(diagonals[0]):
diagonals = [np.atleast_1d(diagonals)]
else:
raise ValueError("Different number of diagonals and offsets.")
else:
diagonals = list(map(np.atleast_1d, diagonals))
offsets = np.atleast_1d(offsets)
# Basic check
if len(diagonals) != len(offsets):
raise ValueError("Different number of diagonals and offsets.")
# Determine shape, if omitted
if shape is None:
m = len(diagonals[0]) + abs(int(offsets[0]))
shape = (m, m)
# Determine data type, if omitted
if dtype is None:
dtype = np.common_type(*diagonals)
# Construct data array
m, n = shape
M = max([min(m + offset, n - offset) + max(0, offset)
for offset in offsets])
M = max(0, M)
data_arr = np.zeros((len(offsets), M), dtype=dtype)
K = min(m, n)
for j, diagonal in enumerate(diagonals):
offset = offsets[j]
k = max(0, offset)
length = min(m + offset, n - offset, K)
if length < 0:
raise ValueError("Offset %d (index %d) out of bounds" % (offset, j))
try:
data_arr[j, k:k+length] = diagonal[...,:length]
except ValueError as e:
if len(diagonal) != length and len(diagonal) != 1:
raise ValueError(
"Diagonal length (index %d: %d at offset %d) does not "
"agree with matrix size (%d, %d)." % (
j, len(diagonal), offset, m, n)) from e
raise
return dia_matrix((data_arr, offsets), shape=(m, n)).asformat(format)
def identity(n, dtype='d', format=None):
"""Identity matrix in sparse format
Returns an identity matrix with shape (n,n) using a given
sparse format and dtype.
Parameters
----------
n : int
Shape of the identity matrix.
dtype : dtype, optional
Data type of the matrix
format : str, optional
Sparse format of the result, e.g., format="csr", etc.
Examples
--------
>>> from scipy.sparse import identity
>>> identity(3).toarray()
array([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> identity(3, dtype='int8', format='dia')
<3x3 sparse matrix of type '<class 'numpy.int8'>'
with 3 stored elements (1 diagonals) in DIAgonal format>
"""
return eye(n, n, dtype=dtype, format=format)
def eye(m, n=None, k=0, dtype=float, format=None):
"""Sparse matrix with ones on diagonal
Returns a sparse (m x n) matrix where the kth diagonal
is all ones and everything else is zeros.
Parameters
----------
m : int
Number of rows in the matrix.
n : int, optional
Number of columns. Default: `m`.
k : int, optional
Diagonal to place ones on. Default: 0 (main diagonal).
dtype : dtype, optional
Data type of the matrix.
format : str, optional
Sparse format of the result, e.g., format="csr", etc.
Examples
--------
>>> import numpy as np
>>> from scipy import sparse
>>> sparse.eye(3).toarray()
array([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> sparse.eye(3, dtype=np.int8)
<3x3 sparse matrix of type '<class 'numpy.int8'>'
with 3 stored elements (1 diagonals) in DIAgonal format>
"""
if n is None:
n = m
m,n = int(m),int(n)
if m == n and k == 0:
# fast branch for special formats
if format in ['csr', 'csc']:
idx_dtype = get_index_dtype(maxval=n)
indptr = np.arange(n+1, dtype=idx_dtype)
indices = np.arange(n, dtype=idx_dtype)
data = np.ones(n, dtype=dtype)
cls = {'csr': csr_matrix, 'csc': csc_matrix}[format]
return cls((data,indices,indptr),(n,n))
elif format == 'coo':
idx_dtype = get_index_dtype(maxval=n)
row = np.arange(n, dtype=idx_dtype)
col = np.arange(n, dtype=idx_dtype)
data = np.ones(n, dtype=dtype)
return coo_matrix((data, (row, col)), (n, n))
diags = np.ones((1, max(0, min(m + k, n))), dtype=dtype)
return spdiags(diags, k, m, n).asformat(format)
def kron(A, B, format=None):
"""kronecker product of sparse matrices A and B
Parameters
----------
A : sparse or dense matrix
first matrix of the product
B : sparse or dense matrix
second matrix of the product
format : str, optional
format of the result (e.g. "csr")
Returns
-------
kronecker product in a sparse matrix format
Examples
--------
>>> import numpy as np
>>> from scipy import sparse
>>> A = sparse.csr_matrix(np.array([[0, 2], [5, 0]]))
>>> B = sparse.csr_matrix(np.array([[1, 2], [3, 4]]))
>>> sparse.kron(A, B).toarray()
array([[ 0, 0, 2, 4],
[ 0, 0, 6, 8],
[ 5, 10, 0, 0],
[15, 20, 0, 0]])
>>> sparse.kron(A, [[1, 2], [3, 4]]).toarray()
array([[ 0, 0, 2, 4],
[ 0, 0, 6, 8],
[ 5, 10, 0, 0],
[15, 20, 0, 0]])
"""
B = coo_matrix(B)
if (format is None or format == "bsr") and 2*B.nnz >= B.shape[0] * B.shape[1]:
# B is fairly dense, use BSR
A = csr_matrix(A,copy=True)
output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1])
if A.nnz == 0 or B.nnz == 0:
# kronecker product is the zero matrix
return coo_matrix(output_shape).asformat(format)
B = B.toarray()
data = A.data.repeat(B.size).reshape(-1,B.shape[0],B.shape[1])
data = data * B
return bsr_matrix((data,A.indices,A.indptr), shape=output_shape)
else:
# use COO
A = coo_matrix(A)
output_shape = (A.shape[0]*B.shape[0], A.shape[1]*B.shape[1])
if A.nnz == 0 or B.nnz == 0:
# kronecker product is the zero matrix
return coo_matrix(output_shape).asformat(format)
# expand entries of a into blocks
row = A.row.repeat(B.nnz)
col = A.col.repeat(B.nnz)
data = A.data.repeat(B.nnz)
if max(A.shape[0]*B.shape[0], A.shape[1]*B.shape[1]) > np.iinfo('int32').max:
row = row.astype(np.int64)
col = col.astype(np.int64)
row *= B.shape[0]
col *= B.shape[1]
# increment block indices
row,col = row.reshape(-1,B.nnz),col.reshape(-1,B.nnz)
row += B.row
col += B.col
row,col = row.reshape(-1),col.reshape(-1)
# compute block entries
data = data.reshape(-1,B.nnz) * B.data
data = data.reshape(-1)
return coo_matrix((data,(row,col)), shape=output_shape).asformat(format)
def kronsum(A, B, format=None):
"""kronecker sum of sparse matrices A and B
Kronecker sum of two sparse matrices is a sum of two Kronecker
products kron(I_n,A) + kron(B,I_m) where A has shape (m,m)
and B has shape (n,n) and I_m and I_n are identity matrices
of shape (m,m) and (n,n), respectively.
Parameters
----------
A
square matrix
B
square matrix
format : str
format of the result (e.g. "csr")
Returns
-------
kronecker sum in a sparse matrix format
Examples
--------
"""
A = coo_matrix(A)
B = coo_matrix(B)
if A.shape[0] != A.shape[1]:
raise ValueError('A is not square')
if B.shape[0] != B.shape[1]:
raise ValueError('B is not square')
dtype = upcast(A.dtype, B.dtype)
L = kron(eye(B.shape[0],dtype=dtype), A, format=format)
R = kron(B, eye(A.shape[0],dtype=dtype), format=format)
return (L+R).asformat(format) # since L + R is not always same format
def _compressed_sparse_stack(blocks, axis):
"""
Stacking fast path for CSR/CSC matrices
(i) vstack for CSR, (ii) hstack for CSC.
"""
other_axis = 1 if axis == 0 else 0
data = np.concatenate([b.data for b in blocks])
constant_dim = blocks[0].shape[other_axis]
idx_dtype = get_index_dtype(arrays=[b.indptr for b in blocks],
maxval=max(data.size, constant_dim))
indices = np.empty(data.size, dtype=idx_dtype)
indptr = np.empty(sum(b.shape[axis] for b in blocks) + 1, dtype=idx_dtype)
last_indptr = idx_dtype(0)
sum_dim = 0
sum_indices = 0
for b in blocks:
if b.shape[other_axis] != constant_dim:
raise ValueError(f'incompatible dimensions for axis {other_axis}')
indices[sum_indices:sum_indices+b.indices.size] = b.indices
sum_indices += b.indices.size
idxs = slice(sum_dim, sum_dim + b.shape[axis])
indptr[idxs] = b.indptr[:-1]
indptr[idxs] += last_indptr
sum_dim += b.shape[axis]
last_indptr += b.indptr[-1]
indptr[-1] = last_indptr
if axis == 0:
return csr_matrix((data, indices, indptr),
shape=(sum_dim, constant_dim))
else:
return csc_matrix((data, indices, indptr),
shape=(constant_dim, sum_dim))
def _stack_along_minor_axis(blocks, axis):
"""
Stacking fast path for CSR/CSC matrices along the minor axis
(i) hstack for CSR, (ii) vstack for CSC.
"""
n_blocks = len(blocks)
if n_blocks == 0:
raise ValueError('Missing block matrices')
if n_blocks == 1:
return blocks[0]
# check for incompatible dimensions
other_axis = 1 if axis == 0 else 0
other_axis_dims = set(b.shape[other_axis] for b in blocks)
if len(other_axis_dims) > 1:
raise ValueError(f'Mismatching dimensions along axis {other_axis}: '
f'{other_axis_dims}')
constant_dim, = other_axis_dims
# Do the stacking
indptr_list = [b.indptr for b in blocks]
data_cat = np.concatenate([b.data for b in blocks])
# Need to check if any indices/indptr, would be too large post-
# concatenation for np.int32:
# - The max value of indices is the output array's stacking-axis length - 1
# - The max value in indptr is the number of non-zero entries. This is
# exceedingly unlikely to require int64, but is checked out of an
# abundance of caution.
sum_dim = sum(b.shape[axis] for b in blocks)
nnz = sum(len(b.indices) for b in blocks)
idx_dtype = get_index_dtype(maxval=max(sum_dim - 1, nnz))
stack_dim_cat = np.array([b.shape[axis] for b in blocks], dtype=idx_dtype)
if data_cat.size > 0:
indptr_cat = np.concatenate(indptr_list).astype(idx_dtype)
indices_cat = (np.concatenate([b.indices for b in blocks])
.astype(idx_dtype))
indptr = np.empty(constant_dim + 1, dtype=idx_dtype)
indices = np.empty_like(indices_cat)
data = np.empty_like(data_cat)
csr_hstack(n_blocks, constant_dim, stack_dim_cat,
indptr_cat, indices_cat, data_cat,
indptr, indices, data)
else:
indptr = np.zeros(constant_dim + 1, dtype=idx_dtype)
indices = np.empty(0, dtype=idx_dtype)
data = np.empty(0, dtype=data_cat.dtype)
if axis == 0:
return csc_matrix((data, indices, indptr),
shape=(sum_dim, constant_dim))
else:
return csr_matrix((data, indices, indptr),
shape=(constant_dim, sum_dim))
def hstack(blocks, format=None, dtype=None):
"""
Stack sparse matrices horizontally (column wise)
Parameters
----------
blocks
sequence of sparse matrices with compatible shapes
format : str
sparse format of the result (e.g., "csr")
by default an appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
See Also
--------
vstack : stack sparse matrices vertically (row wise)
Examples
--------
>>> from scipy.sparse import coo_matrix, hstack
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> hstack([A,B]).toarray()
array([[1, 2, 5],
[3, 4, 6]])
"""
return bmat([blocks], format=format, dtype=dtype)
def vstack(blocks, format=None, dtype=None):
"""
Stack sparse matrices vertically (row wise)
Parameters
----------
blocks
sequence of sparse matrices with compatible shapes
format : str, optional
sparse format of the result (e.g., "csr")
by default an appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
See Also
--------
hstack : stack sparse matrices horizontally (column wise)
Examples
--------
>>> from scipy.sparse import coo_matrix, vstack
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5, 6]])
>>> vstack([A, B]).toarray()
array([[1, 2],
[3, 4],
[5, 6]])
"""
return bmat([[b] for b in blocks], format=format, dtype=dtype)
def bmat(blocks, format=None, dtype=None):
"""
Build a sparse matrix from sparse sub-blocks
Parameters
----------
blocks : array_like
Grid of sparse matrices with compatible shapes.
An entry of None implies an all-zero matrix.
format : {'bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil'}, optional
The sparse format of the result (e.g. "csr"). By default an
appropriate sparse matrix format is returned.
This choice is subject to change.
dtype : dtype, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
Returns
-------
bmat : sparse matrix
See Also
--------
block_diag, diags
Examples
--------
>>> from scipy.sparse import coo_matrix, bmat
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> C = coo_matrix([[7]])
>>> bmat([[A, B], [None, C]]).toarray()
array([[1, 2, 5],
[3, 4, 6],
[0, 0, 7]])
>>> bmat([[A, None], [None, C]]).toarray()
array([[1, 2, 0],
[3, 4, 0],
[0, 0, 7]])
"""
blocks = np.asarray(blocks, dtype='object')
if blocks.ndim != 2:
raise ValueError('blocks must be 2-D')
M,N = blocks.shape
# check for fast path cases
if (format in (None, 'csr') and all(isinstance(b, csr_matrix)
for b in blocks.flat)):
if N > 1:
# stack along columns (axis 1):
blocks = [[_stack_along_minor_axis(blocks[b, :], 1)]
for b in range(M)] # must have shape: (M, 1)
blocks = np.asarray(blocks, dtype='object')
# stack along rows (axis 0):
A = _compressed_sparse_stack(blocks[:, 0], 0)
if dtype is not None:
A = A.astype(dtype)
return A
elif (format in (None, 'csc') and all(isinstance(b, csc_matrix)
for b in blocks.flat)):
if M > 1:
# stack along rows (axis 0):
blocks = [[_stack_along_minor_axis(blocks[:, b], 0)
for b in range(N)]] # must have shape: (1, N)
blocks = np.asarray(blocks, dtype='object')
# stack along columns (axis 1):
A = _compressed_sparse_stack(blocks[0, :], 1)
if dtype is not None:
A = A.astype(dtype)
return A
block_mask = np.zeros(blocks.shape, dtype=bool)
brow_lengths = np.zeros(M, dtype=np.int64)
bcol_lengths = np.zeros(N, dtype=np.int64)
# convert everything to COO format
for i in range(M):
for j in range(N):
if blocks[i,j] is not None:
A = coo_matrix(blocks[i,j])
blocks[i,j] = A
block_mask[i,j] = True
if brow_lengths[i] == 0:
brow_lengths[i] = A.shape[0]
elif brow_lengths[i] != A.shape[0]:
msg = (f'blocks[{i},:] has incompatible row dimensions. '
f'Got blocks[{i},{j}].shape[0] == {A.shape[0]}, '
f'expected {brow_lengths[i]}.')
raise ValueError(msg)
if bcol_lengths[j] == 0:
bcol_lengths[j] = A.shape[1]
elif bcol_lengths[j] != A.shape[1]:
msg = (f'blocks[:,{j}] has incompatible column '
f'dimensions. '
f'Got blocks[{i},{j}].shape[1] == {A.shape[1]}, '
f'expected {bcol_lengths[j]}.')
raise ValueError(msg)
nnz = sum(block.nnz for block in blocks[block_mask])
if dtype is None:
all_dtypes = [blk.dtype for blk in blocks[block_mask]]
dtype = upcast(*all_dtypes) if all_dtypes else None
row_offsets = np.append(0, np.cumsum(brow_lengths))
col_offsets = np.append(0, np.cumsum(bcol_lengths))
shape = (row_offsets[-1], col_offsets[-1])
data = np.empty(nnz, dtype=dtype)
idx_dtype = get_index_dtype(maxval=max(shape))
row = np.empty(nnz, dtype=idx_dtype)
col = np.empty(nnz, dtype=idx_dtype)
nnz = 0
ii, jj = np.nonzero(block_mask)
for i, j in zip(ii, jj):
B = blocks[i, j]
idx = slice(nnz, nnz + B.nnz)
data[idx] = B.data
np.add(B.row, row_offsets[i], out=row[idx], dtype=idx_dtype)
np.add(B.col, col_offsets[j], out=col[idx], dtype=idx_dtype)
nnz += B.nnz
return coo_matrix((data, (row, col)), shape=shape).asformat(format)
def block_diag(mats, format=None, dtype=None):
"""
Build a block diagonal sparse matrix from provided matrices.
Parameters
----------
mats : sequence of matrices
Input matrices.
format : str, optional
The sparse format of the result (e.g., "csr"). If not given, the matrix
is returned in "coo" format.
dtype : dtype specifier, optional
The data-type of the output matrix. If not given, the dtype is
determined from that of `blocks`.
Returns
-------
res : sparse matrix
Notes
-----
.. versionadded:: 0.11.0
See Also
--------
bmat, diags
Examples
--------
>>> from scipy.sparse import coo_matrix, block_diag
>>> A = coo_matrix([[1, 2], [3, 4]])
>>> B = coo_matrix([[5], [6]])
>>> C = coo_matrix([[7]])
>>> block_diag((A, B, C)).toarray()
array([[1, 2, 0, 0],
[3, 4, 0, 0],
[0, 0, 5, 0],
[0, 0, 6, 0],
[0, 0, 0, 7]])
"""
row = []
col = []
data = []
r_idx = 0
c_idx = 0
for a in mats:
if isinstance(a, (list, numbers.Number)):
a = coo_matrix(a)
nrows, ncols = a.shape
if issparse(a):
a = a.tocoo()
row.append(a.row + r_idx)
col.append(a.col + c_idx)
data.append(a.data)
else:
a_row, a_col = np.divmod(np.arange(nrows*ncols), ncols)
row.append(a_row + r_idx)
col.append(a_col + c_idx)
data.append(a.ravel())
r_idx += nrows
c_idx += ncols
row = np.concatenate(row)
col = np.concatenate(col)
data = np.concatenate(data)
return coo_matrix((data, (row, col)),
shape=(r_idx, c_idx),
dtype=dtype).asformat(format)
def random(m, n, density=0.01, format='coo', dtype=None,
random_state=None, data_rvs=None):
"""Generate a sparse matrix of the given shape and density with randomly
distributed values.
Parameters
----------
m, n : int
shape of the matrix
density : real, optional
density of the generated matrix: density equal to one means a full
matrix, density of 0 means a matrix with no non-zero items.
format : str, optional
sparse matrix format.
dtype : dtype, optional
type of the returned matrix values.
random_state : {None, int, `numpy.random.Generator`,
`numpy.random.RandomState`}, optional
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
singleton is used.
If `seed` is an int, a new ``RandomState`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` or ``RandomState`` instance then
that instance is used.
This random state will be used
for sampling the sparsity structure, but not necessarily for sampling
the values of the structurally nonzero entries of the matrix.
data_rvs : callable, optional
Samples a requested number of random values.
This function should take a single argument specifying the length
of the ndarray that it will return. The structurally nonzero entries
of the sparse random matrix will be taken from the array sampled
by this function. By default, uniform [0, 1) random values will be
sampled using the same random state as is used for sampling
the sparsity structure.
Returns
-------
res : sparse matrix
Notes
-----
Only float types are supported for now.
Examples
--------
>>> from scipy.sparse import random
>>> from scipy import stats
>>> from numpy.random import default_rng
>>> rng = default_rng()
>>> rvs = stats.poisson(25, loc=10).rvs
>>> S = random(3, 4, density=0.25, random_state=rng, data_rvs=rvs)
>>> S.A
array([[ 36., 0., 33., 0.], # random
[ 0., 0., 0., 0.],
[ 0., 0., 36., 0.]])
>>> from scipy.sparse import random
>>> from scipy.stats import rv_continuous
>>> class CustomDistribution(rv_continuous):
... def _rvs(self, size=None, random_state=None):
... return random_state.standard_normal(size)
>>> X = CustomDistribution(seed=rng)
>>> Y = X() # get a frozen version of the distribution
>>> S = random(3, 4, density=0.25, random_state=rng, data_rvs=Y.rvs)
>>> S.A
array([[ 0. , 0. , 0. , 0. ], # random
[ 0.13569738, 1.9467163 , -0.81205367, 0. ],
[ 0. , 0. , 0. , 0. ]])
"""
if density < 0 or density > 1:
raise ValueError("density expected to be 0 <= density <= 1")
dtype = np.dtype(dtype)
mn = m * n
tp = np.intc
if mn > np.iinfo(tp).max:
tp = np.int64
if mn > np.iinfo(tp).max:
msg = """\
Trying to generate a random sparse matrix such as the product of dimensions is
greater than %d - this is not supported on this machine
"""
raise ValueError(msg % np.iinfo(tp).max)
# Number of non zero values
k = int(round(density * m * n))
random_state = check_random_state(random_state)
if data_rvs is None:
if np.issubdtype(dtype, np.integer):
def data_rvs(n):
return rng_integers(random_state,
np.iinfo(dtype).min,
np.iinfo(dtype).max,
n,
dtype=dtype)
elif np.issubdtype(dtype, np.complexfloating):
def data_rvs(n):
return (random_state.uniform(size=n) +
random_state.uniform(size=n) * 1j)
else:
data_rvs = partial(random_state.uniform, 0., 1.)
ind = random_state.choice(mn, size=k, replace=False)
j = np.floor(ind * 1. / m).astype(tp, copy=False)
i = (ind - j * m).astype(tp, copy=False)
vals = data_rvs(k).astype(dtype, copy=False)
return coo_matrix((vals, (i, j)), shape=(m, n)).asformat(format,
copy=False)
def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
"""Generate a sparse matrix of the given shape and density with uniformly
distributed values.
Parameters
----------
m, n : int
shape of the matrix
density : real, optional
density of the generated matrix: density equal to one means a full
matrix, density of 0 means a matrix with no non-zero items.
format : str, optional
sparse matrix format.
dtype : dtype, optional
type of the returned matrix values.
random_state : {None, int, `numpy.random.Generator`,
`numpy.random.RandomState`}, optional
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
singleton is used.
If `seed` is an int, a new ``RandomState`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` or ``RandomState`` instance then
that instance is used.
Returns
-------
res : sparse matrix
Notes
-----
Only float types are supported for now.
See Also
--------
scipy.sparse.random : Similar function that allows a user-specified random
data source.
Examples
--------
>>> from scipy.sparse import rand
>>> matrix = rand(3, 4, density=0.25, format="csr", random_state=42)
>>> matrix
<3x4 sparse matrix of type '<class 'numpy.float64'>'
with 3 stored elements in Compressed Sparse Row format>
>>> matrix.toarray()
array([[0.05641158, 0. , 0. , 0.65088847],
[0. , 0. , 0. , 0.14286682],
[0. , 0. , 0. , 0. ]])
"""
return random(m, n, density, format, dtype, random_state)