import numpy as np from numpy.testing import assert_array_almost_equal, assert_ from scipy.sparse import csr_matrix, hstack import pytest def _check_csr_rowslice(i, sl, X, Xcsr): np_slice = X[i, sl] csr_slice = Xcsr[i, sl] assert_array_almost_equal(np_slice, csr_slice.toarray()[0]) assert_(type(csr_slice) is csr_matrix) def test_csr_rowslice(): N = 10 np.random.seed(0) X = np.random.random((N, N)) X[X > 0.7] = 0 Xcsr = csr_matrix(X) slices = [slice(None, None, None), slice(None, None, -1), slice(1, -2, 2), slice(-2, 1, -2)] for i in range(N): for sl in slices: _check_csr_rowslice(i, sl, X, Xcsr) def test_csr_getrow(): N = 10 np.random.seed(0) X = np.random.random((N, N)) X[X > 0.7] = 0 Xcsr = csr_matrix(X) for i in range(N): arr_row = X[i:i + 1, :] csr_row = Xcsr.getrow(i) assert_array_almost_equal(arr_row, csr_row.toarray()) assert_(type(csr_row) is csr_matrix) def test_csr_getcol(): N = 10 np.random.seed(0) X = np.random.random((N, N)) X[X > 0.7] = 0 Xcsr = csr_matrix(X) for i in range(N): arr_col = X[:, i:i + 1] csr_col = Xcsr.getcol(i) assert_array_almost_equal(arr_col, csr_col.toarray()) assert_(type(csr_col) is csr_matrix) @pytest.mark.parametrize("matrix_input, axis, expected_shape", [(csr_matrix([[1, 0, 0, 0], [0, 0, 0, 0], [0, 2, 3, 0]]), 0, (0, 4)), (csr_matrix([[1, 0, 0, 0], [0, 0, 0, 0], [0, 2, 3, 0]]), 1, (3, 0)), (csr_matrix([[1, 0, 0, 0], [0, 0, 0, 0], [0, 2, 3, 0]]), 'both', (0, 0)), (csr_matrix([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 2, 3, 0]]), 0, (0, 5))]) def test_csr_empty_slices(matrix_input, axis, expected_shape): # see gh-11127 for related discussion slice_1 = matrix_input.A.shape[0] - 1 slice_2 = slice_1 slice_3 = slice_2 - 1 if axis == 0: actual_shape_1 = matrix_input[slice_1:slice_2, :].A.shape actual_shape_2 = matrix_input[slice_1:slice_3, :].A.shape elif axis == 1: actual_shape_1 = matrix_input[:, slice_1:slice_2].A.shape actual_shape_2 = matrix_input[:, slice_1:slice_3].A.shape elif axis == 'both': actual_shape_1 = matrix_input[slice_1:slice_2, slice_1:slice_2].A.shape actual_shape_2 = matrix_input[slice_1:slice_3, slice_1:slice_3].A.shape assert actual_shape_1 == expected_shape assert actual_shape_1 == actual_shape_2 def test_csr_bool_indexing(): data = csr_matrix([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) list_indices1 = [False, True, False] array_indices1 = np.array(list_indices1) list_indices2 = [[False, True, False], [False, True, False], [False, True, False]] array_indices2 = np.array(list_indices2) list_indices3 = ([False, True, False], [False, True, False]) array_indices3 = (np.array(list_indices3[0]), np.array(list_indices3[1])) slice_list1 = data[list_indices1].toarray() slice_array1 = data[array_indices1].toarray() slice_list2 = data[list_indices2] slice_array2 = data[array_indices2] slice_list3 = data[list_indices3] slice_array3 = data[array_indices3] assert (slice_list1 == slice_array1).all() assert (slice_list2 == slice_array2).all() assert (slice_list3 == slice_array3).all() def test_csr_hstack_int64(): """ Tests if hstack properly promotes to indices and indptr arrays to np.int64 when using np.int32 during concatenation would result in either array overflowing. """ max_int32 = np.iinfo(np.int32).max # First case: indices would overflow with int32 data = [1.0] row = [0] max_indices_1 = max_int32 - 1 max_indices_2 = 3 # Individual indices arrays are representable with int32 col_1 = [max_indices_1 - 1] col_2 = [max_indices_2 - 1] X_1 = csr_matrix((data, (row, col_1))) X_2 = csr_matrix((data, (row, col_2))) assert max(max_indices_1 - 1, max_indices_2 - 1) < max_int32 assert X_1.indices.dtype == X_1.indptr.dtype == np.int32 assert X_2.indices.dtype == X_2.indptr.dtype == np.int32 # ... but when concatenating their CSR matrices, the resulting indices # array can't be represented with int32 and must be promoted to int64. X_hs = hstack([X_1, X_2], format="csr") assert X_hs.indices.max() == max_indices_1 + max_indices_2 - 1 assert max_indices_1 + max_indices_2 - 1 > max_int32 assert X_hs.indices.dtype == X_hs.indptr.dtype == np.int64 # Even if the matrices are empty, we must account for their size # contribution so that we may safely set the final elements. X_1_empty = csr_matrix(X_1.shape) X_2_empty = csr_matrix(X_2.shape) X_hs_empty = hstack([X_1_empty, X_2_empty], format="csr") assert X_hs_empty.shape == X_hs.shape assert X_hs_empty.indices.dtype == np.int64 # Should be just small enough to stay in int32 after stack. Note that # we theoretically could support indices.max() == max_int32, but due to an # edge-case in the underlying sparsetools code # (namely the `coo_tocsr` routine), # we require that max(X_hs_32.shape) < max_int32 as well. # Hence we can only support max_int32 - 1. col_3 = [max_int32 - max_indices_1 - 1] X_3 = csr_matrix((data, (row, col_3))) X_hs_32 = hstack([X_1, X_3], format="csr") assert X_hs_32.indices.dtype == np.int32 assert X_hs_32.indices.max() == max_int32 - 1