Inzynierka/Lib/site-packages/sklearn/utils/_mask.py

63 lines
1.8 KiB
Python
Raw Permalink Normal View History

2023-06-02 12:51:02 +02:00
import numpy as np
from scipy import sparse as sp
from contextlib import suppress
from . import is_scalar_nan
from .fixes import _object_dtype_isnan
def _get_dense_mask(X, value_to_mask):
with suppress(ImportError, AttributeError):
# We also suppress `AttributeError` because older versions of pandas do
# not have `NA`.
import pandas
if value_to_mask is pandas.NA:
return pandas.isna(X)
if is_scalar_nan(value_to_mask):
if X.dtype.kind == "f":
Xt = np.isnan(X)
elif X.dtype.kind in ("i", "u"):
# can't have NaNs in integer array.
Xt = np.zeros(X.shape, dtype=bool)
else:
# np.isnan does not work on object dtypes.
Xt = _object_dtype_isnan(X)
else:
Xt = X == value_to_mask
return Xt
def _get_mask(X, value_to_mask):
"""Compute the boolean mask X == value_to_mask.
Parameters
----------
X : {ndarray, sparse matrix} of shape (n_samples, n_features)
Input data, where ``n_samples`` is the number of samples and
``n_features`` is the number of features.
value_to_mask : {int, float}
The value which is to be masked in X.
Returns
-------
X_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)
Missing mask.
"""
if not sp.issparse(X):
# For all cases apart of a sparse input where we need to reconstruct
# a sparse output
return _get_dense_mask(X, value_to_mask)
Xt = _get_dense_mask(X.data, value_to_mask)
sparse_constructor = sp.csr_matrix if X.format == "csr" else sp.csc_matrix
Xt_sparse = sparse_constructor(
(Xt, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=bool
)
return Xt_sparse