"""Tools to support array_api.""" import numpy from .._config import get_config import scipy.special as special class _ArrayAPIWrapper: """sklearn specific Array API compatibility wrapper This wrapper makes it possible for scikit-learn maintainers to deal with discrepancies between different implementations of the Python array API standard and its evolution over time. The Python array API standard specification: https://data-apis.org/array-api/latest/ Documentation of the NumPy implementation: https://numpy.org/neps/nep-0047-array-api-standard.html """ def __init__(self, array_namespace): self._namespace = array_namespace def __getattr__(self, name): return getattr(self._namespace, name) def take(self, X, indices, *, axis): # When array_api supports `take` we can use this directly # https://github.com/data-apis/array-api/issues/177 if self._namespace.__name__ == "numpy.array_api": X_np = numpy.take(X, indices, axis=axis) return self._namespace.asarray(X_np) # We only support axis in (0, 1) and ndim in (1, 2) because that is all we need # in scikit-learn if axis not in {0, 1}: raise ValueError(f"Only axis in (0, 1) is supported. Got {axis}") if X.ndim not in {1, 2}: raise ValueError(f"Only X.ndim in (1, 2) is supported. Got {X.ndim}") if axis == 0: if X.ndim == 1: selected = [X[i] for i in indices] else: # X.ndim == 2 selected = [X[i, :] for i in indices] else: # axis == 1 selected = [X[:, i] for i in indices] return self._namespace.stack(selected, axis=axis) class _NumPyApiWrapper: """Array API compat wrapper for any numpy version NumPy < 1.22 does not expose the numpy.array_api namespace. This wrapper makes it possible to write code that uses the standard Array API while working with any version of NumPy supported by scikit-learn. See the `get_namespace()` public function for more details. """ def __getattr__(self, name): return getattr(numpy, name) def astype(self, x, dtype, *, copy=True, casting="unsafe"): # astype is not defined in the top level NumPy namespace return x.astype(dtype, copy=copy, casting=casting) def asarray(self, x, *, dtype=None, device=None, copy=None): # Support copy in NumPy namespace if copy is True: return numpy.array(x, copy=True, dtype=dtype) else: return numpy.asarray(x, dtype=dtype) def unique_inverse(self, x): return numpy.unique(x, return_inverse=True) def unique_counts(self, x): return numpy.unique(x, return_counts=True) def unique_values(self, x): return numpy.unique(x) def concat(self, arrays, *, axis=None): return numpy.concatenate(arrays, axis=axis) def get_namespace(*arrays): """Get namespace of arrays. Introspect `arrays` arguments and return their common Array API compatible namespace object, if any. NumPy 1.22 and later can construct such containers using the `numpy.array_api` namespace for instance. See: https://numpy.org/neps/nep-0047-array-api-standard.html If `arrays` are regular numpy arrays, an instance of the `_NumPyApiWrapper` compatibility wrapper is returned instead. Namespace support is not enabled by default. To enabled it call: sklearn.set_config(array_api_dispatch=True) or: with sklearn.config_context(array_api_dispatch=True): # your code here Otherwise an instance of the `_NumPyApiWrapper` compatibility wrapper is always returned irrespective of the fact that arrays implement the `__array_namespace__` protocol or not. Parameters ---------- *arrays : array objects Array objects. Returns ------- namespace : module Namespace shared by array objects. is_array_api : bool True of the arrays are containers that implement the Array API spec. """ # `arrays` contains one or more arrays, or possibly Python scalars (accepting # those is a matter of taste, but doesn't seem unreasonable). # Returns a tuple: (array_namespace, is_array_api) if not get_config()["array_api_dispatch"]: return _NumPyApiWrapper(), False namespaces = { x.__array_namespace__() if hasattr(x, "__array_namespace__") else None for x in arrays if not isinstance(x, (bool, int, float, complex)) } if not namespaces: # one could special-case np.ndarray above or use np.asarray here if # older numpy versions need to be supported. raise ValueError("Unrecognized array input") if len(namespaces) != 1: raise ValueError(f"Multiple namespaces for array inputs: {namespaces}") (xp,) = namespaces if xp is None: # Use numpy as default return _NumPyApiWrapper(), False return _ArrayAPIWrapper(xp), True def _expit(X): xp, _ = get_namespace(X) if xp.__name__ in {"numpy", "numpy.array_api"}: return xp.asarray(special.expit(numpy.asarray(X))) return 1.0 / (1.0 + xp.exp(-X)) def _asarray_with_order(array, dtype=None, order=None, copy=None, xp=None): """Helper to support the order kwarg only for NumPy-backed arrays Memory layout parameter `order` is not exposed in the Array API standard, however some input validation code in scikit-learn needs to work both for classes and functions that will leverage Array API only operations and for code that inherently relies on NumPy backed data containers with specific memory layout constraints (e.g. our own Cython code). The purpose of this helper is to make it possible to share code for data container validation without memory copies for both downstream use cases: the `order` parameter is only enforced if the input array implementation is NumPy based, otherwise `order` is just silently ignored. """ if xp is None: xp, _ = get_namespace(array) if xp.__name__ in {"numpy", "numpy.array_api"}: # Use NumPy API to support order array = numpy.asarray(array, order=order, dtype=dtype) return xp.asarray(array, copy=copy) else: return xp.asarray(array, dtype=dtype, copy=copy) def _convert_to_numpy(array, xp): """Convert X into a NumPy ndarray. Only works on cupy.array_api and numpy.array_api and is used for testing. """ supported_array_api = ["numpy.array_api", "cupy.array_api"] if xp.__name__ not in supported_array_api: support_array_api_str = ", ".join(supported_array_api) raise ValueError(f"Supported namespaces are: {support_array_api_str}") if xp.__name__ == "cupy.array_api": return array._array.get() else: return numpy.asarray(array) def _estimator_with_converted_arrays(estimator, converter): """Create new estimator which converting all attributes that are arrays. Parameters ---------- estimator : Estimator Estimator to convert converter : callable Callable that takes an array attribute and returns the converted array. Returns ------- new_estimator : Estimator Convert estimator """ from sklearn.base import clone new_estimator = clone(estimator) for key, attribute in vars(estimator).items(): if hasattr(attribute, "__array_namespace__") or isinstance( attribute, numpy.ndarray ): attribute = converter(attribute) setattr(new_estimator, key, attribute) return new_estimator