713 lines
24 KiB
Python
713 lines
24 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
import inspect
|
||
|
from dataclasses import dataclass
|
||
|
from typing import (
|
||
|
Callable, Literal, Protocol, TYPE_CHECKING
|
||
|
)
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from scipy.stats._common import ConfidenceInterval
|
||
|
from scipy.stats._qmc import check_random_state
|
||
|
from scipy.stats._resampling import BootstrapResult
|
||
|
from scipy.stats import qmc, bootstrap
|
||
|
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
import numpy.typing as npt
|
||
|
from scipy._lib._util import DecimalNumber, IntNumber, SeedType
|
||
|
|
||
|
|
||
|
__all__ = [
|
||
|
'sobol_indices'
|
||
|
]
|
||
|
|
||
|
|
||
|
def f_ishigami(x: npt.ArrayLike) -> np.ndarray:
|
||
|
r"""Ishigami function.
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
Y(\mathbf{x}) = \sin x_1 + 7 \sin^2 x_2 + 0.1 x_3^4 \sin x_1
|
||
|
|
||
|
with :math:`\mathbf{x} \in [-\pi, \pi]^3`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like ([x1, x2, x3], n)
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
f : array_like (n,)
|
||
|
Function evaluation.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Ishigami, T. and T. Homma. "An importance quantification technique
|
||
|
in uncertainty analysis for computer models." IEEE,
|
||
|
:doi:`10.1109/ISUMA.1990.151285`, 1990.
|
||
|
"""
|
||
|
x = np.atleast_2d(x)
|
||
|
f_eval = (
|
||
|
np.sin(x[0])
|
||
|
+ 7 * np.sin(x[1])**2
|
||
|
+ 0.1 * (x[2]**4) * np.sin(x[0])
|
||
|
)
|
||
|
return f_eval
|
||
|
|
||
|
|
||
|
def sample_A_B(
|
||
|
n: IntNumber,
|
||
|
dists: list[PPFDist],
|
||
|
random_state: SeedType = None
|
||
|
) -> np.ndarray:
|
||
|
"""Sample two matrices A and B.
|
||
|
|
||
|
Uses a Sobol' sequence with 2`d` columns to have 2 uncorrelated matrices.
|
||
|
This is more efficient than using 2 random draw of Sobol'.
|
||
|
See sec. 5 from [1]_.
|
||
|
|
||
|
Output shape is (d, n).
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and
|
||
|
S. Tarantola. "Variance based sensitivity analysis of model
|
||
|
output. Design and estimator for the total sensitivity index."
|
||
|
Computer Physics Communications, 181(2):259-270,
|
||
|
:doi:`10.1016/j.cpc.2009.09.018`, 2010.
|
||
|
"""
|
||
|
d = len(dists)
|
||
|
A_B = qmc.Sobol(d=2*d, seed=random_state, bits=64).random(n).T
|
||
|
A_B = A_B.reshape(2, d, -1)
|
||
|
try:
|
||
|
for d_, dist in enumerate(dists):
|
||
|
A_B[:, d_] = dist.ppf(A_B[:, d_])
|
||
|
except AttributeError as exc:
|
||
|
message = "Each distribution in `dists` must have method `ppf`."
|
||
|
raise ValueError(message) from exc
|
||
|
return A_B
|
||
|
|
||
|
|
||
|
def sample_AB(A: np.ndarray, B: np.ndarray) -> np.ndarray:
|
||
|
"""AB matrix.
|
||
|
|
||
|
AB: rows of B into A. Shape (d, d, n).
|
||
|
- Copy A into d "pages"
|
||
|
- In the first page, replace 1st rows of A with 1st row of B.
|
||
|
...
|
||
|
- In the dth page, replace dth row of A with dth row of B.
|
||
|
- return the stack of pages
|
||
|
"""
|
||
|
d, n = A.shape
|
||
|
AB = np.tile(A, (d, 1, 1))
|
||
|
i = np.arange(d)
|
||
|
AB[i, i] = B[i]
|
||
|
return AB
|
||
|
|
||
|
|
||
|
def saltelli_2010(
|
||
|
f_A: np.ndarray, f_B: np.ndarray, f_AB: np.ndarray
|
||
|
) -> tuple[np.ndarray, np.ndarray]:
|
||
|
r"""Saltelli2010 formulation.
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
S_i = \frac{1}{N} \sum_{j=1}^N
|
||
|
f(\mathbf{B})_j (f(\mathbf{AB}^{(i)})_j - f(\mathbf{A})_j)
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
S_{T_i} = \frac{1}{N} \sum_{j=1}^N
|
||
|
(f(\mathbf{A})_j - f(\mathbf{AB}^{(i)})_j)^2
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
f_A, f_B : array_like (s, n)
|
||
|
Function values at A and B, respectively
|
||
|
f_AB : array_like (d, s, n)
|
||
|
Function values at each of the AB pages
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
s, st : array_like (s, d)
|
||
|
First order and total order Sobol' indices.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and
|
||
|
S. Tarantola. "Variance based sensitivity analysis of model
|
||
|
output. Design and estimator for the total sensitivity index."
|
||
|
Computer Physics Communications, 181(2):259-270,
|
||
|
:doi:`10.1016/j.cpc.2009.09.018`, 2010.
|
||
|
"""
|
||
|
# Empirical variance calculated using output from A and B which are
|
||
|
# independent. Output of AB is not independent and cannot be used
|
||
|
var = np.var([f_A, f_B], axis=(0, -1))
|
||
|
|
||
|
# We divide by the variance to have a ratio of variance
|
||
|
# this leads to eq. 2
|
||
|
s = np.mean(f_B * (f_AB - f_A), axis=-1) / var # Table 2 (b)
|
||
|
st = 0.5 * np.mean((f_A - f_AB) ** 2, axis=-1) / var # Table 2 (f)
|
||
|
|
||
|
return s.T, st.T
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class BootstrapSobolResult:
|
||
|
first_order: BootstrapResult
|
||
|
total_order: BootstrapResult
|
||
|
|
||
|
|
||
|
@dataclass
|
||
|
class SobolResult:
|
||
|
first_order: np.ndarray
|
||
|
total_order: np.ndarray
|
||
|
_indices_method: Callable
|
||
|
_f_A: np.ndarray
|
||
|
_f_B: np.ndarray
|
||
|
_f_AB: np.ndarray
|
||
|
_A: np.ndarray | None = None
|
||
|
_B: np.ndarray | None = None
|
||
|
_AB: np.ndarray | None = None
|
||
|
_bootstrap_result: BootstrapResult | None = None
|
||
|
|
||
|
def bootstrap(
|
||
|
self,
|
||
|
confidence_level: DecimalNumber = 0.95,
|
||
|
n_resamples: IntNumber = 999
|
||
|
) -> BootstrapSobolResult:
|
||
|
"""Bootstrap Sobol' indices to provide confidence intervals.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
confidence_level : float, default: ``0.95``
|
||
|
The confidence level of the confidence intervals.
|
||
|
n_resamples : int, default: ``999``
|
||
|
The number of resamples performed to form the bootstrap
|
||
|
distribution of the indices.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
res : BootstrapSobolResult
|
||
|
Bootstrap result containing the confidence intervals and the
|
||
|
bootstrap distribution of the indices.
|
||
|
|
||
|
An object with attributes:
|
||
|
|
||
|
first_order : BootstrapResult
|
||
|
Bootstrap result of the first order indices.
|
||
|
total_order : BootstrapResult
|
||
|
Bootstrap result of the total order indices.
|
||
|
See `BootstrapResult` for more details.
|
||
|
|
||
|
"""
|
||
|
def statistic(idx):
|
||
|
f_A_ = self._f_A[:, idx]
|
||
|
f_B_ = self._f_B[:, idx]
|
||
|
f_AB_ = self._f_AB[..., idx]
|
||
|
return self._indices_method(f_A_, f_B_, f_AB_)
|
||
|
|
||
|
n = self._f_A.shape[1]
|
||
|
|
||
|
res = bootstrap(
|
||
|
[np.arange(n)], statistic=statistic, method="BCa",
|
||
|
n_resamples=n_resamples,
|
||
|
confidence_level=confidence_level,
|
||
|
bootstrap_result=self._bootstrap_result
|
||
|
)
|
||
|
self._bootstrap_result = res
|
||
|
|
||
|
first_order = BootstrapResult(
|
||
|
confidence_interval=ConfidenceInterval(
|
||
|
res.confidence_interval.low[0], res.confidence_interval.high[0]
|
||
|
),
|
||
|
bootstrap_distribution=res.bootstrap_distribution[0],
|
||
|
standard_error=res.standard_error[0],
|
||
|
)
|
||
|
total_order = BootstrapResult(
|
||
|
confidence_interval=ConfidenceInterval(
|
||
|
res.confidence_interval.low[1], res.confidence_interval.high[1]
|
||
|
),
|
||
|
bootstrap_distribution=res.bootstrap_distribution[1],
|
||
|
standard_error=res.standard_error[1],
|
||
|
)
|
||
|
|
||
|
return BootstrapSobolResult(
|
||
|
first_order=first_order, total_order=total_order
|
||
|
)
|
||
|
|
||
|
|
||
|
class PPFDist(Protocol):
|
||
|
@property
|
||
|
def ppf(self) -> Callable[..., float]:
|
||
|
...
|
||
|
|
||
|
|
||
|
def sobol_indices(
|
||
|
*,
|
||
|
func: Callable[[np.ndarray], npt.ArrayLike] |
|
||
|
dict[Literal['f_A', 'f_B', 'f_AB'], np.ndarray],
|
||
|
n: IntNumber,
|
||
|
dists: list[PPFDist] | None = None,
|
||
|
method: Callable | Literal['saltelli_2010'] = 'saltelli_2010',
|
||
|
random_state: SeedType = None
|
||
|
) -> SobolResult:
|
||
|
r"""Global sensitivity indices of Sobol'.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
func : callable or dict(str, array_like)
|
||
|
If `func` is a callable, function to compute the Sobol' indices from.
|
||
|
Its signature must be::
|
||
|
|
||
|
func(x: ArrayLike) -> ArrayLike
|
||
|
|
||
|
with ``x`` of shape ``(d, n)`` and output of shape ``(s, n)`` where:
|
||
|
|
||
|
- ``d`` is the input dimensionality of `func`
|
||
|
(number of input variables),
|
||
|
- ``s`` is the output dimensionality of `func`
|
||
|
(number of output variables), and
|
||
|
- ``n`` is the number of samples (see `n` below).
|
||
|
|
||
|
Function evaluation values must be finite.
|
||
|
|
||
|
If `func` is a dictionary, contains the function evaluations from three
|
||
|
different arrays. Keys must be: ``f_A``, ``f_B`` and ``f_AB``.
|
||
|
``f_A`` and ``f_B`` should have a shape ``(s, n)`` and ``f_AB``
|
||
|
should have a shape ``(d, s, n)``.
|
||
|
This is an advanced feature and misuse can lead to wrong analysis.
|
||
|
n : int
|
||
|
Number of samples used to generate the matrices ``A`` and ``B``.
|
||
|
Must be a power of 2. The total number of points at which `func` is
|
||
|
evaluated will be ``n*(d+2)``.
|
||
|
dists : list(distributions), optional
|
||
|
List of each parameter's distribution. The distribution of parameters
|
||
|
depends on the application and should be carefully chosen.
|
||
|
Parameters are assumed to be independently distributed, meaning there
|
||
|
is no constraint nor relationship between their values.
|
||
|
|
||
|
Distributions must be an instance of a class with a ``ppf``
|
||
|
method.
|
||
|
|
||
|
Must be specified if `func` is a callable, and ignored otherwise.
|
||
|
method : Callable or str, default: 'saltelli_2010'
|
||
|
Method used to compute the first and total Sobol' indices.
|
||
|
|
||
|
If a callable, its signature must be::
|
||
|
|
||
|
func(f_A: np.ndarray, f_B: np.ndarray, f_AB: np.ndarray)
|
||
|
-> Tuple[np.ndarray, np.ndarray]
|
||
|
|
||
|
with ``f_A, f_B`` of shape ``(s, n)`` and ``f_AB`` of shape
|
||
|
``(d, s, n)``.
|
||
|
These arrays contain the function evaluations from three different sets
|
||
|
of samples.
|
||
|
The output is a tuple of the first and total indices with
|
||
|
shape ``(s, d)``.
|
||
|
This is an advanced feature and misuse can lead to wrong analysis.
|
||
|
random_state : {None, int, `numpy.random.Generator`}, optional
|
||
|
If `random_state` is an int or None, a new `numpy.random.Generator` is
|
||
|
created using ``np.random.default_rng(random_state)``.
|
||
|
If `random_state` is already a ``Generator`` instance, then the
|
||
|
provided instance is used.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
res : SobolResult
|
||
|
An object with attributes:
|
||
|
|
||
|
first_order : ndarray of shape (s, d)
|
||
|
First order Sobol' indices.
|
||
|
total_order : ndarray of shape (s, d)
|
||
|
Total order Sobol' indices.
|
||
|
|
||
|
And method:
|
||
|
|
||
|
bootstrap(confidence_level: float, n_resamples: int)
|
||
|
-> BootstrapSobolResult
|
||
|
|
||
|
A method providing confidence intervals on the indices.
|
||
|
See `scipy.stats.bootstrap` for more details.
|
||
|
|
||
|
The bootstrapping is done on both first and total order indices,
|
||
|
and they are available in `BootstrapSobolResult` as attributes
|
||
|
``first_order`` and ``total_order``.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The Sobol' method [1]_, [2]_ is a variance-based Sensitivity Analysis which
|
||
|
obtains the contribution of each parameter to the variance of the
|
||
|
quantities of interest (QoIs; i.e., the outputs of `func`).
|
||
|
Respective contributions can be used to rank the parameters and
|
||
|
also gauge the complexity of the model by computing the
|
||
|
model's effective (or mean) dimension.
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
Parameters are assumed to be independently distributed. Each
|
||
|
parameter can still follow any distribution. In fact, the distribution
|
||
|
is very important and should match the real distribution of the
|
||
|
parameters.
|
||
|
|
||
|
It uses a functional decomposition of the variance of the function to
|
||
|
explore
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
\mathbb{V}(Y) = \sum_{i}^{d} \mathbb{V}_i (Y) + \sum_{i<j}^{d}
|
||
|
\mathbb{V}_{ij}(Y) + ... + \mathbb{V}_{1,2,...,d}(Y),
|
||
|
|
||
|
introducing conditional variances:
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
\mathbb{V}_i(Y) = \mathbb{\mathbb{V}}[\mathbb{E}(Y|x_i)]
|
||
|
\qquad
|
||
|
\mathbb{V}_{ij}(Y) = \mathbb{\mathbb{V}}[\mathbb{E}(Y|x_i x_j)]
|
||
|
- \mathbb{V}_i(Y) - \mathbb{V}_j(Y),
|
||
|
|
||
|
Sobol' indices are expressed as
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
S_i = \frac{\mathbb{V}_i(Y)}{\mathbb{V}[Y]}
|
||
|
\qquad
|
||
|
S_{ij} =\frac{\mathbb{V}_{ij}(Y)}{\mathbb{V}[Y]}.
|
||
|
|
||
|
:math:`S_{i}` corresponds to the first-order term which apprises the
|
||
|
contribution of the i-th parameter, while :math:`S_{ij}` corresponds to the
|
||
|
second-order term which informs about the contribution of interactions
|
||
|
between the i-th and the j-th parameters. These equations can be
|
||
|
generalized to compute higher order terms; however, they are expensive to
|
||
|
compute and their interpretation is complex.
|
||
|
This is why only first order indices are provided.
|
||
|
|
||
|
Total order indices represent the global contribution of the parameters
|
||
|
to the variance of the QoI and are defined as:
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
S_{T_i} = S_i + \sum_j S_{ij} + \sum_{j,k} S_{ijk} + ...
|
||
|
= 1 - \frac{\mathbb{V}[\mathbb{E}(Y|x_{\sim i})]}{\mathbb{V}[Y]}.
|
||
|
|
||
|
First order indices sum to at most 1, while total order indices sum to at
|
||
|
least 1. If there are no interactions, then first and total order indices
|
||
|
are equal, and both first and total order indices sum to 1.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
Negative Sobol' values are due to numerical errors. Increasing the
|
||
|
number of points `n` should help.
|
||
|
|
||
|
The number of sample required to have a good analysis increases with
|
||
|
the dimensionality of the problem. e.g. for a 3 dimension problem,
|
||
|
consider at minima ``n >= 2**12``. The more complex the model is,
|
||
|
the more samples will be needed.
|
||
|
|
||
|
Even for a purely addiditive model, the indices may not sum to 1 due
|
||
|
to numerical noise.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Sobol, I. M.. "Sensitivity analysis for nonlinear mathematical
|
||
|
models." Mathematical Modeling and Computational Experiment, 1:407-414,
|
||
|
1993.
|
||
|
.. [2] Sobol, I. M. (2001). "Global sensitivity indices for nonlinear
|
||
|
mathematical models and their Monte Carlo estimates." Mathematics
|
||
|
and Computers in Simulation, 55(1-3):271-280,
|
||
|
:doi:`10.1016/S0378-4754(00)00270-6`, 2001.
|
||
|
.. [3] Saltelli, A. "Making best use of model evaluations to
|
||
|
compute sensitivity indices." Computer Physics Communications,
|
||
|
145(2):280-297, :doi:`10.1016/S0010-4655(02)00280-1`, 2002.
|
||
|
.. [4] Saltelli, A., M. Ratto, T. Andres, F. Campolongo, J. Cariboni,
|
||
|
D. Gatelli, M. Saisana, and S. Tarantola. "Global Sensitivity Analysis.
|
||
|
The Primer." 2007.
|
||
|
.. [5] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and
|
||
|
S. Tarantola. "Variance based sensitivity analysis of model
|
||
|
output. Design and estimator for the total sensitivity index."
|
||
|
Computer Physics Communications, 181(2):259-270,
|
||
|
:doi:`10.1016/j.cpc.2009.09.018`, 2010.
|
||
|
.. [6] Ishigami, T. and T. Homma. "An importance quantification technique
|
||
|
in uncertainty analysis for computer models." IEEE,
|
||
|
:doi:`10.1109/ISUMA.1990.151285`, 1990.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
The following is an example with the Ishigami function [6]_
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
Y(\mathbf{x}) = \sin x_1 + 7 \sin^2 x_2 + 0.1 x_3^4 \sin x_1,
|
||
|
|
||
|
with :math:`\mathbf{x} \in [-\pi, \pi]^3`. This function exhibits strong
|
||
|
non-linearity and non-monotonicity.
|
||
|
|
||
|
Remember, Sobol' indices assumes that samples are independently
|
||
|
distributed. In this case we use a uniform distribution on each marginals.
|
||
|
|
||
|
>>> import numpy as np
|
||
|
>>> from scipy.stats import sobol_indices, uniform
|
||
|
>>> rng = np.random.default_rng()
|
||
|
>>> def f_ishigami(x):
|
||
|
... f_eval = (
|
||
|
... np.sin(x[0])
|
||
|
... + 7 * np.sin(x[1])**2
|
||
|
... + 0.1 * (x[2]**4) * np.sin(x[0])
|
||
|
... )
|
||
|
... return f_eval
|
||
|
>>> indices = sobol_indices(
|
||
|
... func=f_ishigami, n=1024,
|
||
|
... dists=[
|
||
|
... uniform(loc=-np.pi, scale=2*np.pi),
|
||
|
... uniform(loc=-np.pi, scale=2*np.pi),
|
||
|
... uniform(loc=-np.pi, scale=2*np.pi)
|
||
|
... ],
|
||
|
... random_state=rng
|
||
|
... )
|
||
|
>>> indices.first_order
|
||
|
array([0.31637954, 0.43781162, 0.00318825])
|
||
|
>>> indices.total_order
|
||
|
array([0.56122127, 0.44287857, 0.24229595])
|
||
|
|
||
|
Confidence interval can be obtained using bootstrapping.
|
||
|
|
||
|
>>> boot = indices.bootstrap()
|
||
|
|
||
|
Then, this information can be easily visualized.
|
||
|
|
||
|
>>> import matplotlib.pyplot as plt
|
||
|
>>> fig, axs = plt.subplots(1, 2, figsize=(9, 4))
|
||
|
>>> _ = axs[0].errorbar(
|
||
|
... [1, 2, 3], indices.first_order, fmt='o',
|
||
|
... yerr=[
|
||
|
... indices.first_order - boot.first_order.confidence_interval.low,
|
||
|
... boot.first_order.confidence_interval.high - indices.first_order
|
||
|
... ],
|
||
|
... )
|
||
|
>>> axs[0].set_ylabel("First order Sobol' indices")
|
||
|
>>> axs[0].set_xlabel('Input parameters')
|
||
|
>>> axs[0].set_xticks([1, 2, 3])
|
||
|
>>> _ = axs[1].errorbar(
|
||
|
... [1, 2, 3], indices.total_order, fmt='o',
|
||
|
... yerr=[
|
||
|
... indices.total_order - boot.total_order.confidence_interval.low,
|
||
|
... boot.total_order.confidence_interval.high - indices.total_order
|
||
|
... ],
|
||
|
... )
|
||
|
>>> axs[1].set_ylabel("Total order Sobol' indices")
|
||
|
>>> axs[1].set_xlabel('Input parameters')
|
||
|
>>> axs[1].set_xticks([1, 2, 3])
|
||
|
>>> plt.tight_layout()
|
||
|
>>> plt.show()
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
By default, `scipy.stats.uniform` has support ``[0, 1]``.
|
||
|
Using the parameters ``loc`` and ``scale``, one obtains the uniform
|
||
|
distribution on ``[loc, loc + scale]``.
|
||
|
|
||
|
This result is particularly interesting because the first order index
|
||
|
:math:`S_{x_3} = 0` whereas its total order is :math:`S_{T_{x_3}} = 0.244`.
|
||
|
This means that higher order interactions with :math:`x_3` are responsible
|
||
|
for the difference. Almost 25% of the observed variance
|
||
|
on the QoI is due to the correlations between :math:`x_3` and :math:`x_1`,
|
||
|
although :math:`x_3` by itself has no impact on the QoI.
|
||
|
|
||
|
The following gives a visual explanation of Sobol' indices on this
|
||
|
function. Let's generate 1024 samples in :math:`[-\pi, \pi]^3` and
|
||
|
calculate the value of the output.
|
||
|
|
||
|
>>> from scipy.stats import qmc
|
||
|
>>> n_dim = 3
|
||
|
>>> p_labels = ['$x_1$', '$x_2$', '$x_3$']
|
||
|
>>> sample = qmc.Sobol(d=n_dim, seed=rng).random(1024)
|
||
|
>>> sample = qmc.scale(
|
||
|
... sample=sample,
|
||
|
... l_bounds=[-np.pi, -np.pi, -np.pi],
|
||
|
... u_bounds=[np.pi, np.pi, np.pi]
|
||
|
... )
|
||
|
>>> output = f_ishigami(sample.T)
|
||
|
|
||
|
Now we can do scatter plots of the output with respect to each parameter.
|
||
|
This gives a visual way to understand how each parameter impacts the
|
||
|
output of the function.
|
||
|
|
||
|
>>> fig, ax = plt.subplots(1, n_dim, figsize=(12, 4))
|
||
|
>>> for i in range(n_dim):
|
||
|
... xi = sample[:, i]
|
||
|
... ax[i].scatter(xi, output, marker='+')
|
||
|
... ax[i].set_xlabel(p_labels[i])
|
||
|
>>> ax[0].set_ylabel('Y')
|
||
|
>>> plt.tight_layout()
|
||
|
>>> plt.show()
|
||
|
|
||
|
Now Sobol' goes a step further:
|
||
|
by conditioning the output value by given values of the parameter
|
||
|
(black lines), the conditional output mean is computed. It corresponds to
|
||
|
the term :math:`\mathbb{E}(Y|x_i)`. Taking the variance of this term gives
|
||
|
the numerator of the Sobol' indices.
|
||
|
|
||
|
>>> mini = np.min(output)
|
||
|
>>> maxi = np.max(output)
|
||
|
>>> n_bins = 10
|
||
|
>>> bins = np.linspace(-np.pi, np.pi, num=n_bins, endpoint=False)
|
||
|
>>> dx = bins[1] - bins[0]
|
||
|
>>> fig, ax = plt.subplots(1, n_dim, figsize=(12, 4))
|
||
|
>>> for i in range(n_dim):
|
||
|
... xi = sample[:, i]
|
||
|
... ax[i].scatter(xi, output, marker='+')
|
||
|
... ax[i].set_xlabel(p_labels[i])
|
||
|
... for bin_ in bins:
|
||
|
... idx = np.where((bin_ <= xi) & (xi <= bin_ + dx))
|
||
|
... xi_ = xi[idx]
|
||
|
... y_ = output[idx]
|
||
|
... ave_y_ = np.mean(y_)
|
||
|
... ax[i].plot([bin_ + dx/2] * 2, [mini, maxi], c='k')
|
||
|
... ax[i].scatter(bin_ + dx/2, ave_y_, c='r')
|
||
|
>>> ax[0].set_ylabel('Y')
|
||
|
>>> plt.tight_layout()
|
||
|
>>> plt.show()
|
||
|
|
||
|
Looking at :math:`x_3`, the variance
|
||
|
of the mean is zero leading to :math:`S_{x_3} = 0`. But we can further
|
||
|
observe that the variance of the output is not constant along the parameter
|
||
|
values of :math:`x_3`. This heteroscedasticity is explained by higher order
|
||
|
interactions. Moreover, an heteroscedasticity is also noticeable on
|
||
|
:math:`x_1` leading to an interaction between :math:`x_3` and :math:`x_1`.
|
||
|
On :math:`x_2`, the variance seems to be constant and thus null interaction
|
||
|
with this parameter can be supposed.
|
||
|
|
||
|
This case is fairly simple to analyse visually---although it is only a
|
||
|
qualitative analysis. Nevertheless, when the number of input parameters
|
||
|
increases such analysis becomes unrealistic as it would be difficult to
|
||
|
conclude on high-order terms. Hence the benefit of using Sobol' indices.
|
||
|
|
||
|
"""
|
||
|
random_state = check_random_state(random_state)
|
||
|
|
||
|
n_ = int(n)
|
||
|
if not (n_ & (n_ - 1) == 0) or n != n_:
|
||
|
raise ValueError(
|
||
|
"The balance properties of Sobol' points require 'n' "
|
||
|
"to be a power of 2."
|
||
|
)
|
||
|
n = n_
|
||
|
|
||
|
if not callable(method):
|
||
|
indices_methods: dict[str, Callable] = {
|
||
|
"saltelli_2010": saltelli_2010,
|
||
|
}
|
||
|
try:
|
||
|
method = method.lower() # type: ignore[assignment]
|
||
|
indices_method_ = indices_methods[method]
|
||
|
except KeyError as exc:
|
||
|
message = (
|
||
|
f"{method!r} is not a valid 'method'. It must be one of"
|
||
|
f" {set(indices_methods)!r} or a callable."
|
||
|
)
|
||
|
raise ValueError(message) from exc
|
||
|
else:
|
||
|
indices_method_ = method
|
||
|
sig = inspect.signature(indices_method_)
|
||
|
|
||
|
if set(sig.parameters) != {'f_A', 'f_B', 'f_AB'}:
|
||
|
message = (
|
||
|
"If 'method' is a callable, it must have the following"
|
||
|
f" signature: {inspect.signature(saltelli_2010)}"
|
||
|
)
|
||
|
raise ValueError(message)
|
||
|
|
||
|
def indices_method(f_A, f_B, f_AB):
|
||
|
"""Wrap indices method to ensure proper output dimension.
|
||
|
|
||
|
1D when single output, 2D otherwise.
|
||
|
"""
|
||
|
return np.squeeze(indices_method_(f_A=f_A, f_B=f_B, f_AB=f_AB))
|
||
|
|
||
|
if callable(func):
|
||
|
if dists is None:
|
||
|
raise ValueError(
|
||
|
"'dists' must be defined when 'func' is a callable."
|
||
|
)
|
||
|
|
||
|
def wrapped_func(x):
|
||
|
return np.atleast_2d(func(x))
|
||
|
|
||
|
A, B = sample_A_B(n=n, dists=dists, random_state=random_state)
|
||
|
AB = sample_AB(A=A, B=B)
|
||
|
|
||
|
f_A = wrapped_func(A)
|
||
|
|
||
|
if f_A.shape[1] != n:
|
||
|
raise ValueError(
|
||
|
"'func' output should have a shape ``(s, -1)`` with ``s`` "
|
||
|
"the number of output."
|
||
|
)
|
||
|
|
||
|
def funcAB(AB):
|
||
|
d, d, n = AB.shape
|
||
|
AB = np.moveaxis(AB, 0, -1).reshape(d, n*d)
|
||
|
f_AB = wrapped_func(AB)
|
||
|
return np.moveaxis(f_AB.reshape((-1, n, d)), -1, 0)
|
||
|
|
||
|
f_B = wrapped_func(B)
|
||
|
f_AB = funcAB(AB)
|
||
|
else:
|
||
|
message = (
|
||
|
"When 'func' is a dictionary, it must contain the following "
|
||
|
"keys: 'f_A', 'f_B' and 'f_AB'."
|
||
|
"'f_A' and 'f_B' should have a shape ``(s, n)`` and 'f_AB' "
|
||
|
"should have a shape ``(d, s, n)``."
|
||
|
)
|
||
|
try:
|
||
|
f_A, f_B, f_AB = np.atleast_2d(
|
||
|
func['f_A'], func['f_B'], func['f_AB']
|
||
|
)
|
||
|
except KeyError as exc:
|
||
|
raise ValueError(message) from exc
|
||
|
|
||
|
if f_A.shape[1] != n or f_A.shape != f_B.shape or \
|
||
|
f_AB.shape == f_A.shape or f_AB.shape[-1] % n != 0:
|
||
|
raise ValueError(message)
|
||
|
|
||
|
# Normalization by mean
|
||
|
# Sobol', I. and Levitan, Y. L. (1999). On the use of variance reducing
|
||
|
# multipliers in monte carlo computations of a global sensitivity index.
|
||
|
# Computer Physics Communications, 117(1) :52-61.
|
||
|
mean = np.mean([f_A, f_B], axis=(0, -1)).reshape(-1, 1)
|
||
|
f_A -= mean
|
||
|
f_B -= mean
|
||
|
f_AB -= mean
|
||
|
|
||
|
# Compute indices
|
||
|
# Filter warnings for constant output as var = 0
|
||
|
with np.errstate(divide='ignore', invalid='ignore'):
|
||
|
first_order, total_order = indices_method(f_A=f_A, f_B=f_B, f_AB=f_AB)
|
||
|
|
||
|
# null variance means null indices
|
||
|
first_order[~np.isfinite(first_order)] = 0
|
||
|
total_order[~np.isfinite(total_order)] = 0
|
||
|
|
||
|
res = dict(
|
||
|
first_order=first_order,
|
||
|
total_order=total_order,
|
||
|
_indices_method=indices_method,
|
||
|
_f_A=f_A,
|
||
|
_f_B=f_B,
|
||
|
_f_AB=f_AB
|
||
|
)
|
||
|
|
||
|
if callable(func):
|
||
|
res.update(
|
||
|
dict(
|
||
|
_A=A,
|
||
|
_B=B,
|
||
|
_AB=AB,
|
||
|
)
|
||
|
)
|
||
|
|
||
|
return SobolResult(**res)
|