projektAI/venv/Lib/site-packages/mlxtend/evaluate/bootstrap.py

121 lines
3.9 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Bootstrap functions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
import numpy as np
def bootstrap(x, func, num_rounds=1000, ci=0.95, ddof=1, seed=None):
"""Implements the ordinary nonparametric bootstrap
Parameters
----------
x : NumPy array, shape=(n_samples, [n_columns])
An one or multidimensional array of data records
func : <func>
A function which computes a statistic that is used
to compute the bootstrap replicates (the statistic computed
from the bootstrap samples). This function must return a
scalar value. For example, `np.mean` or `np.median` would be
an acceptable argument for `func` if `x` is a 1-dimensional array
or vector.
num_rounds : int (default=1000)
The number of bootstrap samples to draw where each
bootstrap sample has the same number of records as the
original dataset.
ci : int (default=0.95)
An integer in the range (0, 1) that represents the
confidence level for computing the confidence interval.
For example, `ci=0.95` (default)
will compute the 95% confidence
interval from the bootstrap replicates.
ddof : int
The delta degrees of freedom used when computing the
standard error.
seed : int or None (default=None)
Random seed for generating bootstrap samples.
Returns
-------
original, standard_error, (lower_ci, upper_ci) : tuple
Returns the statistic of the original sample (`original`),
the standard error of the estimate, and the
respective confidence interval bounds.
Examples
--------
>>> from mlxtend.evaluate import bootstrap
>>> rng = np.random.RandomState(123)
>>> x = rng.normal(loc=5., size=100)
>>> original, std_err, ci_bounds = bootstrap(x,
... num_rounds=1000,
... func=np.mean,
... ci=0.95,
... seed=123)
>>> print('Mean: %.2f, SE: +/- %.2f, CI95: [%.2f, %.2f]' % (original,
... std_err,
... ci_bounds[0],
... ci_bounds[1]))
Mean: 5.03, SE: +/- 0.11, CI95: [4.80, 5.26]
>>>
For more usage examples, please see
http://rasbt.github.io/mlxtend/user_guide/evaluate/bootstrap/
"""
if ci <= 0 or ci >= 1:
raise AttributeError('ci must be in range (0, 1)')
check_output = func(x)
if (not isinstance(check_output, float)
and not isinstance(check_output, int)
and len(check_output.shape) != 0):
raise AttributeError('func must return a scalar')
rng = np.random.RandomState(seed)
bootstrap_replicates = np.zeros(shape=num_rounds)
# quantile function implemented due
# to the weird behavior of the NumPy equivalent with
# either nearest or lower interpolation
def quantile(x, q):
rank = round(q * x.shape[0]) - 1
if rank >= x.shape[0]:
rank = x.shape[0]
elif rank <= 0:
rank = 0
rank = int(round(rank))
return x[rank]
sample_idx = np.arange(x.shape[0])
for i in range(num_rounds):
bootstrap_idx = rng.choice(sample_idx,
size=sample_idx.shape[0],
replace=True)
bootstrap_replicates[i] = func(x[bootstrap_idx])
original = check_output
standard_error = np.std(bootstrap_replicates, ddof=ddof)
t = np.sort(bootstrap_replicates)
bound = (1 - ci) / 2.
upper_ci = quantile(t, q=(ci + bound))
lower_ci = quantile(t, q=bound)
return original, standard_error, (lower_ci, upper_ci)