516 lines
13 KiB
Python
516 lines
13 KiB
Python
"""
|
|
.. _statsrefmanual:
|
|
|
|
==========================================
|
|
Statistical functions (:mod:`scipy.stats`)
|
|
==========================================
|
|
|
|
.. currentmodule:: scipy.stats
|
|
|
|
This module contains a large number of probability distributions,
|
|
summary and frequency statistics, correlation functions and statistical
|
|
tests, masked statistics, kernel density estimation, quasi-Monte Carlo
|
|
functionality, and more.
|
|
|
|
Statistics is a very large area, and there are topics that are out of scope
|
|
for SciPy and are covered by other packages. Some of the most important ones
|
|
are:
|
|
|
|
- `statsmodels <https://www.statsmodels.org/stable/index.html>`__:
|
|
regression, linear models, time series analysis, extensions to topics
|
|
also covered by ``scipy.stats``.
|
|
- `Pandas <https://pandas.pydata.org/>`__: tabular data, time series
|
|
functionality, interfaces to other statistical languages.
|
|
- `PyMC <https://docs.pymc.io/>`__: Bayesian statistical
|
|
modeling, probabilistic machine learning.
|
|
- `scikit-learn <https://scikit-learn.org/>`__: classification, regression,
|
|
model selection.
|
|
- `Seaborn <https://seaborn.pydata.org/>`__: statistical data visualization.
|
|
- `rpy2 <https://rpy2.github.io/>`__: Python to R bridge.
|
|
|
|
|
|
Probability distributions
|
|
=========================
|
|
|
|
Each univariate distribution is an instance of a subclass of `rv_continuous`
|
|
(`rv_discrete` for discrete distributions):
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
rv_continuous
|
|
rv_discrete
|
|
rv_histogram
|
|
|
|
Continuous distributions
|
|
------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
alpha -- Alpha
|
|
anglit -- Anglit
|
|
arcsine -- Arcsine
|
|
argus -- Argus
|
|
beta -- Beta
|
|
betaprime -- Beta Prime
|
|
bradford -- Bradford
|
|
burr -- Burr (Type III)
|
|
burr12 -- Burr (Type XII)
|
|
cauchy -- Cauchy
|
|
chi -- Chi
|
|
chi2 -- Chi-squared
|
|
cosine -- Cosine
|
|
crystalball -- Crystalball
|
|
dgamma -- Double Gamma
|
|
dweibull -- Double Weibull
|
|
erlang -- Erlang
|
|
expon -- Exponential
|
|
exponnorm -- Exponentially Modified Normal
|
|
exponweib -- Exponentiated Weibull
|
|
exponpow -- Exponential Power
|
|
f -- F (Snecdor F)
|
|
fatiguelife -- Fatigue Life (Birnbaum-Saunders)
|
|
fisk -- Fisk
|
|
foldcauchy -- Folded Cauchy
|
|
foldnorm -- Folded Normal
|
|
genlogistic -- Generalized Logistic
|
|
gennorm -- Generalized normal
|
|
genpareto -- Generalized Pareto
|
|
genexpon -- Generalized Exponential
|
|
genextreme -- Generalized Extreme Value
|
|
gausshyper -- Gauss Hypergeometric
|
|
gamma -- Gamma
|
|
gengamma -- Generalized gamma
|
|
genhalflogistic -- Generalized Half Logistic
|
|
genhyperbolic -- Generalized Hyperbolic
|
|
geninvgauss -- Generalized Inverse Gaussian
|
|
gibrat -- Gibrat
|
|
gilbrat -- Gilbrat
|
|
gompertz -- Gompertz (Truncated Gumbel)
|
|
gumbel_r -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
|
|
gumbel_l -- Left Sided Gumbel, etc.
|
|
halfcauchy -- Half Cauchy
|
|
halflogistic -- Half Logistic
|
|
halfnorm -- Half Normal
|
|
halfgennorm -- Generalized Half Normal
|
|
hypsecant -- Hyperbolic Secant
|
|
invgamma -- Inverse Gamma
|
|
invgauss -- Inverse Gaussian
|
|
invweibull -- Inverse Weibull
|
|
johnsonsb -- Johnson SB
|
|
johnsonsu -- Johnson SU
|
|
kappa4 -- Kappa 4 parameter
|
|
kappa3 -- Kappa 3 parameter
|
|
ksone -- Distribution of Kolmogorov-Smirnov one-sided test statistic
|
|
kstwo -- Distribution of Kolmogorov-Smirnov two-sided test statistic
|
|
kstwobign -- Limiting Distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
|
|
laplace -- Laplace
|
|
laplace_asymmetric -- Asymmetric Laplace
|
|
levy -- Levy
|
|
levy_l
|
|
levy_stable
|
|
logistic -- Logistic
|
|
loggamma -- Log-Gamma
|
|
loglaplace -- Log-Laplace (Log Double Exponential)
|
|
lognorm -- Log-Normal
|
|
loguniform -- Log-Uniform
|
|
lomax -- Lomax (Pareto of the second kind)
|
|
maxwell -- Maxwell
|
|
mielke -- Mielke's Beta-Kappa
|
|
moyal -- Moyal
|
|
nakagami -- Nakagami
|
|
ncx2 -- Non-central chi-squared
|
|
ncf -- Non-central F
|
|
nct -- Non-central Student's T
|
|
norm -- Normal (Gaussian)
|
|
norminvgauss -- Normal Inverse Gaussian
|
|
pareto -- Pareto
|
|
pearson3 -- Pearson type III
|
|
powerlaw -- Power-function
|
|
powerlognorm -- Power log normal
|
|
powernorm -- Power normal
|
|
rdist -- R-distribution
|
|
rayleigh -- Rayleigh
|
|
rice -- Rice
|
|
recipinvgauss -- Reciprocal Inverse Gaussian
|
|
semicircular -- Semicircular
|
|
skewcauchy -- Skew Cauchy
|
|
skewnorm -- Skew normal
|
|
studentized_range -- Studentized Range
|
|
t -- Student's T
|
|
trapezoid -- Trapezoidal
|
|
triang -- Triangular
|
|
truncexpon -- Truncated Exponential
|
|
truncnorm -- Truncated Normal
|
|
truncpareto -- Truncated Pareto
|
|
truncweibull_min -- Truncated minimum Weibull distribution
|
|
tukeylambda -- Tukey-Lambda
|
|
uniform -- Uniform
|
|
vonmises -- Von-Mises (Circular)
|
|
vonmises_line -- Von-Mises (Line)
|
|
wald -- Wald
|
|
weibull_min -- Minimum Weibull (see Frechet)
|
|
weibull_max -- Maximum Weibull (see Frechet)
|
|
wrapcauchy -- Wrapped Cauchy
|
|
|
|
Multivariate distributions
|
|
--------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
multivariate_normal -- Multivariate normal distribution
|
|
matrix_normal -- Matrix normal distribution
|
|
dirichlet -- Dirichlet
|
|
wishart -- Wishart
|
|
invwishart -- Inverse Wishart
|
|
multinomial -- Multinomial distribution
|
|
special_ortho_group -- SO(N) group
|
|
ortho_group -- O(N) group
|
|
unitary_group -- U(N) group
|
|
random_correlation -- random correlation matrices
|
|
multivariate_t -- Multivariate t-distribution
|
|
multivariate_hypergeom -- Multivariate hypergeometric distribution
|
|
random_table -- Distribution of random tables with given marginals
|
|
uniform_direction -- Uniform distribution on S(N-1)
|
|
|
|
`scipy.stats.multivariate_normal` methods accept instances
|
|
of the following class to represent the covariance.
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
Covariance -- Representation of a covariance matrix
|
|
|
|
|
|
Discrete distributions
|
|
----------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
bernoulli -- Bernoulli
|
|
betabinom -- Beta-Binomial
|
|
binom -- Binomial
|
|
boltzmann -- Boltzmann (Truncated Discrete Exponential)
|
|
dlaplace -- Discrete Laplacian
|
|
geom -- Geometric
|
|
hypergeom -- Hypergeometric
|
|
logser -- Logarithmic (Log-Series, Series)
|
|
nbinom -- Negative Binomial
|
|
nchypergeom_fisher -- Fisher's Noncentral Hypergeometric
|
|
nchypergeom_wallenius -- Wallenius's Noncentral Hypergeometric
|
|
nhypergeom -- Negative Hypergeometric
|
|
planck -- Planck (Discrete Exponential)
|
|
poisson -- Poisson
|
|
randint -- Discrete Uniform
|
|
skellam -- Skellam
|
|
yulesimon -- Yule-Simon
|
|
zipf -- Zipf (Zeta)
|
|
zipfian -- Zipfian
|
|
|
|
An overview of statistical functions is given below. Many of these functions
|
|
have a similar version in `scipy.stats.mstats` which work for masked arrays.
|
|
|
|
Summary statistics
|
|
==================
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
describe -- Descriptive statistics
|
|
gmean -- Geometric mean
|
|
hmean -- Harmonic mean
|
|
pmean -- Power mean
|
|
kurtosis -- Fisher or Pearson kurtosis
|
|
mode -- Modal value
|
|
moment -- Central moment
|
|
expectile -- Expectile
|
|
skew -- Skewness
|
|
kstat --
|
|
kstatvar --
|
|
tmean -- Truncated arithmetic mean
|
|
tvar -- Truncated variance
|
|
tmin --
|
|
tmax --
|
|
tstd --
|
|
tsem --
|
|
variation -- Coefficient of variation
|
|
find_repeats
|
|
trim_mean
|
|
gstd -- Geometric Standard Deviation
|
|
iqr
|
|
sem
|
|
bayes_mvs
|
|
mvsdist
|
|
entropy
|
|
differential_entropy
|
|
median_abs_deviation
|
|
|
|
Frequency statistics
|
|
====================
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
cumfreq
|
|
percentileofscore
|
|
scoreatpercentile
|
|
relfreq
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
binned_statistic -- Compute a binned statistic for a set of data.
|
|
binned_statistic_2d -- Compute a 2-D binned statistic for a set of data.
|
|
binned_statistic_dd -- Compute a d-D binned statistic for a set of data.
|
|
|
|
Correlation functions
|
|
=====================
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
f_oneway
|
|
alexandergovern
|
|
pearsonr
|
|
spearmanr
|
|
pointbiserialr
|
|
kendalltau
|
|
weightedtau
|
|
somersd
|
|
linregress
|
|
siegelslopes
|
|
theilslopes
|
|
multiscale_graphcorr
|
|
|
|
Statistical tests
|
|
=================
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
ttest_1samp
|
|
ttest_ind
|
|
ttest_ind_from_stats
|
|
ttest_rel
|
|
chisquare
|
|
cramervonmises
|
|
cramervonmises_2samp
|
|
power_divergence
|
|
kstest
|
|
ks_1samp
|
|
ks_2samp
|
|
epps_singleton_2samp
|
|
mannwhitneyu
|
|
tiecorrect
|
|
rankdata
|
|
ranksums
|
|
wilcoxon
|
|
kruskal
|
|
friedmanchisquare
|
|
brunnermunzel
|
|
combine_pvalues
|
|
jarque_bera
|
|
page_trend_test
|
|
tukey_hsd
|
|
poisson_means_test
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
ansari
|
|
bartlett
|
|
levene
|
|
shapiro
|
|
anderson
|
|
anderson_ksamp
|
|
binom_test
|
|
binomtest
|
|
fligner
|
|
median_test
|
|
mood
|
|
skewtest
|
|
kurtosistest
|
|
normaltest
|
|
goodness_of_fit
|
|
|
|
|
|
Quasi-Monte Carlo
|
|
=================
|
|
|
|
.. toctree::
|
|
:maxdepth: 4
|
|
|
|
stats.qmc
|
|
|
|
Resampling Methods
|
|
==================
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
bootstrap
|
|
permutation_test
|
|
monte_carlo_test
|
|
|
|
Masked statistics functions
|
|
===========================
|
|
|
|
.. toctree::
|
|
|
|
stats.mstats
|
|
|
|
|
|
Other statistical functionality
|
|
===============================
|
|
|
|
Transformations
|
|
---------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
boxcox
|
|
boxcox_normmax
|
|
boxcox_llf
|
|
yeojohnson
|
|
yeojohnson_normmax
|
|
yeojohnson_llf
|
|
obrientransform
|
|
sigmaclip
|
|
trimboth
|
|
trim1
|
|
zmap
|
|
zscore
|
|
gzscore
|
|
|
|
Statistical distances
|
|
---------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
wasserstein_distance
|
|
energy_distance
|
|
|
|
Sampling
|
|
--------
|
|
|
|
.. toctree::
|
|
:maxdepth: 4
|
|
|
|
stats.sampling
|
|
|
|
Random variate generation / CDF Inversion
|
|
-----------------------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
rvs_ratio_uniforms
|
|
|
|
Distribution Fitting
|
|
--------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
fit
|
|
|
|
Directional statistical functions
|
|
---------------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
directional_stats
|
|
circmean
|
|
circvar
|
|
circstd
|
|
|
|
Contingency table functions
|
|
---------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
chi2_contingency
|
|
contingency.crosstab
|
|
contingency.expected_freq
|
|
contingency.margins
|
|
contingency.relative_risk
|
|
contingency.association
|
|
contingency.odds_ratio
|
|
fisher_exact
|
|
barnard_exact
|
|
boschloo_exact
|
|
|
|
Plot-tests
|
|
----------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
ppcc_max
|
|
ppcc_plot
|
|
probplot
|
|
boxcox_normplot
|
|
yeojohnson_normplot
|
|
|
|
Univariate and multivariate kernel density estimation
|
|
-----------------------------------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
gaussian_kde
|
|
|
|
Warnings / Errors used in :mod:`scipy.stats`
|
|
--------------------------------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
DegenerateDataWarning
|
|
ConstantInputWarning
|
|
NearConstantInputWarning
|
|
FitError
|
|
|
|
"""
|
|
|
|
from ._warnings_errors import (ConstantInputWarning, NearConstantInputWarning,
|
|
DegenerateDataWarning, FitError)
|
|
from ._stats_py import *
|
|
from ._variation import variation
|
|
from .distributions import *
|
|
from ._morestats import *
|
|
from ._binomtest import binomtest
|
|
from ._binned_statistic import *
|
|
from ._kde import gaussian_kde
|
|
from . import mstats
|
|
from . import qmc
|
|
from ._multivariate import *
|
|
from . import contingency
|
|
from .contingency import chi2_contingency
|
|
from ._resampling import bootstrap, monte_carlo_test, permutation_test
|
|
from ._entropy import *
|
|
from ._hypotests import *
|
|
from ._rvs_sampling import rvs_ratio_uniforms
|
|
from ._page_trend_test import page_trend_test
|
|
from ._mannwhitneyu import mannwhitneyu
|
|
from ._fit import fit, goodness_of_fit
|
|
from ._covariance import Covariance
|
|
|
|
# Deprecated namespaces, to be removed in v2.0.0
|
|
from . import (
|
|
biasedurn, kde, morestats, mstats_basic, mstats_extras, mvn, statlib, stats
|
|
)
|
|
|
|
__all__ = [s for s in dir() if not s.startswith("_")] # Remove dunders.
|
|
|
|
from scipy._lib._testutils import PytestTester
|
|
test = PytestTester(__name__)
|
|
del PytestTester
|