Merge pull request 'Rewritten to proper bootstrap' (#3) from fixes into main

Reviewed-on: #3
This commit is contained in:
Marcin Kostrzewski 2022-05-17 22:43:37 +02:00
commit f4f61b0876

View File

@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 68,
"metadata": {
"pycharm": {
"name": "#%%\n"
@ -35,18 +35,14 @@
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from math import sqrt\n",
"from scipy import stats\n",
"from scipy.stats import sem\n",
"from scipy.stats import t\n",
"import matplotlib.pyplot as plt\n",
"from statistics import mean, stdev\n",
"from scipy.stats import ttest_ind, ttest_1samp, ttest_rel"
"from enum import Enum\n",
"from scipy.stats import ttest_ind, ttest_1samp, ttest_rel, shapiro"
]
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
@ -55,29 +51,39 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"def calculate_p(t_stat, df):\n",
" \"\"\"Funkcja oblicza wartość *p* na podstawie statystyki testowej i stopni swobody\"\"\"\n",
" return (1.0 - t.cdf(abs(t_stat), df)) * 2.0"
"class Alternatives(Enum):\n",
" LESS = 'less'\n",
" GREATER = 'greater'"
]
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"def calculate_cv(df, alpha=0.05):\n",
" \"\"\"Funkcja oblicza wartość krytyczną (critical value)\"\"\"\n",
" return t.ppf(1.0 - alpha, df)"
"def calculate_t_difference(t_stat_sample, t_stat_list, alternative):\n",
" \"\"\"\n",
" Funkcja oblicza procent statystyk testowych powstałych z prób bootstrapowych, \n",
" które róznią się od statystyki testowej powstałej ze zbioru według hipotezy alternatywnej.\n",
" \"\"\"\n",
" all_stats = len(t_stat_list)\n",
" stats_different_count = 0\n",
" for t_stat_boot in t_stat_list:\n",
" if alternative is Alternatives.LESS and t_stat_boot < t_stat_sample:\n",
" stats_different_count += 1 \n",
" elif alternative is Alternatives.GREATER and t_stat_boot > t_stat_sample:\n",
" stats_different_count += 1\n",
" return stats_different_count / all_stats"
]
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 72,
"metadata": {
"pycharm": {
"name": "#%%\n"
@ -85,57 +91,112 @@
},
"outputs": [],
"source": [
"def t_test(sample_1, sample_2=None, df_fn=df_single, t_stat_fn=t_stat_single, population_mean=None, alpha=0.05):\n",
"def t_test_1_samp(sample_1, population_mean=None, alternative=Alternatives.LESS):\n",
" \"\"\"\n",
" Funkcja przeprowadza test T-studenta dla dwóch zmiennych.\n",
" liczba kolumn wynosi 1, test jest przeprowadzany dla jednej zmiennej.\n",
" @param df_fn - funkcja obliczająca stopnie swobody\n",
" @param t_stat_fn - funkcja obliczająca statystykę T\n",
" Funkcja przeprowadza test T-studenta dla jednej zmiennej.\n",
" \"\"\"\n",
" t_stat_list = get_t_stats(sample_1, sample_2, t_stat_fn, population_mean=population_mean)\n",
" t_stat_sum = sum(t_stat_list)\n",
" t_stat_from_sample, _ = ttest_1samp(a=sample_1, popmean=population_mean, alternative=alternative.value)\n",
" t_stat_list = get_t_stats(sample_1, t_stat_fn=ttest_1samp, alternative=alternative, population_mean=population_mean)\n",
"\n",
" data_size = sample_1.shape[0]\n",
" p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n",
"\n",
" t_stat = t_stat_sum / data_size\n",
" # TODO: dolna i górna opcja dają inne wyniki z jakiegoś powodu (???)\n",
" t_stat = mean(t_stat_list)\n",
"\n",
" if sample_2 is None:\n",
" df = df_fn(sample_1)\n",
" else:\n",
" df = df_fn(sample_1, sample_2)\n",
" cv = calculate_cv(df, alpha)\n",
" p = calculate_p(t_stat, df)\n",
" return t_stat, df, cv, p, t_stat_list"
" return p, t_stat_from_sample, t_stat_list"
]
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"def get_t_stats(sample_1, sample_2=None, t_stat_fn=t_stat_single, population_mean=None):\n",
"def t_test_ind(sample_1, sample_2, alternative=Alternatives.LESS):\n",
" \"\"\"\n",
" Funkcja przeprowadza test T-studenta dla dwóch zmiennych niezależnych.\n",
" \"\"\"\n",
" t_stat_from_sample, _ = ttest_ind(sample_1, sample_2, alternative=alternative.value)\n",
" t_stat_list = get_t_stats(sample_1, sample_2, alternative=alternative, t_stat_fn=ttest_ind)\n",
"\n",
" p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n",
"\n",
" return p, t_stat_from_sample, t_stat_list"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"def t_test_dep(sample_1, sample_2, alternative=Alternatives.LESS):\n",
" \"\"\"\n",
" Funkcja przeprowadza test T-studenta dla dwóch zmiennych zależnych.\n",
" \"\"\"\n",
" t_stat_list = get_t_stats(sample_1, sample_2, alternative=alternative, t_stat_fn=ttest_rel)\n",
" t_stat_from_sample, _ = ttest_rel(sample_1, sample_2, alternative=alternative.value)\n",
"\n",
" p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n",
"\n",
" return p, t_stat_from_sample, t_stat_list"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"def get_t_stats(sample_1, sample_2=None, t_stat_fn=ttest_1samp, alternative=Alternatives.LESS, population_mean=None):\n",
" \"\"\"Funkcja oblicza listę statystyk testowych dla każdej próbki bootstrapowej wybranej na podstawie danych sample_1 i sample_2\"\"\"\n",
" t_stat_list = []\n",
"\n",
" # One sample test\n",
" if t_stat_fn==t_stat_single:\n",
" if t_stat_fn is ttest_1samp and sample_2 is None:\n",
" if not population_mean:\n",
" raise Exception(\"population_mean not provided\")\n",
" for bootstrap in generate_bootstraps(sample_1):\n",
" stat = t_stat_fn(bootstrap, population_mean)\n",
" stat, _ = t_stat_fn(bootstrap, population_mean, alternative=alternative.value)\n",
" t_stat_list.append(stat)\n",
" return t_stat_list\n",
"\n",
" # Two sample test\n",
" for bootstrap_1, bootstrap_2 in zip(generate_bootstraps(sample_1), generate_bootstraps(sample_2)):\n",
" stat = t_stat_fn(bootstrap_1, bootstrap_2)\n",
" for bootstrap_sample in generate_bootstraps(pd.concat((sample_1, sample_2), ignore_index=True)):\n",
" bootstrap_1 = bootstrap_sample.iloc[: len(bootstrap_sample) // 2]\n",
" bootstrap_2 = bootstrap_sample.iloc[len(bootstrap_sample) // 2 :]\n",
" stat, _ = t_stat_fn(bootstrap_1, bootstrap_2, alternative=alternative.value)\n",
" t_stat_list.append(stat)\n",
" return t_stat_list"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"def pretty_print_test(p, t_stat_from_sample, t_stat_list, thesis, alternative, max_print=5):\n",
" print('Wyniki bootstrapowej wersji testu T-studenta')\n",
" print()\n",
" print(f'Hipoteza: {thesis}')\n",
" if alternative is Alternatives.LESS:\n",
" print(f'Hipoteza alternatywna: średnia jest mniejsza')\n",
" else:\n",
" print(f'Hipoteza alternatywna: średnia jest większa')\n",
" print()\n",
" print(f'p: {p}')\n",
" print(f'Wartość statystyki testowej z próby: {t_stat_from_sample}')\n",
" print(f'Wartości statystyk z prób boostrapowych:')\n",
"\n",
" t_stat_list_len = len(t_stat_list)\n",
" for i in range(min(max_print, t_stat_list_len)):\n",
" print(f'{t_stat_list[i]}, ', end='')\n",
" if max_print < t_stat_list_len:\n",
" remaining = t_stat_list_len - max_print\n",
" print(f'... (i {remaining} pozostałych)')\n",
"\n",
" print()\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -147,7 +208,7 @@
},
{
"cell_type": "code",
"execution_count": 87,
"execution_count": 77,
"metadata": {},
"outputs": [
{
@ -164,7 +225,7 @@
"source": [
"ALPHA = 0.05\n",
"female_heights = dataset['Female height'].to_numpy()\n",
"shapiro_test = stats.shapiro(female_heights)\n",
"shapiro_test = shapiro(female_heights)\n",
"\n",
"if shapiro_test.pvalue > ALPHA:\n",
" print(\"Female height: Dane mają rozkład normalny.\")\n",
@ -172,7 +233,7 @@
" print(\"Female height: Dane nie mają rozkładu normalnego.\")\n",
"\n",
"male_heights = dataset['Male height'].to_numpy()\n",
"shapiro_test = stats.shapiro(male_heights)\n",
"shapiro_test = shapiro(male_heights)\n",
"\n",
"if shapiro_test.pvalue > ALPHA:\n",
" print(\"Male height: Dane mają rozkład normalny.\")\n",
@ -180,7 +241,7 @@
" print(\"Male height: Dane nie mają rozkładu normalnego.\")\n",
"\n",
"weights_before = dataset['Weight before'].to_numpy()\n",
"shapiro_test = stats.shapiro(weights_before)\n",
"shapiro_test = shapiro(weights_before)\n",
"\n",
"if shapiro_test.pvalue > ALPHA:\n",
" print(\"Weight before: Dane mają rozkład normalny.\")\n",
@ -188,7 +249,7 @@
" print(\"Weight before: Dane nie mają rozkładu normalnego.\")\n",
"\n",
"weights_after = dataset['Weight after'].to_numpy()\n",
"shapiro_test = stats.shapiro(weights_after)\n",
"shapiro_test = shapiro(weights_after)\n",
"\n",
"if shapiro_test.pvalue > ALPHA:\n",
" print(\"Weight after: Dane mają rozkład normalny.\")\n",
@ -211,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 78,
"metadata": {
"pycharm": {
"name": "#%%\n"
@ -239,7 +300,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 79,
"metadata": {
"collapsed": false,
"pycharm": {
@ -248,45 +309,16 @@
},
"outputs": [],
"source": [
"def t_stat_single(sample, population_mean):\n",
" \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n",
" if sample.empty:\n",
" raise Exception(\"Empty sample\")\n",
" sample = sample['Height'].values.tolist()\n",
" sample_size = len(sample)\n",
" return (mean(sample) - population_mean) / (stdev(sample) / sqrt(sample_size))"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"def df_single(sample_1):\n",
" \"\"\"Funkcja oblicza stopnie swobody dla jednej próbki\"\"\"\n",
" # TODO: I have no clue what to return from here\n",
" return len(sample_1)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def bootstrap_one_sample(sample, population_mean):\n",
" return t_test(\n",
"def bootstrap_one_sample(sample, population_mean, alternative=Alternatives.LESS):\n",
" p, t, ts = t_test_1_samp(\n",
" sample_1=sample,\n",
" df_fn=df_single,\n",
" t_stat_fn=t_stat_single,\n",
" population_mean=population_mean\n",
" )"
" population_mean=population_mean,\n",
" alternative=alternative,\n",
" )\n",
" \n",
" pretty_print_test(p, t, ts, f'średnia jest równa {population_mean}', alternative)\n",
" print()\n",
" return p, t, ts"
]
},
{
@ -298,7 +330,18 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"dummy = pd.DataFrame([1, 2, 3, 4, 5])\n",
"dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n",
"dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false,
"pycharm": {
@ -310,7 +353,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"t: 6.854929920812628, df: 500, cv: 1.6479068539295045, p: 2.1091128843409024e-11\n",
"Wyniki bootstrapowej wersji testu T-studenta\n",
"\n",
"Hipoteza: średnia jest równa 165\n",
"Hipoteza alternatywna: średnia jest mniejsza\n",
"\n",
"p: 0.72\n",
"Wartość statystyki testowej z próby: [-229.1025971]\n",
"Wartości statystyk z prób boostrapowych:\n",
"[-239.4457368], [-201.5], [-176.97470898], [-256.14449047], [-436.1703468], ... (i 95 pozostałych)\n",
"\n",
"\n",
"\n"
]
}
@ -318,8 +371,7 @@
"source": [
"#TODO: poprawić kod aby można było podawać kolumny\n",
"\n",
"t_stat, df, cv, p, _ = bootstrap_one_sample(dataset, 165)\n",
"pretty_print_full_stats(t_stat, df, cv, p)"
"p, t, ts = bootstrap_one_sample(dummy, 165)"
]
},
{
@ -343,7 +395,7 @@
},
{
"cell_type": "code",
"execution_count": 159,
"execution_count": 82,
"metadata": {
"collapsed": false,
"pycharm": {
@ -352,56 +404,15 @@
},
"outputs": [],
"source": [
"def t_stat_ind(sample_1, sample_2):\n",
" \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek niezależnych\"\"\"\n",
" if sample_1.empty or sample_2.empty:\n",
" raise Exception(\"Empty sample\")\n",
" sample_1 = sample_1[0].values.tolist()\n",
" sample_2 = sample_2[0].values.tolist()\n",
" sed = sqrt(sem(sample_1)**2 + sem(sample_2)**2)\n",
" return (mean(sample_1) - mean(sample_2)) / sed"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [],
"source": [
"def df_ind(sample_1, sample_2):\n",
" \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek niezależnych\"\"\"\n",
" return len(sample_1) + len(sample_2) - 2"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def bootstrap_independent(sample_1, sample_2):\n",
" return t_test(\n",
"def bootstrap_independent(sample_1, sample_2, alternative=Alternatives.LESS):\n",
" p, t, ts = t_test_ind(\n",
" sample_1=sample_1,\n",
" sample_2=sample_2,\n",
" df_fn=df_ind,\n",
" t_stat_fn=t_stat_ind\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#TODO: Wyciągnąć wysokości kobiet i mężczyzn oraz poprawić kod aby można było podawać kolumny\n",
"t_stat, df, cv, p, _ = bootstrap_independent(dataset, dataset)\n",
"pretty_print_full_stats(t_stat, df, cv, p)"
" alternative=alternative,\n",
" )\n",
" \n",
" pretty_print_test(p, t, ts, 'średnie są takie same', alternative)\n",
" return p, t, ts"
]
},
{
@ -424,7 +435,7 @@
},
{
"cell_type": "code",
"execution_count": 160,
"execution_count": 83,
"metadata": {
"collapsed": false,
"pycharm": {
@ -433,49 +444,15 @@
},
"outputs": [],
"source": [
"def t_stat_dep(sample_1, sample_2, mu=0):\n",
" \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek zależnych\"\"\"\n",
" if sample_1.empty or sample_2.empty:\n",
" raise Exception(\"Empty sample\")\n",
" sample_1 = sample_1[0].values.tolist()\n",
" sample_2 = sample_2[0].values.tolist()\n",
" differences = [x_1 - x_2 for x_1, x_2 in zip(sample_1, sample_2)]\n",
" sample_size = len(sample_1)\n",
" return (mean(differences) - mu) / (stdev(differences) / sqrt(sample_size))"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [],
"source": [
"def df_dep(sample_1, sample_2):\n",
" \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek zależnych\"\"\"\n",
" l1, l2 = len(sample_1), len(sample_2)\n",
" if l1 != l2:\n",
" raise Exception(\"Samples aren't of equal length\")\n",
" return l1"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def bootstrap_dependent(sample_1, sample_2):\n",
" return t_test(\n",
"def bootstrap_dependent(sample_1, sample_2, alternative=Alternatives.LESS):\n",
" p, t, ts = t_test_dep(\n",
" sample_1=sample_1,\n",
" sample_2=sample_2,\n",
" df_fn=df_dep,\n",
" t_stat_fn=t_stat_dep\n",
" )"
" alternative=alternative,\n",
" )\n",
" \n",
" pretty_print_test(p, t, ts, 'średnie są takie same', alternative)\n",
" return p, t, ts"
]
},
{
@ -503,7 +480,7 @@
},
{
"cell_type": "code",
"execution_count": 171,
"execution_count": 84,
"metadata": {
"collapsed": false,
"pycharm": {
@ -532,76 +509,25 @@
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statystyka testowa dla jednej próby:\n",
"1.414213562373095 - z naszej funkcji\n",
"[1.41421356] - z gotowej biblioteki\n",
"\n",
"Statystyka testowa dla dwóch prób niezależnych:\n",
"-3.0 - z naszej funkcji\n",
"[-3.] - z gotowej biblioteki\n",
"\n",
"Statystyka testowa dla dwóch prób zależnych:\n",
"-1.6329931618554525 - z naszej funkcji\n",
"[-1.63299316] - z gotowej biblioteki\n",
"\n"
]
}
],
"source": [
"# Testy dla samych statystyk testowych\n",
"def pretty_print_stats(t_stat_selfmade, t_stat_lib, suffix):\n",
" print(f'Statystyka testowa dla {suffix}:')\n",
" print(t_stat_selfmade, '- z naszej funkcji')\n",
" print(t_stat_lib, '- z gotowej biblioteki')\n",
" print()\n",
" \n",
"dummy = pd.DataFrame([1, 2, 3, 4, 5])\n",
"dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n",
"dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])\n",
"\n",
"t_stat_selfmade = t_stat_single(dummy, 2)\n",
"t_stat_lib, _ = ttest_1samp(dummy, 2)\n",
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'jednej próby')\n",
"\n",
"t_stat_selfmade = t_stat_ind(dummy, dummy2)\n",
"t_stat_lib, _ = ttest_ind(dummy, dummy2)\n",
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób niezależnych')\n",
"\n",
"t_stat_selfmade = t_stat_dep(dummy, dummy3)\n",
"t_stat_lib, _ = ttest_rel(dummy, dummy3)\n",
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób zależnych')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Statystyki dla jednej próby:\n",
"t: 1.8073147056683616, df: 5, cv: 2.015048372669157, p: 0.13052275003443325\n",
"Wyniki bootstrapowej wersji testu T-studenta\n",
"\n",
"Hipoteza: średnia jest równa 2\n",
"Hipoteza alternatywna: średnia jest mniejsza\n",
"\n",
"p: 0.35\n",
"Wartość statystyki testowej z próby: [1.41421356]\n",
"Wartości statystyk z prób boostrapowych:\n",
"[2.44948974], [3.13785816], [1.72328087], [0.27216553], [1.17669681], ... (i 95 pozostałych)\n",
"\n",
"Statystyki dla dwóch prób zależnych:\n",
"t: 3.0790273716290404, df: 5, cv: 2.015048372669157, p: 0.027500015466573435\n",
"\n",
"Statystyki dla dwóch prób niezależnych:\n",
"t: 2.8109511013364576, df: 8, cv: 1.8595480375228421, p: 0.02280961069987497\n",
"\n"
]
}
@ -609,22 +535,66 @@
"source": [
"# Testy z bootstrappowaniem\n",
"\n",
"def pretty_print_full_stats(t_stat, df, cv, p):\n",
" print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
"\n",
"print(type(dummy))\n",
"\n",
"print('Statystyki dla jednej próby:')\n",
"t_stat, df, cv, p, _ = bootstrap_one_sample(dummy, 2)\n",
"pretty_print_full_stats(t_stat, df, cv, p)\n",
"\n",
"p, t, ts = bootstrap_one_sample(dummy, 2)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statystyki dla dwóch prób zależnych:\n",
"Wyniki bootstrapowej wersji testu T-studenta\n",
"\n",
"Hipoteza: średnie są takie same\n",
"Hipoteza alternatywna: średnia jest mniejsza\n",
"\n",
"p: 1.0\n",
"Wartość statystyki testowej z próby: [10.61445555]\n",
"Wartości statystyk z prób boostrapowych:\n",
"[-2.66666667], [-0.14359163], [0.21199958], [0.11470787], [0.76696499], ... (i 95 pozostałych)\n",
"\n",
"\n"
]
}
],
"source": [
"print('Statystyki dla dwóch prób zależnych:')\n",
"t_stat, df, cv, p, _ = bootstrap_dependent(dummy2, dummy3)\n",
"pretty_print_full_stats(t_stat, df, cv, p)\n",
"\n",
"p, t, ts = bootstrap_dependent(dummy2, dummy3)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statystyki dla dwóch prób niezależnych:\n",
"Wyniki bootstrapowej wersji testu T-studenta\n",
"\n",
"Hipoteza: średnie są takie same\n",
"Hipoteza alternatywna: średnia jest mniejsza\n",
"\n",
"p: 0.95\n",
"Wartość statystyki testowej z próby: [2.4140394]\n",
"Wartości statystyk z prób boostrapowych:\n",
"[-2.20937908], [0.13187609], [-0.81110711], [-0.94280904], [-0.77151675], ... (i 95 pozostałych)\n",
"\n",
"\n"
]
}
],
"source": [
"print('Statystyki dla dwóch prób niezależnych:')\n",
"t_stat, df, cv, p, _ = bootstrap_independent(dummy2, dummy3)\n",
"pretty_print_full_stats(t_stat, df, cv, p)"
"p, t, ts = bootstrap_independent(dummy2, dummy3)"
]
}
],
@ -633,8 +603,12 @@
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
},
"kernelspec": {
"display_name": "Python 3.9.1 64-bit",
"language": "python",
"display_name": "Python 3.8.10 64-bit",
"metadata": {
"interpreter": {
"hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
}
},
"name": "python3"
},
"language_info": {
@ -648,8 +622,7 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
},
"orig_nbformat": 4
}
},
"nbformat": 4,
"nbformat_minor": 2