diff --git a/bootstrap-t.ipynb b/bootstrap-t.ipynb index 8f5b40c..feee38d 100644 --- a/bootstrap-t.ipynb +++ b/bootstrap-t.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 68, "metadata": { "pycharm": { "name": "#%%\n" @@ -35,18 +35,14 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "from math import sqrt\n", - "from scipy import stats\n", - "from scipy.stats import sem\n", - "from scipy.stats import t\n", "import matplotlib.pyplot as plt\n", - "from statistics import mean, stdev\n", - "from scipy.stats import ttest_ind, ttest_1samp, ttest_rel" + "from enum import Enum\n", + "from scipy.stats import ttest_ind, ttest_1samp, ttest_rel, shapiro" ] }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 69, "metadata": {}, "outputs": [], "source": [ @@ -55,29 +51,39 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ - "def calculate_p(t_stat, df):\n", - " \"\"\"Funkcja oblicza wartość *p* na podstawie statystyki testowej i stopni swobody\"\"\"\n", - " return (1.0 - t.cdf(abs(t_stat), df)) * 2.0" + "class Alternatives(Enum):\n", + " LESS = 'less'\n", + " GREATER = 'greater'" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ - "def calculate_cv(df, alpha=0.05):\n", - " \"\"\"Funkcja oblicza wartość krytyczną (critical value)\"\"\"\n", - " return t.ppf(1.0 - alpha, df)" + "def calculate_t_difference(t_stat_sample, t_stat_list, alternative):\n", + " \"\"\"\n", + " Funkcja oblicza procent statystyk testowych powstałych z prób bootstrapowych, \n", + " które róznią się od statystyki testowej powstałej ze zbioru według hipotezy alternatywnej.\n", + " \"\"\"\n", + " all_stats = len(t_stat_list)\n", + " stats_different_count = 0\n", + " for t_stat_boot in t_stat_list:\n", + " if alternative is Alternatives.LESS and t_stat_boot < t_stat_sample:\n", + " stats_different_count += 1 \n", + " elif alternative is Alternatives.GREATER and t_stat_boot > t_stat_sample:\n", + " stats_different_count += 1\n", + " return stats_different_count / all_stats" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 72, "metadata": { "pycharm": { "name": "#%%\n" @@ -85,57 +91,112 @@ }, "outputs": [], "source": [ - "def t_test(sample_1, sample_2=None, df_fn=df_single, t_stat_fn=t_stat_single, population_mean=None, alpha=0.05):\n", + "def t_test_1_samp(sample_1, population_mean=None, alternative=Alternatives.LESS):\n", " \"\"\"\n", - " Funkcja przeprowadza test T-studenta dla dwóch zmiennych.\n", - " liczba kolumn wynosi 1, test jest przeprowadzany dla jednej zmiennej.\n", - " @param df_fn - funkcja obliczająca stopnie swobody\n", - " @param t_stat_fn - funkcja obliczająca statystykę T\n", + " Funkcja przeprowadza test T-studenta dla jednej zmiennej.\n", " \"\"\"\n", - " t_stat_list = get_t_stats(sample_1, sample_2, t_stat_fn, population_mean=population_mean)\n", - " t_stat_sum = sum(t_stat_list)\n", + " t_stat_from_sample, _ = ttest_1samp(a=sample_1, popmean=population_mean, alternative=alternative.value)\n", + " t_stat_list = get_t_stats(sample_1, t_stat_fn=ttest_1samp, alternative=alternative, population_mean=population_mean)\n", "\n", - " data_size = sample_1.shape[0]\n", + " p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n", "\n", - " t_stat = t_stat_sum / data_size\n", - " # TODO: dolna i górna opcja dają inne wyniki z jakiegoś powodu (???)\n", - " t_stat = mean(t_stat_list)\n", - "\n", - " if sample_2 is None:\n", - " df = df_fn(sample_1)\n", - " else:\n", - " df = df_fn(sample_1, sample_2)\n", - " cv = calculate_cv(df, alpha)\n", - " p = calculate_p(t_stat, df)\n", - " return t_stat, df, cv, p, t_stat_list" + " return p, t_stat_from_sample, t_stat_list" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ - "def get_t_stats(sample_1, sample_2=None, t_stat_fn=t_stat_single, population_mean=None):\n", + "def t_test_ind(sample_1, sample_2, alternative=Alternatives.LESS):\n", + " \"\"\"\n", + " Funkcja przeprowadza test T-studenta dla dwóch zmiennych niezależnych.\n", + " \"\"\"\n", + " t_stat_from_sample, _ = ttest_ind(sample_1, sample_2, alternative=alternative.value)\n", + " t_stat_list = get_t_stats(sample_1, sample_2, alternative=alternative, t_stat_fn=ttest_ind)\n", + "\n", + " p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n", + "\n", + " return p, t_stat_from_sample, t_stat_list" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "def t_test_dep(sample_1, sample_2, alternative=Alternatives.LESS):\n", + " \"\"\"\n", + " Funkcja przeprowadza test T-studenta dla dwóch zmiennych zależnych.\n", + " \"\"\"\n", + " t_stat_list = get_t_stats(sample_1, sample_2, alternative=alternative, t_stat_fn=ttest_rel)\n", + " t_stat_from_sample, _ = ttest_rel(sample_1, sample_2, alternative=alternative.value)\n", + "\n", + " p = calculate_t_difference(t_stat_from_sample, t_stat_list, alternative)\n", + "\n", + " return p, t_stat_from_sample, t_stat_list" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "def get_t_stats(sample_1, sample_2=None, t_stat_fn=ttest_1samp, alternative=Alternatives.LESS, population_mean=None):\n", " \"\"\"Funkcja oblicza listę statystyk testowych dla każdej próbki bootstrapowej wybranej na podstawie danych sample_1 i sample_2\"\"\"\n", " t_stat_list = []\n", "\n", " # One sample test\n", - " if t_stat_fn==t_stat_single:\n", + " if t_stat_fn is ttest_1samp and sample_2 is None:\n", " if not population_mean:\n", " raise Exception(\"population_mean not provided\")\n", " for bootstrap in generate_bootstraps(sample_1):\n", - " stat = t_stat_fn(bootstrap, population_mean)\n", + " stat, _ = t_stat_fn(bootstrap, population_mean, alternative=alternative.value)\n", " t_stat_list.append(stat)\n", " return t_stat_list\n", "\n", " # Two sample test\n", - " for bootstrap_1, bootstrap_2 in zip(generate_bootstraps(sample_1), generate_bootstraps(sample_2)):\n", - " stat = t_stat_fn(bootstrap_1, bootstrap_2)\n", + " for bootstrap_sample in generate_bootstraps(pd.concat((sample_1, sample_2), ignore_index=True)):\n", + " bootstrap_1 = bootstrap_sample.iloc[: len(bootstrap_sample) // 2]\n", + " bootstrap_2 = bootstrap_sample.iloc[len(bootstrap_sample) // 2 :]\n", + " stat, _ = t_stat_fn(bootstrap_1, bootstrap_2, alternative=alternative.value)\n", " t_stat_list.append(stat)\n", " return t_stat_list" ] }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "def pretty_print_test(p, t_stat_from_sample, t_stat_list, thesis, alternative, max_print=5):\n", + " print('Wyniki bootstrapowej wersji testu T-studenta')\n", + " print()\n", + " print(f'Hipoteza: {thesis}')\n", + " if alternative is Alternatives.LESS:\n", + " print(f'Hipoteza alternatywna: średnia jest mniejsza')\n", + " else:\n", + " print(f'Hipoteza alternatywna: średnia jest większa')\n", + " print()\n", + " print(f'p: {p}')\n", + " print(f'Wartość statystyki testowej z próby: {t_stat_from_sample}')\n", + " print(f'Wartości statystyk z prób boostrapowych:')\n", + "\n", + " t_stat_list_len = len(t_stat_list)\n", + " for i in range(min(max_print, t_stat_list_len)):\n", + " print(f'{t_stat_list[i]}, ', end='')\n", + " if max_print < t_stat_list_len:\n", + " remaining = t_stat_list_len - max_print\n", + " print(f'... (i {remaining} pozostałych)')\n", + "\n", + " print()\n", + " print()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -147,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -164,7 +225,7 @@ "source": [ "ALPHA = 0.05\n", "female_heights = dataset['Female height'].to_numpy()\n", - "shapiro_test = stats.shapiro(female_heights)\n", + "shapiro_test = shapiro(female_heights)\n", "\n", "if shapiro_test.pvalue > ALPHA:\n", " print(\"Female height: Dane mają rozkład normalny.\")\n", @@ -172,7 +233,7 @@ " print(\"Female height: Dane nie mają rozkładu normalnego.\")\n", "\n", "male_heights = dataset['Male height'].to_numpy()\n", - "shapiro_test = stats.shapiro(male_heights)\n", + "shapiro_test = shapiro(male_heights)\n", "\n", "if shapiro_test.pvalue > ALPHA:\n", " print(\"Male height: Dane mają rozkład normalny.\")\n", @@ -180,7 +241,7 @@ " print(\"Male height: Dane nie mają rozkładu normalnego.\")\n", "\n", "weights_before = dataset['Weight before'].to_numpy()\n", - "shapiro_test = stats.shapiro(weights_before)\n", + "shapiro_test = shapiro(weights_before)\n", "\n", "if shapiro_test.pvalue > ALPHA:\n", " print(\"Weight before: Dane mają rozkład normalny.\")\n", @@ -188,7 +249,7 @@ " print(\"Weight before: Dane nie mają rozkładu normalnego.\")\n", "\n", "weights_after = dataset['Weight after'].to_numpy()\n", - "shapiro_test = stats.shapiro(weights_after)\n", + "shapiro_test = shapiro(weights_after)\n", "\n", "if shapiro_test.pvalue > ALPHA:\n", " print(\"Weight after: Dane mają rozkład normalny.\")\n", @@ -211,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 78, "metadata": { "pycharm": { "name": "#%%\n" @@ -239,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 79, "metadata": { "collapsed": false, "pycharm": { @@ -248,45 +309,16 @@ }, "outputs": [], "source": [ - "def t_stat_single(sample, population_mean):\n", - " \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n", - " if sample.empty:\n", - " raise Exception(\"Empty sample\")\n", - " sample = sample['Height'].values.tolist()\n", - " sample_size = len(sample)\n", - " return (mean(sample) - population_mean) / (stdev(sample) / sqrt(sample_size))" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "def df_single(sample_1):\n", - " \"\"\"Funkcja oblicza stopnie swobody dla jednej próbki\"\"\"\n", - " # TODO: I have no clue what to return from here\n", - " return len(sample_1)" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def bootstrap_one_sample(sample, population_mean):\n", - " return t_test(\n", + "def bootstrap_one_sample(sample, population_mean, alternative=Alternatives.LESS):\n", + " p, t, ts = t_test_1_samp(\n", " sample_1=sample,\n", - " df_fn=df_single,\n", - " t_stat_fn=t_stat_single,\n", - " population_mean=population_mean\n", - " )" + " population_mean=population_mean,\n", + " alternative=alternative,\n", + " )\n", + " \n", + " pretty_print_test(p, t, ts, f'średnia jest równa {population_mean}', alternative)\n", + " print()\n", + " return p, t, ts" ] }, { @@ -298,7 +330,18 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "dummy = pd.DataFrame([1, 2, 3, 4, 5])\n", + "dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n", + "dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])" + ] + }, + { + "cell_type": "code", + "execution_count": 81, "metadata": { "collapsed": false, "pycharm": { @@ -310,7 +353,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "t: 6.854929920812628, df: 500, cv: 1.6479068539295045, p: 2.1091128843409024e-11\n", + "Wyniki bootstrapowej wersji testu T-studenta\n", + "\n", + "Hipoteza: średnia jest równa 165\n", + "Hipoteza alternatywna: średnia jest mniejsza\n", + "\n", + "p: 0.72\n", + "Wartość statystyki testowej z próby: [-229.1025971]\n", + "Wartości statystyk z prób boostrapowych:\n", + "[-239.4457368], [-201.5], [-176.97470898], [-256.14449047], [-436.1703468], ... (i 95 pozostałych)\n", + "\n", + "\n", "\n" ] } @@ -318,8 +371,7 @@ "source": [ "#TODO: poprawić kod aby można było podawać kolumny\n", "\n", - "t_stat, df, cv, p, _ = bootstrap_one_sample(dataset, 165)\n", - "pretty_print_full_stats(t_stat, df, cv, p)" + "p, t, ts = bootstrap_one_sample(dummy, 165)" ] }, { @@ -343,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 82, "metadata": { "collapsed": false, "pycharm": { @@ -352,56 +404,15 @@ }, "outputs": [], "source": [ - "def t_stat_ind(sample_1, sample_2):\n", - " \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek niezależnych\"\"\"\n", - " if sample_1.empty or sample_2.empty:\n", - " raise Exception(\"Empty sample\")\n", - " sample_1 = sample_1[0].values.tolist()\n", - " sample_2 = sample_2[0].values.tolist()\n", - " sed = sqrt(sem(sample_1)**2 + sem(sample_2)**2)\n", - " return (mean(sample_1) - mean(sample_2)) / sed" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "metadata": {}, - "outputs": [], - "source": [ - "def df_ind(sample_1, sample_2):\n", - " \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek niezależnych\"\"\"\n", - " return len(sample_1) + len(sample_2) - 2" - ] - }, - { - "cell_type": "code", - "execution_count": 167, - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def bootstrap_independent(sample_1, sample_2):\n", - " return t_test(\n", + "def bootstrap_independent(sample_1, sample_2, alternative=Alternatives.LESS):\n", + " p, t, ts = t_test_ind(\n", " sample_1=sample_1,\n", " sample_2=sample_2,\n", - " df_fn=df_ind,\n", - " t_stat_fn=t_stat_ind\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#TODO: Wyciągnąć wysokości kobiet i mężczyzn oraz poprawić kod aby można było podawać kolumny\n", - "t_stat, df, cv, p, _ = bootstrap_independent(dataset, dataset)\n", - "pretty_print_full_stats(t_stat, df, cv, p)" + " alternative=alternative,\n", + " )\n", + " \n", + " pretty_print_test(p, t, ts, 'średnie są takie same', alternative)\n", + " return p, t, ts" ] }, { @@ -424,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 83, "metadata": { "collapsed": false, "pycharm": { @@ -433,49 +444,15 @@ }, "outputs": [], "source": [ - "def t_stat_dep(sample_1, sample_2, mu=0):\n", - " \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek zależnych\"\"\"\n", - " if sample_1.empty or sample_2.empty:\n", - " raise Exception(\"Empty sample\")\n", - " sample_1 = sample_1[0].values.tolist()\n", - " sample_2 = sample_2[0].values.tolist()\n", - " differences = [x_1 - x_2 for x_1, x_2 in zip(sample_1, sample_2)]\n", - " sample_size = len(sample_1)\n", - " return (mean(differences) - mu) / (stdev(differences) / sqrt(sample_size))" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "metadata": {}, - "outputs": [], - "source": [ - "def df_dep(sample_1, sample_2):\n", - " \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek zależnych\"\"\"\n", - " l1, l2 = len(sample_1), len(sample_2)\n", - " if l1 != l2:\n", - " raise Exception(\"Samples aren't of equal length\")\n", - " return l1" - ] - }, - { - "cell_type": "code", - "execution_count": 168, - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "def bootstrap_dependent(sample_1, sample_2):\n", - " return t_test(\n", + "def bootstrap_dependent(sample_1, sample_2, alternative=Alternatives.LESS):\n", + " p, t, ts = t_test_dep(\n", " sample_1=sample_1,\n", " sample_2=sample_2,\n", - " df_fn=df_dep,\n", - " t_stat_fn=t_stat_dep\n", - " )" + " alternative=alternative,\n", + " )\n", + " \n", + " pretty_print_test(p, t, ts, 'średnie są takie same', alternative)\n", + " return p, t, ts" ] }, { @@ -503,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": 84, "metadata": { "collapsed": false, "pycharm": { @@ -532,76 +509,25 @@ }, { "cell_type": "code", - "execution_count": 31, - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Statystyka testowa dla jednej próby:\n", - "1.414213562373095 - z naszej funkcji\n", - "[1.41421356] - z gotowej biblioteki\n", - "\n", - "Statystyka testowa dla dwóch prób niezależnych:\n", - "-3.0 - z naszej funkcji\n", - "[-3.] - z gotowej biblioteki\n", - "\n", - "Statystyka testowa dla dwóch prób zależnych:\n", - "-1.6329931618554525 - z naszej funkcji\n", - "[-1.63299316] - z gotowej biblioteki\n", - "\n" - ] - } - ], - "source": [ - "# Testy dla samych statystyk testowych\n", - "def pretty_print_stats(t_stat_selfmade, t_stat_lib, suffix):\n", - " print(f'Statystyka testowa dla {suffix}:')\n", - " print(t_stat_selfmade, '- z naszej funkcji')\n", - " print(t_stat_lib, '- z gotowej biblioteki')\n", - " print()\n", - " \n", - "dummy = pd.DataFrame([1, 2, 3, 4, 5])\n", - "dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n", - "dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])\n", - "\n", - "t_stat_selfmade = t_stat_single(dummy, 2)\n", - "t_stat_lib, _ = ttest_1samp(dummy, 2)\n", - "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'jednej próby')\n", - "\n", - "t_stat_selfmade = t_stat_ind(dummy, dummy2)\n", - "t_stat_lib, _ = ttest_ind(dummy, dummy2)\n", - "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób niezależnych')\n", - "\n", - "t_stat_selfmade = t_stat_dep(dummy, dummy3)\n", - "t_stat_lib, _ = ttest_rel(dummy, dummy3)\n", - "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób zależnych')" - ] - }, - { - "cell_type": "code", - "execution_count": 39, + "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n", "Statystyki dla jednej próby:\n", - "t: 1.8073147056683616, df: 5, cv: 2.015048372669157, p: 0.13052275003443325\n", + "Wyniki bootstrapowej wersji testu T-studenta\n", + "\n", + "Hipoteza: średnia jest równa 2\n", + "Hipoteza alternatywna: średnia jest mniejsza\n", + "\n", + "p: 0.35\n", + "Wartość statystyki testowej z próby: [1.41421356]\n", + "Wartości statystyk z prób boostrapowych:\n", + "[2.44948974], [3.13785816], [1.72328087], [0.27216553], [1.17669681], ... (i 95 pozostałych)\n", "\n", - "Statystyki dla dwóch prób zależnych:\n", - "t: 3.0790273716290404, df: 5, cv: 2.015048372669157, p: 0.027500015466573435\n", "\n", - "Statystyki dla dwóch prób niezależnych:\n", - "t: 2.8109511013364576, df: 8, cv: 1.8595480375228421, p: 0.02280961069987497\n", "\n" ] } @@ -609,22 +535,66 @@ "source": [ "# Testy z bootstrappowaniem\n", "\n", - "def pretty_print_full_stats(t_stat, df, cv, p):\n", - " print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n", - "\n", - "print(type(dummy))\n", - "\n", "print('Statystyki dla jednej próby:')\n", - "t_stat, df, cv, p, _ = bootstrap_one_sample(dummy, 2)\n", - "pretty_print_full_stats(t_stat, df, cv, p)\n", - "\n", + "p, t, ts = bootstrap_one_sample(dummy, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Statystyki dla dwóch prób zależnych:\n", + "Wyniki bootstrapowej wersji testu T-studenta\n", + "\n", + "Hipoteza: średnie są takie same\n", + "Hipoteza alternatywna: średnia jest mniejsza\n", + "\n", + "p: 1.0\n", + "Wartość statystyki testowej z próby: [10.61445555]\n", + "Wartości statystyk z prób boostrapowych:\n", + "[-2.66666667], [-0.14359163], [0.21199958], [0.11470787], [0.76696499], ... (i 95 pozostałych)\n", + "\n", + "\n" + ] + } + ], + "source": [ "print('Statystyki dla dwóch prób zależnych:')\n", - "t_stat, df, cv, p, _ = bootstrap_dependent(dummy2, dummy3)\n", - "pretty_print_full_stats(t_stat, df, cv, p)\n", - "\n", + "p, t, ts = bootstrap_dependent(dummy2, dummy3)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Statystyki dla dwóch prób niezależnych:\n", + "Wyniki bootstrapowej wersji testu T-studenta\n", + "\n", + "Hipoteza: średnie są takie same\n", + "Hipoteza alternatywna: średnia jest mniejsza\n", + "\n", + "p: 0.95\n", + "Wartość statystyki testowej z próby: [2.4140394]\n", + "Wartości statystyk z prób boostrapowych:\n", + "[-2.20937908], [0.13187609], [-0.81110711], [-0.94280904], [-0.77151675], ... (i 95 pozostałych)\n", + "\n", + "\n" + ] + } + ], + "source": [ "print('Statystyki dla dwóch prób niezależnych:')\n", - "t_stat, df, cv, p, _ = bootstrap_independent(dummy2, dummy3)\n", - "pretty_print_full_stats(t_stat, df, cv, p)" + "p, t, ts = bootstrap_independent(dummy2, dummy3)" ] } ], @@ -633,8 +603,12 @@ "hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594" }, "kernelspec": { - "display_name": "Python 3.9.1 64-bit", - "language": "python", + "display_name": "Python 3.8.10 64-bit", + "metadata": { + "interpreter": { + "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90" + } + }, "name": "python3" }, "language_info": { @@ -648,8 +622,7 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" - }, - "orig_nbformat": 4 + } }, "nbformat": 4, "nbformat_minor": 2