diff --git a/bootstrap-t.ipynb b/bootstrap-t.ipynb index 8c2b43d..ae6ee27 100644 --- a/bootstrap-t.ipynb +++ b/bootstrap-t.ipynb @@ -2,18 +2,21 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "Bootstrapowa wersja testu t.\n", "Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n", "W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n", "Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "Zbiór danych - ???\n", "Hipoteza zerowa - ???\n", @@ -21,14 +24,11 @@ "\n", "Dla każdego z 3 testów inne\n", "https://www.jmp.com/en_ch/statistics-knowledge-portal/t-test.html" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", - "execution_count": 239, + "execution_count": 1131, "metadata": { "pycharm": { "name": "#%%\n" @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": 1132, "metadata": { "pycharm": { "name": "#%%\n" @@ -65,105 +65,208 @@ }, { "cell_type": "code", - "execution_count": 241, - "outputs": [], - "source": [ - "def t_stat_single(sample, population_mean):\n", - " \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n", - " sample_size = len(sample)\n", - " return (mean(sample) - population_mean) / (stdev(sample) / sqrt(sample_size))" - ], + "execution_count": 1133, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "def t_stat_single(sample, population_mean=2):\n", + " \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n", + " sample = sample[0].values.tolist()\n", + " sample_size = len(sample)\n", + " # min is to fix near-zero values causing zero division erros\n", + " return (mean(sample) - population_mean) / (stdev(sample) / min(0.00000001, sqrt(sample_size)))" + ] }, { "cell_type": "code", - "execution_count": 242, + "execution_count": 1134, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "def t_stat_ind(sample_1, sample_2):\n", " \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek niezależnych\"\"\"\n", + " sample_1 = sample_1[0].values.tolist()\n", + " sample_2 = sample_2[0].values.tolist()\n", " sed = sqrt(sem(sample_1)**2 + sem(sample_2)**2)\n", " return (mean(sample_1) - mean(sample_2)) / sed" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 1135, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } - }, - { - "cell_type": "code", - "execution_count": 243, + }, "outputs": [], "source": [ "def t_stat_dep(sample_1, sample_2):\n", " \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek zależnych\"\"\"\n", + " sample_1 = sample_1[0].values.tolist()\n", + " sample_2 = sample_2[0].values.tolist()\n", " differences = [x_1 - x_2 for x_1, x_2 in zip(sample_1, sample_2)]\n", " sample_size = len(sample_1)\n", " mu = 0 # The constant is zero if we want to test whether the average of the difference is significantly different.\n", - " return (mean(differences) - mu) / (stdev(differences) / sqrt(sample_size))" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + " return (mean(differences) - mu) / (stdev(differences) / min(0.00000001, sqrt(sample_size)))" + ] }, { "cell_type": "code", - "execution_count": 244, + "execution_count": 1136, + "metadata": {}, "outputs": [], "source": [ - "def bootstrap_one_sample():\n", - " return" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + "def df_dep(sample_1, sample_2):\n", + " \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek zależnych\"\"\"\n", + " l1, l2 = len(sample_1), len(sample_2)\n", + " assert l1 == l2 \n", + "\n", + " return l1" + ] }, { "cell_type": "code", - "execution_count": 245, + "execution_count": 1137, + "metadata": {}, "outputs": [], "source": [ - "def bootstrap_independent():\n", - " return" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + "def df_ind(sample_1, sample_2):\n", + " \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek niezależnych\"\"\"\n", + " return len(sample_1) + len(sample_2) - 2" + ] }, { "cell_type": "code", - "execution_count": 246, + "execution_count": 1138, + "metadata": {}, "outputs": [], "source": [ - "def bootstrap_dependent():\n", - " return" - ], + "def df_single(sample_1):\n", + " \"\"\"Funkcja oblicza stopnie swobody dla jednej próbki\"\"\"\n", + " # TODO: I have no clue what to return from here\n", + " return len(sample_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 1139, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_p(t_stat, df):\n", + " \"\"\"Funkcja oblicza wartość *p* na podstawie statystyki testowej i stopni swobody\"\"\"\n", + " return (1.0 - t.cdf(abs(t_stat), df)) * 2.0" + ] + }, + { + "cell_type": "code", + "execution_count": 1140, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_cv(df, alpha=0.05):\n", + " \"\"\"Funkcja oblicza wartość krytyczną (critical value)\"\"\"\n", + " return t.ppf(1.0 - alpha, df)" + ] + }, + { + "cell_type": "code", + "execution_count": 1141, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "def bootstrap_one_sample(sample):\n", + " return t_test(\n", + " sample_1=sample,\n", + " df_fn=df_single,\n", + " t_stat_fn=t_stat_single\n", + " )" + ] }, { "cell_type": "code", - "execution_count": 247, + "execution_count": 1142, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def bootstrap_independent(sample_1, sample_2):\n", + " return t_test(\n", + " sample_1=sample_1,\n", + " sample_2=sample_2,\n", + " df_fn=df_ind,\n", + " t_stat_fn=t_stat_ind\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 1143, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def bootstrap_dependent(sample_1, sample_2):\n", + " return t_test(\n", + " sample_1=sample_1,\n", + " sample_2=sample_2,\n", + " df_fn=df_dep,\n", + " t_stat_fn=t_stat_dep\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 1144, + "metadata": {}, + "outputs": [], + "source": [ + "def get_t_stats(sample_1, sample_2=None, t_stat_fn=t_stat_ind):\n", + " \"\"\"Funkcja oblicza listę statystyk testowych dla każdej próbki bootstrapowej wybranej na podstawie danych sample_1 i sample_2\"\"\"\n", + " t_stat_list = []\n", + "\n", + " # Separate case for single tests\n", + " if sample_2 is None:\n", + " for bootstrap in generate_bootstraps(sample_1):\n", + " stat = t_stat_fn(bootstrap)\n", + " t_stat_list.append(stat)\n", + " return t_stat_list\n", + " \n", + " for bootstrap_1, bootstrap_2 in zip(generate_bootstraps(sample_1), generate_bootstraps(sample_2)):\n", + " stat = t_stat_fn(bootstrap_1, bootstrap_2)\n", + " t_stat_list.append(stat)\n", + " \n", + " return t_stat_list" + ] + }, + { + "cell_type": "code", + "execution_count": 1145, "metadata": { "pycharm": { "name": "#%%\n" @@ -171,52 +274,44 @@ }, "outputs": [], "source": [ - "def independent_t_test(data, columns, alpha=0.05):\n", - " t_stat_sum = 0\n", - " t_stat_list = []\n", - " for sample in generate_bootstraps(data):\n", - " stat = t_stat_ind(sample[columns[0]], sample[columns[1]])\n", - " t_stat_list.append(stat)\n", - " t_stat_sum += stat\n", - " data_size = data.shape[0]\n", + "def t_test(sample_1, sample_2=None, df_fn=df_ind, t_stat_fn=t_stat_ind, alpha=0.05):\n", + " \"\"\"\n", + " Funkcja przeprowadza test T-studenta dla dwóch zmiennych.\n", + " liczba kolumn wynosi 1, test jest przeprowadzany dla jednej zmiennej.\n", + " @param df_fn - funkcja obliczająca stopnie swobody\n", + " @param t_stat_fn - funkcja obliczająca statystykę T\n", + " \"\"\"\n", + " t_stat_list = get_t_stats(sample_1, sample_2, t_stat_fn)\n", + " t_stat_sum = sum(t_stat_list)\n", + "\n", + " data_size = sample_1.shape[0]\n", + "\n", " t_stat = t_stat_sum / data_size\n", - " df = 2 * data_size - 2\n", - " cv = t.ppf(1.0 - alpha, df)\n", - " p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n", + "\n", + " df = 0.0\n", + " if sample_2 is None:\n", + " df = df_fn(sample_1)\n", + " else:\n", + " df = df_fn(sample_1, sample_2)\n", + " cv = calculate_cv(df, alpha)\n", + " p = calculate_p(t_stat, df)\n", + " \n", " return t_stat, df, cv, p, t_stat_list" ] }, { "cell_type": "code", - "execution_count": 248, - "outputs": [], - "source": [ - "def make_decision(data, columns, alpha=0.05):\n", - " t_stat, df, cv, p, stats = independent_t_test(data, columns, alpha)\n", - " print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n", - " draw_distribution(stats)\n", - " if abs(t_stat) <= cv:\n", - "\t print('Accept null hypothesis that the means are equal.')\n", - " else:\n", - " print('Reject the null hypothesis that the means are equal.')\n", - " if p > alpha:\n", - " print('Accept null hypothesis that the means are equal.')\n", - " else:\n", - "\t print('Reject the null hypothesis that the means are equal.')" - ], + "execution_count": 1146, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } - }, - { - "cell_type": "code", - "execution_count": 249, + }, "outputs": [], "source": [ - "def draw_distribution(stats): # To powinno być zdefiniowane przed make decision w sumie\n", + "def draw_distribution(stats): \n", + " # To powinno być zdefiniowane przed make decision w sumie\n", " \"\"\"\n", " Funkcja rysuje rozkład statystyki testowej\n", " stats: lista statystyk testowych\n", @@ -225,107 +320,132 @@ " plt.xlabel('Test statistic value')\n", " plt.ylabel('Frequency')\n", " plt.show()" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 1147, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "def make_decision(data, columns):\n", + " # TODO\n", + " pass" + ] }, { "cell_type": "code", - "execution_count": 250, + "execution_count": 1148, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statystyka testowa dla jednej próby:\n", - "1.414213562373095 - z naszej funkcji\n", - "1.414213562373095 - z gotowej biblioteki\n", + "6.324555320336758e-09 - z naszej funkcji\n", + "[1.41421356] - z gotowej biblioteki\n", "\n", "Statystyka testowa dla dwóch prób niezależnych:\n", "-3.0 - z naszej funkcji\n", - "-3.0 - z gotowej biblioteki\n", + "[-3.] - z gotowej biblioteki\n", "\n", "Statystyka testowa dla dwóch prób zależnych:\n", - "-1.6329931618554525 - z naszej funkcji\n", - "-1.632993161855452 - z gotowej biblioteki\n" + "-7.302967433402215e-09 - z naszej funkcji\n", + "[-1.63299316] - z gotowej biblioteki\n", + "\n" ] } ], "source": [ - "# Testy\n", - "dummy = [1, 2, 3, 4, 5]\n", - "dummy2 = [4, 5, 6, 7, 8]\n", - "dummy3 = [1, 3 , 3, 4, 6]\n", + "# Testy dla samych statystyk testowych\n", + "def pretty_print_stats(t_stat_selfmade, t_stat_lib, suffix):\n", + " print(f'Statystyka testowa dla {suffix}:')\n", + " print(t_stat_selfmade, '- z naszej funkcji')\n", + " print(t_stat_lib, '- z gotowej biblioteki')\n", + " print()\n", + " \n", + "dummy = pd.DataFrame([1, 2, 3, 4, 5])\n", + "dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n", + "dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])\n", + "\n", "t_stat_selfmade = t_stat_single(dummy, 2)\n", "t_stat_lib, _ = ttest_1samp(dummy, 2)\n", - "print('Statystyka testowa dla jednej próby:')\n", - "print(t_stat_selfmade, '- z naszej funkcji')\n", - "print(t_stat_lib, '- z gotowej biblioteki')\n", - "print()\n", + "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'jednej próby')\n", + "\n", "t_stat_selfmade = t_stat_ind(dummy, dummy2)\n", "t_stat_lib, _ = ttest_ind(dummy, dummy2)\n", - "print('Statystyka testowa dla dwóch prób niezależnych:')\n", - "print(t_stat_selfmade, '- z naszej funkcji')\n", - "print(t_stat_lib, '- z gotowej biblioteki')\n", - "print()\n", + "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób niezależnych')\n", + "\n", "t_stat_selfmade = t_stat_dep(dummy, dummy3)\n", "t_stat_lib, _ = ttest_rel(dummy, dummy3)\n", - "print('Statystyka testowa dla dwóch prób zależnych:')\n", - "print(t_stat_selfmade, '- z naszej funkcji')\n", - "print(t_stat_lib, '- z gotowej biblioteki')" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + "pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób zależnych')" + ] }, { "cell_type": "code", - "execution_count": 251, + "execution_count": 1149, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "t: 6.929903381575467, df: 998, cv: 1.6463818766348755, p: 7.544853630747639e-12\n", + "Statystyki dla jednej próby:\n", + "t: 1.6371853975970775e-07, df: 5, cv: 2.015048372669157, p: 0.9999998757026942\n", + "\n", + "Statystyki dla dwóch prób zależnych:\n", + "t: 2.721731710913334e-07, df: 5, cv: 2.015048372669157, p: 0.9999997933624869\n", + "\n", + "Statystyki dla dwóch prób niezależnych:\n", + "t: 56.011644110212046, df: 8, cv: 1.8595480375228421, p: 1.145550321268729e-11\n", "\n" ] - }, - { - "data": { - "text/plain": "
", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAATd0lEQVR4nO3dfbRldX3f8feHh4jIg1CuZKqQawyJUpUJGWgiMWI0lUh9oBoNbQzLJk6aaI0kaZ3Yrkqy6lq4WiE1bWxwiaBBjYoSBDQi9TmtOCDPg9EmQwqMMEZawCQS4Ns/9h45ztxz59zL3Xffmd/7tdZZs/dvP3053Pu5+/zO3r+dqkKS1I59xi5AkrS6DH5JaozBL0mNMfglqTEGvyQ1xuCXpMYMFvxJjkry6SS3JLk5ya/37WcluSPJdf3rhUPVIEnaVYa6jj/JOmBdVV2b5GDgGuClwCuA+6vqPw9yYEnSovYbasdVtQ3Y1k/fl2QL8MTl7OuII46o+fn5FaxOkvZ+11xzzTeram7n9sGCf1KSeeBHgS8BJwGvS/KLwGbgN6vqnsW2n5+fZ/PmzYPXKUl7kyS3LdQ++Je7SQ4CLgbeUFX3Au8AngKsp/tE8LYp221MsjnJ5u3btw9dpiQ1Y9DgT7I/XehfVFUfAaiqu6rqoap6GHgncOJC21bVeVW1oao2zM3t8klFkrRMQ17VE+BdwJaqOmeifd3EaqcBNw1VgyRpV0P28Z8EvAq4Mcl1fdubgNOTrAcK2Ar8yoA1SJJ2MuRVPV8AssCiK4Y6piRp97xzV5IaY/BLUmMMfklqjMEvSY1ZlTt3paHNb7p8lONuPfvUUY4rPRqe8UtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGuODWKRHYawHwIAPgdHyecYvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYMFvxJjkry6SS3JLk5ya/37YcnuTLJ1/p/DxuqBknSroY8438Q+M2qOhb4ceC1SY4FNgFXVdUxwFX9vCRplQwW/FW1raqu7afvA7YATwReAlzYr3Yh8NKhapAk7WpV+viTzAM/CnwJOLKqtvWLvgEcuRo1SJI6gwd/koOAi4E3VNW9k8uqqoCast3GJJuTbN6+ffvQZUpSMwYN/iT704X+RVX1kb75riTr+uXrgLsX2raqzquqDVW1YW5ubsgyJakpQ17VE+BdwJaqOmdi0aXAGf30GcCfDFWDJGlXQz5z9yTgVcCNSa7r294EnA18MMkvAbcBrxiwBknSTgYL/qr6ApApi5831HElSYvzzl1JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmP3GLkB7j/lNl49dgqQZeMYvSY0x+CWpMQa/JDVmsOBPcn6Su5PcNNF2VpI7klzXv1441PElSQubKfiTPGMZ+74AOGWB9nOran3/umIZ+5UkPQqznvH/QZKrk/xakkNn2aCqPgd8a/mlSZKGMFPwV9WzgX8BHAVck+R9SX5mmcd8XZIb+q6gw5a5D0nSMs3cx19VXwP+PfBG4DnA25PcmuSfLeF47wCeAqwHtgFvm7Ziko1JNifZvH379iUcQpK0mFn7+J+Z5FxgC/DTwIuq6mn99LmzHqyq7qqqh6rqYeCdwImLrHteVW2oqg1zc3OzHkKStBuznvH/PnAtcFxVvbaqrgWoqjvpPgXMJMm6idnTgJumrStJGsasQzacCvxtVT0EkGQf4ICq+puqeu9CGyR5P3AycESS24E3AycnWQ8UsBX4lUdVvSRpyWYN/k8Bzwfu7+cPBD4JPGvaBlV1+gLN71pSdZKkFTdrV88BVbUj9OmnDxymJEnSkGYN/m8nOX7HTJIfA/52mJIkSUOatavnDcCHktwJBPh+4JVDFSVJGs5MwV9VX07yVOBH+qavVtXfD1eWJGkoS3kQywnAfL/N8UmoqvcMUpUkaTAzBX+S99LdcXsd8FDfXIDBL0l7mFnP+DcAx1ZVDVmMJGl4s17VcxPdF7qSpD3crGf8RwC3JLka+M6Oxqp68SBVSZIGM2vwnzVkEZKk1TPr5ZyfTfIDwDFV9akkBwL7DluaJGkIsw7L/Brgw8Af9k1PBC4ZqCZJ0oBm/XL3tcBJwL3w3YeyPGGooiRJw5k1+L9TVQ/smEmyH911/JKkPcyswf/ZJG8CHts/a/dDwMeGK0uSNJRZg38TsB24ke7hKVewhCdvSZLWjlmv6tnxjNx3DluOJGlos47V85cs0KdfVT+44hVJkga1lLF6djgA+Dng8JUvR5I0tJn6+Kvqryded1TV79E9gF2StIeZtavn+InZfeg+ASxlLH9J0hoxa3i/bWL6QWAr8IoVr0YrYn7T5WOXoFUw1v/nrWf7YX9PN+tVPc8duhBJ0uqYtavnNxZbXlXnrEw5kqShLeWqnhOAS/v5FwFXA18boihJ0nBmDf4nAcdX1X0ASc4CLq+qXxiqMEnSMGYdsuFI4IGJ+Qf6NknSHmbWM/73AFcn+Wg//1LgwkEqkiQNataret6S5OPAs/umV1fVV4YrS5I0lFm7egAOBO6tqv8C3J7kyQPVJEka0KyPXnwz8Ebgt/um/YE/GqooSdJwZj3jPw14MfBtgKq6Ezh4qKIkScOZNfgfqKqiH5o5yeOGK0mSNKRZg/+DSf4QeHyS1wCfwoeySNIeabdX9SQJ8MfAU4F7gR8B/kNVXTlwbZKkAew2+KuqklxRVc8AZg77JOcD/xS4u6qe3rcdTvdHZJ5+hM+qumcZdUuSlmnWrp5rk5ywxH1fAJyyU9sm4KqqOga4qp+XJK2iWYP/HwP/K8n/TnJDkhuT3LDYBlX1OeBbOzW/hEfu+L2Q7g5gSdIqWrSrJ8nRVfVXwAtW6HhHVtW2fvobON6PJK263fXxX0I3KudtSS6uqpet1IH77w5q2vIkG4GNAEcfffRKHVaSmre7rp5MTP/gChzvriTrAPp/7562YlWdV1UbqmrD3NzcChxakgS7D/6aMr1clwJn9NNnAH+yAvuUJC3B7rp6jktyL92Z/2P7afr5qqpDpm2Y5P3AycARSW4H3gycTXcz2C8Bt+ED2yVp1S0a/FW173J3XFWnT1n0vOXuU5L06C1lWGZJ0l7A4Jekxhj8ktSYWZ+5K0kAzG+6fLRjbz371NGOvTfxjF+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1Zr8xDppkK3Af8BDwYFVtGKMOSWrRKMHfe25VfXPE40tSk+zqkaTGjBX8BXwyyTVJNo5UgyQ1aayunp+sqjuSPAG4MsmtVfW5yRX6PwgbAY4++ugxapSkvdIoZ/xVdUf/793AR4ETF1jnvKraUFUb5ubmVrtESdprrXrwJ3lckoN3TAP/BLhpteuQpFaN0dVzJPDRJDuO/76q+sQIdUhSk1Y9+KvqL4DjVvu4kqSOl3NKUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNWa/sQsY2vymy0c79tazTx3t2JI0jWf8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1Zq+/jn9MY95DIO2NWvydGuJ+IM/4JakxBr8kNcbgl6TGGPyS1JhRgj/JKUm+muTrSTaNUYMktWrVgz/JvsB/A34WOBY4Pcmxq12HJLVqjDP+E4GvV9VfVNUDwAeAl4xQhyQ1aYzgfyLwfybmb+/bJEmrYM3ewJVkI7Cxn70/yVfHrGeKI4Bvjl3EMlj36ttTa7fu1bVL3Xnro9rfDyzUOEbw3wEcNTH/pL7te1TVecB5q1XUciTZXFUbxq5jqax79e2ptVv36lqtusfo6vkycEySJyf5PuDngUtHqEOSmrTqZ/xV9WCS1wF/CuwLnF9VN692HZLUqlH6+KvqCuCKMY69wtZ0V9QirHv17am1W/fqWpW6U1WrcRxJ0hrhkA2S1BiDfwZJDkhydZLrk9yc5Hf69ov6oSduSnJ+kv3HrnVni9T+rr7thiQfTnLQ2LVOmlb3xPK3J7l/rPqmWeT9viDJXya5rn+tH7nU77FI3UnyliR/nmRLktePXevOFqn98xPv951JLhm51O+xSN3PS3JtX/cXkvzQih+8qnzt5gUEOKif3h/4EvDjwAv7ZQHeD/zq2LUuofZDJtY5B9g0dq2z1N3PbwDeC9w/dp1LeL8vAF4+dn3LqPvVwHuAffplTxi71qX8rEysczHwi2PXOuN7/ufA0/r2XwMuWOlje8Y/g+rsOLvcv39VVV3RLyvgarp7EtaURWq/F7ozOuCxwJr6smda3f1YT/8J+LejFbeIaXWPWNJMFqn7V4HfraqH+/XuHqnEqXb3nic5BPhp4JLVr266Reou4JC+/VDgzpU+tsE/oyT7JrkOuBu4sqq+NLFsf+BVwCdGKm9R02pP8m7gG8BTgd8fr8KFTan7dcClVbVt1OIWscjPylv6rrVzkzxmvAoXNqXupwCvTLI5yceTHDNqkVMs9vsJvBS4asfJzloype5fBq5Icjtdrpy90sc1+GdUVQ9V1Xq6s/oTkzx9YvEfAJ+rqs+PUtxuTKu9ql4N/ENgC/DK8Spc2AJ1/xTwc6zBP1KTprzfv033B/YE4HDgjeNVuLApdT8G+Lvq7iZ9J3D+iCVOtZvfz9PpumLXnCl1nwm8sKqeBLybrit2RRn8S1RV/xf4NHAKQJI3A3PAb4xY1kx2rr1ve4huhNSXjVTWbk3U/Vzgh4CvJ9kKHJjk6yOWtqjJ97uqtvUf7b9D98t84qjFLWKnn5PbgY/0iz4KPHOksmaywO/nEXTv9Zp+SvtE3T8LHDfxieWPgWet9PEM/hkkmUvy+H76scDPALcm+WXgBcDpO/pA15optX91x5UCfR//i4FbRytyAVPqvqaqvr+q5qtqHvibqlr5Kx4ehUV+Vtb1baHrerhprBoXMq1uun7x5/arPYfui8c1ZZHaAV4OXFZVfzdSeVNNqXsLcGiSH+5X29G2otbs6JxrzDrgwv6LxX2AD1bVZUkeBG4D/mf3+8xHqup3R6xzIbvUTnf28/n+S68A19N9ibeWLPiej1zTLKb9rPyPJHN07/d1wL8ascaFTKv7C8BFSc4E7qfrf15rFvtZ+XkG6CNfIdPe89cAFyd5GLgH+JcrfWDv3JWkxtjVI0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfa0KSfzAxkuI3ktwxMf99M2x/cpKZb3RJMp/kny91vSQbkrx9pdZ/tJJ8Jske92xZjcvg15pQVX9dVev729f/O3DujvmqemCGXZzM0u5wnAd2G/w7r1dVm6tqsaGJl7q+tOoMfq1ZSX4syWeTXJPkTyfufn19klv6Ac8+kGSe7oaoM/tPCM/eaT/Pmfj08JUkB9Pd1PPsvu3M/kz98+nGQb924tPDzuudnOSyJex3cv2Dkrw7yY197S/bqc5TknxoYn5y23ekGyhtl2cTTKx//8T0y5Nc0E/PJbk4yZf710nL/X+ivcRyxnL25WvIF3AW8G+APwPm+rZXAuf303cCj+mnHz+xzW9N2d/HgJP66YPo7lg/me5W/h3rHAgc0E8fA2zup3de77vzM+53cv23Ar83seywnercD/gr4HH9/DuAX+inD+//3Rf4DPDMfv4zwIZ++v6Jfb2cfhx34H3AT/bTRwNbxv5/7Gvcl0M2aK16DPB04Mp+OIx9gR1DMd9AN4zAJcw2xvoXgXOSXEQ3rMbt/T4n7Q/813RPxnoI+OGdV1jmfic9n24IAQCq6p7JhVX1YJJPAC9K8mHgVB557sArkmyk++OwDjiW7n2YxfOBYydqOyTJQfXIWPBqjMGvtSrAzVX1EwssOxX4KeBFwL9L8ozFdlRVZye5nO6JaV9M8oIFVjsTuAs4jq4LdLeDes2436X6AN0zB75F96njviRPBn4LOKGq7um7cA5YqKSJ6cnl+9A9kWrNDVSmcdjHr7XqO8Bckp+A7mE3Sf5Rkn2Ao6rq03Rj2h9K181yH3DwQjtK8pSqurGq3gp8mW5c/J3XPxTYVt0oq6+i+4TBCux30pXAaye2P2yBdT4LHA+8hu6PAHRPY/o28P+SHEk3dO9C7krytP49Om2i/ZPAv5447vop26sRBr/Wqofp+qnfmuR6uhEtn0UXyH+U5EbgK8DbqxvL/GPAaQt9uQu8IclNSW4A/h74OF03yUPpHnR9Jt3DdM7oj/VUuqBlgfWWut9J/xE4rN/meh4Z7vi7qns+wmV04X5Z33Z9/996K11//RenvGeb+m3+jEe6xQBeD2zov1C+hbU3MqhWmaNzSlJjPOOXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNeb/A8xCTZ43e5wtAAAAAElFTkSuQmCC\n" - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reject the null hypothesis that the means are equal.\n", - "Reject the null hypothesis that the means are equal.\n" - ] } ], "source": [ - "dataset = pd.read_csv('experiment_data.csv')\n", - "make_decision(dataset, ['Weight', 'Age'])" - ], + "# Testy z bootstrappowaniem\n", + "\n", + "def pretty_print_full_stats(t_stat, df, cv, p):\n", + " print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n", + "\n", + "print('Statystyki dla jednej próby:')\n", + "t_stat, df, cv, p, _ = bootstrap_one_sample(dummy)\n", + "pretty_print_full_stats(t_stat, df, cv, p)\n", + "\n", + "print('Statystyki dla dwóch prób zależnych:')\n", + "t_stat, df, cv, p, _ = bootstrap_dependent(dummy2, dummy3)\n", + "pretty_print_full_stats(t_stat, df, cv, p)\n", + "\n", + "print('Statystyki dla dwóch prób niezależnych:')\n", + "t_stat, df, cv, p, _ = bootstrap_independent(dummy2, dummy3)\n", + "pretty_print_full_stats(t_stat, df, cv, p)" + ] + }, + { + "cell_type": "code", + "execution_count": 1150, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "dataset = pd.read_csv('experiment_data.csv')\n", + "make_decision(dataset, ['Weight', 'Age'])" + ] } ], "metadata": { @@ -353,4 +473,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}