{ "cells": [ { "cell_type": "markdown", "source": [ "Bootstrapowa wersja testu t.\n", "Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n", "W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n", "Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej." ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "source": [ "Zbiór danych - ???\n", "Hipoteza zerowa - ???\n", "Hipoteza alternatywna - ???\n", "\n", "Dla każdego z 3 testów inne\n", "https://www.jmp.com/en_ch/statistics-knowledge-portal/t-test.html" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 15, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from math import sqrt\n", "from scipy.stats import sem\n", "from scipy.stats import t\n", "import matplotlib.pyplot as plt\n", "from statistics import mean, stdev" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def generate_bootstraps(data, n_bootstraps=100):\n", " data_size = data.shape[0]\n", " for _ in range(n_bootstraps):\n", " indices = np.random.choice(len(data), size=data_size)\n", " yield data.iloc[indices, :]" ] }, { "cell_type": "code", "execution_count": 17, "outputs": [], "source": [ "def get_t_stat(data1, data2):\n", " mean1 = np.mean(data1)\n", " mean2 = np.mean(data2)\n", " sem1 = sem(data1)\n", " sem2 = sem(data2)\n", "\n", " sed = sqrt(sem1**2.0 + sem2**2.0)\n", " # To jest wzór chyba tylko dla jednego przypadku\n", " return (mean1 - mean2) / sed" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "def t_stat_single(sample, population_mean):\n", " \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n", " sample_mean = mean(sample)\n", " sample_std = stdev(sample)\n", " sample_size = len(sample)\n", " return (sample_mean - population_mean) / (sample_std / sqrt(sample_size))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "def t_stat_indept():\n", " pass" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "def t_stat_dep():\n", " pass" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 18, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def independent_t_test(data, columns, alpha=0.05):\n", " t_stat_sum = 0\n", " for sample in generate_bootstraps(data):\n", " t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n", "\n", " data_size = data.shape[0]\n", " t_stat = t_stat_sum / data_size\n", " df = 2 * data_size - 2\n", " cv = t.ppf(1.0 - alpha, df)\n", " p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n", " return t_stat, df, cv, p" ] }, { "cell_type": "code", "execution_count": 19, "outputs": [], "source": [ "def make_decision(data, columns, alpha=0.05):\n", " t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n", " print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n", " if abs(t_stat) <= cv:\n", "\t print('Accept null hypothesis that the means are equal.')\n", " else:\n", " print('Reject the null hypothesis that the means are equal.')\n", " if p > alpha:\n", " print('Accept null hypothesis that the means are equal.')\n", " else:\n", "\t print('Reject the null hypothesis that the means are equal.')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 20, "outputs": [ { "data": { "text/plain": "
", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQxElEQVR4nO3dfYxldX3H8fdHECxaBdyB0t1NZ6NoQk2rOKU01oeCUR6MS1tLoE3dKslGRYsPLS7YiEljsqiVatLSrEJZW4JQpIUWbUWKJU1kdUCeH2RFkF0XdgyKtiYq+u0f96zcDrO7M3Pm4e7P9yu5uef8fufc883Jnc+c+7vnnJuqQpLUlqctdwGSpIVnuEtSgwx3SWqQ4S5JDTLcJalB+y93AQArVqyo8fHx5S5DkvYpN99887eramymvpEI9/HxcSYnJ5e7DEnapyR5aHd9DstIUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDRuIKVWlvxjdcu2zbfnDjycu2bWm+PHKXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWiv4Z7k4iQ7k9w5Q997klSSFd18knw8ydYktyc5ejGKliTt2WyO3C8BTpjemGQ18Brgm0PNJwJHdo/1wIX9S5QkzdVew72qbgQem6HrAuBsoIba1gKfqoGbgIOTHLEglUqSZm1eY+5J1gLbq+q2aV0rgYeH5rd1bTO9xvokk0kmp6am5lOGJGk35hzuSQ4CzgXe32fDVbWpqiaqamJsbKzPS0mSppnPXSGfB6wBbksCsAq4JckxwHZg9dCyq7o2SdISmvORe1XdUVWHVdV4VY0zGHo5uqoeAa4B3tidNXMs8HhV7VjYkiVJezObUyEvA74EvDDJtiRn7GHxzwIPAFuBTwBvW5AqJUlzstdhmao6fS/940PTBZzZvyxJUh9eoSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aDY/kH1xkp1J7hxq+3CSe5PcnuSfkxw81HdOkq1J7kvy2kWqW5K0B7M5cr8EOGFa23XAi6rq14CvAecAJDkKOA341W6dv02y34JVK0malb2Ge1XdCDw2re3zVfVEN3sTsKqbXgt8uqp+WFXfALYCxyxgvZKkWViIMfc3A5/rplcCDw/1bevaniLJ+iSTSSanpqYWoAxJ0i69wj3J+4AngEvnum5VbaqqiaqaGBsb61OGJGma/ee7YpI/AV4HHF9V1TVvB1YPLbaqa5MkLaF5HbknOQE4G3h9Vf1gqOsa4LQkByZZAxwJfLl/mZKkudjrkXuSy4BXASuSbAPOY3B2zIHAdUkAbqqqt1TVXUmuAO5mMFxzZlX9ZLGKlyTNbK/hXlWnz9B80R6W/yDwwT5FSZL68QpVSWqQ4S5JDTLcJalBhrskNchwl6QGzfsiJunnxfiGa5dluw9uPHlZtqs2eOQuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoL2Ge5KLk+xMcudQ26FJrktyf/d8SNeeJB9PsjXJ7UmOXsziJUkzm82R+yXACdPaNgDXV9WRwPXdPMCJwJHdYz1w4cKUKUmai72Ge1XdCDw2rXktsLmb3gycMtT+qRq4CTg4yRELVKskaZbmO+Z+eFXt6KYfAQ7vplcCDw8tt61re4ok65NMJpmcmpqaZxmSpJn0/kK1qgqoeay3qaomqmpibGysbxmSpCHzDfdHdw23dM87u/btwOqh5VZ1bZKkJTTfcL8GWNdNrwOuHmp/Y3fWzLHA40PDN5KkJbLXH8hOchnwKmBFkm3AecBG4IokZwAPAad2i38WOAnYCvwAeNMi1CxJ2ou9hntVnb6bruNnWLaAM/sWJUnqxytUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qFe4J3lXkruS3JnksiTPSLImyZYkW5NcnuSAhSpWkjQ78w73JCuBPwUmqupFwH7AacD5wAVV9XzgO8AZC1GoJGn29voD2bNY/xeS/Bg4CNgBHAf8Yde/GfgAcGHP7WhEjG+4drlLkDQL8z5yr6rtwEeAbzII9ceBm4HvVtUT3WLbgJV9i5QkzU2fYZlDgLXAGuCXgWcCJ8xh/fVJJpNMTk1NzbcMSdIM+nyh+mrgG1U1VVU/Bq4CXgYcnGTXcM8qYPtMK1fVpqqaqKqJsbGxHmVIkqbrE+7fBI5NclCSAMcDdwM3AG/ollkHXN2vREnSXPUZc98CXAncAtzRvdYm4L3Au5NsBZ4LXLQAdUqS5qDX2TJVdR5w3rTmB4Bj+ryuJKkfr1CVpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGtQr3JMcnOTKJPcmuSfJbyU5NMl1Se7vng9ZqGIlSbPT6weygY8B/15Vb0hyAHAQcC5wfVVtTLIB2AC8t+d2pJ874xuuXZbtPrjx5GXZrhbWvI/ckzwHeAVwEUBV/aiqvgusBTZ3i20GTulXoiRprvoMy6wBpoC/T/LVJJ9M8kzg8Kra0S3zCHD4TCsnWZ9kMsnk1NRUjzIkSdP1Cff9gaOBC6vqJcD/MhiC+ZmqKqBmWrmqNlXVRFVNjI2N9ShDkjRdn3DfBmyrqi3d/JUMwv7RJEcAdM87+5UoSZqreYd7VT0CPJzkhV3T8cDdwDXAuq5tHXB1rwolSXPW92yZdwCXdmfKPAC8icE/jCuSnAE8BJzacxuSpDnqFe5VdSswMUPX8X1eV5LUj1eoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrU9wpVLYPlus+3pH2HR+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGtQ73JPsl+SrSf6tm1+TZEuSrUku7348W5K0hBbiyP0s4J6h+fOBC6rq+cB3gDMWYBuSpDnoFe5JVgEnA5/s5gMcB1zZLbIZOKXPNiRJc9f3yP2vgbOBn3bzzwW+W1VPdPPbgJUzrZhkfZLJJJNTU1M9y5AkDZt3uCd5HbCzqm6ez/pVtamqJqpqYmxsbL5lSJJm0Od+7i8DXp/kJOAZwLOBjwEHJ9m/O3pfBWzvX6YkaS7mfeReVedU1aqqGgdOA/6zqv4IuAF4Q7fYOuDq3lVKkuZkMc5zfy/w7iRbGYzBX7QI25Ak7cGC/MxeVX0R+GI3/QBwzEK8riRpfrxCVZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCC3FtGUjvGN1y7bNt+cOPJy7bt1njkLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg+Yd7klWJ7khyd1J7kpyVtd+aJLrktzfPR+ycOVKkmajz5H7E8B7quoo4FjgzCRHARuA66vqSOD6bl6StITmHe5VtaOqbummvw/cA6wE1gKbu8U2A6f0rFGSNEcLMuaeZBx4CbAFOLyqdnRdjwCH72ad9Ukmk0xOTU0tRBmSpE7vcE/yLOAzwDur6nvDfVVVQM20XlVtqqqJqpoYGxvrW4YkaUivcE/ydAbBfmlVXdU1P5rkiK7/CGBnvxIlSXPV52yZABcB91TVR4e6rgHWddPrgKvnX54kaT763PL3ZcAfA3ckubVrOxfYCFyR5AzgIeDUXhVKkuZs3uFeVf8NZDfdx8/3dSVJ/XmFqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KA+Nw77uTe+4drlLkGSZuSRuyQ1yCN3SSNjuT4NP7jx5GXZ7mLyyF2SGmS4S1KDDHdJapDhLkkNMtwlqUGLdrZMkhOAjwH7AZ+sqo2LsR3PNZfU13LmyGKdqbMoR+5J9gP+BjgROAo4PclRi7EtSdJTLdawzDHA1qp6oKp+BHwaWLtI25IkTbNYwzIrgYeH5rcBvzm8QJL1wPpu9n+S3LdItQxbAXx7Cbaz0Kx7aVn30tkXa4YFrDvn91r9V3bXsWxXqFbVJmDTUm4zyWRVTSzlNheCdS8t6146+2LNsG/UvVjDMtuB1UPzq7o2SdISWKxw/wpwZJI1SQ4ATgOuWaRtSZKmWZRhmap6Isnbgf9gcCrkxVV112Jsa46WdBhoAVn30rLupbMv1gz7QN2pquWuQZK0wLxCVZIaZLhLUoOaCvckFyfZmeTOGfrek6SSrOjmk+TjSbYmuT3J0Utf8c9qe0rdST6QZHuSW7vHSUN953R135fktctT9e73d5J3JLk3yV1JPjTUvux172ZfXz60nx9Mcuso1dzVMVPdL05yU1f3ZJJjuvZRf2//epIvJbkjyb8mefZQ37Lv7ySrk9yQ5O7uPXxW135okuuS3N89H9K1j8z+/n+qqpkH8ArgaODOae2rGXy5+xCwoms7CfgcEOBYYMso1Q18APizGZY9CrgNOBBYA3wd2G+E6v4d4AvAgd38YaNU9+7eI0P9fwW8f5Rq3sO+/jxwYjd9EvDFoelRfm9/BXhlN/1m4C9HaX8DRwBHd9O/CHytq+1DwIaufQNw/qjt7+FHU0fuVXUj8NgMXRcAZwPD3x6vBT5VAzcBByc5YgnKfIo91D2TtcCnq+qHVfUNYCuD2z0sud3U/VZgY1X9sFtmZ9c+EnXvaV8nCXAqcFnXNBI1w27rLmDXUe9zgG9106P+3n4BcGM3fR3w+930SOzvqtpRVbd0098H7mFw1f1aYHO32GbglG56ZPb3sKbCfSZJ1gLbq+q2aV0z3SJh5ZIVNjtv7z7mXbzrIyCjX/cLgJcn2ZLkv5L8Rtc+6nUDvBx4tKru7+ZHveZ3Ah9O8jDwEeCcrn3U676LJ+819Qc8ecHjyNWdZBx4CbAFOLyqdnRdjwCHd9MjVzc0Hu5JDgLOBd6/3LXMw4XA84AXAzsYDBfsC/YHDmXw8fTPgSu6I+J9wek8edS+L3gr8K6qWg28C7homeuZrTcDb0tyM4Nhjx8tcz0zSvIs4DPAO6vqe8N9NRiPGenzyJsOdwbhuAa4LcmDDG6DcEuSX2LEb5FQVY9W1U+q6qfAJ3jy4+lI183gqOWq7iPql4GfMrjJ0kjXnWR/4PeAy4eaR7pmYB1wVTf9T+wj75GqureqXlNVL2Xwz/TrXdfI1J3k6QyC/dKq2rWPH9013NI97xpyHJm6hzUd7lV1R1UdVlXjVTXOIHiOrqpHGNwO4Y3dN93HAo8PfeRadtPG7H4X2HW2wTXAaUkOTLIGOBL48lLXtwf/wuBLVZK8ADiAwd3zRr3uVwP3VtW2obZRr/lbwCu76eOAXcNJo/7ePqx7fhrwF8DfdV0jsb+7T5oXAfdU1UeHuq5h8A+V7vnqofbR29/L/Y3uQj4YHAXsAH7MIMjPmNb/IE+eLRMGPyjydeAOYGKU6gb+oavrdgZvniOGln9fV/d9dGdLjFDdBwD/yOCf0S3AcaNU9+7eI8AlwFtmWH7Za97Dvv5t4GYGZ5hsAV7aLTvq7+2zGJyB8jVgI92V8qOyv7v9Wt3f3q3d4yTgucD1DP6JfgE4dNT29/DD2w9IUoOaHpaRpJ9XhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq0P8B851KdtKAlogAAAAASUVORK5CYII=\n" }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "def draw_distribution():\n", " \"\"\"Funkcja rysuje rozkład statystyki testowej\"\"\"\n", " dummy = np.random.normal(170, 10, 500)\n", " plt.hist(dummy)\n", " plt.show()\n", " pass\n", "draw_distribution()" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 21, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "t: 6.940510630195086, df: 998, cv: 1.6463818766348755, p: 7.02371494298859e-12\n", "\n", "Reject the null hypothesis that the means are equal.\n", "Reject the null hypothesis that the means are equal.\n" ] } ], "source": [ "dataset = pd.read_csv('experiment_data.csv')\n", "make_decision(dataset, ['Weight', 'Age'])" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "interpreter": { "hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594" }, "kernelspec": { "display_name": "Python 3.9.1 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }