2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
2022-05-13 22:06:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"source": [
|
|
|
|
"Bootstrapowa wersja testu t.\n",
|
|
|
|
"Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n",
|
|
|
|
"W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n",
|
|
|
|
"Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej."
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"source": [
|
|
|
|
"Zbiór danych - ???\n",
|
|
|
|
"Hipoteza zerowa - ???\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"Hipoteza alternatywna - ???\n",
|
|
|
|
"\n",
|
|
|
|
"Dla każdego z 3 testów inne\n",
|
|
|
|
"https://www.jmp.com/en_ch/statistics-knowledge-portal/t-test.html"
|
2022-05-13 22:06:56 +02:00
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
}
|
|
|
|
},
|
2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 15,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"from math import sqrt\n",
|
|
|
|
"from scipy.stats import sem\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"from scipy.stats import t\n",
|
2022-05-14 15:31:47 +02:00
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
"from statistics import mean, stdev"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 16,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def generate_bootstraps(data, n_bootstraps=100):\n",
|
|
|
|
" data_size = data.shape[0]\n",
|
2022-05-13 22:06:56 +02:00
|
|
|
" for _ in range(n_bootstraps):\n",
|
|
|
|
" indices = np.random.choice(len(data), size=data_size)\n",
|
|
|
|
" yield data.iloc[indices, :]"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 17,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def get_t_stat(data1, data2):\n",
|
|
|
|
" mean1 = np.mean(data1)\n",
|
|
|
|
" mean2 = np.mean(data2)\n",
|
|
|
|
" sem1 = sem(data1)\n",
|
|
|
|
" sem2 = sem(data2)\n",
|
|
|
|
"\n",
|
|
|
|
" sed = sqrt(sem1**2.0 + sem2**2.0)\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
" # To jest wzór chyba tylko dla jednego przypadku\n",
|
2022-05-11 15:02:15 +02:00
|
|
|
" return (mean1 - mean2) / sed"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": null,
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def t_stat_single(sample, population_mean):\n",
|
|
|
|
" \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n",
|
|
|
|
" sample_mean = mean(sample)\n",
|
|
|
|
" sample_std = stdev(sample)\n",
|
|
|
|
" sample_size = len(sample)\n",
|
|
|
|
" return (sample_mean - population_mean) / (sample_std / sqrt(sample_size))"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def t_stat_indept():\n",
|
|
|
|
" pass"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def t_stat_dep():\n",
|
|
|
|
" pass"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 18,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def independent_t_test(data, columns, alpha=0.05):\n",
|
|
|
|
" t_stat_sum = 0\n",
|
|
|
|
" for sample in generate_bootstraps(data):\n",
|
|
|
|
" t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n",
|
|
|
|
"\n",
|
|
|
|
" data_size = data.shape[0]\n",
|
|
|
|
" t_stat = t_stat_sum / data_size\n",
|
|
|
|
" df = 2 * data_size - 2\n",
|
|
|
|
" cv = t.ppf(1.0 - alpha, df)\n",
|
|
|
|
" p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n",
|
|
|
|
" return t_stat, df, cv, p"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 19,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def make_decision(data, columns, alpha=0.05):\n",
|
|
|
|
" t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n",
|
|
|
|
" print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
|
|
|
|
" if abs(t_stat) <= cv:\n",
|
|
|
|
"\t print('Accept null hypothesis that the means are equal.')\n",
|
|
|
|
" else:\n",
|
|
|
|
" print('Reject the null hypothesis that the means are equal.')\n",
|
|
|
|
" if p > alpha:\n",
|
|
|
|
" print('Accept null hypothesis that the means are equal.')\n",
|
|
|
|
" else:\n",
|
|
|
|
"\t print('Reject the null hypothesis that the means are equal.')"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 20,
|
2022-05-13 23:43:00 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": "<Figure size 432x288 with 1 Axes>",
|
2022-05-14 15:31:47 +02:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQxElEQVR4nO3dfYxldX3H8fdHECxaBdyB0t1NZ6NoQk2rOKU01oeCUR6MS1tLoE3dKslGRYsPLS7YiEljsqiVatLSrEJZW4JQpIUWbUWKJU1kdUCeH2RFkF0XdgyKtiYq+u0f96zcDrO7M3Pm4e7P9yu5uef8fufc883Jnc+c+7vnnJuqQpLUlqctdwGSpIVnuEtSgwx3SWqQ4S5JDTLcJalB+y93AQArVqyo8fHx5S5DkvYpN99887eramymvpEI9/HxcSYnJ5e7DEnapyR5aHd9DstIUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDRuIKVWlvxjdcu2zbfnDjycu2bWm+PHKXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWiv4Z7k4iQ7k9w5Q997klSSFd18knw8ydYktyc5ejGKliTt2WyO3C8BTpjemGQ18Brgm0PNJwJHdo/1wIX9S5QkzdVew72qbgQem6HrAuBsoIba1gKfqoGbgIOTHLEglUqSZm1eY+5J1gLbq+q2aV0rgYeH5rd1bTO9xvokk0kmp6am5lOGJGk35hzuSQ4CzgXe32fDVbWpqiaqamJsbKzPS0mSppnPXSGfB6wBbksCsAq4JckxwHZg9dCyq7o2SdISmvORe1XdUVWHVdV4VY0zGHo5uqoeAa4B3tidNXMs8HhV7VjYkiVJezObUyEvA74EvDDJtiRn7GHxzwIPAFuBTwBvW5AqJUlzstdhmao6fS/940PTBZzZvyxJUh9eoSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aDY/kH1xkp1J7hxq+3CSe5PcnuSfkxw81HdOkq1J7kvy2kWqW5K0B7M5cr8EOGFa23XAi6rq14CvAecAJDkKOA341W6dv02y34JVK0malb2Ge1XdCDw2re3zVfVEN3sTsKqbXgt8uqp+WFXfALYCxyxgvZKkWViIMfc3A5/rplcCDw/1bevaniLJ+iSTSSanpqYWoAxJ0i69wj3J+4AngEvnum5VbaqqiaqaGBsb61OGJGma/ee7YpI/AV4HHF9V1TVvB1YPLbaqa5MkLaF5HbknOQE4G3h9Vf1gqOsa4LQkByZZAxwJfLl/mZKkudjrkXuSy4BXASuSbAPOY3B2zIHAdUkAbqqqt1TVXUmuAO5mMFxzZlX9ZLGKlyTNbK/hXlWnz9B80R6W/yDwwT5FSZL68QpVSWqQ4S5JDTLcJalBhrskNchwl6QGzfsiJunnxfiGa5dluw9uPHlZtqs2eOQuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoL2Ge5KLk+xMcudQ26FJrktyf/d8SNeeJB9PsjXJ7UmOXsziJUkzm82R+yXACdPaNgDXV9WRwPXdPMCJwJHdYz1w4cKUKUmai72Ge1XdCDw2rXktsLmb3gycMtT+qRq4CTg4yRELVKskaZbmO+Z+eFXt6KYfAQ7vplcCDw8tt61re4ok65NMJpmcmpqaZxmSpJn0/kK1qgqoeay3qaomqmpibGysbxmSpCHzDfdHdw23dM87u/btwOqh5VZ1bZKkJTTfcL8GWNdNrwOuHmp/Y3fWzLHA40PDN5KkJbLXH8hOchnwKmBFkm3AecBG4IokZwAPAad2i38WOAnYCvwAeNMi1CxJ2ou9hntVnb6bruNnWLaAM/sWJUnqxytUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qFe4J3lXkruS3JnksiTPSLImyZYkW5NcnuSAhSpWkjQ78w73JCuBPwUmqupFwH7AacD5wAVV9XzgO8AZC1GoJGn29voD2bNY/xeS/Bg4CNgBHAf8Yde/GfgAcGHP7WhEjG+4drlLkDQL8z5yr6rtwEeAbzII9ceBm4HvVtUT3WLbgJV9i5QkzU2fYZlDgLXAGuCXgWcCJ8xh/fVJJpNMTk1NzbcMSdIM+nyh+mrgG1U1VVU/Bq4CXgYcnGTXcM8qYPtMK1fVpqqaqKqJsbGxHmVIkqbrE+7fBI5NclCSAMcDdwM3AG/ollkHXN2vREnSXPUZc98CXAncAtzRvdYm4L3Au5NsBZ4LXLQAdUqS5qDX2TJVdR5w3rTmB4Bj+ryuJKkfr1CVpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGtQr3JMcnOTKJPcmuSfJbyU5NMl1Se7vng9ZqGIlSbPT6weygY8B/15Vb0hyAHAQcC5wfVVtTLIB2AC8t+d2pJ874xuuXZbtPrjx5GXZrhbWvI/ckzwHeAVwEUBV/aiqvgusBTZ3i20GTulXoiRprvoMy6wBpoC/T/LVJJ9M8kzg8Kra0S3zCHD4TCsnWZ9kMsnk1NRUjzIkSdP1Cff9gaOBC6vqJcD/MhiC+ZmqKqBmWrmqNlXVRFVNjI2N9ShDkjRdn3DfBmyrqi3d/JUMwv7RJEcAdM87+5UoSZqreYd7VT0CPJzkhV3T8cDdwDXAuq5tHXB1rwolSXPW92yZdwCXdmfKPAC8icE/jCuSnAE8BJzacxuSpDnqFe5VdSswMUPX8X1eV5LUj1eoSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrU9wpVLYPlus+3pH2HR+6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGtQ73JPsl+SrSf6tm1+TZEuSrUku7348W5K0hBbiyP0s4J6h+fOBC6rq+cB3gDMWYBuSpDnoFe5JVgEnA5/s5gMcB1zZLbIZOKXPNiRJc9f3yP2vgbOBn3bzzwW+W1VPdPPbgJUzrZhkfZLJJJNTU1M9y5AkDZt3uCd5HbCzqm6ez/pVtamqJqpqYmxsbL5lSJJm0Od+7i8DXp/kJOAZwLOBjwEHJ9m/O3pfBWzvX6YkaS7mfeReVedU1aqqGgdOA/6zqv4IuAF4Q7fYOuDq3lVKkuZkMc5zfy/w7iRbGYzBX7QI25Ak7cGC/MxeVX0R+GI3/QBwzEK8riRpfrxCVZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCC3FtGUjvGN1y7bNt+cOPJy7bt1njkLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg+Yd7klWJ7khyd1J7kpyVtd+aJLrktzfPR+ycOVKkmajz5H7E8B7quoo4FjgzCRHARuA66vqSOD6bl6StITmHe5VtaOqbummvw/cA6wE1gKbu8U2A6f0rFGSNEcLMuaeZBx4CbAFOLyqdnRdjwCH72ad9Ukmk0xOTU0tRBmSpE7vcE/yLOAzwDur6nvDfVVVQM20XlVtqqqJqpoYGxvrW4YkaUivcE/ydAbBfmlVXdU1P5rkiK7/CGBnvxIlSXPV52yZABcB91TVR4e6rgHWddPrgKvnX54kaT763PL3ZcAfA3ckubVrOxfYCFyR5AzgIeDUXhVKkuZs3uFeVf8NZDfdx8/3dSVJ/XmFqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KA+Nw77uTe+4drlLkGSZuSRuyQ1yCN3SSN
|
2022-05-13 23:43:00 +02:00
|
|
|
},
|
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2022-05-13 22:06:56 +02:00
|
|
|
"source": [
|
|
|
|
"def draw_distribution():\n",
|
|
|
|
" \"\"\"Funkcja rysuje rozkład statystyki testowej\"\"\"\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
" dummy = np.random.normal(170, 10, 500)\n",
|
|
|
|
" plt.hist(dummy)\n",
|
|
|
|
" plt.show()\n",
|
|
|
|
" pass\n",
|
|
|
|
"draw_distribution()"
|
2022-05-13 22:06:56 +02:00
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-14 15:31:47 +02:00
|
|
|
"execution_count": 21,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2022-05-14 15:31:47 +02:00
|
|
|
"t: 6.940510630195086, df: 998, cv: 1.6463818766348755, p: 7.02371494298859e-12\n",
|
2022-05-11 15:02:15 +02:00
|
|
|
"\n",
|
|
|
|
"Reject the null hypothesis that the means are equal.\n",
|
|
|
|
"Reject the null hypothesis that the means are equal.\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"dataset = pd.read_csv('experiment_data.csv')\n",
|
|
|
|
"make_decision(dataset, ['Weight', 'Age'])"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"interpreter": {
|
|
|
|
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
|
|
|
|
},
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3.9.1 64-bit",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.9.1"
|
|
|
|
},
|
|
|
|
"orig_nbformat": 4
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|