2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
2022-05-13 22:06:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"source": [
|
|
|
|
"Bootstrapowa wersja testu t.\n",
|
|
|
|
"Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n",
|
|
|
|
"W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n",
|
|
|
|
"Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej."
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"source": [
|
|
|
|
"Zbiór danych - ???\n",
|
|
|
|
"Hipoteza zerowa - ???\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"Hipoteza alternatywna - ???\n",
|
|
|
|
"\n",
|
|
|
|
"Dla każdego z 3 testów inne\n",
|
|
|
|
"https://www.jmp.com/en_ch/statistics-knowledge-portal/t-test.html"
|
2022-05-13 22:06:56 +02:00
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
}
|
|
|
|
},
|
2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 52,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"from math import sqrt\n",
|
|
|
|
"from scipy.stats import sem\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"from scipy.stats import t\n",
|
|
|
|
"import matplotlib.pyplot as plt"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 53,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def generate_bootstraps(data, n_bootstraps=100):\n",
|
|
|
|
" data_size = data.shape[0]\n",
|
2022-05-13 22:06:56 +02:00
|
|
|
" for _ in range(n_bootstraps):\n",
|
|
|
|
" indices = np.random.choice(len(data), size=data_size)\n",
|
|
|
|
" yield data.iloc[indices, :]"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 54,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def get_t_stat(data1, data2):\n",
|
|
|
|
" mean1 = np.mean(data1)\n",
|
|
|
|
" mean2 = np.mean(data2)\n",
|
|
|
|
" sem1 = sem(data1)\n",
|
|
|
|
" sem2 = sem(data2)\n",
|
|
|
|
"\n",
|
|
|
|
" sed = sqrt(sem1**2.0 + sem2**2.0)\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
" # To jest wzór chyba tylko dla jednego przypadku\n",
|
2022-05-11 15:02:15 +02:00
|
|
|
" return (mean1 - mean2) / sed"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 55,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def independent_t_test(data, columns, alpha=0.05):\n",
|
|
|
|
" t_stat_sum = 0\n",
|
|
|
|
" for sample in generate_bootstraps(data):\n",
|
|
|
|
" t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n",
|
|
|
|
"\n",
|
|
|
|
" data_size = data.shape[0]\n",
|
|
|
|
" t_stat = t_stat_sum / data_size\n",
|
|
|
|
" df = 2 * data_size - 2\n",
|
|
|
|
" cv = t.ppf(1.0 - alpha, df)\n",
|
|
|
|
" p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n",
|
|
|
|
" return t_stat, df, cv, p"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 56,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def make_decision(data, columns, alpha=0.05):\n",
|
|
|
|
" t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n",
|
|
|
|
" print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
|
|
|
|
" if abs(t_stat) <= cv:\n",
|
|
|
|
"\t print('Accept null hypothesis that the means are equal.')\n",
|
|
|
|
" else:\n",
|
|
|
|
" print('Reject the null hypothesis that the means are equal.')\n",
|
|
|
|
" if p > alpha:\n",
|
|
|
|
" print('Accept null hypothesis that the means are equal.')\n",
|
|
|
|
" else:\n",
|
|
|
|
"\t print('Reject the null hypothesis that the means are equal.')"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 69,
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": "<Figure size 432x288 with 1 Axes>",
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP20lEQVR4nO3df4xlZX3H8fdHVrBqK+COW7pLOpuKbaipilNK48+CqQjGpa0lkKZulWSjVYs/Wl20EZPGZFGr1aS12Qp1bQlCkRZaaitSLGlSVgdEfiMrLrK4sGP82Zqo6Ld/3LPlZp1lZ++5w527z/uVTO45zznnnu+TM/cz5z73njOpKiRJh77HTboASdJjw8CXpEYY+JLUCANfkhph4EtSI1ZNugCA1atX1+zs7KTLkKSpcuONN369qmaWuv6KCPzZ2Vnm5+cnXYYkTZUk9x3M+g7pSFIjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSI1bElbbSSja7+eqJ7HfnltMnsl8dujzDl6RGGPiS1IgDBn6Si5LsSXLbUNv7ktyV5JYk/5jkyKFl5yXZkeTuJC9dprolSQdpKWf4HwNO3aftGuCZVfUrwJeA8wCSHA+cBfxyt81fJTlsbNVKkkZ2wMCvquuBb+zT9umqeribvQFY101vAD5RVd+vqq8AO4ATx1ivJGlE4xjDfw3wqW56LXD/0LJdXdtPSLIpyXyS+YWFhTGUIUl6NL0CP8k7gYeBiw9226raWlVzVTU3M7Pkf9giSRrRyN/DT/IHwMuBU6qquuYHgGOHVlvXtUmSJmykM/wkpwJvA15RVd8bWnQVcFaSI5KsB44DPte/TElSXwc8w09yCfBiYHWSXcD5DL6VcwRwTRKAG6rqtVV1e5LLgDsYDPW8vqp+tFzFS5KW7oCBX1VnL9J84aOs/x7gPX2KkiSNn1faSlIjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDhj4SS5KsifJbUNtRye5Jsk93eNRXXuSfDjJjiS3JDlhOYuXJC3dUs7wPwacuk/bZuDaqjoOuLabB3gZcFz3swn4yHjKlCT1dcDAr6rrgW/s07wB2NZNbwPOGGr/eA3cAByZ5Jgx1SpJ6mHUMfw1VbW7m34QWNNNrwXuH1pvV9f2E5JsSjKfZH5hYWHEMiRJS9X7Q9uqKqBG2G5rVc1V1dzMzEzfMiRJBzBq4D+0d6ime9zTtT8AHDu03rquTZI0YaMG/lXAxm56I3DlUPurum/rnAR8e2joR5I0QasOtEKSS4AXA6uT7ALOB7YAlyU5B7gPOLNb/V+B04AdwPeAVy9DzZKkERww8Kvq7P0sOmWRdQt4fd+iJEnj55W2ktQIA1+SGmHgS1IjDHxJasQBP7SVVoLZzVdPugRp6nmGL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY3wP15JK9Sk/svXzi2nT2S/Wn6e4UtSI3oFfpI3J7k9yW1JLknyhCTrk2xPsiPJpUkOH1exkqTRjRz4SdYCfwTMVdUzgcOAs4ALgA9W1dOBbwLnjKNQSVI/fYd0VgE/lWQV8ERgN3AycHm3fBtwRs99SJLGYOTAr6oHgPcDX2UQ9N8GbgS+VVUPd6vtAtYutn2STUnmk8wvLCyMWoYkaYn6DOkcBWwA1gM/BzwJOHWp21fV1qqaq6q5mZmZUcuQJC1RnyGdlwBfqaqFqvohcAXwPODIbogHYB3wQM8aJUlj0CfwvwqclOSJSQKcAtwBXAe8sltnI3BlvxIlSePQZwx/O4MPZ28Cbu2eayvwduAtSXYATwUuHEOdkqSeel1pW1XnA+fv03wvcGKf55UkjZ9X2kpSI7yXjg7KpO7vIqk/z/AlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjegV+kiOTXJ7kriR3Jvn1JEcnuSbJPd3jUeMqVpI0ur5n+B8C/q2qfgl4FnAnsBm4tqqOA67t5iVJEzZy4Cd5CvBC4EKAqvpBVX0L2ABs61bbBpzRr0RJ0jj0OcNfDywAf5vkC0k+muRJwJqq2t2t8yCwpm+RkqT++gT+KuAE4CNV9Rzgf9ln+KaqCqjFNk6yKcl8kvmFhYUeZUiSlqJP4O8CdlXV9m7+cgZ/AB5KcgxA97hnsY2ramtVzVXV3MzMTI8yJElLMXLgV9WDwP1JfrFrOgW4A7gK2Ni1bQSu7FWhJGksVvXc/o3AxUkOB+4FXs3gj8hlSc4B7gPO7LkPSdIY9Ar8qroZmFtk0Sl9nleSNH5eaStJjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9Jjegd+EkOS/KFJP/Sza9Psj3JjiSXJjm8f5mSpL7GcYZ/LnDn0PwFwAer6unAN4FzxrAPSVJPvQI/yTrgdOCj3XyAk4HLu1W2AWf02YckaTz6nuH/BfA24Mfd/FOBb1XVw938LmDtYhsm2ZRkPsn8wsJCzzIkSQcycuAneTmwp6puHGX7qtpaVXNVNTczMzNqGZKkJVrVY9vnAa9IchrwBOBngA8BRyZZ1Z3lrwMe6F+mhs1uvnrSJegQNsnfr51bTp/Yvlsw8hl+VZ1XVeuqahY4C/iPqvo94Drgld1qG4Ere1cpSeptOb6H/3bgLUl2MBjTv3AZ9iFJOkh9hnT+X1V9FvhsN30vcOI4nleSND5eaStJjTDwJakRBr4kNcLAl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUiFWTLkCS9prdfPVE9rtzy+kT2e9jzTN8SWqEgS9JjRg58JMcm+S6JHckuT3JuV370UmuSXJP93jU+MqVJI2qzxn+w8Bbq+p44CTg9UmOBzYD11bVccC13bwkacJGDvyq2l1VN3XT3wXuBNYCG4Bt3WrbgDN61ihJGoOxjOEnmQWeA2wH1lTV7m7Rg8Ca/WyzKcl8kvmFhYVxlCFJehS9Az/Jk4FPAm+qqu8ML6uqAmqx7apqa1XNVdXczMxM3zIkSQfQK/CTPJ5B2F9cVVd0zQ8lOaZbfgywp1+JkqRx6PMtnQAXAndW1QeGFl0FbOymNwJXjl6eJGlc+lxp+zzg94Fbk9zctb0D2AJcluQc4D7gzF4VSpLGYuTAr6r/ArKfxaeM+rySpOXhlbaS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mN6HN75ObNbr560iVIGoNJvpZ3bjn9MduXZ/iS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktSIqb/S1qtdJWlpPMOXpEYsW+AnOTXJ3Ul2JNm8XPuRJC3NsgR+ksOAvwReBhwPnJ3k+OXYlyRpaZbrDP9EYEdV3VtVPwA+AWxYpn1JkpZguT60XQvcPzS/C/i14RWSbAI2dbP/k+TuHvtbDXy9x/YrzaHWHzj0+nSo9Qfs00TkgoNafd/+/PzBbDyxb+lU1VZg6zieK8l8Vc2N47lWgkOtP3Do9elQ6w/Yp2nQtz/LNaTzAHDs0Py6rk2SNCHLFfifB45Lsj7J4cBZwFXLtC9J0hIsy5BOVT2c5A3AvwOHARd
|
|
|
|
},
|
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2022-05-13 22:06:56 +02:00
|
|
|
"source": [
|
|
|
|
"def draw_distribution():\n",
|
|
|
|
" \"\"\"Funkcja rysuje rozkład statystyki testowej\"\"\"\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
" dummy = np.random.normal(170, 10, 500)\n",
|
|
|
|
" plt.hist(dummy)\n",
|
|
|
|
" plt.show()\n",
|
|
|
|
" pass\n",
|
|
|
|
"draw_distribution()"
|
2022-05-13 22:06:56 +02:00
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-13 23:43:00 +02:00
|
|
|
"execution_count": 60,
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2022-05-13 23:43:00 +02:00
|
|
|
"t: 6.893215520199072, df: 998, cv: 1.6463818766348755, p: 9.657386002004387e-12\n",
|
2022-05-11 15:02:15 +02:00
|
|
|
"\n",
|
|
|
|
"Reject the null hypothesis that the means are equal.\n",
|
|
|
|
"Reject the null hypothesis that the means are equal.\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"dataset = pd.read_csv('experiment_data.csv')\n",
|
|
|
|
"make_decision(dataset, ['Weight', 'Age'])"
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"interpreter": {
|
|
|
|
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
|
|
|
|
},
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3.9.1 64-bit",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.9.1"
|
|
|
|
},
|
|
|
|
"orig_nbformat": 4
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|