Bootstrap-t-student/bootstrap-t.ipynb

196 lines
4.7 KiB
Plaintext
Raw Normal View History

2022-05-11 15:02:15 +02:00
{
"cells": [
2022-05-13 22:06:56 +02:00
{
"cell_type": "markdown",
"source": [
"Bootstrapowa wersja testu t.\n",
"Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n",
"W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n",
"Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"Zbiór danych - ???\n",
"Hipoteza zerowa - ???\n",
"Hipoteza alternatywna - ???"
],
"metadata": {
"collapsed": false
}
},
2022-05-11 15:02:15 +02:00
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 50,
2022-05-11 15:02:15 +02:00
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from math import sqrt\n",
"from scipy.stats import sem\n",
"from scipy.stats import t"
]
},
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 51,
2022-05-11 15:02:15 +02:00
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def generate_bootstraps(data, n_bootstraps=100):\n",
" data_size = data.shape[0]\n",
2022-05-13 22:06:56 +02:00
" for _ in range(n_bootstraps):\n",
" indices = np.random.choice(len(data), size=data_size)\n",
" yield data.iloc[indices, :]"
2022-05-11 15:02:15 +02:00
]
},
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 52,
2022-05-11 15:02:15 +02:00
"outputs": [],
"source": [
"def get_t_stat(data1, data2):\n",
" mean1 = np.mean(data1)\n",
" mean2 = np.mean(data2)\n",
" sem1 = sem(data1)\n",
" sem2 = sem(data2)\n",
"\n",
" sed = sqrt(sem1**2.0 + sem2**2.0)\n",
" return (mean1 - mean2) / sed"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 53,
2022-05-11 15:02:15 +02:00
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def independent_t_test(data, columns, alpha=0.05):\n",
" t_stat_sum = 0\n",
" for sample in generate_bootstraps(data):\n",
" t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n",
"\n",
" data_size = data.shape[0]\n",
" t_stat = t_stat_sum / data_size\n",
" df = 2 * data_size - 2\n",
" cv = t.ppf(1.0 - alpha, df)\n",
" p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n",
" return t_stat, df, cv, p"
]
},
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 54,
2022-05-11 15:02:15 +02:00
"outputs": [],
"source": [
"def make_decision(data, columns, alpha=0.05):\n",
" t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n",
" print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
" if abs(t_stat) <= cv:\n",
"\t print('Accept null hypothesis that the means are equal.')\n",
" else:\n",
" print('Reject the null hypothesis that the means are equal.')\n",
" if p > alpha:\n",
" print('Accept null hypothesis that the means are equal.')\n",
" else:\n",
"\t print('Reject the null hypothesis that the means are equal.')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
2022-05-13 22:06:56 +02:00
"execution_count": 55,
"outputs": [],
"source": [
"def draw_distribution():\n",
" \"\"\"Funkcja rysuje rozkład statystyki testowej\"\"\"\n",
" pass"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 56,
2022-05-11 15:02:15 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2022-05-13 22:06:56 +02:00
"t: 6.891235313595221, df: 998, cv: 1.6463818766348755, p: 9.78683800667568e-12\n",
2022-05-11 15:02:15 +02:00
"\n",
"Reject the null hypothesis that the means are equal.\n",
"Reject the null hypothesis that the means are equal.\n"
]
}
],
"source": [
"dataset = pd.read_csv('experiment_data.csv')\n",
"make_decision(dataset, ['Weight', 'Age'])"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"interpreter": {
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
},
"kernelspec": {
"display_name": "Python 3.9.1 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}