2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
2022-05-13 22:06:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
2022-05-16 23:34:31 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
},
|
2022-05-13 22:06:56 +02:00
|
|
|
"source": [
|
|
|
|
"Bootstrapowa wersja testu t.\n",
|
|
|
|
"Implementacja powinna obejmować test dla jednej próby, dla dwóch prób niezależnych oraz dla dwóch prób zależnych.\n",
|
|
|
|
"W każdej sytuacji oczekiwanym wejście jest zbiór danych w odpowiednim formacie, a wyjściem p-wartość oraz ostateczna decyzja.\n",
|
|
|
|
"Dodatkowo powinien być rysowany odpowiedni rozkład statystyki testowej."
|
2022-05-16 23:34:31 +02:00
|
|
|
]
|
2022-05-13 22:06:56 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
2022-05-16 23:34:31 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false
|
|
|
|
},
|
2022-05-13 22:06:56 +02:00
|
|
|
"source": [
|
|
|
|
"Zbiór danych - ???\n",
|
|
|
|
"Hipoteza zerowa - ???\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"Hipoteza alternatywna - ???\n",
|
|
|
|
"\n",
|
|
|
|
"Dla każdego z 3 testów inne\n",
|
|
|
|
"https://www.jmp.com/en_ch/statistics-knowledge-portal/t-test.html"
|
2022-05-16 23:34:31 +02:00
|
|
|
]
|
2022-05-13 22:06:56 +02:00
|
|
|
},
|
2022-05-11 15:02:15 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1131,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"from math import sqrt\n",
|
|
|
|
"from scipy.stats import sem\n",
|
2022-05-13 23:43:00 +02:00
|
|
|
"from scipy.stats import t\n",
|
2022-05-14 15:31:47 +02:00
|
|
|
"import matplotlib.pyplot as plt\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"from statistics import mean, stdev\n",
|
|
|
|
"from scipy.stats import ttest_ind, ttest_1samp, ttest_rel"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1132,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def generate_bootstraps(data, n_bootstraps=100):\n",
|
|
|
|
" data_size = data.shape[0]\n",
|
2022-05-13 22:06:56 +02:00
|
|
|
" for _ in range(n_bootstraps):\n",
|
|
|
|
" indices = np.random.choice(len(data), size=data_size)\n",
|
|
|
|
" yield data.iloc[indices, :]"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1133,
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
2022-05-14 15:31:47 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def t_stat_single(sample, population_mean=2):\n",
|
2022-05-14 15:31:47 +02:00
|
|
|
" \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" sample = sample[0].values.tolist()\n",
|
2022-05-14 15:31:47 +02:00
|
|
|
" sample_size = len(sample)\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" # min is to fix near-zero values causing zero division erros\n",
|
|
|
|
" return (mean(sample) - population_mean) / (stdev(sample) / min(0.00000001, sqrt(sample_size)))"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1134,
|
2022-05-14 15:31:47 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
2022-05-14 15:31:47 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-14 16:47:42 +02:00
|
|
|
"def t_stat_ind(sample_1, sample_2):\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
" \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek niezależnych\"\"\"\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" sample_1 = sample_1[0].values.tolist()\n",
|
|
|
|
" sample_2 = sample_2[0].values.tolist()\n",
|
2022-05-14 16:47:42 +02:00
|
|
|
" sed = sqrt(sem(sample_1)**2 + sem(sample_2)**2)\n",
|
|
|
|
" return (mean(sample_1) - mean(sample_2)) / sed"
|
2022-05-16 23:34:31 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1135,
|
2022-05-14 15:31:47 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
2022-05-14 15:31:47 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-14 16:40:40 +02:00
|
|
|
"def t_stat_dep(sample_1, sample_2):\n",
|
|
|
|
" \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek zależnych\"\"\"\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" sample_1 = sample_1[0].values.tolist()\n",
|
|
|
|
" sample_2 = sample_2[0].values.tolist()\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
" differences = [x_1 - x_2 for x_1, x_2 in zip(sample_1, sample_2)]\n",
|
|
|
|
" sample_size = len(sample_1)\n",
|
2022-05-14 17:09:29 +02:00
|
|
|
" mu = 0 # The constant is zero if we want to test whether the average of the difference is significantly different.\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" return (mean(differences) - mu) / (stdev(differences) / min(0.00000001, sqrt(sample_size)))"
|
|
|
|
]
|
2022-05-14 15:31:47 +02:00
|
|
|
},
|
2022-05-14 17:09:29 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1136,
|
|
|
|
"metadata": {},
|
2022-05-14 17:09:29 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def df_dep(sample_1, sample_2):\n",
|
|
|
|
" \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek zależnych\"\"\"\n",
|
|
|
|
" l1, l2 = len(sample_1), len(sample_2)\n",
|
|
|
|
" assert l1 == l2 \n",
|
|
|
|
"\n",
|
|
|
|
" return l1"
|
|
|
|
]
|
2022-05-14 17:09:29 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1137,
|
|
|
|
"metadata": {},
|
2022-05-14 17:09:29 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def df_ind(sample_1, sample_2):\n",
|
|
|
|
" \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek niezależnych\"\"\"\n",
|
|
|
|
" return len(sample_1) + len(sample_2) - 2"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1138,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def df_single(sample_1):\n",
|
|
|
|
" \"\"\"Funkcja oblicza stopnie swobody dla jednej próbki\"\"\"\n",
|
|
|
|
" # TODO: I have no clue what to return from here\n",
|
|
|
|
" return len(sample_1)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1139,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def calculate_p(t_stat, df):\n",
|
|
|
|
" \"\"\"Funkcja oblicza wartość *p* na podstawie statystyki testowej i stopni swobody\"\"\"\n",
|
|
|
|
" return (1.0 - t.cdf(abs(t_stat), df)) * 2.0"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1140,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def calculate_cv(df, alpha=0.05):\n",
|
|
|
|
" \"\"\"Funkcja oblicza wartość krytyczną (critical value)\"\"\"\n",
|
|
|
|
" return t.ppf(1.0 - alpha, df)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1141,
|
2022-05-14 17:09:29 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def bootstrap_one_sample(sample):\n",
|
|
|
|
" return t_test(\n",
|
|
|
|
" sample_1=sample,\n",
|
|
|
|
" df_fn=df_single,\n",
|
|
|
|
" t_stat_fn=t_stat_single\n",
|
|
|
|
" )"
|
|
|
|
]
|
2022-05-14 17:09:29 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1142,
|
2022-05-14 17:09:29 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def bootstrap_independent(sample_1, sample_2):\n",
|
|
|
|
" return t_test(\n",
|
|
|
|
" sample_1=sample_1,\n",
|
|
|
|
" sample_2=sample_2,\n",
|
|
|
|
" df_fn=df_ind,\n",
|
|
|
|
" t_stat_fn=t_stat_ind\n",
|
|
|
|
" )"
|
|
|
|
]
|
2022-05-14 17:09:29 +02:00
|
|
|
},
|
2022-05-14 15:31:47 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1143,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
2022-05-16 23:34:31 +02:00
|
|
|
"collapsed": false,
|
2022-05-11 15:02:15 +02:00
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def bootstrap_dependent(sample_1, sample_2):\n",
|
|
|
|
" return t_test(\n",
|
|
|
|
" sample_1=sample_1,\n",
|
|
|
|
" sample_2=sample_2,\n",
|
|
|
|
" df_fn=df_dep,\n",
|
|
|
|
" t_stat_fn=t_stat_dep\n",
|
|
|
|
" )"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1144,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def get_t_stats(sample_1, sample_2=None, t_stat_fn=t_stat_ind):\n",
|
|
|
|
" \"\"\"Funkcja oblicza listę statystyk testowych dla każdej próbki bootstrapowej wybranej na podstawie danych sample_1 i sample_2\"\"\"\n",
|
2022-05-16 18:52:49 +02:00
|
|
|
" t_stat_list = []\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"\n",
|
|
|
|
" # Separate case for single tests\n",
|
|
|
|
" if sample_2 is None:\n",
|
|
|
|
" for bootstrap in generate_bootstraps(sample_1):\n",
|
|
|
|
" stat = t_stat_fn(bootstrap)\n",
|
|
|
|
" t_stat_list.append(stat)\n",
|
|
|
|
" return t_stat_list\n",
|
|
|
|
" \n",
|
|
|
|
" for bootstrap_1, bootstrap_2 in zip(generate_bootstraps(sample_1), generate_bootstraps(sample_2)):\n",
|
|
|
|
" stat = t_stat_fn(bootstrap_1, bootstrap_2)\n",
|
2022-05-16 18:52:49 +02:00
|
|
|
" t_stat_list.append(stat)\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" \n",
|
|
|
|
" return t_stat_list"
|
2022-05-11 15:02:15 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1145,
|
|
|
|
"metadata": {
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def t_test(sample_1, sample_2=None, df_fn=df_ind, t_stat_fn=t_stat_ind, alpha=0.05):\n",
|
|
|
|
" \"\"\"\n",
|
|
|
|
" Funkcja przeprowadza test T-studenta dla dwóch zmiennych.\n",
|
|
|
|
" liczba kolumn wynosi 1, test jest przeprowadzany dla jednej zmiennej.\n",
|
|
|
|
" @param df_fn - funkcja obliczająca stopnie swobody\n",
|
|
|
|
" @param t_stat_fn - funkcja obliczająca statystykę T\n",
|
|
|
|
" \"\"\"\n",
|
|
|
|
" t_stat_list = get_t_stats(sample_1, sample_2, t_stat_fn)\n",
|
|
|
|
" t_stat_sum = sum(t_stat_list)\n",
|
|
|
|
"\n",
|
|
|
|
" data_size = sample_1.shape[0]\n",
|
|
|
|
"\n",
|
|
|
|
" t_stat = t_stat_sum / data_size\n",
|
|
|
|
"\n",
|
|
|
|
" df = 0.0\n",
|
|
|
|
" if sample_2 is None:\n",
|
|
|
|
" df = df_fn(sample_1)\n",
|
2022-05-11 15:02:15 +02:00
|
|
|
" else:\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
" df = df_fn(sample_1, sample_2)\n",
|
|
|
|
" cv = calculate_cv(df, alpha)\n",
|
|
|
|
" p = calculate_p(t_stat, df)\n",
|
|
|
|
" \n",
|
|
|
|
" return t_stat, df, cv, p, t_stat_list"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1146,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
2022-05-16 18:52:49 +02:00
|
|
|
"outputs": [],
|
2022-05-13 22:06:56 +02:00
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"def draw_distribution(stats): \n",
|
|
|
|
" # To powinno być zdefiniowane przed make decision w sumie\n",
|
2022-05-16 18:52:49 +02:00
|
|
|
" \"\"\"\n",
|
|
|
|
" Funkcja rysuje rozkład statystyki testowej\n",
|
|
|
|
" stats: lista statystyk testowych\n",
|
|
|
|
" \"\"\"\n",
|
|
|
|
" plt.hist(stats)\n",
|
|
|
|
" plt.xlabel('Test statistic value')\n",
|
|
|
|
" plt.ylabel('Frequency')\n",
|
|
|
|
" plt.show()"
|
2022-05-16 23:34:31 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1147,
|
2022-05-13 22:06:56 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def make_decision(data, columns):\n",
|
|
|
|
" # TODO\n",
|
|
|
|
" pass"
|
|
|
|
]
|
2022-05-13 22:06:56 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1148,
|
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
|
|
|
},
|
2022-05-11 15:02:15 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2022-05-14 16:40:40 +02:00
|
|
|
"Statystyka testowa dla jednej próby:\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"6.324555320336758e-09 - z naszej funkcji\n",
|
|
|
|
"[1.41421356] - z gotowej biblioteki\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"\n",
|
|
|
|
"Statystyka testowa dla dwóch prób niezależnych:\n",
|
|
|
|
"-3.0 - z naszej funkcji\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"[-3.] - z gotowej biblioteki\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"\n",
|
|
|
|
"Statystyka testowa dla dwóch prób zależnych:\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"-7.302967433402215e-09 - z naszej funkcji\n",
|
|
|
|
"[-1.63299316] - z gotowej biblioteki\n",
|
|
|
|
"\n"
|
2022-05-14 16:40:40 +02:00
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"# Testy dla samych statystyk testowych\n",
|
|
|
|
"def pretty_print_stats(t_stat_selfmade, t_stat_lib, suffix):\n",
|
|
|
|
" print(f'Statystyka testowa dla {suffix}:')\n",
|
|
|
|
" print(t_stat_selfmade, '- z naszej funkcji')\n",
|
|
|
|
" print(t_stat_lib, '- z gotowej biblioteki')\n",
|
|
|
|
" print()\n",
|
|
|
|
" \n",
|
|
|
|
"dummy = pd.DataFrame([1, 2, 3, 4, 5])\n",
|
|
|
|
"dummy2 = pd.DataFrame([4, 5, 6, 7, 8])\n",
|
|
|
|
"dummy3 = pd.DataFrame([1, 3 , 3, 4, 6])\n",
|
|
|
|
"\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"t_stat_selfmade = t_stat_single(dummy, 2)\n",
|
|
|
|
"t_stat_lib, _ = ttest_1samp(dummy, 2)\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'jednej próby')\n",
|
|
|
|
"\n",
|
2022-05-14 16:47:42 +02:00
|
|
|
"t_stat_selfmade = t_stat_ind(dummy, dummy2)\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"t_stat_lib, _ = ttest_ind(dummy, dummy2)\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób niezależnych')\n",
|
|
|
|
"\n",
|
2022-05-14 16:40:40 +02:00
|
|
|
"t_stat_selfmade = t_stat_dep(dummy, dummy3)\n",
|
|
|
|
"t_stat_lib, _ = ttest_rel(dummy, dummy3)\n",
|
2022-05-16 23:34:31 +02:00
|
|
|
"pretty_print_stats(t_stat_selfmade, t_stat_lib, 'dwóch prób zależnych')"
|
|
|
|
]
|
2022-05-14 16:40:40 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-05-16 23:34:31 +02:00
|
|
|
"execution_count": 1149,
|
|
|
|
"metadata": {},
|
2022-05-14 16:40:40 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"Statystyki dla jednej próby:\n",
|
|
|
|
"t: 1.6371853975970775e-07, df: 5, cv: 2.015048372669157, p: 0.9999998757026942\n",
|
|
|
|
"\n",
|
|
|
|
"Statystyki dla dwóch prób zależnych:\n",
|
|
|
|
"t: 2.721731710913334e-07, df: 5, cv: 2.015048372669157, p: 0.9999997933624869\n",
|
|
|
|
"\n",
|
|
|
|
"Statystyki dla dwóch prób niezależnych:\n",
|
|
|
|
"t: 56.011644110212046, df: 8, cv: 1.8595480375228421, p: 1.145550321268729e-11\n",
|
2022-05-16 18:52:49 +02:00
|
|
|
"\n"
|
|
|
|
]
|
2022-05-11 15:02:15 +02:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2022-05-16 23:34:31 +02:00
|
|
|
"# Testy z bootstrappowaniem\n",
|
|
|
|
"\n",
|
|
|
|
"def pretty_print_full_stats(t_stat, df, cv, p):\n",
|
|
|
|
" print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
|
|
|
|
"\n",
|
|
|
|
"print('Statystyki dla jednej próby:')\n",
|
|
|
|
"t_stat, df, cv, p, _ = bootstrap_one_sample(dummy)\n",
|
|
|
|
"pretty_print_full_stats(t_stat, df, cv, p)\n",
|
|
|
|
"\n",
|
|
|
|
"print('Statystyki dla dwóch prób zależnych:')\n",
|
|
|
|
"t_stat, df, cv, p, _ = bootstrap_dependent(dummy2, dummy3)\n",
|
|
|
|
"pretty_print_full_stats(t_stat, df, cv, p)\n",
|
|
|
|
"\n",
|
|
|
|
"print('Statystyki dla dwóch prób niezależnych:')\n",
|
|
|
|
"t_stat, df, cv, p, _ = bootstrap_independent(dummy2, dummy3)\n",
|
|
|
|
"pretty_print_full_stats(t_stat, df, cv, p)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1150,
|
2022-05-11 15:02:15 +02:00
|
|
|
"metadata": {
|
|
|
|
"collapsed": false,
|
|
|
|
"pycharm": {
|
|
|
|
"name": "#%%\n"
|
|
|
|
}
|
2022-05-16 23:34:31 +02:00
|
|
|
},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"dataset = pd.read_csv('experiment_data.csv')\n",
|
|
|
|
"make_decision(dataset, ['Weight', 'Age'])"
|
|
|
|
]
|
2022-05-11 15:02:15 +02:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"interpreter": {
|
|
|
|
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
|
|
|
|
},
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3.9.1 64-bit",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.9.1"
|
|
|
|
},
|
|
|
|
"orig_nbformat": 4
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
2022-05-16 23:34:31 +02:00
|
|
|
}
|