Bootstrap-t-student/bootstrap-t.ipynb at 64aa809906635f3bc4bfa90fe82892a0cf4bcee1

Marcin Kostrzewski 7e0cf14302 Initial implementation

2022-05-11 15:02:15 +02:00

3.7 KiB

Raw Blame History

import numpy as np
import pandas as pd
from math import sqrt
from scipy.stats import sem
from scipy.stats import t

def generate_bootstraps(data, n_bootstraps=100):
    data_size = data.shape[0]
    for b in range(n_bootstraps):
        indicies =  np.random.choice(len(data), size=data_size)
        yield data.iloc[indicies, :]

def get_t_stat(data1, data2):
    mean1 = np.mean(data1)
    mean2 = np.mean(data2)
    sem1 = sem(data1)
    sem2 = sem(data2)

    sed = sqrt(sem1**2.0 + sem2**2.0)
    return (mean1 - mean2) / sed

def independent_t_test(data, columns, alpha=0.05):
    t_stat_sum = 0
    for sample in generate_bootstraps(data):
        t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])

    data_size = data.shape[0]
    t_stat = t_stat_sum / data_size
    df = 2 * data_size - 2
    cv = t.ppf(1.0 - alpha, df)
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    return t_stat, df, cv, p

def make_decision(data, columns, alpha=0.05):
    t_stat, df, cv, p = independent_t_test(data, columns, alpha)
    print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\n')
    if abs(t_stat) <= cv:
	    print('Accept null hypothesis that the means are equal.')
    else:
        print('Reject the null hypothesis that the means are equal.')
    if p > alpha:
        print('Accept null hypothesis that the means are equal.')
    else:
	    print('Reject the null hypothesis that the means are equal.')

dataset = pd.read_csv('experiment_data.csv')
make_decision(dataset, ['Weight', 'Age'])

t: 6.903407918031469, df: 998, cv: 1.6463818766348755, p: 9.018563673635072e-12

Reject the null hypothesis that the means are equal.
Reject the null hypothesis that the means are equal.

3.7 KiB Raw Blame History

3.7 KiB

Raw Blame History