{ "cells": [ { "cell_type": "code", "execution_count": 46, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from math import sqrt\n", "from scipy.stats import sem\n", "from scipy.stats import t" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def generate_bootstraps(data, n_bootstraps=100):\n", " data_size = data.shape[0]\n", " for b in range(n_bootstraps):\n", " indicies = np.random.choice(len(data), size=data_size)\n", " yield data.iloc[indicies, :]" ] }, { "cell_type": "code", "execution_count": 48, "outputs": [], "source": [ "def get_t_stat(data1, data2):\n", " mean1 = np.mean(data1)\n", " mean2 = np.mean(data2)\n", " sem1 = sem(data1)\n", " sem2 = sem(data2)\n", "\n", " sed = sqrt(sem1**2.0 + sem2**2.0)\n", " return (mean1 - mean2) / sed" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 49, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def independent_t_test(data, columns, alpha=0.05):\n", " t_stat_sum = 0\n", " for sample in generate_bootstraps(data):\n", " t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n", "\n", " data_size = data.shape[0]\n", " t_stat = t_stat_sum / data_size\n", " df = 2 * data_size - 2\n", " cv = t.ppf(1.0 - alpha, df)\n", " p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n", " return t_stat, df, cv, p" ] }, { "cell_type": "code", "execution_count": 50, "outputs": [], "source": [ "def make_decision(data, columns, alpha=0.05):\n", " t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n", " print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n", " if abs(t_stat) <= cv:\n", "\t print('Accept null hypothesis that the means are equal.')\n", " else:\n", " print('Reject the null hypothesis that the means are equal.')\n", " if p > alpha:\n", " print('Accept null hypothesis that the means are equal.')\n", " else:\n", "\t print('Reject the null hypothesis that the means are equal.')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 51, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "t: 6.903407918031469, df: 998, cv: 1.6463818766348755, p: 9.018563673635072e-12\n", "\n", "Reject the null hypothesis that the means are equal.\n", "Reject the null hypothesis that the means are equal.\n" ] } ], "source": [ "dataset = pd.read_csv('experiment_data.csv')\n", "make_decision(dataset, ['Weight', 'Age'])" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "interpreter": { "hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594" }, "kernelspec": { "display_name": "Python 3.9.1 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }