{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from math import sqrt\n",
    "from scipy.stats import sem\n",
    "from scipy.stats import t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def generate_bootstraps(data, n_bootstraps=100):\n",
    "    data_size = data.shape[0]\n",
    "    for b in range(n_bootstraps):\n",
    "        indicies =  np.random.choice(len(data), size=data_size)\n",
    "        yield data.iloc[indicies, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "outputs": [],
   "source": [
    "def get_t_stat(data1, data2):\n",
    "    mean1 = np.mean(data1)\n",
    "    mean2 = np.mean(data2)\n",
    "    sem1 = sem(data1)\n",
    "    sem2 = sem(data2)\n",
    "\n",
    "    sed = sqrt(sem1**2.0 + sem2**2.0)\n",
    "    return (mean1 - mean2) / sed"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "def independent_t_test(data, columns, alpha=0.05):\n",
    "    t_stat_sum = 0\n",
    "    for sample in generate_bootstraps(data):\n",
    "        t_stat_sum += get_t_stat(sample[columns[0]], sample[columns[1]])\n",
    "\n",
    "    data_size = data.shape[0]\n",
    "    t_stat = t_stat_sum / data_size\n",
    "    df = 2 * data_size - 2\n",
    "    cv = t.ppf(1.0 - alpha, df)\n",
    "    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0\n",
    "    return t_stat, df, cv, p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "outputs": [],
   "source": [
    "def make_decision(data, columns, alpha=0.05):\n",
    "    t_stat, df, cv, p = independent_t_test(data, columns, alpha)\n",
    "    print(f't: {t_stat}, df: {df}, cv: {cv}, p: {p}\\n')\n",
    "    if abs(t_stat) <= cv:\n",
    "\t    print('Accept null hypothesis that the means are equal.')\n",
    "    else:\n",
    "        print('Reject the null hypothesis that the means are equal.')\n",
    "    if p > alpha:\n",
    "        print('Accept null hypothesis that the means are equal.')\n",
    "    else:\n",
    "\t    print('Reject the null hypothesis that the means are equal.')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "t: 6.903407918031469, df: 998, cv: 1.6463818766348755, p: 9.018563673635072e-12\n",
      "\n",
      "Reject the null hypothesis that the means are equal.\n",
      "Reject the null hypothesis that the means are equal.\n"
     ]
    }
   ],
   "source": [
    "dataset = pd.read_csv('experiment_data.csv')\n",
    "make_decision(dataset, ['Weight', 'Age'])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
  },
  "kernelspec": {
   "display_name": "Python 3.9.1 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}