TODOs
This commit is contained in:
parent
ea30480590
commit
0dea9ae3fd
@ -28,7 +28,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1131,
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TODO: Poprzestawiać kolejność definicji funkcji?"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 252,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
@ -48,7 +62,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1132,
|
||||
"execution_count": 253,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
@ -65,7 +79,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1133,
|
||||
"execution_count": 254,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -75,6 +89,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def t_stat_single(sample, population_mean=2):\n",
|
||||
" # TODO: Wywalić min, funkcja nie powinna działać dla pustej próbki\n",
|
||||
" # TODO: population mean nie powinien mieć defaultowego argumentu\n",
|
||||
" \"\"\"Funkcja oblicza wartość statystyki testowej dla jednej próbki\"\"\"\n",
|
||||
" sample = sample[0].values.tolist()\n",
|
||||
" sample_size = len(sample)\n",
|
||||
@ -84,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1134,
|
||||
"execution_count": 255,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -103,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1135,
|
||||
"execution_count": 256,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -114,6 +130,8 @@
|
||||
"source": [
|
||||
"def t_stat_dep(sample_1, sample_2):\n",
|
||||
" \"\"\"Funkcja oblicza wartość statystyki testowej dla dwóch próbek zależnych\"\"\"\n",
|
||||
" # TODO: Wywalić min\n",
|
||||
" # TODO: Przenieść mu jako opcjonalny argument?\n",
|
||||
" sample_1 = sample_1[0].values.tolist()\n",
|
||||
" sample_2 = sample_2[0].values.tolist()\n",
|
||||
" differences = [x_1 - x_2 for x_1, x_2 in zip(sample_1, sample_2)]\n",
|
||||
@ -124,12 +142,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1136,
|
||||
"execution_count": 257,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def df_dep(sample_1, sample_2):\n",
|
||||
" \"\"\"Funkcja oblicza stopnie swobody dla dwóch próbek zależnych\"\"\"\n",
|
||||
" # TODO: Assert działa chyba tylko w trybie debugowania\n",
|
||||
" l1, l2 = len(sample_1), len(sample_2)\n",
|
||||
" assert l1 == l2 \n",
|
||||
"\n",
|
||||
@ -138,7 +157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1137,
|
||||
"execution_count": 258,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -149,7 +168,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1138,
|
||||
"execution_count": 259,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -161,7 +180,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1139,
|
||||
"execution_count": 260,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -172,7 +191,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1140,
|
||||
"execution_count": 261,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -183,7 +202,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1141,
|
||||
"execution_count": 262,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -202,7 +221,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1142,
|
||||
"execution_count": 263,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -222,7 +241,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1143,
|
||||
"execution_count": 264,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -242,7 +261,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1144,
|
||||
"execution_count": 265,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -266,7 +285,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1145,
|
||||
"execution_count": 266,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
@ -301,7 +320,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1146,
|
||||
"execution_count": 267,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -311,7 +330,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def draw_distribution(stats):\n",
|
||||
" # To powinno być zdefiniowane przed make decision w sumie\n",
|
||||
" \"\"\"\n",
|
||||
" Funkcja rysuje rozkład statystyki testowej\n",
|
||||
" stats: lista statystyk testowych\n",
|
||||
@ -324,7 +342,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1147,
|
||||
"execution_count": 268,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -334,13 +352,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_decision(data, columns):\n",
|
||||
" # TODO\n",
|
||||
" # TODO: Potrzebna ta funkcja w ogóle? Decyzja jest zależna od wybranych hipotez chyba.\n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1148,
|
||||
"execution_count": 269,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
@ -394,22 +412,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1149,
|
||||
"execution_count": 270,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Statystyki dla jednej próby:\n",
|
||||
"t: 1.6371853975970775e-07, df: 5, cv: 2.015048372669157, p: 0.9999998757026942\n",
|
||||
"\n",
|
||||
"Statystyki dla dwóch prób zależnych:\n",
|
||||
"t: 2.721731710913334e-07, df: 5, cv: 2.015048372669157, p: 0.9999997933624869\n",
|
||||
"\n",
|
||||
"Statystyki dla dwóch prób niezależnych:\n",
|
||||
"t: 56.011644110212046, df: 8, cv: 1.8595480375228421, p: 1.145550321268729e-11\n",
|
||||
"\n"
|
||||
"Statystyki dla jednej próby:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "t_stat_single() missing 1 required positional argument: 'population_mean'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[1;31mTypeError\u001B[0m Traceback (most recent call last)",
|
||||
"Input \u001B[1;32mIn [270]\u001B[0m, in \u001B[0;36m<cell line: 7>\u001B[1;34m()\u001B[0m\n\u001B[0;32m 4\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mt: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mt_stat\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m, df: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mdf\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m, cv: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcv\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m, p: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mp\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m 6\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mStatystyki dla jednej próby:\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[1;32m----> 7\u001B[0m t_stat, df, cv, p, _ \u001B[38;5;241m=\u001B[39m \u001B[43mbootstrap_one_sample\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdummy\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 8\u001B[0m pretty_print_full_stats(t_stat, df, cv, p)\n\u001B[0;32m 10\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mStatystyki dla dwóch prób zależnych:\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
|
||||
"Input \u001B[1;32mIn [262]\u001B[0m, in \u001B[0;36mbootstrap_one_sample\u001B[1;34m(sample)\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mbootstrap_one_sample\u001B[39m(sample):\n\u001B[1;32m----> 2\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mt_test\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 3\u001B[0m \u001B[43m \u001B[49m\u001B[43msample_1\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msample\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 4\u001B[0m \u001B[43m \u001B[49m\u001B[43mdf_fn\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdf_single\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 5\u001B[0m \u001B[43m \u001B[49m\u001B[43mt_stat_fn\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mt_stat_single\u001B[49m\n\u001B[0;32m 6\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"Input \u001B[1;32mIn [266]\u001B[0m, in \u001B[0;36mt_test\u001B[1;34m(sample_1, sample_2, df_fn, t_stat_fn, alpha)\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mt_test\u001B[39m(sample_1, sample_2\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, df_fn\u001B[38;5;241m=\u001B[39mdf_ind, t_stat_fn\u001B[38;5;241m=\u001B[39mt_stat_ind, alpha\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m0.05\u001B[39m):\n\u001B[0;32m 2\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 3\u001B[0m \u001B[38;5;124;03m Funkcja przeprowadza test T-studenta dla dwóch zmiennych.\u001B[39;00m\n\u001B[0;32m 4\u001B[0m \u001B[38;5;124;03m liczba kolumn wynosi 1, test jest przeprowadzany dla jednej zmiennej.\u001B[39;00m\n\u001B[0;32m 5\u001B[0m \u001B[38;5;124;03m @param df_fn - funkcja obliczająca stopnie swobody\u001B[39;00m\n\u001B[0;32m 6\u001B[0m \u001B[38;5;124;03m @param t_stat_fn - funkcja obliczająca statystykę T\u001B[39;00m\n\u001B[0;32m 7\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m----> 8\u001B[0m t_stat_list \u001B[38;5;241m=\u001B[39m \u001B[43mget_t_stats\u001B[49m\u001B[43m(\u001B[49m\u001B[43msample_1\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msample_2\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mt_stat_fn\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 9\u001B[0m t_stat_sum \u001B[38;5;241m=\u001B[39m \u001B[38;5;28msum\u001B[39m(t_stat_list)\n\u001B[0;32m 11\u001B[0m data_size \u001B[38;5;241m=\u001B[39m sample_1\u001B[38;5;241m.\u001B[39mshape[\u001B[38;5;241m0\u001B[39m]\n",
|
||||
"Input \u001B[1;32mIn [265]\u001B[0m, in \u001B[0;36mget_t_stats\u001B[1;34m(sample_1, sample_2, t_stat_fn)\u001B[0m\n\u001B[0;32m 6\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m sample_2 \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 7\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m bootstrap \u001B[38;5;129;01min\u001B[39;00m generate_bootstraps(sample_1):\n\u001B[1;32m----> 8\u001B[0m stat \u001B[38;5;241m=\u001B[39m \u001B[43mt_stat_fn\u001B[49m\u001B[43m(\u001B[49m\u001B[43mbootstrap\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 9\u001B[0m t_stat_list\u001B[38;5;241m.\u001B[39mappend(stat)\n\u001B[0;32m 10\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m t_stat_list\n",
|
||||
"\u001B[1;31mTypeError\u001B[0m: t_stat_single() missing 1 required positional argument: 'population_mean'"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -434,7 +458,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1150,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
|
Loading…
Reference in New Issue
Block a user