diff --git a/.ipynb_checkpoints/body_performance-checkpoint.ipynb b/.ipynb_checkpoints/body_performance-checkpoint.ipynb index 32c090a..ebc6af9 100644 --- a/.ipynb_checkpoints/body_performance-checkpoint.ipynb +++ b/.ipynb_checkpoints/body_performance-checkpoint.ipynb @@ -2,37 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "74524ede", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " age gender height_cm weight_kg body fat_% diastolic systolic \\\n", - "0 27.0 M 172.3 75.24 21.3 80.0 130.0 \n", - "1 25.0 M 165.0 55.80 15.7 77.0 126.0 \n", - "2 31.0 M 179.6 78.00 20.1 92.0 152.0 \n", - "3 32.0 M 174.5 71.10 18.4 76.0 147.0 \n", - "4 28.0 M 173.8 67.70 17.1 70.0 127.0 \n", - "\n", - " gripForce sit and bend forward_cm sit-ups counts broad jump_cm class \\\n", - "0 54.9 18.4 60.0 217.0 C \n", - "1 36.4 16.3 53.0 229.0 A \n", - "2 44.8 12.0 49.0 181.0 C \n", - "3 41.4 15.2 53.0 219.0 B \n", - "4 43.5 27.1 45.0 217.0 B \n", - "\n", - " BMI \n", - "0 25.344179 \n", - "1 20.495868 \n", - "2 24.181428 \n", - "3 23.349562 \n", - "4 22.412439 \n" - ] - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "import plotly.express as px\n", @@ -62,21 +35,10 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "05f9442a", + "execution_count": null, + "id": "8abefe6e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "number of elements in data frame: 13393\n", - "train: 10715\n", - "test: 1339\n", - "valid: 1339\n" - ] - } - ], + "outputs": [], "source": [ "body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n", "body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n", @@ -89,55 +51,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "0f3ad57a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " age gender height_cm weight_kg body fat_% \\\n", - "count 13393.000000 13393 13393.000000 13393.000000 13393.000000 \n", - "unique NaN 2 NaN NaN NaN \n", - "top NaN M NaN NaN NaN \n", - "freq NaN 8467 NaN NaN NaN \n", - "mean 36.775106 NaN 168.559807 67.447316 23.240165 \n", - "std 13.625639 NaN 8.426583 11.949666 7.256844 \n", - "min 21.000000 NaN 125.000000 26.300000 3.000000 \n", - "25% 25.000000 NaN 162.400000 58.200000 18.000000 \n", - "50% 32.000000 NaN 169.200000 67.400000 22.800000 \n", - "75% 48.000000 NaN 174.800000 75.300000 28.000000 \n", - "max 64.000000 NaN 193.800000 138.100000 78.400000 \n", - "\n", - " diastolic systolic gripForce sit and bend forward_cm \\\n", - "count 13393.000000 13393.000000 13393.000000 13393.000000 \n", - "unique NaN NaN NaN NaN \n", - "top NaN NaN NaN NaN \n", - "freq NaN NaN NaN NaN \n", - "mean 78.796842 130.234817 36.963877 15.209268 \n", - "std 10.742033 14.713954 10.624864 8.456677 \n", - "min 0.000000 0.000000 0.000000 -25.000000 \n", - "25% 71.000000 120.000000 27.500000 10.900000 \n", - "50% 79.000000 130.000000 37.900000 16.200000 \n", - "75% 86.000000 141.000000 45.200000 20.700000 \n", - "max 156.200000 201.000000 70.500000 213.000000 \n", - "\n", - " sit-ups counts broad jump_cm class BMI \n", - "count 13393.000000 13393.000000 13393 13393.000000 \n", - "unique NaN NaN 4 NaN \n", - "top NaN NaN C NaN \n", - "freq NaN NaN 3349 NaN \n", - "mean 39.771224 190.129627 NaN 23.606014 \n", - "std 14.276698 39.868000 NaN 2.940936 \n", - "min 0.000000 0.000000 NaN 11.103976 \n", - "25% 30.000000 162.000000 NaN 21.612812 \n", - "50% 41.000000 193.000000 NaN 23.463513 \n", - "75% 50.000000 221.000000 NaN 25.341367 \n", - "max 80.000000 303.000000 NaN 42.906509 \n" - ] - } - ], + "outputs": [], "source": [ "print(df.describe(include='all'))\n", "#sit and bend forward_cm jest na minusie!!!" @@ -145,273 +62,10 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "dacdd816", + "execution_count": null, + "id": "b694be50", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agegenderheight_cmweight_kgbody fat_%diastolicsystolicgripForcesit and bend forward_cmsit-ups countsbroad jump_cmclassBMI
count13393.0000001339313393.00000013393.00000013393.00000013393.00000013393.00000013393.00000013393.00000013393.00000013393.0000001339313393.000000
uniqueNaN2NaNNaNNaNNaNNaNNaNNaNNaNNaN4NaN
topNaNMNaNNaNNaNNaNNaNNaNNaNNaNNaNCNaN
freqNaN8467NaNNaNNaNNaNNaNNaNNaNNaNNaN3349NaN
mean0.366863NaN0.6331370.3680440.2684370.5044610.6479340.524310-0.6621070.4971400.627491NaN0.393115
std0.316875NaN0.1224790.1068840.0962450.0687710.0732040.1507070.0710650.1784590.131578NaN0.092475
min0.000000NaN0.0000000.0000000.0000000.0000000.0000000.000000-1.0000000.0000000.000000NaN0.000000
25%0.093023NaN0.5436050.2853310.1989390.4545450.5970150.390071-0.6983190.3750000.534653NaN0.330440
50%0.255814NaN0.6424420.3676210.2625990.5057620.6467660.537589-0.6537820.5125000.636964NaN0.388634
75%0.627907NaN0.7238370.4382830.3315650.5505760.7014930.641135-0.6159660.6250000.729373NaN0.447681
max1.000000NaN1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000NaN1.000000
\n", - "
" - ], - "text/plain": [ - " age gender height_cm weight_kg body fat_% \\\n", - "count 13393.000000 13393 13393.000000 13393.000000 13393.000000 \n", - "unique NaN 2 NaN NaN NaN \n", - "top NaN M NaN NaN NaN \n", - "freq NaN 8467 NaN NaN NaN \n", - "mean 0.366863 NaN 0.633137 0.368044 0.268437 \n", - "std 0.316875 NaN 0.122479 0.106884 0.096245 \n", - "min 0.000000 NaN 0.000000 0.000000 0.000000 \n", - "25% 0.093023 NaN 0.543605 0.285331 0.198939 \n", - "50% 0.255814 NaN 0.642442 0.367621 0.262599 \n", - "75% 0.627907 NaN 0.723837 0.438283 0.331565 \n", - "max 1.000000 NaN 1.000000 1.000000 1.000000 \n", - "\n", - " diastolic systolic gripForce sit and bend forward_cm \\\n", - "count 13393.000000 13393.000000 13393.000000 13393.000000 \n", - "unique NaN NaN NaN NaN \n", - "top NaN NaN NaN NaN \n", - "freq NaN NaN NaN NaN \n", - "mean 0.504461 0.647934 0.524310 -0.662107 \n", - "std 0.068771 0.073204 0.150707 0.071065 \n", - "min 0.000000 0.000000 0.000000 -1.000000 \n", - "25% 0.454545 0.597015 0.390071 -0.698319 \n", - "50% 0.505762 0.646766 0.537589 -0.653782 \n", - "75% 0.550576 0.701493 0.641135 -0.615966 \n", - "max 1.000000 1.000000 1.000000 1.000000 \n", - "\n", - " sit-ups counts broad jump_cm class BMI \n", - "count 13393.000000 13393.000000 13393 13393.000000 \n", - "unique NaN NaN 4 NaN \n", - "top NaN NaN C NaN \n", - "freq NaN NaN 3349 NaN \n", - "mean 0.497140 0.627491 NaN 0.393115 \n", - "std 0.178459 0.131578 NaN 0.092475 \n", - "min 0.000000 0.000000 NaN 0.000000 \n", - "25% 0.375000 0.534653 NaN 0.330440 \n", - "50% 0.512500 0.636964 NaN 0.388634 \n", - "75% 0.625000 0.729373 NaN 0.447681 \n", - "max 1.000000 1.000000 NaN 1.000000 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "scaler = MinMaxScaler()\n", "df[['age', 'height_cm', 'weight_kg','body fat_%',\n", @@ -429,78 +83,20 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "5cd376cf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 13393 entries, 0 to 13392\n", - "Data columns (total 13 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 age 13393 non-null float64\n", - " 1 gender 13393 non-null object \n", - " 2 height_cm 13393 non-null float64\n", - " 3 weight_kg 13393 non-null float64\n", - " 4 body fat_% 13393 non-null float64\n", - " 5 diastolic 13393 non-null float64\n", - " 6 systolic 13393 non-null float64\n", - " 7 gripForce 13393 non-null float64\n", - " 8 sit and bend forward_cm 13393 non-null float64\n", - " 9 sit-ups counts 13393 non-null float64\n", - " 10 broad jump_cm 13393 non-null float64\n", - " 11 class 13393 non-null object \n", - " 12 BMI 13393 non-null float64\n", - "dtypes: float64(11), object(2)\n", - "memory usage: 1.3+ MB\n" - ] - } - ], + "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "93dcf330", + "execution_count": null, + "id": "2375b677", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Each class in data frame: \n", - "C 3349\n", - "D 3349\n", - "A 3348\n", - "B 3347\n", - "Name: class, dtype: int64\n", - "Each class in train data: \n", - "A 2703\n", - "B 2681\n", - "C 2671\n", - "D 2660\n", - "Name: class, dtype: int64\n", - "Each class in test data: \n", - "D 353\n", - "C 332\n", - "B 328\n", - "A 326\n", - "Name: class, dtype: int64\n", - "Each class in valid data: \n", - "C 346\n", - "B 338\n", - "D 336\n", - "A 319\n", - "Name: class, dtype: int64\n" - ] - } - ], + "outputs": [], "source": [ "print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n", "print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n", @@ -511,7 +107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5620509", + "id": "781b7e0b", "metadata": {}, "outputs": [], "source": [] @@ -519,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3e9bbbe7", + "id": "225a3cd0", "metadata": {}, "outputs": [], "source": [ diff --git a/body_performance.ipynb b/body_performance.ipynb index 1b237c0..ebc6af9 100644 --- a/body_performance.ipynb +++ b/body_performance.ipynb @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56212519", + "id": "8abefe6e", "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7df6cfc", + "id": "b694be50", "metadata": {}, "outputs": [], "source": [ @@ -94,7 +94,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3be91a92", + "id": "2375b677", "metadata": {}, "outputs": [], "source": [ @@ -107,7 +107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ab230559", + "id": "781b7e0b", "metadata": {}, "outputs": [], "source": [] @@ -115,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7cb3ebf", + "id": "225a3cd0", "metadata": {}, "outputs": [], "source": [ diff --git a/dockerfile3 b/dockerfile3 new file mode 100644 index 0000000..c86a156 --- /dev/null +++ b/dockerfile3 @@ -0,0 +1,19 @@ +FROM ubuntu:latest + +RUN apt-get update \ + && apt-get install -y git python3 python3-pip curl \ + && curl -O https://bootstrap.pypa.io/get-pip.py \ + && python3 get-pip.py --user \ + && rm get-pip.py \ + && pip install --user kaggle \ + && pip install --user pandas \ + && pip install --user plotly \ + && pip install --user seaborn + +ENV PATH="/root/.local/bin:$PATH" + +WORKDIR /app + +RUN git clone https://git.wmi.amu.edu.pl/s487178/ium_s487178.git /app + +CMD ["python3", "body_performance.py"] \ No newline at end of file