new dockerfile

This commit is contained in:
s487178 2023-04-05 19:36:18 +02:00
parent 6ffa08f749
commit e3e20473d4
3 changed files with 41 additions and 426 deletions

View File

@ -2,37 +2,10 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": null,
"id": "74524ede", "id": "74524ede",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
" age gender height_cm weight_kg body fat_% diastolic systolic \\\n",
"0 27.0 M 172.3 75.24 21.3 80.0 130.0 \n",
"1 25.0 M 165.0 55.80 15.7 77.0 126.0 \n",
"2 31.0 M 179.6 78.00 20.1 92.0 152.0 \n",
"3 32.0 M 174.5 71.10 18.4 76.0 147.0 \n",
"4 28.0 M 173.8 67.70 17.1 70.0 127.0 \n",
"\n",
" gripForce sit and bend forward_cm sit-ups counts broad jump_cm class \\\n",
"0 54.9 18.4 60.0 217.0 C \n",
"1 36.4 16.3 53.0 229.0 A \n",
"2 44.8 12.0 49.0 181.0 C \n",
"3 41.4 15.2 53.0 219.0 B \n",
"4 43.5 27.1 45.0 217.0 B \n",
"\n",
" BMI \n",
"0 25.344179 \n",
"1 20.495868 \n",
"2 24.181428 \n",
"3 23.349562 \n",
"4 22.412439 \n"
]
}
],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import plotly.express as px\n", "import plotly.express as px\n",
@ -62,21 +35,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"id": "05f9442a", "id": "8abefe6e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"number of elements in data frame: 13393\n",
"train: 10715\n",
"test: 1339\n",
"valid: 1339\n"
]
}
],
"source": [ "source": [
"body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n", "body_train, body_test = train_test_split(df, test_size=int(df[\"age\"].count()*0.2), random_state=1)\n",
"body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n", "body_test, body_valid = train_test_split(body_test, test_size=int(body_test[\"age\"].count()*0.5), random_state=1)\n",
@ -89,55 +51,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": null,
"id": "0f3ad57a", "id": "0f3ad57a",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
" age gender height_cm weight_kg body fat_% \\\n",
"count 13393.000000 13393 13393.000000 13393.000000 13393.000000 \n",
"unique NaN 2 NaN NaN NaN \n",
"top NaN M NaN NaN NaN \n",
"freq NaN 8467 NaN NaN NaN \n",
"mean 36.775106 NaN 168.559807 67.447316 23.240165 \n",
"std 13.625639 NaN 8.426583 11.949666 7.256844 \n",
"min 21.000000 NaN 125.000000 26.300000 3.000000 \n",
"25% 25.000000 NaN 162.400000 58.200000 18.000000 \n",
"50% 32.000000 NaN 169.200000 67.400000 22.800000 \n",
"75% 48.000000 NaN 174.800000 75.300000 28.000000 \n",
"max 64.000000 NaN 193.800000 138.100000 78.400000 \n",
"\n",
" diastolic systolic gripForce sit and bend forward_cm \\\n",
"count 13393.000000 13393.000000 13393.000000 13393.000000 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 78.796842 130.234817 36.963877 15.209268 \n",
"std 10.742033 14.713954 10.624864 8.456677 \n",
"min 0.000000 0.000000 0.000000 -25.000000 \n",
"25% 71.000000 120.000000 27.500000 10.900000 \n",
"50% 79.000000 130.000000 37.900000 16.200000 \n",
"75% 86.000000 141.000000 45.200000 20.700000 \n",
"max 156.200000 201.000000 70.500000 213.000000 \n",
"\n",
" sit-ups counts broad jump_cm class BMI \n",
"count 13393.000000 13393.000000 13393 13393.000000 \n",
"unique NaN NaN 4 NaN \n",
"top NaN NaN C NaN \n",
"freq NaN NaN 3349 NaN \n",
"mean 39.771224 190.129627 NaN 23.606014 \n",
"std 14.276698 39.868000 NaN 2.940936 \n",
"min 0.000000 0.000000 NaN 11.103976 \n",
"25% 30.000000 162.000000 NaN 21.612812 \n",
"50% 41.000000 193.000000 NaN 23.463513 \n",
"75% 50.000000 221.000000 NaN 25.341367 \n",
"max 80.000000 303.000000 NaN 42.906509 \n"
]
}
],
"source": [ "source": [
"print(df.describe(include='all'))\n", "print(df.describe(include='all'))\n",
"#sit and bend forward_cm jest na minusie!!!" "#sit and bend forward_cm jest na minusie!!!"
@ -145,273 +62,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": null,
"id": "dacdd816", "id": "b694be50",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>gender</th>\n",
" <th>height_cm</th>\n",
" <th>weight_kg</th>\n",
" <th>body fat_%</th>\n",
" <th>diastolic</th>\n",
" <th>systolic</th>\n",
" <th>gripForce</th>\n",
" <th>sit and bend forward_cm</th>\n",
" <th>sit-ups counts</th>\n",
" <th>broad jump_cm</th>\n",
" <th>class</th>\n",
" <th>BMI</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13393.000000</td>\n",
" <td>13393</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393.000000</td>\n",
" <td>13393</td>\n",
" <td>13393.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>NaN</td>\n",
" <td>M</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>NaN</td>\n",
" <td>8467</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3349</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.366863</td>\n",
" <td>NaN</td>\n",
" <td>0.633137</td>\n",
" <td>0.368044</td>\n",
" <td>0.268437</td>\n",
" <td>0.504461</td>\n",
" <td>0.647934</td>\n",
" <td>0.524310</td>\n",
" <td>-0.662107</td>\n",
" <td>0.497140</td>\n",
" <td>0.627491</td>\n",
" <td>NaN</td>\n",
" <td>0.393115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.316875</td>\n",
" <td>NaN</td>\n",
" <td>0.122479</td>\n",
" <td>0.106884</td>\n",
" <td>0.096245</td>\n",
" <td>0.068771</td>\n",
" <td>0.073204</td>\n",
" <td>0.150707</td>\n",
" <td>0.071065</td>\n",
" <td>0.178459</td>\n",
" <td>0.131578</td>\n",
" <td>NaN</td>\n",
" <td>0.092475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.093023</td>\n",
" <td>NaN</td>\n",
" <td>0.543605</td>\n",
" <td>0.285331</td>\n",
" <td>0.198939</td>\n",
" <td>0.454545</td>\n",
" <td>0.597015</td>\n",
" <td>0.390071</td>\n",
" <td>-0.698319</td>\n",
" <td>0.375000</td>\n",
" <td>0.534653</td>\n",
" <td>NaN</td>\n",
" <td>0.330440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.255814</td>\n",
" <td>NaN</td>\n",
" <td>0.642442</td>\n",
" <td>0.367621</td>\n",
" <td>0.262599</td>\n",
" <td>0.505762</td>\n",
" <td>0.646766</td>\n",
" <td>0.537589</td>\n",
" <td>-0.653782</td>\n",
" <td>0.512500</td>\n",
" <td>0.636964</td>\n",
" <td>NaN</td>\n",
" <td>0.388634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.627907</td>\n",
" <td>NaN</td>\n",
" <td>0.723837</td>\n",
" <td>0.438283</td>\n",
" <td>0.331565</td>\n",
" <td>0.550576</td>\n",
" <td>0.701493</td>\n",
" <td>0.641135</td>\n",
" <td>-0.615966</td>\n",
" <td>0.625000</td>\n",
" <td>0.729373</td>\n",
" <td>NaN</td>\n",
" <td>0.447681</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age gender height_cm weight_kg body fat_% \\\n",
"count 13393.000000 13393 13393.000000 13393.000000 13393.000000 \n",
"unique NaN 2 NaN NaN NaN \n",
"top NaN M NaN NaN NaN \n",
"freq NaN 8467 NaN NaN NaN \n",
"mean 0.366863 NaN 0.633137 0.368044 0.268437 \n",
"std 0.316875 NaN 0.122479 0.106884 0.096245 \n",
"min 0.000000 NaN 0.000000 0.000000 0.000000 \n",
"25% 0.093023 NaN 0.543605 0.285331 0.198939 \n",
"50% 0.255814 NaN 0.642442 0.367621 0.262599 \n",
"75% 0.627907 NaN 0.723837 0.438283 0.331565 \n",
"max 1.000000 NaN 1.000000 1.000000 1.000000 \n",
"\n",
" diastolic systolic gripForce sit and bend forward_cm \\\n",
"count 13393.000000 13393.000000 13393.000000 13393.000000 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 0.504461 0.647934 0.524310 -0.662107 \n",
"std 0.068771 0.073204 0.150707 0.071065 \n",
"min 0.000000 0.000000 0.000000 -1.000000 \n",
"25% 0.454545 0.597015 0.390071 -0.698319 \n",
"50% 0.505762 0.646766 0.537589 -0.653782 \n",
"75% 0.550576 0.701493 0.641135 -0.615966 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" sit-ups counts broad jump_cm class BMI \n",
"count 13393.000000 13393.000000 13393 13393.000000 \n",
"unique NaN NaN 4 NaN \n",
"top NaN NaN C NaN \n",
"freq NaN NaN 3349 NaN \n",
"mean 0.497140 0.627491 NaN 0.393115 \n",
"std 0.178459 0.131578 NaN 0.092475 \n",
"min 0.000000 0.000000 NaN 0.000000 \n",
"25% 0.375000 0.534653 NaN 0.330440 \n",
"50% 0.512500 0.636964 NaN 0.388634 \n",
"75% 0.625000 0.729373 NaN 0.447681 \n",
"max 1.000000 1.000000 NaN 1.000000 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"scaler = MinMaxScaler()\n", "scaler = MinMaxScaler()\n",
"df[['age', 'height_cm', 'weight_kg','body fat_%',\n", "df[['age', 'height_cm', 'weight_kg','body fat_%',\n",
@ -429,78 +83,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": null,
"id": "5cd376cf", "id": "5cd376cf",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 13393 entries, 0 to 13392\n",
"Data columns (total 13 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 13393 non-null float64\n",
" 1 gender 13393 non-null object \n",
" 2 height_cm 13393 non-null float64\n",
" 3 weight_kg 13393 non-null float64\n",
" 4 body fat_% 13393 non-null float64\n",
" 5 diastolic 13393 non-null float64\n",
" 6 systolic 13393 non-null float64\n",
" 7 gripForce 13393 non-null float64\n",
" 8 sit and bend forward_cm 13393 non-null float64\n",
" 9 sit-ups counts 13393 non-null float64\n",
" 10 broad jump_cm 13393 non-null float64\n",
" 11 class 13393 non-null object \n",
" 12 BMI 13393 non-null float64\n",
"dtypes: float64(11), object(2)\n",
"memory usage: 1.3+ MB\n"
]
}
],
"source": [ "source": [
"df.info()" "df.info()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"id": "93dcf330", "id": "2375b677",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Each class in data frame: \n",
"C 3349\n",
"D 3349\n",
"A 3348\n",
"B 3347\n",
"Name: class, dtype: int64\n",
"Each class in train data: \n",
"A 2703\n",
"B 2681\n",
"C 2671\n",
"D 2660\n",
"Name: class, dtype: int64\n",
"Each class in test data: \n",
"D 353\n",
"C 332\n",
"B 328\n",
"A 326\n",
"Name: class, dtype: int64\n",
"Each class in valid data: \n",
"C 346\n",
"B 338\n",
"D 336\n",
"A 319\n",
"Name: class, dtype: int64\n"
]
}
],
"source": [ "source": [
"print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n", "print('Each class in data frame: \\n{}'.format(df['class'].value_counts()))\n",
"print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n", "print('Each class in train data: \\n{}'.format(body_train['class'].value_counts()))\n",
@ -511,7 +107,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b5620509", "id": "781b7e0b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []
@ -519,7 +115,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "3e9bbbe7", "id": "225a3cd0",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [

View File

@ -36,7 +36,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "56212519", "id": "8abefe6e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -63,7 +63,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b7df6cfc", "id": "b694be50",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -94,7 +94,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "3be91a92", "id": "2375b677",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -107,7 +107,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "ab230559", "id": "781b7e0b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []
@ -115,7 +115,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b7cb3ebf", "id": "225a3cd0",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [

19
dockerfile3 Normal file
View File

@ -0,0 +1,19 @@
FROM ubuntu:latest
RUN apt-get update \
&& apt-get install -y git python3 python3-pip curl \
&& curl -O https://bootstrap.pypa.io/get-pip.py \
&& python3 get-pip.py --user \
&& rm get-pip.py \
&& pip install --user kaggle \
&& pip install --user pandas \
&& pip install --user plotly \
&& pip install --user seaborn
ENV PATH="/root/.local/bin:$PATH"
WORKDIR /app
RUN git clone https://git.wmi.amu.edu.pl/s487178/ium_s487178.git /app
CMD ["python3", "body_performance.py"]