Docker zadanie 2
This commit is contained in:
parent
c9b575549c
commit
d2e0211ef2
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@ -0,0 +1,18 @@
|
||||
FROM ubuntu:latest
|
||||
RUN apt-get update -y
|
||||
RUN apt-get install -y python3 python3-pip
|
||||
|
||||
RUN python3 -m pip install --user kaggle
|
||||
RUN pip install --user pandas
|
||||
RUN pip install --user scikit-learn
|
||||
|
||||
WORKDIR app
|
||||
|
||||
ARG CUTOFF
|
||||
ENV CUTOFF=${CUTOFF}
|
||||
|
||||
COPY main_docker.py ./
|
||||
COPY heart_2020_cleaned.csv ./
|
||||
|
||||
|
||||
CMD ["python3", "./main_docker.py"]
|
319796
heart_2020_cleaned.csv
Normal file
319796
heart_2020_cleaned.csv
Normal file
File diff suppressed because it is too large
Load Diff
35
main_docker.py
Normal file
35
main_docker.py
Normal file
@ -0,0 +1,35 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
import os
|
||||
|
||||
dataset = pd.read_csv("heart_2020_cleaned.csv")
|
||||
print(dataset.describe(include='all'))
|
||||
dataset = dataset.dropna()
|
||||
|
||||
print(dataset.describe(include='all'))
|
||||
|
||||
dataset_train, dataset_test = train_test_split(dataset, test_size=.2, train_size=.8, random_state=1)
|
||||
|
||||
print(dataset_train.describe(include='all'))
|
||||
|
||||
print("Wielkości:")
|
||||
|
||||
print("Zbiór uczący:", dataset_train.shape[0])
|
||||
print("Zbiór testowy:", dataset_test.shape[0])
|
||||
print("Łącznie: ", dataset.shape[0])
|
||||
|
||||
print(dataset["GenHealth"].value_counts())
|
||||
print(dataset_train["GenHealth"].value_counts())
|
||||
print("Średnia BMI -łącznie: ", dataset["BMI"].mean())
|
||||
print("Odchylenie standardowe BMI - uczący:", dataset_train["BMI"].std())
|
||||
print("Odchylenie standardowe BMI - łącznie:", dataset["BMI"].std())
|
||||
|
||||
print("Mediana BMI:", dataset_test["BMI"].median())
|
||||
|
||||
max_bmi = dataset_train["BMI"].max()
|
||||
|
||||
print(max_bmi)
|
||||
dataset_train["BMI"] = dataset_train["BMI"].apply(lambda x: x/max_bmi)
|
||||
dataset_test["BMI"] = dataset_test["BMI"].apply(lambda x: x/max_bmi)
|
||||
print(dataset_train["AgeCategory"].value_counts())
|
||||
print(dataset_train["BMI"])
|
Loading…
Reference in New Issue
Block a user