From a70a6ebc9620f763fc22de8c0bc4c0a682c67051 Mon Sep 17 00:00:00 2001 From: Jakub Henyk Date: Tue, 4 Apr 2023 13:58:30 +0200 Subject: [PATCH] changed .ipynb to .py --- Dockerfile | 6 +++--- zadanie1.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 zadanie1.py diff --git a/Dockerfile b/Dockerfile index 9c26b8b..5f900ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,8 +11,8 @@ RUN apt-get install -y python3 RUN apt-get install -y python3-pip RUN python3 -m pip install pandas -COPY ./zadanie1.ipynb ./ +COPY ./zadanie1.py ./ -RUN chmod +x ./zadanie1.ipynb +RUN chmod +x ./zadanie1.py -CMD ./zadanie1.ipynb \ No newline at end of file +CMD ./zadanie1.py \ No newline at end of file diff --git a/zadanie1.py b/zadanie1.py new file mode 100644 index 0000000..7bfd817 --- /dev/null +++ b/zadanie1.py @@ -0,0 +1,34 @@ +import pandas as pd +import numpy as np + +data = pd.read_csv("Customers.csv") +print(data[:10]) + +dataF = data + +mapping = {'NaN' : 0, 'Healthcare' : 1, 'Engineer' : 2, 'Lawyer' : 3, 'Entertainment' : 4, 'Artist' : 5, 'Executive' : 6, + 'Doctor' : 7, 'Homemaker' : 8, 'Marketing' : 9} + +mapping2 = {'Male' : 0, 'Female' : 1} + +dataF = dataF.replace({'Profession': mapping}) +dataF = dataF.replace({'Gender': mapping2}) + +dataF = dataF.drop(columns=['CustomerID']) + +dataF['Profession'] = dataF['Profession'].fillna(0) + +normalized_dataF = (dataF - dataF.min())/(dataF.max() - dataF.min()) + +print(normalized_dataF[:10]) + +train_data = normalized_dataF[0:1600] +dev_data = normalized_dataF[1600:1800] +test_data = normalized_dataF[1800:] + +print(f"Wielkość zbioru Customers: {len(data)} elementów") +print(f"Wielkość zbioru trenującego: {len(train_data)} elementów") +print(f"Wielkość zbioru walidującego: {len(dev_data)} elementów") +print(f"Wielkość zbioru testującego: {len(test_data)} elementów") + +print(f" \nDane i wartości na temat zbioru: \n \n {normalized_dataF.describe()}")