Zadanie frameworki ML
This commit is contained in:
parent
0b95b22cfa
commit
12fcd199fa
@ -7,10 +7,13 @@ RUN apt install -y curl
|
||||
RUN pip3 install --user pandas
|
||||
RUN pip3 install --user matplotlib
|
||||
RUN pip3 install --user numpy
|
||||
RUN pip3 install --user tensorflow
|
||||
RUN pip3 install --user sklearn
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./preparations.sh ./
|
||||
COPY ./preprocesing_python.py ./
|
||||
COPY ./training.py ./
|
||||
|
||||
# CMD ./preparations.sh
|
||||
|
7213
results.csv
Normal file
7213
results.csv
Normal file
File diff suppressed because it is too large
Load Diff
103
training.py
103
training.py
@ -3,60 +3,87 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from countries_map import countries
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.callbacks import EarlyStopping
|
||||
from keras.models import Sequential
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras.layers.experimental import preprocessing
|
||||
|
||||
|
||||
def mapSet(set):
|
||||
age = {"5-14 years": 0, "15-24 years": 1, "25-34 years": 2,
|
||||
"35-54 years": 3, "55-74 years": 4, "75+ years": 5}
|
||||
sex = {"male": 0, "female": 1}
|
||||
|
||||
set["age"].replace(age, inplace=True)
|
||||
set["sex"].replace(sex, inplace=True)
|
||||
set["country"].replace({v: k for k, v in countries.items()}, inplace=True)
|
||||
|
||||
return set
|
||||
|
||||
|
||||
column_names = ["country", "year", "sex", "age", "suicides_no", "population"]
|
||||
feature_names = ["country", "year", "sex", "age", "population"]
|
||||
label_name = column_names[4]
|
||||
|
||||
# wczytanie danych
|
||||
sc = pd.read_csv('who_suicide_statistics.csv')
|
||||
print(sc.shape)
|
||||
|
||||
# Usunięcie niepełnych danych
|
||||
sc.dropna(inplace=True)
|
||||
|
||||
# Kategoryzacja
|
||||
sc = pd.get_dummies(
|
||||
sc, columns=['age', 'sex', 'country'], prefix='', prefix_sep='')
|
||||
|
||||
|
||||
# podział na train validate i test
|
||||
train, validate, test = np.split(sc.sample(frac=1, random_state=42),
|
||||
[int(.6*len(sc)), int(.8*len(sc))])
|
||||
train.dropna(inplace=True)
|
||||
validate.dropna(inplace=True)
|
||||
test.dropna(inplace=True)
|
||||
|
||||
train_n = mapSet(train)
|
||||
validate_n = mapSet(validate)
|
||||
test_n = mapSet(validate)
|
||||
# podział train set
|
||||
X_train = train.loc[:, train.columns != 'suicides_no']
|
||||
y_train = train[['suicides_no']]
|
||||
X_test = test.loc[:, train.columns != 'suicides_no']
|
||||
y_test = test[['suicides_no']]
|
||||
|
||||
train_csv = pd.DataFrame.to_csv(train_n, index=False)
|
||||
normalizer = preprocessing.Normalization()
|
||||
normalizer.adapt(np.array(X_train))
|
||||
|
||||
train_dataset = tf.data.experimental.make_csv_dataset(
|
||||
train_csv,
|
||||
1000,
|
||||
column_names=column_names,
|
||||
label_name=label_name,
|
||||
num_epochs=1)
|
||||
first = np.array(X_train[:1])
|
||||
with np.printoptions(precision=2, suppress=True):
|
||||
print('First example:', first)
|
||||
print()
|
||||
print('Normalized:', normalizer(first).numpy())
|
||||
|
||||
features, labels = next(iter(train_dataset))
|
||||
print(features)
|
||||
model = tf.keras.Sequential([
|
||||
normalizer,
|
||||
layers.Dense(units=1)
|
||||
])
|
||||
model.predict(X_train[:10])
|
||||
|
||||
plt.scatter(features['year'],
|
||||
features['age'],
|
||||
c=labels,
|
||||
cmap='sex')
|
||||
# Compile model
|
||||
model.compile(
|
||||
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
||||
loss='mean_absolute_error')
|
||||
|
||||
plt.xlabel("year")
|
||||
plt.ylabel("age")
|
||||
plt.show()
|
||||
# Train model
|
||||
history = model.fit(
|
||||
X_train, y_train,
|
||||
epochs=30,
|
||||
validation_split=0.2)
|
||||
|
||||
print("Features: {}".format(feature_names))
|
||||
print("Label: {}".format(label_name))
|
||||
test_results = {}
|
||||
|
||||
# print(train)
|
||||
test_results['model'] = model.evaluate(
|
||||
X_test, y_test, verbose=0)
|
||||
|
||||
test_predictions = model.predict(X_test).flatten()
|
||||
|
||||
# a = plt.axes(aspect='equal')
|
||||
# plt.scatter(y_test, test_predictions)
|
||||
# plt.xlabel('True values [sucides_no]')
|
||||
# plt.ylabel('Predictions values [sucides_no]')
|
||||
# lims = [0, 5000]
|
||||
# plt.xlim(lims)
|
||||
# plt.ylim(lims)
|
||||
# _ = plt.plot(lims, lims)
|
||||
|
||||
# plt.show()
|
||||
|
||||
predictions = model.predict(X_test)
|
||||
pd.DataFrame(predictions).to_csv('results.csv')
|
||||
|
Loading…
Reference in New Issue
Block a user