From 83361bdf43f3ac82c65558565a60a924b9c6b02c Mon Sep 17 00:00:00 2001 From: Karolina Oparczyk Date: Mon, 17 May 2021 19:24:30 +0200 Subject: [PATCH] nan error fix v2 --- get_data.sh | 2 +- neural_network.py | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/get_data.sh b/get_data.sh index 9bf4dd7..e6d03bf 100644 --- a/get_data.sh +++ b/get_data.sh @@ -7,6 +7,6 @@ if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip head -n -1 "USvideos_modified.csv" | shuf > "data_shuf" head -n 544 "data_shuf" > "data_test" head -n 1088 "data_shuf" | tail -n 544 > "data_dev" - head -n +1089 "data_shuf" > "data_train" + head -n 5441 "data_shuf" | tail -n 4352 > "data_train" python3 get_data.py USvideos_modified.csv fi \ No newline at end of file diff --git a/neural_network.py b/neural_network.py index 71d2253..5ecd857 100644 --- a/neural_network.py +++ b/neural_network.py @@ -1,19 +1,30 @@ import pandas as pd import numpy as np +from keras import optimizers from tensorflow import keras + +def normalize_data(data): + return (data - np.min(data)) / (np.max(data) - np.min(data)) + + data = pd.read_csv("data_train", sep=',', error_bad_lines=False).dropna() X = data.loc[:,data.columns == "2805317"].astype(int) y = data.loc[:,data.columns == "198909"].astype(int) +min_val_sub = np.min(X) +max_val_sub = np.max(X) +X = (X - min_val_sub) / (max_val_sub - min_val_sub) +print(min_val_sub) +print(max_val_sub) -def NormalizeData(data): - return (data - np.min(data)) / (np.max(data) - np.min(data)) +min_val_like = np.min(y) +max_val_like = np.max(y) +y = (y - min_val_like) / (max_val_like - min_val_like) - -X = NormalizeData(X) -y = NormalizeData(y) +print(min_val_like) +print(max_val_like) model = keras.Sequential([ @@ -22,7 +33,7 @@ model = keras.Sequential([ keras.layers.Dense(1,activation='relu'), ]) -model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error']) +model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error']) model.fit(X, y, epochs=15, validation_split = 0.3) @@ -30,8 +41,18 @@ data = pd.read_csv("data_test", sep=',', error_bad_lines=False).dropna() X_test = data.loc[:,data.columns == "2805317"].astype(int) y_test = data.loc[:,data.columns == "198909"].astype(int) -X_test = NormalizeData(X_test) -y_test = NormalizeData(y_test) +min_val_sub = np.min(X_test) +max_val_sub = np.max(X_test) +X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub) +print(min_val_sub) +print(max_val_sub) + +min_val_like = np.min(y_test) +max_val_like = np.max(y_test) +y_test = (y_test - min_val_like) / (max_val_like - min_val_like) + +print(min_val_like) +print(max_val_like) prediction = model.predict(X_test)