ium_434765/neural_network.py
Karolina Oparczyk 4ed875434b
All checks were successful
s434765-training/pipeline/head This commit looks good
trigger other projects
2021-05-20 19:03:48 +02:00

83 lines
3.0 KiB
Python

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from tensorflow import keras
import sys
def normalize_data(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
data = pd.read_csv("data_train", sep=',', skip_blank_lines=True, nrows=1087, error_bad_lines=False,
names=["video_id", "last_trending_date", "publish_date", "publish_hour", "category_id",
"channel_title", "views", "likes", "dislikes", "comment_count", "comments_disabled",
"ratings_disabled", "tag_appeared_in_title_count", "tag_appeared_in_title", "title",
"tags", "description", "trend_day_count", "trend_publish_diff", "trend_tag_highest",
"trend_tag_total", "tags_count", "subscriber"]).dropna()
X = data.loc[:,data.columns == "views"].astype(int)
y = data.loc[:,data.columns == "likes"].astype(int)
min_val_sub = np.min(X)
max_val_sub = np.max(X)
X = (X - min_val_sub) / (max_val_sub - min_val_sub)
print(min_val_sub)
print(max_val_sub)
min_val_like = np.min(y)
max_val_like = np.max(y)
y = (y - min_val_like) / (max_val_like - min_val_like)
print(min_val_like)
print(max_val_like)
model = keras.Sequential([
keras.layers.Dense(512,input_dim = X.shape[1], activation='relu'),
keras.layers.Dense(256, activation='relu'),
keras.layers.Dense(256, activation='relu'),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(1,activation='linear'),
])
model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error'])
model.fit(X, y, epochs=int(sys.argv[1]), validation_split = 0.3)
data = pd.read_csv("data_dev", sep=',', error_bad_lines=False,
skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date",
"publish_date", "publish_hour", "category_id",
"channel_title", "views", "likes", "dislikes",
"comment_count"]).dropna()
X_test = data.loc[:,data.columns == "views"].astype(int)
y_test = data.loc[:,data.columns == "likes"].astype(int)
min_val_sub = np.min(X_test)
max_val_sub = np.max(X_test)
X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub)
print(min_val_sub)
print(max_val_sub)
min_val_like = np.min(y_test)
max_val_like = np.max(y_test)
print(min_val_like)
print(max_val_like)
prediction = model.predict(X_test)
prediction_denormalized = []
for pred in prediction:
denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0]
prediction_denormalized.append(denorm)
f = open("predictions.txt", "w")
for (pred, test) in zip(prediction_denormalized, y_test.values):
f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0])))
error = mean_squared_error(y_test, prediction_denormalized)
print(error)
model.save('model')