This commit is contained in:
parent
9feb02a3b8
commit
83361bdf43
@ -7,6 +7,6 @@ if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip
|
||||
head -n -1 "USvideos_modified.csv" | shuf > "data_shuf"
|
||||
head -n 544 "data_shuf" > "data_test"
|
||||
head -n 1088 "data_shuf" | tail -n 544 > "data_dev"
|
||||
head -n +1089 "data_shuf" > "data_train"
|
||||
head -n 5441 "data_shuf" | tail -n 4352 > "data_train"
|
||||
python3 get_data.py USvideos_modified.csv
|
||||
fi
|
@ -1,19 +1,30 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from keras import optimizers
|
||||
|
||||
from tensorflow import keras
|
||||
|
||||
|
||||
def normalize_data(data):
|
||||
return (data - np.min(data)) / (np.max(data) - np.min(data))
|
||||
|
||||
|
||||
data = pd.read_csv("data_train", sep=',', error_bad_lines=False).dropna()
|
||||
X = data.loc[:,data.columns == "2805317"].astype(int)
|
||||
y = data.loc[:,data.columns == "198909"].astype(int)
|
||||
|
||||
min_val_sub = np.min(X)
|
||||
max_val_sub = np.max(X)
|
||||
X = (X - min_val_sub) / (max_val_sub - min_val_sub)
|
||||
print(min_val_sub)
|
||||
print(max_val_sub)
|
||||
|
||||
def NormalizeData(data):
|
||||
return (data - np.min(data)) / (np.max(data) - np.min(data))
|
||||
min_val_like = np.min(y)
|
||||
max_val_like = np.max(y)
|
||||
y = (y - min_val_like) / (max_val_like - min_val_like)
|
||||
|
||||
|
||||
X = NormalizeData(X)
|
||||
y = NormalizeData(y)
|
||||
print(min_val_like)
|
||||
print(max_val_like)
|
||||
|
||||
|
||||
model = keras.Sequential([
|
||||
@ -22,7 +33,7 @@ model = keras.Sequential([
|
||||
keras.layers.Dense(1,activation='relu'),
|
||||
])
|
||||
|
||||
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
|
||||
model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error'])
|
||||
|
||||
model.fit(X, y, epochs=15, validation_split = 0.3)
|
||||
|
||||
@ -30,8 +41,18 @@ data = pd.read_csv("data_test", sep=',', error_bad_lines=False).dropna()
|
||||
X_test = data.loc[:,data.columns == "2805317"].astype(int)
|
||||
y_test = data.loc[:,data.columns == "198909"].astype(int)
|
||||
|
||||
X_test = NormalizeData(X_test)
|
||||
y_test = NormalizeData(y_test)
|
||||
min_val_sub = np.min(X_test)
|
||||
max_val_sub = np.max(X_test)
|
||||
X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub)
|
||||
print(min_val_sub)
|
||||
print(max_val_sub)
|
||||
|
||||
min_val_like = np.min(y_test)
|
||||
max_val_like = np.max(y_test)
|
||||
y_test = (y_test - min_val_like) / (max_val_like - min_val_like)
|
||||
|
||||
print(min_val_like)
|
||||
print(max_val_like)
|
||||
|
||||
prediction = model.predict(X_test)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user