Add script for zadanie 5
This commit is contained in:
parent
aa0e85f270
commit
997481e85a
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -15,5 +15,6 @@ RUN pip3 install sklearn
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install seaborn
|
||||
RUN pip3 install matplotlib
|
||||
RUN pip3 install tensorflow
|
||||
|
||||
CMD ./run.sh
|
||||
|
@ -18,6 +18,7 @@ netflix_cleaned.date_added = netflix_cleaned.date_added.dropna().apply(lambda x:
|
||||
netflix_cleaned.update(netflix_cleaned.select_dtypes(include = 'object').apply(lambda col: col.str.lower()))
|
||||
|
||||
|
||||
|
||||
movies = netflix_cleaned[netflix_cleaned.type == 'movie']
|
||||
series = netflix_cleaned[netflix_cleaned.type == 'tv show']
|
||||
|
||||
@ -32,6 +33,15 @@ movies = movies.join(pd.DataFrame(mlb.fit_transform(movies.pop('listed_in').str.
|
||||
index=movies.index))
|
||||
movies.drop(['movies'], axis = 1)
|
||||
|
||||
movies = movies[['release_year', 'duration',
|
||||
'rottentomatoes_audience_score',
|
||||
'action & adventure', 'anime features', 'children & family movies',
|
||||
'classic movies', 'comedies', 'cult movies', 'documentaries', 'dramas',
|
||||
'faith & spirituality', 'horror movies', 'independent movies',
|
||||
'international movies', 'lgbtq movies', 'movies', 'music & musicals',
|
||||
'romantic movies', 'sci-fi & fantasy', 'sports movies',
|
||||
'stand-up comedy', 'thrillers']]
|
||||
|
||||
import sklearn
|
||||
from sklearn.model_selection import train_test_split
|
||||
movies_train, movies_test = sklearn.model_selection.train_test_split(movies,test_size=0.20, random_state=42)
|
||||
@ -75,3 +85,9 @@ series_subsets = series_subsets.reset_index()
|
||||
ax = sns.boxplot(data = series_subsets, x = 'level_0', y = 'rottentomatoes_audience_score')
|
||||
ax.set(title = 'Audience score distribution between subsets', ylabel = 'Audience score on Rotten Tomatoes', xlabel = 'SUBSET')
|
||||
#plt.show(ax)
|
||||
|
||||
|
||||
|
||||
movies_train.to_csv('movies_train.csv')
|
||||
movies_test.to_csv('movies_test.csv')
|
||||
movies_val.to_csv('movies_val.csv')
|
43
ium_zadanie5.py
Normal file
43
ium_zadanie5.py
Normal file
@ -0,0 +1,43 @@
|
||||
import tensorflow as tf
|
||||
from keras.models import Sequential
|
||||
from keras import layers
|
||||
# from keras.layers import Flatten,Dense,Dropout, GlobalAveragePooling2D
|
||||
from keras.optimizers import Adam
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
movies_train = pd.read_csv('movies_train.csv')
|
||||
movies_test = pd.read_csv('movies_test.csv')
|
||||
|
||||
x_train = movies_train.copy()
|
||||
x_test = movies_test.copy()
|
||||
y_train = x_train.pop('rottentomatoes_audience_score')
|
||||
y_test = x_test.pop('rottentomatoes_audience_score')
|
||||
x_train.pop('Unnamed: 0')
|
||||
x_test.pop('Unnamed: 0')
|
||||
|
||||
|
||||
|
||||
model = Sequential()
|
||||
model.add(layers.Input(shape=(22,)))
|
||||
model.add(layers.Dense(64))
|
||||
model.add(layers.Dense(64))
|
||||
model.add(layers.Dense(32))
|
||||
model.add(layers.Dense(1))
|
||||
|
||||
model.compile(loss='mean_absolute_error', optimizer=Adam(0.001))
|
||||
|
||||
history = model.fit(
|
||||
x = tf.convert_to_tensor(x_train, np.float32),
|
||||
y = y_train,
|
||||
verbose=0, epochs=99)
|
||||
|
||||
y_predicted = model.predict(x_test, batch_size=64)
|
||||
|
||||
error = mean_squared_error(y_test, y_predicted)
|
||||
|
||||
np.savetxt("test_predictions.csv", y_predicted, delimiter=",")
|
||||
with open('evaluation.txt', 'w') as f:
|
||||
f.write('Mean square error: %d' % error)
|
||||
|
Loading…
Reference in New Issue
Block a user