Add script for zadanie 5
This commit is contained in:
parent
aa0e85f270
commit
997481e85a
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
6
.ipynb_checkpoints/Untitled-checkpoint.ipynb
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"cells": [],
|
||||||
|
"metadata": {},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -15,5 +15,6 @@ RUN pip3 install sklearn
|
|||||||
RUN pip3 install pandas
|
RUN pip3 install pandas
|
||||||
RUN pip3 install seaborn
|
RUN pip3 install seaborn
|
||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
|
RUN pip3 install tensorflow
|
||||||
|
|
||||||
CMD ./run.sh
|
CMD ./run.sh
|
||||||
|
@ -18,6 +18,7 @@ netflix_cleaned.date_added = netflix_cleaned.date_added.dropna().apply(lambda x:
|
|||||||
netflix_cleaned.update(netflix_cleaned.select_dtypes(include = 'object').apply(lambda col: col.str.lower()))
|
netflix_cleaned.update(netflix_cleaned.select_dtypes(include = 'object').apply(lambda col: col.str.lower()))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
movies = netflix_cleaned[netflix_cleaned.type == 'movie']
|
movies = netflix_cleaned[netflix_cleaned.type == 'movie']
|
||||||
series = netflix_cleaned[netflix_cleaned.type == 'tv show']
|
series = netflix_cleaned[netflix_cleaned.type == 'tv show']
|
||||||
|
|
||||||
@ -32,6 +33,15 @@ movies = movies.join(pd.DataFrame(mlb.fit_transform(movies.pop('listed_in').str.
|
|||||||
index=movies.index))
|
index=movies.index))
|
||||||
movies.drop(['movies'], axis = 1)
|
movies.drop(['movies'], axis = 1)
|
||||||
|
|
||||||
|
movies = movies[['release_year', 'duration',
|
||||||
|
'rottentomatoes_audience_score',
|
||||||
|
'action & adventure', 'anime features', 'children & family movies',
|
||||||
|
'classic movies', 'comedies', 'cult movies', 'documentaries', 'dramas',
|
||||||
|
'faith & spirituality', 'horror movies', 'independent movies',
|
||||||
|
'international movies', 'lgbtq movies', 'movies', 'music & musicals',
|
||||||
|
'romantic movies', 'sci-fi & fantasy', 'sports movies',
|
||||||
|
'stand-up comedy', 'thrillers']]
|
||||||
|
|
||||||
import sklearn
|
import sklearn
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
movies_train, movies_test = sklearn.model_selection.train_test_split(movies,test_size=0.20, random_state=42)
|
movies_train, movies_test = sklearn.model_selection.train_test_split(movies,test_size=0.20, random_state=42)
|
||||||
@ -75,3 +85,9 @@ series_subsets = series_subsets.reset_index()
|
|||||||
ax = sns.boxplot(data = series_subsets, x = 'level_0', y = 'rottentomatoes_audience_score')
|
ax = sns.boxplot(data = series_subsets, x = 'level_0', y = 'rottentomatoes_audience_score')
|
||||||
ax.set(title = 'Audience score distribution between subsets', ylabel = 'Audience score on Rotten Tomatoes', xlabel = 'SUBSET')
|
ax.set(title = 'Audience score distribution between subsets', ylabel = 'Audience score on Rotten Tomatoes', xlabel = 'SUBSET')
|
||||||
#plt.show(ax)
|
#plt.show(ax)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
movies_train.to_csv('movies_train.csv')
|
||||||
|
movies_test.to_csv('movies_test.csv')
|
||||||
|
movies_val.to_csv('movies_val.csv')
|
43
ium_zadanie5.py
Normal file
43
ium_zadanie5.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras import layers
|
||||||
|
# from keras.layers import Flatten,Dense,Dropout, GlobalAveragePooling2D
|
||||||
|
from keras.optimizers import Adam
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
|
movies_train = pd.read_csv('movies_train.csv')
|
||||||
|
movies_test = pd.read_csv('movies_test.csv')
|
||||||
|
|
||||||
|
x_train = movies_train.copy()
|
||||||
|
x_test = movies_test.copy()
|
||||||
|
y_train = x_train.pop('rottentomatoes_audience_score')
|
||||||
|
y_test = x_test.pop('rottentomatoes_audience_score')
|
||||||
|
x_train.pop('Unnamed: 0')
|
||||||
|
x_test.pop('Unnamed: 0')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
model = Sequential()
|
||||||
|
model.add(layers.Input(shape=(22,)))
|
||||||
|
model.add(layers.Dense(64))
|
||||||
|
model.add(layers.Dense(64))
|
||||||
|
model.add(layers.Dense(32))
|
||||||
|
model.add(layers.Dense(1))
|
||||||
|
|
||||||
|
model.compile(loss='mean_absolute_error', optimizer=Adam(0.001))
|
||||||
|
|
||||||
|
history = model.fit(
|
||||||
|
x = tf.convert_to_tensor(x_train, np.float32),
|
||||||
|
y = y_train,
|
||||||
|
verbose=0, epochs=99)
|
||||||
|
|
||||||
|
y_predicted = model.predict(x_test, batch_size=64)
|
||||||
|
|
||||||
|
error = mean_squared_error(y_test, y_predicted)
|
||||||
|
|
||||||
|
np.savetxt("test_predictions.csv", y_predicted, delimiter=",")
|
||||||
|
with open('evaluation.txt', 'w') as f:
|
||||||
|
f.write('Mean square error: %d' % error)
|
||||||
|
|
1
run.sh
1
run.sh
@ -1,3 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
kaggle kernels output 'eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset'
|
kaggle kernels output 'eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset'
|
||||||
python3 ium_zadanie1.py
|
python3 ium_zadanie1.py
|
||||||
|
python3 ium_zadanie5.py
|
||||||
|
Loading…
Reference in New Issue
Block a user