diff --git a/Jenkinsfile-lab5 b/Jenkinsfile-lab5 index 02b8094..631bb73 100644 --- a/Jenkinsfile-lab5 +++ b/Jenkinsfile-lab5 @@ -39,9 +39,9 @@ pipeline { echo("run data script") //sh "source docker_ium/bin/activate" sh "ls -a" - sh "chmod u+x script5.py" + sh "chmod u+x script5_1.py" //sh "pip3 show pandas" - sh "python3 script5.py" + sh "python3 script5_1.py" } } diff --git a/script5_1.py b/script5_1.py new file mode 100644 index 0000000..7756243 --- /dev/null +++ b/script5_1.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MultiLabelBinarizer +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from tensorflow.keras.optimizers import Adam + +# Load the dataset from the CSV file +data = pd.read_csv('data.csv', on_bad_lines='skip', engine='python') + + + +# Prepare the data +X = data[['movie title', 'User Rating', 'Director', 'Top 5 Casts', 'Writer', 'year']] +y = data['Rating'] + +# Preprocess the data +# Convert the categorical columns into numerical representations +mlb_genres = MultiLabelBinarizer() +X_genres = mlb_genres.fit_transform(data['Generes']) +X.loc[:, 'Generes'] = X_genres.tolist() + +mlb_keywords = MultiLabelBinarizer() +X_keywords = mlb_keywords.fit_transform(data['Plot Kyeword']) +X.loc[:, 'Plot Kyeword'] = X_keywords.tolist() + +mlb_casts = MultiLabelBinarizer() +X_casts = mlb_casts.fit_transform(data['Top 5 Casts'].astype(str)) +X.loc[:, 'Top 5 Casts'] = X_casts.tolist() + +# Split the data into training and testing sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Create the neural network model +model = Sequential() +model.add(Dense(32, activation='relu', input_dim=X.shape[1])) +model.add(Dense(16, activation='relu')) +model.add(Dense(1)) + +# Compile the model +model.compile(optimizer=Adam(), loss='mse') + +# Train the model +model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test)) + +# Evaluate the model +mse = model.evaluate(X_test, y_test) +print("Mean Squared Error:", mse)