From bd09cb0dd1bc86793ce48e31bf0b43cc6d78cdd5 Mon Sep 17 00:00:00 2001 From: Yevhenii Poliakov Date: Sun, 14 May 2023 17:58:00 +0200 Subject: [PATCH] update script4 --- script4.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/script4.py b/script4.py index 6959ae8..f6a11ff 100644 --- a/script4.py +++ b/script4.py @@ -5,8 +5,7 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler # Step 1: Load the dataset -data = pd.read_csv('25k_movies.csv.shuf') -# Replace 'path_to_dataset.csv' with the actual path to your dataset file +data = pd.read_csv('25k_movies.csv.shuf', error_bad_lines=False) # Step 2: Preprocess the data features = ['Total Run Time', 'User Rating', 'Genres', 'Director Name', 'Writer Name'] @@ -16,6 +15,9 @@ data = data[features + [target]] # Handle missing values if any data = data.dropna() +# Filter out rows with a different number of columns +data = data[data.apply(lambda x: len(x) == 12, axis=1)] + # Convert categorical variables to numerical representations data = pd.get_dummies(data, columns=['Genres', 'Director Name', 'Writer Name']) @@ -46,4 +48,4 @@ model.fit(X_train, y_train, epochs=10, batch_size=32) y_pred = model.predict(X_test) mse = np.mean((y_pred - y_test)**2) -print(f"Mean Squared Error (MSE): {mse}") \ No newline at end of file +print(f"Mean Squared Error (MSE): {mse}")