diff --git a/script4.py b/script4.py index 6959ae8..f6a11ff 100644 --- a/script4.py +++ b/script4.py @@ -5,8 +5,7 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler # Step 1: Load the dataset -data = pd.read_csv('25k_movies.csv.shuf') -# Replace 'path_to_dataset.csv' with the actual path to your dataset file +data = pd.read_csv('25k_movies.csv.shuf', error_bad_lines=False) # Step 2: Preprocess the data features = ['Total Run Time', 'User Rating', 'Genres', 'Director Name', 'Writer Name'] @@ -16,6 +15,9 @@ data = data[features + [target]] # Handle missing values if any data = data.dropna() +# Filter out rows with a different number of columns +data = data[data.apply(lambda x: len(x) == 12, axis=1)] + # Convert categorical variables to numerical representations data = pd.get_dummies(data, columns=['Genres', 'Director Name', 'Writer Name']) @@ -46,4 +48,4 @@ model.fit(X_train, y_train, epochs=10, batch_size=32) y_pred = model.predict(X_test) mse = np.mean((y_pred - y_test)**2) -print(f"Mean Squared Error (MSE): {mse}") \ No newline at end of file +print(f"Mean Squared Error (MSE): {mse}")