import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import accuracy_score from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Embedding, LSTM from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.utils import to_categorical # Step 1: Data Preprocessing df = pd.read_csv('25k_movies.csv.shuf') # Replace with the actual file name or path text_data = df['review'] labels = df['sentiment'] # Step 2: Data Split X_train, X_test, y_train, y_test = train_test_split(text_data, labels, test_size=0.2, random_state=42) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) # Step 3: Vectorization vectorizer = TfidfVectorizer() X_train_vec = vectorizer.fit_transform(X_train) X_val_vec = vectorizer.transform(X_val) X_test_vec = vectorizer.transform(X_test) # Step 4: Model Architecture model = Sequential() model.add(Dense(128, activation='relu', input_shape=(X_train_vec.shape[1],))) model.add(Dense(64, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Step 5: Training model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(X_train_vec, y_train, batch_size=32, epochs=10, validation_data=(X_val_vec, y_val)) # Step 6: Evaluation y_pred = model.predict_classes(X_test_vec) accuracy = accuracy_score(y_test, y_pred) print("Test Accuracy:", accuracy) # Step 7: Fine-tuning and Optimization # Adjust hyperparameters, architecture, and retrain the model as needed # Step 8: Inference new_reviews = ['Great movie!', 'Terrible acting.'] new_reviews_vec = vectorizer.transform(new_reviews) predictions = model.predict_classes(new_reviews_vec) sentiments = ['Positive' if p == 1 else 'Negative' for p in predictions] print("Predictions:", sentiments)