diff --git a/Dockerfile b/Dockerfile index 110908e..dae3fa8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,13 +10,13 @@ RUN pip3 install --user kaggle pandas COPY . /app WORKDIR /app -RUN apt install python3.10-venv +RUN apt install python3.10-venv -y RUN python3 -m venv docker_ium -RUN source docker_ium/bin/activate +CMD source docker_ium/bin/activate RUN pip3 install pandas -RUN pip3 install -U scikit-learn - - -#CMD python3 script2.py +RUN pip3 install -U scikit-learn +RUN pip install tensorflow==2.12.* +CMD deactivate +RUN echo "hurra" diff --git a/script3.py b/script3.py new file mode 100644 index 0000000..399c8f5 --- /dev/null +++ b/script3.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics import accuracy_score +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Embedding, LSTM +from tensorflow.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.utils import to_categorical + +# Step 1: Data Preprocessing +df = pd.read_csv('25k_movies.csv.shuf') # Replace with the actual file name or path +text_data = df['review'] +labels = df['sentiment'] + +# Step 2: Data Split +X_train, X_test, y_train, y_test = train_test_split(text_data, labels, test_size=0.2, random_state=42) +X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) + +# Step 3: Vectorization +vectorizer = TfidfVectorizer() +X_train_vec = vectorizer.fit_transform(X_train) +X_val_vec = vectorizer.transform(X_val) +X_test_vec = vectorizer.transform(X_test) + +# Step 4: Model Architecture +model = Sequential() +model.add(Dense(128, activation='relu', input_shape=(X_train_vec.shape[1],))) +model.add(Dense(64, activation='relu')) +model.add(Dense(1, activation='sigmoid')) + +# Step 5: Training +model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) +model.fit(X_train_vec, y_train, batch_size=32, epochs=10, validation_data=(X_val_vec, y_val)) + +# Step 6: Evaluation +y_pred = model.predict_classes(X_test_vec) +accuracy = accuracy_score(y_test, y_pred) +print("Test Accuracy:", accuracy) + +# Step 7: Fine-tuning and Optimization +# Adjust hyperparameters, architecture, and retrain the model as needed + +# Step 8: Inference +new_reviews = ['Great movie!', 'Terrible acting.'] +new_reviews_vec = vectorizer.transform(new_reviews) +predictions = model.predict_classes(new_reviews_vec) +sentiments = ['Positive' if p == 1 else 'Negative' for p in predictions] +print("Predictions:", sentiments)