test jenkins

2023-05-12 15:40:20 +02:00 · 2023-05-12 15:40:20 +02:00 · 29933744f1
commit 29933744f1
parent ff812074bf
2 changed files with 55 additions and 6 deletions
--- a/12
+++ b/12
@ -10,13 +10,13 @@ RUN pip3 install --user kaggle pandas
 COPY . /app
 WORKDIR /app

-RUN apt install python3.10-venv
+RUN apt install python3.10-venv -y
 RUN python3 -m venv docker_ium
-RUN source docker_ium/bin/activate
+CMD source docker_ium/bin/activate

 RUN pip3 install pandas
-RUN pip3 install -U scikit-learn
-
-
-#CMD python3 script2.py
+RUN pip3 install -U scikit-learn 
+RUN pip install tensorflow==2.12.*

+CMD deactivate
+RUN echo "hurra"
--- a/script3.py
+++ b/script3.py
@ -0,0 +1,49 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics import accuracy_score
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Embedding, LSTM
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.utils import to_categorical
+
+# Step 1: Data Preprocessing
+df = pd.read_csv('25k_movies.csv.shuf')  # Replace with the actual file name or path
+text_data = df['review']
+labels = df['sentiment']
+
+# Step 2: Data Split
+X_train, X_test, y_train, y_test = train_test_split(text_data, labels, test_size=0.2, random_state=42)
+X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
+
+# Step 3: Vectorization
+vectorizer = TfidfVectorizer()
+X_train_vec = vectorizer.fit_transform(X_train)
+X_val_vec = vectorizer.transform(X_val)
+X_test_vec = vectorizer.transform(X_test)
+
+# Step 4: Model Architecture
+model = Sequential()
+model.add(Dense(128, activation='relu', input_shape=(X_train_vec.shape[1],)))
+model.add(Dense(64, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+
+# Step 5: Training
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+model.fit(X_train_vec, y_train, batch_size=32, epochs=10, validation_data=(X_val_vec, y_val))
+
+# Step 6: Evaluation
+y_pred = model.predict_classes(X_test_vec)
+accuracy = accuracy_score(y_test, y_pred)
+print("Test Accuracy:", accuracy)
+
+# Step 7: Fine-tuning and Optimization
+# Adjust hyperparameters, architecture, and retrain the model as needed
+
+# Step 8: Inference
+new_reviews = ['Great movie!', 'Terrible acting.']
+new_reviews_vec = vectorizer.transform(new_reviews)
+predictions = model.predict_classes(new_reviews_vec)
+sentiments = ['Positive' if p == 1 else 'Negative' for p in predictions]
+print("Predictions:", sentiments)