From 3fef5dda7620590a87e8c1f5fe684d6e97bc7eee Mon Sep 17 00:00:00 2001 From: eugene Date: Wed, 7 Jun 2023 00:34:30 +0200 Subject: [PATCH] update --- Dockerfile | 2 + Jenkinsfile-lab7 | 55 ++++++++++++++++++++++++++ script7.py | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 Jenkinsfile-lab7 create mode 100644 script7.py diff --git a/Dockerfile b/Dockerfile index 002b674..3b0913b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,8 @@ RUN pip3 install tensorflow==2.12.* RUN pip3 install torch torchvision #RUN pip install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html RUN pip3 install pickle5 +RUN pip3 install sacred +RUN pip3 install pymongo #RUN apt install python3.10-venv -y #RUN python3 -m venv docker_ium diff --git a/Jenkinsfile-lab7 b/Jenkinsfile-lab7 new file mode 100644 index 0000000..61589b2 --- /dev/null +++ b/Jenkinsfile-lab7 @@ -0,0 +1,55 @@ +pipeline { + agent { + dockerfile true + } + stages { + stage('do nothing') + { + steps { + echo ("do nothing") + /*echo ("set up venv") + + sh "python3 -m venv docker_ium" + sh "source docker_ium/bin/activate" + sh "pip3 install kaggle" + sh "pip3 install pandas" + sh "pip3 install -U scikit-learn"*/ + + } + } + stage('Run Script') { + steps { + //echo ("checkout: check out from version control") + //git "https://git.wmi.amu.edu.pl/s151636/ium_151636.git" + + //echo ("sh: Shell Script") + //sh "python3 script2.py" + + //echo ("copyArtifacts") + + + //echo("archiveArtifacts") + + //echo ("run dockerfile") + //sh docker + + + echo("run data script") + //sh "source docker_ium/bin/activate" + sh "ls -a" + sh "chmod u+x script7.py" + //sh "pip3 show pandas" + //sh "python3 script5_3.py | tee output.txt | tar -czf output.tar.gz output.txt" + sh "python3 script7.py | tee metrics.txt" + } + } + stages { + stage('Archive Output') { + steps { + archiveArtifacts 'experiments/**/*.*' + } + } + } + + } +} diff --git a/script7.py b/script7.py new file mode 100644 index 0000000..5cde5c4 --- /dev/null +++ b/script7.py @@ -0,0 +1,101 @@ +import pandas as pd +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +import pickle +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +ex = Experiment("s151636", interactive=True, save_git_info=False) +ex.observers.append(FileStorageObserver('experiments')) +ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) + +# Define the neural network model +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.fc1 = nn.Linear(1, 64) + self.fc2 = nn.Linear(64, 1) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + return x + +# Define a custom dataset +class CustomDataset(Dataset): + def __init__(self, X, y): + self.X = torch.FloatTensor(X.values.reshape(-1, 1)) + self.y = torch.FloatTensor(y.values.reshape(-1, 1)) + + def __len__(self): + return len(self.X) + + def __getitem__(self, idx): + return self.X[idx], self.y[idx] + +@ex.main +def train_model(): + # Load the dataset + df = pd.read_csv('data.csv') + + # Select the relevant columns (e.g., 'Rating' and 'Writer') + data = df[['Rating', 'Writer']] + + # Drop rows with missing values + data = data.dropna() + + # Convert the 'Writer' column to numeric using label encoding + encoder = LabelEncoder() + data['Writer'] = encoder.fit_transform(data['Writer']) + + # Split the data into training and testing sets + X = data['Writer'] + y = data['Rating'] + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Create the model instance + model = Model() + + # Define the loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.Adam(model.parameters()) + + # Create dataloaders for training and testing + train_dataset = CustomDataset(X_train, y_train) + test_dataset = CustomDataset(X_test, y_test) + train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True) + test_dataloader = DataLoader(test_dataset, batch_size=64) + + # Train the model + for epoch in range(10): + model.train() + for inputs, targets in train_dataloader: + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + + # Save the model to a file + torch.save(model.state_dict(), 'model.pth') + + # Save the encoder to a file + with open('encoder.pkl', 'wb') as f: + pickle.dump(encoder, f) + + # Make predictions on new data + new_writer = 'Jim Cash' + new_writer_encoded = torch.tensor(encoder.transform([new_writer])).float() + + model.eval() + rating_prediction = model(new_writer_encoded) + print("Predicted rating for the writer 'Jim Cash':", rating_prediction.item()) + +ex.run()