From 0d0ee8bf906bef93c53e51f45e7f0c1e68411da9 Mon Sep 17 00:00:00 2001 From: eugene Date: Tue, 6 Jun 2023 21:28:14 +0200 Subject: [PATCH] upd d + scritp5_3 --- Dockerfile | 4 ++- script5_3.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 script5_3.py diff --git a/Dockerfile b/Dockerfile index aec7f68..9451802 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,9 @@ RUN apt-get update && \ RUN pip3 install kaggle RUN pip3 install pandas RUN pip3 install scikit-learn -RUN pip3 install tensorflow==2.12.* +#RUN pip3 install tensorflow==2.12.* +RUN pip3 install torch torchvision +RUN pip install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html RUN pip3 install pickle5 #RUN apt install python3.10-venv -y diff --git a/script5_3.py b/script5_3.py new file mode 100644 index 0000000..8b3b53e --- /dev/null +++ b/script5_3.py @@ -0,0 +1,91 @@ +import pandas as pd +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +import pickle + +# Define the neural network model +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + self.fc1 = nn.Linear(1, 64) + self.fc2 = nn.Linear(64, 1) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + return x + +# Define a custom dataset +class CustomDataset(Dataset): + def __init__(self, X, y): + self.X = torch.FloatTensor(X.values.reshape(-1, 1)) + self.y = torch.FloatTensor(y.values.reshape(-1, 1)) + + def __len__(self): + return len(self.X) + + def __getitem__(self, idx): + return self.X[idx], self.y[idx] + +# Load the dataset +df = pd.read_csv('data.csv') + +# Select the relevant columns (e.g., 'Rating' and 'Writer') +data = df[['Rating', 'Writer']] + +# Drop rows with missing values +data = data.dropna() + +# Convert the 'Writer' column to numeric using label encoding +encoder = LabelEncoder() +data['Writer'] = encoder.fit_transform(data['Writer']) + +# Split the data into training and testing sets +X = data['Writer'] +y = data['Rating'] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Create the model instance +model = Model() + +# Define the loss function and optimizer +criterion = nn.MSELoss() +optimizer = optim.Adam(model.parameters()) + +# Create dataloaders for training and testing +train_dataset = CustomDataset(X_train, y_train) +test_dataset = CustomDataset(X_test, y_test) +train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True) +test_dataloader = DataLoader(test_dataset, batch_size=64) + +# Train the model +for epoch in range(10): + model.train() + for inputs, targets in train_dataloader: + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + +# Save the model to a file +torch.save(model.state_dict(), 'model.pth') + +# Save the encoder to a file +with open('encoder.pkl', 'wb') as f: + pickle.dump(encoder, f) + +# Make predictions on new data +new_writer = 'Jim Cash' +new_writer_encoded = torch.tensor(encoder.transform([new_writer])).float() + +model.eval() +rating_prediction = model(new_writer_encoded) +print("Predicted rating for the writer 'Jim Cash':", rating_prediction.item()) \ No newline at end of file