upd d + scritp5_3

2023-06-06 21:28:14 +02:00 · 2023-06-06 21:28:14 +02:00 · 0d0ee8bf90
commit 0d0ee8bf90
parent 46430423d7
2 changed files with 94 additions and 1 deletions
--- a/4
+++ b/4
@ -7,7 +7,9 @@ RUN apt-get update && \
 RUN pip3 install kaggle
 RUN pip3 install pandas
 RUN pip3 install scikit-learn
-RUN pip3 install tensorflow==2.12.*
+#RUN pip3 install tensorflow==2.12.*
+RUN pip3 install torch torchvision
+RUN pip install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
 RUN pip3 install pickle5

 #RUN apt install python3.10-venv -y
--- a/script5_3.py
+++ b/script5_3.py
@ -0,0 +1,91 @@
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import pickle
+
+# Define the neural network model
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.fc1 = nn.Linear(1, 64)
+        self.fc2 = nn.Linear(64, 1)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        return x
+
+# Define a custom dataset
+class CustomDataset(Dataset):
+    def __init__(self, X, y):
+        self.X = torch.FloatTensor(X.values.reshape(-1, 1))
+        self.y = torch.FloatTensor(y.values.reshape(-1, 1))
+
+    def __len__(self):
+        return len(self.X)
+
+    def __getitem__(self, idx):
+        return self.X[idx], self.y[idx]
+
+# Load the dataset
+df = pd.read_csv('data.csv')
+
+# Select the relevant columns (e.g., 'Rating' and 'Writer')
+data = df[['Rating', 'Writer']]
+
+# Drop rows with missing values
+data = data.dropna()
+
+# Convert the 'Writer' column to numeric using label encoding
+encoder = LabelEncoder()
+data['Writer'] = encoder.fit_transform(data['Writer'])
+
+# Split the data into training and testing sets
+X = data['Writer']
+y = data['Rating']
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Create the model instance
+model = Model()
+
+# Define the loss function and optimizer
+criterion = nn.MSELoss()
+optimizer = optim.Adam(model.parameters())
+
+# Create dataloaders for training and testing
+train_dataset = CustomDataset(X_train, y_train)
+test_dataset = CustomDataset(X_test, y_test)
+train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
+test_dataloader = DataLoader(test_dataset, batch_size=64)
+
+# Train the model
+for epoch in range(10):
+    model.train()
+    for inputs, targets in train_dataloader:
+        optimizer.zero_grad()
+        outputs = model(inputs)
+        loss = criterion(outputs, targets)
+        loss.backward()
+        optimizer.step()
+
+# Save the model to a file
+torch.save(model.state_dict(), 'model.pth')
+
+# Save the encoder to a file
+with open('encoder.pkl', 'wb') as f:
+    pickle.dump(encoder, f)
+
+# Make predictions on new data
+new_writer = 'Jim Cash'
+new_writer_encoded = torch.tensor(encoder.transform([new_writer])).float()
+
+model.eval()
+rating_prediction = model(new_writer_encoded)
+print("Predicted rating for the writer 'Jim Cash':", rating_prediction.item())