ium_05

2024-04-24 02:37:19 +02:00 · 2024-04-24 02:37:19 +02:00 · 3452520ae1
parent dcacf8af32
commit 3452520ae1
4 changed files with 430397 additions and 0 deletions
--- a/ium_05/learning.ipynb
+++ b/ium_05/learning.ipynb
@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PyTorch train model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wczytanie niezbędnych bibliotek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 233,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch import nn\n",
+    "from torch.utils.data import DataLoader, TensorDataset\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import LabelEncoder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wczytanie danych z pliku"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 234,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('../data/btc_train.csv')\n",
+    "data = pd.DataFrame(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przygotowanie danych\n",
+    "Powinienembył zrobić to w zadaniu 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 235,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "le = LabelEncoder()\n",
+    "data['date'] = le.fit_transform(data['date'])\n",
+    "data['hour'] = le.fit_transform(data['hour'])\n",
+    "data['Volume BTC'] = data['Volume BTC']/10\n",
+    "\n",
+    "# Przekształć łańcuchy znaków na liczby aby zapobiec 'TypeError: can't convert np.ndarray of type numpy.object_.'\n",
+    "for col in data.columns:\n",
+    "    data[col] = pd.to_numeric(data[col], errors='coerce')\n",
+    "\n",
+    "# # Zamień brakujące wartości na 0 aby zapobiec 'IndexError: Target -9223372036854775808 is out of bounds.'\n",
+    "data = data.fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przygotowanie inputs oraz targets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 236,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Przekształć dane na tensory PyTorch\n",
+    "inputs = torch.tensor(data[['date', 'hour', 'Volume BTC']].values, dtype=torch.float32)\n",
+    "targets = torch.tensor(data['Volume USD'].values, dtype=torch.float32).view(-1, 1) # zmieniono z torch.float32 na torch.long aby zapobiec RuntimeError: expected scalar type Long but found Float\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Utwórz DataLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 237,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_set = TensorDataset(inputs, targets)\n",
+    "data_loader = DataLoader(data_set, batch_size=64)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 238,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    nn.Flatten(),\n",
+    "    nn.Linear(inputs.shape[1], 64),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Linear(64, 1),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Funkcja straty i optymalizator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 239,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss_fn = nn.MSELoss()\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Trenowanie modelu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 240,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model został wytrenowany.\n"
+     ]
+    }
+   ],
+   "source": [
+    "for epoch in range(10):\n",
+    "    for X, y in data_loader:\n",
+    "        pred = model(X)\n",
+    "        loss = loss_fn(pred, y)\n",
+    "\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "print(\"Model został wytrenowany.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Zapis modelu do pliku"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 241,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model został zapisany do pliku 'model.pth'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "torch.save(model.state_dict(), \"model.pth\")\n",
+    "print(\"Model został zapisany do pliku 'model.pth'.\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/ium_05/model.pth
+++ b/ium_05/model.pth
--- a/ium_05/predict.ipynb
+++ b/ium_05/predict.ipynb
@ -0,0 +1,227 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PyTorch train model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wczytanie niezbędnych bibliotek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 289,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import torch\n",
+    "from torch import nn\n",
+    "from sklearn.preprocessing import LabelEncoder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wczytanie danych z pliku"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 290,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('../data/btc_test.csv')\n",
+    "data = pd.DataFrame(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przygotowanie danych\n",
+    "Powinienembył zrobić to w zadaniu 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 291,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "le = LabelEncoder()\n",
+    "data['date'] = le.fit_transform(data['date'])\n",
+    "data['hour'] = le.fit_transform(data['hour'])\n",
+    "data['Volume BTC'] = data['Volume BTC']/10\n",
+    "\n",
+    "# Przekształć łańcuchy znaków na liczby aby zapobiec 'TypeError: can't convert np.ndarray of type numpy.object_.'\n",
+    "for col in data.columns:\n",
+    "    data[col] = pd.to_numeric(data[col], errors='coerce')\n",
+    "\n",
+    "# Zamień brakujące wartości na 0 aby zapobiec 'IndexError: Target -9223372036854775808 is out of bounds.'\n",
+    "data = data.fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Przygotowanie inputs oraz targets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 292,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Przekształć dane na tensory PyTorch\n",
+    "inputs = torch.tensor(data[['date', 'hour', 'Volume BTC']].values, dtype=torch.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 293,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = nn.Sequential(\n",
+    "    nn.Flatten(),\n",
+    "    nn.Linear(inputs.shape[1], 64),\n",
+    "    nn.ReLU(),\n",
+    "    nn.Linear(64, 1),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Wczytanie wytrenowanego modelu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 294,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Sequential(\n",
+       "  (0): Flatten(start_dim=1, end_dim=-1)\n",
+       "  (1): Linear(in_features=3, out_features=64, bias=True)\n",
+       "  (2): ReLU()\n",
+       "  (3): Linear(in_features=64, out_features=1, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 294,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.load_state_dict(torch.load(\"model.pth\"))\n",
+    "model.eval()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predykcja modelu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 298,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[772837.5000],\n",
+      "        [772837.5000],\n",
+      "        [772837.5000],\n",
+      "        ...,\n",
+      "        [772837.5000],\n",
+      "        [772837.5000],\n",
+      "        [772837.5000]], grad_fn=<MulBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "predictions = model(inputs)\n",
+    "predicted_data = (predictions.float() * 10)\n",
+    "print(predicted_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Zapis danych do pliku csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 300,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "detach() missing 1 required positional arguments: \"input\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[300], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m predicted_data_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetach\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mnumpy(predicted_data))\n\u001b[0;32m      2\u001b[0m predicted_data_df\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredict_result.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+      "\u001b[1;31mTypeError\u001b[0m: detach() missing 1 required positional arguments: \"input\""
+     ]
+    }
+   ],
+   "source": [
+    "predicted_data_df = pd.DataFrame(torch.detach(predicted_data).numpy())\n",
+    "predicted_data_df.to_csv(\"predict_result.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/ium_05/predict_result.csv
+++ b/ium_05/predict_result.csv