ium_05
This commit is contained in:
parent
dcacf8af32
commit
3452520ae1
228
ium_05/learning.ipynb
Normal file
228
ium_05/learning.ipynb
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## PyTorch train model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Wczytanie niezbędnych bibliotek"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 233,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"from torch import nn\n",
|
||||||
|
"from torch.utils.data import DataLoader, TensorDataset\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.preprocessing import LabelEncoder"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Wczytanie danych z pliku"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 234,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = pd.read_csv('../data/btc_train.csv')\n",
|
||||||
|
"data = pd.DataFrame(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Przygotowanie danych\n",
|
||||||
|
"Powinienembył zrobić to w zadaniu 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 235,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"le = LabelEncoder()\n",
|
||||||
|
"data['date'] = le.fit_transform(data['date'])\n",
|
||||||
|
"data['hour'] = le.fit_transform(data['hour'])\n",
|
||||||
|
"data['Volume BTC'] = data['Volume BTC']/10\n",
|
||||||
|
"\n",
|
||||||
|
"# Przekształć łańcuchy znaków na liczby aby zapobiec 'TypeError: can't convert np.ndarray of type numpy.object_.'\n",
|
||||||
|
"for col in data.columns:\n",
|
||||||
|
" data[col] = pd.to_numeric(data[col], errors='coerce')\n",
|
||||||
|
"\n",
|
||||||
|
"# # Zamień brakujące wartości na 0 aby zapobiec 'IndexError: Target -9223372036854775808 is out of bounds.'\n",
|
||||||
|
"data = data.fillna(0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Przygotowanie inputs oraz targets"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 236,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Przekształć dane na tensory PyTorch\n",
|
||||||
|
"inputs = torch.tensor(data[['date', 'hour', 'Volume BTC']].values, dtype=torch.float32)\n",
|
||||||
|
"targets = torch.tensor(data['Volume USD'].values, dtype=torch.float32).view(-1, 1) # zmieniono z torch.float32 na torch.long aby zapobiec RuntimeError: expected scalar type Long but found Float\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Utwórz DataLoader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 237,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data_set = TensorDataset(inputs, targets)\n",
|
||||||
|
"data_loader = DataLoader(data_set, batch_size=64)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 238,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model = nn.Sequential(\n",
|
||||||
|
" nn.Flatten(),\n",
|
||||||
|
" nn.Linear(inputs.shape[1], 64),\n",
|
||||||
|
" nn.ReLU(),\n",
|
||||||
|
" nn.Linear(64, 1),\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Funkcja straty i optymalizator"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 239,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loss_fn = nn.MSELoss()\n",
|
||||||
|
"optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Trenowanie modelu"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 240,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Model został wytrenowany.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for epoch in range(10):\n",
|
||||||
|
" for X, y in data_loader:\n",
|
||||||
|
" pred = model(X)\n",
|
||||||
|
" loss = loss_fn(pred, y)\n",
|
||||||
|
"\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Model został wytrenowany.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Zapis modelu do pliku"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 241,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Model został zapisany do pliku 'model.pth'.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"torch.save(model.state_dict(), \"model.pth\")\n",
|
||||||
|
"print(\"Model został zapisany do pliku 'model.pth'.\")\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
BIN
ium_05/model.pth
Normal file
BIN
ium_05/model.pth
Normal file
Binary file not shown.
227
ium_05/predict.ipynb
Normal file
227
ium_05/predict.ipynb
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## PyTorch train model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Wczytanie niezbędnych bibliotek"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 289,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import torch\n",
|
||||||
|
"from torch import nn\n",
|
||||||
|
"from sklearn.preprocessing import LabelEncoder"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Wczytanie danych z pliku"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 290,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = pd.read_csv('../data/btc_test.csv')\n",
|
||||||
|
"data = pd.DataFrame(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Przygotowanie danych\n",
|
||||||
|
"Powinienembył zrobić to w zadaniu 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 291,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"le = LabelEncoder()\n",
|
||||||
|
"data['date'] = le.fit_transform(data['date'])\n",
|
||||||
|
"data['hour'] = le.fit_transform(data['hour'])\n",
|
||||||
|
"data['Volume BTC'] = data['Volume BTC']/10\n",
|
||||||
|
"\n",
|
||||||
|
"# Przekształć łańcuchy znaków na liczby aby zapobiec 'TypeError: can't convert np.ndarray of type numpy.object_.'\n",
|
||||||
|
"for col in data.columns:\n",
|
||||||
|
" data[col] = pd.to_numeric(data[col], errors='coerce')\n",
|
||||||
|
"\n",
|
||||||
|
"# Zamień brakujące wartości na 0 aby zapobiec 'IndexError: Target -9223372036854775808 is out of bounds.'\n",
|
||||||
|
"data = data.fillna(0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Przygotowanie inputs oraz targets"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 292,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Przekształć dane na tensory PyTorch\n",
|
||||||
|
"inputs = torch.tensor(data[['date', 'hour', 'Volume BTC']].values, dtype=torch.float32)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 293,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model = nn.Sequential(\n",
|
||||||
|
" nn.Flatten(),\n",
|
||||||
|
" nn.Linear(inputs.shape[1], 64),\n",
|
||||||
|
" nn.ReLU(),\n",
|
||||||
|
" nn.Linear(64, 1),\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Wczytanie wytrenowanego modelu"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 294,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Sequential(\n",
|
||||||
|
" (0): Flatten(start_dim=1, end_dim=-1)\n",
|
||||||
|
" (1): Linear(in_features=3, out_features=64, bias=True)\n",
|
||||||
|
" (2): ReLU()\n",
|
||||||
|
" (3): Linear(in_features=64, out_features=1, bias=True)\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 294,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model.load_state_dict(torch.load(\"model.pth\"))\n",
|
||||||
|
"model.eval()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Predykcja modelu"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 298,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"tensor([[772837.5000],\n",
|
||||||
|
" [772837.5000],\n",
|
||||||
|
" [772837.5000],\n",
|
||||||
|
" ...,\n",
|
||||||
|
" [772837.5000],\n",
|
||||||
|
" [772837.5000],\n",
|
||||||
|
" [772837.5000]], grad_fn=<MulBackward0>)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"predictions = model(inputs)\n",
|
||||||
|
"predicted_data = (predictions.float() * 10)\n",
|
||||||
|
"print(predicted_data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Zapis danych do pliku csv"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 300,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "TypeError",
|
||||||
|
"evalue": "detach() missing 1 required positional arguments: \"input\"",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[1;32mIn[300], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m predicted_data_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetach\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mnumpy(predicted_data))\n\u001b[0;32m 2\u001b[0m predicted_data_df\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredict_result.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
|
||||||
|
"\u001b[1;31mTypeError\u001b[0m: detach() missing 1 required positional arguments: \"input\""
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"predicted_data_df = pd.DataFrame(torch.detach(predicted_data).numpy())\n",
|
||||||
|
"predicted_data_df.to_csv(\"predict_result.csv\", index=False)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
429942
ium_05/predict_result.csv
Normal file
429942
ium_05/predict_result.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user