{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting torchtext\n", " Downloading torchtext-0.15.2-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n", "\u001b[?25hCollecting tqdm\n", " Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\n", "Requirement already satisfied: numpy in /home/gedin/.local/lib/python3.10/site-packages (from torchtext) (1.24.3)\n", "Collecting torchdata==0.6.1\n", " Downloading torchdata-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in /usr/lib/python3/dist-packages (from torchtext) (2.25.1)\n", "Collecting torch==2.0.1\n", " Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:09\u001b[0m\n", "\u001b[?25hCollecting sympy\n", " Downloading sympy-1.12-py3-none-any.whl (5.7 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting nvidia-cudnn-cu11==8.5.0.96\n", " Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n", "Collecting nvidia-cuda-cupti-cu11==11.7.101\n", " Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n", "Collecting nvidia-cusparse-cu11==11.7.4.91\n", " Using cached nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n", "Collecting networkx\n", " Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n", "Collecting nvidia-cufft-cu11==10.9.0.58\n", " Using cached nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\n", "Collecting filelock\n", " Downloading filelock-3.12.0-py3-none-any.whl (10 kB)\n", "Collecting nvidia-cuda-runtime-cu11==11.7.99\n", " Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n", "Collecting triton==2.0.0\n", " Downloading triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n", "\u001b[?25hCollecting nvidia-cusolver-cu11==11.4.0.1\n", " Using cached nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n", "Requirement already satisfied: jinja2 in /home/gedin/.local/lib/python3.10/site-packages (from torch==2.0.1->torchtext) (3.1.2)\n", "Collecting nvidia-cublas-cu11==11.10.3.66\n", " Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n", "Collecting typing-extensions\n", " Downloading typing_extensions-4.6.3-py3-none-any.whl (31 kB)\n", "Collecting nvidia-nccl-cu11==2.14.3\n", " Using cached nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n", "Collecting nvidia-cuda-nvrtc-cu11==11.7.99\n", " Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n", "Collecting nvidia-curand-cu11==10.2.10.91\n", " Using cached nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n", "Collecting nvidia-nvtx-cu11==11.7.91\n", " Using cached nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n", "Requirement already satisfied: urllib3>=1.25 in /usr/lib/python3/dist-packages (from torchdata==0.6.1->torchtext) (1.26.5)\n", "Requirement already satisfied: wheel in /usr/lib/python3/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1->torchtext) (0.37.1)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1->torchtext) (59.6.0)\n", "Collecting lit\n", " Downloading lit-16.0.5.tar.gz (138 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.0/138.0 KB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hCollecting cmake\n", " Using cached cmake-3.26.3-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.0 MB)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/lib/python3/dist-packages (from jinja2->torch==2.0.1->torchtext) (2.0.1)\n", "Collecting mpmath>=0.19\n", " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", "Building wheels for collected packages: lit\n", " Building wheel for lit (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for lit: filename=lit-16.0.5-py3-none-any.whl size=88192 sha256=f6c57a31a147cbfe0af3d6bf4b856390ad14c28a9ddb38c8044ec29331b35c26\n", " Stored in directory: /home/gedin/.cache/pip/wheels/eb/02/84/d82f0b1a6098209edf7e3607be6cc592ebbc015a8a3127c68d\n", "Successfully built lit\n", "Installing collected packages: mpmath, lit, cmake, typing-extensions, tqdm, sympy, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, networkx, filelock, nvidia-cusolver-cu11, nvidia-cudnn-cu11, triton, torch, torchdata, torchtext\n", "Successfully installed cmake-3.26.3 filelock-3.12.0 lit-16.0.5 mpmath-1.3.0 networkx-3.1 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 sympy-1.12 torch-2.0.1 torchdata-0.6.1 torchtext-0.15.2 tqdm-4.65.0 triton-2.0.0 typing-extensions-4.6.3\n" ] } ], "source": [ "!pip install torchtext" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "train_file ='train/in.tsv.xz'\n", "test_file = 'dev-0/in.tsv.xz'\n", "out_file = 'dev-0/out.tsv'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from itertools import islice\n", "import regex as re\n", "import sys\n", "from torchtext.vocab import build_vocab_from_iterator\n", "import lzma\n", "import pickle\n", "import re\n", "import torch\n", "from torch import nn\n", "from torch.utils.data import IterableDataset\n", "import itertools\n", "from torch.utils.data import DataLoader\n", "import gc" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "embed_size = 300\n", "device = 'cuda'\n", "vocab_size = 25000\n", "batch_s = 3200\n", "learning_rate = 0.0001\n", "epochs = 4\n", "k = 20 #top k words\n", "wildcard_minweight = 0.001" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [], "source": [ "###preprocessing\n", "def preprocess(line):\n", " line = get_rid_of_header(line)\n", " line = replace_endline(line)\n", " return line\n", "\n", "def get_rid_of_header(line):\n", " line = line.split('\\t')[6:]\n", " return \"\".join(line)\n", " \n", "def replace_endline(line):\n", " line = line.replace(\"\\\\n\", \" \")\n", " return line\n", "\n", "\n", "def get_last_word(text):\n", " \"\"\"Return the last word of a string.\"\"\"\n", " last_word = \"\"\n", " for i in range(len(text)-1, -1, -1):\n", " if text[i] == ' ':\n", " return last_word[::-1].rstrip()\n", " else:\n", " last_word += text[i]\n", " return last_word[::-1].rstrip()\n", "\n", "def get_first_word(text):\n", " \"\"\"Return the first word of a string.\"\"\"\n", " word = \"\"\n", " for i in range(len(text)-1):\n", " if text[i] == ' ':\n", " return word\n", " else:\n", " word += text[i]\n", " return word\n", "\n", "\n", "def get_words_from_line(line):\n", " line = line.rstrip()\n", " yield ''\n", " line = preprocess(line)\n", " for t in line.split(' '):\n", " yield t\n", " yield ''\n", "\n", "\n", "def get_word_lines_from_file(file_name):\n", " n = 0\n", " with lzma.open(file_name, 'r') as fh:\n", " for line in fh:\n", " n+=1\n", " if n%1000==0:\n", " print(n)\n", " yield get_words_from_line(line.decode('utf-8'))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1000\n", "2000\n", "3000\n", "4000\n", "5000\n", "6000\n", "7000\n", "8000\n", "9000\n", "10000\n", "11000\n", "12000\n", "13000\n", "14000\n", "15000\n", "16000\n", "17000\n", "18000\n", "19000\n", "20000\n", "21000\n", "22000\n", "23000\n", "24000\n", "25000\n", "26000\n", "27000\n", "28000\n", "29000\n", "30000\n", "31000\n", "32000\n", "33000\n", "34000\n", "35000\n", "36000\n", "37000\n", "38000\n", "39000\n", "40000\n", "41000\n", "42000\n", "43000\n", "44000\n", "45000\n", "46000\n", "47000\n", "48000\n", "49000\n", "50000\n", "51000\n", "52000\n", "53000\n", "54000\n", "55000\n", "56000\n", "57000\n", "58000\n", "59000\n", "60000\n", "61000\n", "62000\n", "63000\n", "64000\n", "65000\n", "66000\n", "67000\n", "68000\n", "69000\n", "70000\n", "71000\n", "72000\n", "73000\n", "74000\n", "75000\n", "76000\n", "77000\n", "78000\n", "79000\n", "80000\n", "81000\n", "82000\n", "83000\n", "84000\n", "85000\n", "86000\n", "87000\n", "88000\n", "89000\n", "90000\n", "91000\n", "92000\n", "93000\n", "94000\n", "95000\n", "96000\n", "97000\n", "98000\n", "99000\n", "100000\n", "101000\n", "102000\n", "103000\n", "104000\n", "105000\n", "106000\n", "107000\n", "108000\n", "109000\n", "110000\n", "111000\n", "112000\n", "113000\n", "114000\n", "115000\n", "116000\n", "117000\n", "118000\n", "119000\n", "120000\n", "121000\n", "122000\n", "123000\n", "124000\n", "125000\n", "126000\n", "127000\n", "128000\n", "129000\n", "130000\n", "131000\n", "132000\n", "133000\n", "134000\n", "135000\n", "136000\n", "137000\n", "138000\n", "139000\n", "140000\n", "141000\n", "142000\n", "143000\n", "144000\n", "145000\n", "146000\n", "147000\n", "148000\n", "149000\n", "150000\n", "151000\n", "152000\n", "153000\n", "154000\n", "155000\n", "156000\n", "157000\n", "158000\n", "159000\n", "160000\n", "161000\n", "162000\n", "163000\n", "164000\n", "165000\n", "166000\n", "167000\n", "168000\n", "169000\n", "170000\n", "171000\n", "172000\n", "173000\n", "174000\n", "175000\n", "176000\n", "177000\n", "178000\n", "179000\n", "180000\n", "181000\n", "182000\n", "183000\n", "184000\n", "185000\n", "186000\n", "187000\n", "188000\n", "189000\n", "190000\n", "191000\n", "192000\n", "193000\n", "194000\n", "195000\n", "196000\n", "197000\n", "198000\n", "199000\n", "200000\n", "201000\n", "202000\n", "203000\n", "204000\n", "205000\n", "206000\n", "207000\n", "208000\n", "209000\n", "210000\n", "211000\n", "212000\n", "213000\n", "214000\n", "215000\n", "216000\n", "217000\n", "218000\n", "219000\n", "220000\n", "221000\n", "222000\n", "223000\n", "224000\n", "225000\n", "226000\n", "227000\n", "228000\n", "229000\n", "230000\n", "231000\n", "232000\n", "233000\n", "234000\n", "235000\n", "236000\n", "237000\n", "238000\n", "239000\n", "240000\n", "241000\n", "242000\n", "243000\n", "244000\n", "245000\n", "246000\n", "247000\n", "248000\n", "249000\n", "250000\n", "251000\n", "252000\n", "253000\n", "254000\n", "255000\n", "256000\n", "257000\n", "258000\n", "259000\n", "260000\n", "261000\n", "262000\n", "263000\n", "264000\n", "265000\n", "266000\n", "267000\n", "268000\n", "269000\n", "270000\n", "271000\n", "272000\n", "273000\n", "274000\n", "275000\n", "276000\n", "277000\n", "278000\n", "279000\n", "280000\n", "281000\n", "282000\n", "283000\n", "284000\n", "285000\n", "286000\n", "287000\n", "288000\n", "289000\n", "290000\n", "291000\n", "292000\n", "293000\n", "294000\n", "295000\n", "296000\n", "297000\n", "298000\n", "299000\n", "300000\n", "301000\n", "302000\n", "303000\n", "304000\n", "305000\n", "306000\n", "307000\n", "308000\n", "309000\n", "310000\n", "311000\n", "312000\n", "313000\n", "314000\n", "315000\n", "316000\n", "317000\n", "318000\n", "319000\n", "320000\n", "321000\n", "322000\n", "323000\n", "324000\n", "325000\n", "326000\n", "327000\n", "328000\n", "329000\n", "330000\n", "331000\n", "332000\n", "333000\n", "334000\n", "335000\n", "336000\n", "337000\n", "338000\n", "339000\n", "340000\n", "341000\n", "342000\n", "343000\n", "344000\n", "345000\n", "346000\n", "347000\n", "348000\n", "349000\n", "350000\n", "351000\n", "352000\n", "353000\n", "354000\n", "355000\n", "356000\n", "357000\n", "358000\n", "359000\n", "360000\n", "361000\n", "362000\n", "363000\n", "364000\n", "365000\n", "366000\n", "367000\n", "368000\n", "369000\n", "370000\n", "371000\n", "372000\n", "373000\n", "374000\n", "375000\n", "376000\n", "377000\n", "378000\n", "379000\n", "380000\n", "381000\n", "382000\n", "383000\n", "384000\n", "385000\n", "386000\n", "387000\n", "388000\n", "389000\n", "390000\n", "391000\n", "392000\n", "393000\n", "394000\n", "395000\n", "396000\n", "397000\n", "398000\n", "399000\n", "400000\n", "401000\n", "402000\n", "403000\n", "404000\n", "405000\n", "406000\n", "407000\n", "408000\n", "409000\n", "410000\n", "411000\n", "412000\n", "413000\n", "414000\n", "415000\n", "416000\n", "417000\n", "418000\n", "419000\n", "420000\n", "421000\n", "422000\n", "423000\n", "424000\n", "425000\n", "426000\n", "427000\n", "428000\n", "429000\n", "430000\n", "431000\n", "432000\n" ] } ], "source": [ "vocab = build_vocab_from_iterator(\n", " get_word_lines_from_file(train_file),\n", " max_tokens = vocab_size,\n", " specials = [''])\n", "\n", "with open('filename.pickle', 'wb') as handle:\n", " pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['', 'the', 'of', 'was', 'ladies']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vocab.lookup_tokens([0, 1, 2, 10, 2000])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Definicja sieci\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Naszą prostą sieć neuronową zaimplementujemy używając frameworku PyTorch.\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "class SimpleBigramNeuralLanguageModel(nn.Module):\n", " def __init__(self, vocabulary_size, embedding_size):\n", " super(SimpleBigramNeuralLanguageModel, self).__init__()\n", " self.model = nn.Sequential(\n", " nn.Embedding(vocabulary_size, embedding_size),\n", " nn.Linear(embedding_size, vocabulary_size),\n", " nn.Softmax()\n", " )\n", " \n", " def forward(self, x):\n", " return self.model(x)\n", "\n", "with open('filename.pickle','rb') as handle:\n", " vocab = pickle.load(handle)\n", "\n", "vocab.set_default_index(vocab[''])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on Vocab in module torchtext.vocab.vocab object:\n", "\n", "class Vocab(torch.nn.modules.module.Module)\n", " | Vocab(vocab) -> None\n", " | \n", " | Base class for all neural network modules.\n", " | \n", " | Your models should also subclass this class.\n", " | \n", " | Modules can also contain other Modules, allowing to nest them in\n", " | a tree structure. You can assign the submodules as regular attributes::\n", " | \n", " | import torch.nn as nn\n", " | import torch.nn.functional as F\n", " | \n", " | class Model(nn.Module):\n", " | def __init__(self):\n", " | super().__init__()\n", " | self.conv1 = nn.Conv2d(1, 20, 5)\n", " | self.conv2 = nn.Conv2d(20, 20, 5)\n", " | \n", " | def forward(self, x):\n", " | x = F.relu(self.conv1(x))\n", " | return F.relu(self.conv2(x))\n", " | \n", " | Submodules assigned in this way will be registered, and will have their\n", " | parameters converted too when you call :meth:`to`, etc.\n", " | \n", " | .. note::\n", " | As per the example above, an ``__init__()`` call to the parent class\n", " | must be made before assignment on the child.\n", " | \n", " | :ivar training: Boolean represents whether this module is in training or\n", " | evaluation mode.\n", " | :vartype training: bool\n", " | \n", " | Method resolution order:\n", " | Vocab\n", " | torch.nn.modules.module.Module\n", " | builtins.object\n", " | \n", " | Methods defined here:\n", " | \n", " | __contains__(self, token: str) -> bool\n", " | Args:\n", " | token: The token for which to check the membership.\n", " | \n", " | Returns:\n", " | Whether the token is member of vocab or not.\n", " | \n", " | __getitem__(self, token: str) -> int\n", " | Args:\n", " | token: The token used to lookup the corresponding index.\n", " | \n", " | Returns:\n", " | The index corresponding to the associated token.\n", " | \n", " | __init__(self, vocab) -> None\n", " | Initializes internal Module state, shared by both nn.Module and ScriptModule.\n", " | \n", " | __len__(self) -> int\n", " | Returns:\n", " | The length of the vocab.\n", " | \n", " | __prepare_scriptable__(self)\n", " | Return a JITable Vocab.\n", " | \n", " | append_token(self, token: str) -> None\n", " | Args:\n", " | token: The token used to lookup the corresponding index.\n", " | \n", " | Raises:\n", " | RuntimeError: If `token` already exists in the vocab\n", " | \n", " | forward(self, tokens: List[str]) -> List[int]\n", " | Calls the `lookup_indices` method\n", " | \n", " | Args:\n", " | tokens: a list of tokens used to lookup their corresponding `indices`.\n", " | \n", " | Returns:\n", " | The indices associated with a list of `tokens`.\n", " | \n", " | get_default_index(self) -> Union[int, NoneType]\n", " | Returns:\n", " | Value of default index if it is set.\n", " | \n", " | get_itos(self) -> List[str]\n", " | Returns:\n", " | List mapping indices to tokens.\n", " | \n", " | get_stoi(self) -> Dict[str, int]\n", " | Returns:\n", " | Dictionary mapping tokens to indices.\n", " | \n", " | insert_token(self, token: str, index: int) -> None\n", " | Args:\n", " | token: The token used to lookup the corresponding index.\n", " | index: The index corresponding to the associated token.\n", " | Raises:\n", " | RuntimeError: If `index` is not in range [0, Vocab.size()] or if `token` already exists in the vocab.\n", " | \n", " | lookup_indices(self, tokens: List[str]) -> List[int]\n", " | Args:\n", " | tokens: the tokens used to lookup their corresponding `indices`.\n", " | \n", " | Returns:\n", " | The 'indices` associated with `tokens`.\n", " | \n", " | lookup_token(self, index: int) -> str\n", " | Args:\n", " | index: The index corresponding to the associated token.\n", " | \n", " | Returns:\n", " | token: The token used to lookup the corresponding index.\n", " | \n", " | Raises:\n", " | RuntimeError: If `index` not in range [0, itos.size()).\n", " | \n", " | lookup_tokens(self, indices: List[int]) -> List[str]\n", " | Args:\n", " | indices: The `indices` used to lookup their corresponding`tokens`.\n", " | \n", " | Returns:\n", " | The `tokens` associated with `indices`.\n", " | \n", " | Raises:\n", " | RuntimeError: If an index within `indices` is not int range [0, itos.size()).\n", " | \n", " | set_default_index(self, index: Union[int, NoneType]) -> None\n", " | Args:\n", " | index: Value of default index. This index will be returned when OOV token is queried.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Readonly properties defined here:\n", " | \n", " | is_jitable\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data and other attributes defined here:\n", " | \n", " | __jit_unused_properties__ = ['is_jitable']\n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from torch.nn.modules.module.Module:\n", " | \n", " | __call__ = _call_impl(self, *args, **kwargs)\n", " | \n", " | __delattr__(self, name)\n", " | Implement delattr(self, name).\n", " | \n", " | __dir__(self)\n", " | Default dir() implementation.\n", " | \n", " | __getattr__(self, name: str) -> Union[torch.Tensor, ForwardRef('Module')]\n", " | \n", " | __repr__(self)\n", " | Return repr(self).\n", " | \n", " | __setattr__(self, name: str, value: Union[torch.Tensor, ForwardRef('Module')]) -> None\n", " | Implement setattr(self, name, value).\n", " | \n", " | __setstate__(self, state)\n", " | \n", " | add_module(self, name: str, module: Union[ForwardRef('Module'), NoneType]) -> None\n", " | Adds a child module to the current module.\n", " | \n", " | The module can be accessed as an attribute using the given name.\n", " | \n", " | Args:\n", " | name (str): name of the child module. The child module can be\n", " | accessed from this module using the given name\n", " | module (Module): child module to be added to the module.\n", " | \n", " | apply(self: ~T, fn: Callable[[ForwardRef('Module')], NoneType]) -> ~T\n", " | Applies ``fn`` recursively to every submodule (as returned by ``.children()``)\n", " | as well as self. Typical use includes initializing the parameters of a model\n", " | (see also :ref:`nn-init-doc`).\n", " | \n", " | Args:\n", " | fn (:class:`Module` -> None): function to be applied to each submodule\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | Example::\n", " | \n", " | >>> @torch.no_grad()\n", " | >>> def init_weights(m):\n", " | >>> print(m)\n", " | >>> if type(m) == nn.Linear:\n", " | >>> m.weight.fill_(1.0)\n", " | >>> print(m.weight)\n", " | >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))\n", " | >>> net.apply(init_weights)\n", " | Linear(in_features=2, out_features=2, bias=True)\n", " | Parameter containing:\n", " | tensor([[1., 1.],\n", " | [1., 1.]], requires_grad=True)\n", " | Linear(in_features=2, out_features=2, bias=True)\n", " | Parameter containing:\n", " | tensor([[1., 1.],\n", " | [1., 1.]], requires_grad=True)\n", " | Sequential(\n", " | (0): Linear(in_features=2, out_features=2, bias=True)\n", " | (1): Linear(in_features=2, out_features=2, bias=True)\n", " | )\n", " | \n", " | bfloat16(self: ~T) -> ~T\n", " | Casts all floating point parameters and buffers to ``bfloat16`` datatype.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]\n", " | Returns an iterator over module buffers.\n", " | \n", " | Args:\n", " | recurse (bool): if True, then yields buffers of this module\n", " | and all submodules. Otherwise, yields only buffers that\n", " | are direct members of this module.\n", " | \n", " | Yields:\n", " | torch.Tensor: module buffer\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> for buf in model.buffers():\n", " | >>> print(type(buf), buf.size())\n", " | (20L,)\n", " | (20L, 1L, 5L, 5L)\n", " | \n", " | children(self) -> Iterator[ForwardRef('Module')]\n", " | Returns an iterator over immediate children modules.\n", " | \n", " | Yields:\n", " | Module: a child module\n", " | \n", " | cpu(self: ~T) -> ~T\n", " | Moves all model parameters and buffers to the CPU.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | cuda(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n", " | Moves all model parameters and buffers to the GPU.\n", " | \n", " | This also makes associated parameters and buffers different objects. So\n", " | it should be called before constructing optimizer if the module will\n", " | live on GPU while being optimized.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Args:\n", " | device (int, optional): if specified, all parameters will be\n", " | copied to that device\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | double(self: ~T) -> ~T\n", " | Casts all floating point parameters and buffers to ``double`` datatype.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | eval(self: ~T) -> ~T\n", " | Sets the module in evaluation mode.\n", " | \n", " | This has any effect only on certain modules. See documentations of\n", " | particular modules for details of their behaviors in training/evaluation\n", " | mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,\n", " | etc.\n", " | \n", " | This is equivalent with :meth:`self.train(False) `.\n", " | \n", " | See :ref:`locally-disable-grad-doc` for a comparison between\n", " | `.eval()` and several similar mechanisms that may be confused with it.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | extra_repr(self) -> str\n", " | Set the extra representation of the module\n", " | \n", " | To print customized extra information, you should re-implement\n", " | this method in your own modules. Both single-line and multi-line\n", " | strings are acceptable.\n", " | \n", " | float(self: ~T) -> ~T\n", " | Casts all floating point parameters and buffers to ``float`` datatype.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | get_buffer(self, target: str) -> 'Tensor'\n", " | Returns the buffer given by ``target`` if it exists,\n", " | otherwise throws an error.\n", " | \n", " | See the docstring for ``get_submodule`` for a more detailed\n", " | explanation of this method's functionality as well as how to\n", " | correctly specify ``target``.\n", " | \n", " | Args:\n", " | target: The fully-qualified string name of the buffer\n", " | to look for. (See ``get_submodule`` for how to specify a\n", " | fully-qualified string.)\n", " | \n", " | Returns:\n", " | torch.Tensor: The buffer referenced by ``target``\n", " | \n", " | Raises:\n", " | AttributeError: If the target string references an invalid\n", " | path or resolves to something that is not a\n", " | buffer\n", " | \n", " | get_extra_state(self) -> Any\n", " | Returns any extra state to include in the module's state_dict.\n", " | Implement this and a corresponding :func:`set_extra_state` for your module\n", " | if you need to store extra state. This function is called when building the\n", " | module's `state_dict()`.\n", " | \n", " | Note that extra state should be picklable to ensure working serialization\n", " | of the state_dict. We only provide provide backwards compatibility guarantees\n", " | for serializing Tensors; other objects may break backwards compatibility if\n", " | their serialized pickled form changes.\n", " | \n", " | Returns:\n", " | object: Any extra state to store in the module's state_dict\n", " | \n", " | get_parameter(self, target: str) -> 'Parameter'\n", " | Returns the parameter given by ``target`` if it exists,\n", " | otherwise throws an error.\n", " | \n", " | See the docstring for ``get_submodule`` for a more detailed\n", " | explanation of this method's functionality as well as how to\n", " | correctly specify ``target``.\n", " | \n", " | Args:\n", " | target: The fully-qualified string name of the Parameter\n", " | to look for. (See ``get_submodule`` for how to specify a\n", " | fully-qualified string.)\n", " | \n", " | Returns:\n", " | torch.nn.Parameter: The Parameter referenced by ``target``\n", " | \n", " | Raises:\n", " | AttributeError: If the target string references an invalid\n", " | path or resolves to something that is not an\n", " | ``nn.Parameter``\n", " | \n", " | get_submodule(self, target: str) -> 'Module'\n", " | Returns the submodule given by ``target`` if it exists,\n", " | otherwise throws an error.\n", " | \n", " | For example, let's say you have an ``nn.Module`` ``A`` that\n", " | looks like this:\n", " | \n", " | .. code-block:: text\n", " | \n", " | A(\n", " | (net_b): Module(\n", " | (net_c): Module(\n", " | (conv): Conv2d(16, 33, kernel_size=(3, 3), stride=(2, 2))\n", " | )\n", " | (linear): Linear(in_features=100, out_features=200, bias=True)\n", " | )\n", " | )\n", " | \n", " | (The diagram shows an ``nn.Module`` ``A``. ``A`` has a nested\n", " | submodule ``net_b``, which itself has two submodules ``net_c``\n", " | and ``linear``. ``net_c`` then has a submodule ``conv``.)\n", " | \n", " | To check whether or not we have the ``linear`` submodule, we\n", " | would call ``get_submodule(\"net_b.linear\")``. To check whether\n", " | we have the ``conv`` submodule, we would call\n", " | ``get_submodule(\"net_b.net_c.conv\")``.\n", " | \n", " | The runtime of ``get_submodule`` is bounded by the degree\n", " | of module nesting in ``target``. A query against\n", " | ``named_modules`` achieves the same result, but it is O(N) in\n", " | the number of transitive modules. So, for a simple check to see\n", " | if some submodule exists, ``get_submodule`` should always be\n", " | used.\n", " | \n", " | Args:\n", " | target: The fully-qualified string name of the submodule\n", " | to look for. (See above example for how to specify a\n", " | fully-qualified string.)\n", " | \n", " | Returns:\n", " | torch.nn.Module: The submodule referenced by ``target``\n", " | \n", " | Raises:\n", " | AttributeError: If the target string references an invalid\n", " | path or resolves to something that is not an\n", " | ``nn.Module``\n", " | \n", " | half(self: ~T) -> ~T\n", " | Casts all floating point parameters and buffers to ``half`` datatype.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | ipu(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n", " | Moves all model parameters and buffers to the IPU.\n", " | \n", " | This also makes associated parameters and buffers different objects. So\n", " | it should be called before constructing optimizer if the module will\n", " | live on IPU while being optimized.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Arguments:\n", " | device (int, optional): if specified, all parameters will be\n", " | copied to that device\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True)\n", " | Copies parameters and buffers from :attr:`state_dict` into\n", " | this module and its descendants. If :attr:`strict` is ``True``, then\n", " | the keys of :attr:`state_dict` must exactly match the keys returned\n", " | by this module's :meth:`~torch.nn.Module.state_dict` function.\n", " | \n", " | Args:\n", " | state_dict (dict): a dict containing parameters and\n", " | persistent buffers.\n", " | strict (bool, optional): whether to strictly enforce that the keys\n", " | in :attr:`state_dict` match the keys returned by this module's\n", " | :meth:`~torch.nn.Module.state_dict` function. Default: ``True``\n", " | \n", " | Returns:\n", " | ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:\n", " | * **missing_keys** is a list of str containing the missing keys\n", " | * **unexpected_keys** is a list of str containing the unexpected keys\n", " | \n", " | Note:\n", " | If a parameter or buffer is registered as ``None`` and its corresponding key\n", " | exists in :attr:`state_dict`, :meth:`load_state_dict` will raise a\n", " | ``RuntimeError``.\n", " | \n", " | modules(self) -> Iterator[ForwardRef('Module')]\n", " | Returns an iterator over all modules in the network.\n", " | \n", " | Yields:\n", " | Module: a module in the network\n", " | \n", " | Note:\n", " | Duplicate modules are returned only once. In the following\n", " | example, ``l`` will be returned only once.\n", " | \n", " | Example::\n", " | \n", " | >>> l = nn.Linear(2, 2)\n", " | >>> net = nn.Sequential(l, l)\n", " | >>> for idx, m in enumerate(net.modules()):\n", " | ... print(idx, '->', m)\n", " | \n", " | 0 -> Sequential(\n", " | (0): Linear(in_features=2, out_features=2, bias=True)\n", " | (1): Linear(in_features=2, out_features=2, bias=True)\n", " | )\n", " | 1 -> Linear(in_features=2, out_features=2, bias=True)\n", " | \n", " | named_buffers(self, prefix: str = '', recurse: bool = True, remove_duplicate: bool = True) -> Iterator[Tuple[str, torch.Tensor]]\n", " | Returns an iterator over module buffers, yielding both the\n", " | name of the buffer as well as the buffer itself.\n", " | \n", " | Args:\n", " | prefix (str): prefix to prepend to all buffer names.\n", " | recurse (bool, optional): if True, then yields buffers of this module\n", " | and all submodules. Otherwise, yields only buffers that\n", " | are direct members of this module. Defaults to True.\n", " | remove_duplicate (bool, optional): whether to remove the duplicated buffers in the result. Defaults to True.\n", " | \n", " | Yields:\n", " | (str, torch.Tensor): Tuple containing the name and buffer\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> for name, buf in self.named_buffers():\n", " | >>> if name in ['running_var']:\n", " | >>> print(buf.size())\n", " | \n", " | named_children(self) -> Iterator[Tuple[str, ForwardRef('Module')]]\n", " | Returns an iterator over immediate children modules, yielding both\n", " | the name of the module as well as the module itself.\n", " | \n", " | Yields:\n", " | (str, Module): Tuple containing a name and child module\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> for name, module in model.named_children():\n", " | >>> if name in ['conv4', 'conv5']:\n", " | >>> print(module)\n", " | \n", " | named_modules(self, memo: Union[Set[ForwardRef('Module')], NoneType] = None, prefix: str = '', remove_duplicate: bool = True)\n", " | Returns an iterator over all modules in the network, yielding\n", " | both the name of the module as well as the module itself.\n", " | \n", " | Args:\n", " | memo: a memo to store the set of modules already added to the result\n", " | prefix: a prefix that will be added to the name of the module\n", " | remove_duplicate: whether to remove the duplicated module instances in the result\n", " | or not\n", " | \n", " | Yields:\n", " | (str, Module): Tuple of name and module\n", " | \n", " | Note:\n", " | Duplicate modules are returned only once. In the following\n", " | example, ``l`` will be returned only once.\n", " | \n", " | Example::\n", " | \n", " | >>> l = nn.Linear(2, 2)\n", " | >>> net = nn.Sequential(l, l)\n", " | >>> for idx, m in enumerate(net.named_modules()):\n", " | ... print(idx, '->', m)\n", " | \n", " | 0 -> ('', Sequential(\n", " | (0): Linear(in_features=2, out_features=2, bias=True)\n", " | (1): Linear(in_features=2, out_features=2, bias=True)\n", " | ))\n", " | 1 -> ('0', Linear(in_features=2, out_features=2, bias=True))\n", " | \n", " | named_parameters(self, prefix: str = '', recurse: bool = True, remove_duplicate: bool = True) -> Iterator[Tuple[str, torch.nn.parameter.Parameter]]\n", " | Returns an iterator over module parameters, yielding both the\n", " | name of the parameter as well as the parameter itself.\n", " | \n", " | Args:\n", " | prefix (str): prefix to prepend to all parameter names.\n", " | recurse (bool): if True, then yields parameters of this module\n", " | and all submodules. Otherwise, yields only parameters that\n", " | are direct members of this module.\n", " | remove_duplicate (bool, optional): whether to remove the duplicated\n", " | parameters in the result. Defaults to True.\n", " | \n", " | Yields:\n", " | (str, Parameter): Tuple containing the name and parameter\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> for name, param in self.named_parameters():\n", " | >>> if name in ['bias']:\n", " | >>> print(param.size())\n", " | \n", " | parameters(self, recurse: bool = True) -> Iterator[torch.nn.parameter.Parameter]\n", " | Returns an iterator over module parameters.\n", " | \n", " | This is typically passed to an optimizer.\n", " | \n", " | Args:\n", " | recurse (bool): if True, then yields parameters of this module\n", " | and all submodules. Otherwise, yields only parameters that\n", " | are direct members of this module.\n", " | \n", " | Yields:\n", " | Parameter: module parameter\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> for param in model.parameters():\n", " | >>> print(type(param), param.size())\n", " | (20L,)\n", " | (20L, 1L, 5L, 5L)\n", " | \n", " | register_backward_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor], Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]]) -> torch.utils.hooks.RemovableHandle\n", " | Registers a backward hook on the module.\n", " | \n", " | This function is deprecated in favor of :meth:`~torch.nn.Module.register_full_backward_hook` and\n", " | the behavior of this function will change in future versions.\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_buffer(self, name: str, tensor: Union[torch.Tensor, NoneType], persistent: bool = True) -> None\n", " | Adds a buffer to the module.\n", " | \n", " | This is typically used to register a buffer that should not to be\n", " | considered a model parameter. For example, BatchNorm's ``running_mean``\n", " | is not a parameter, but is part of the module's state. Buffers, by\n", " | default, are persistent and will be saved alongside parameters. This\n", " | behavior can be changed by setting :attr:`persistent` to ``False``. The\n", " | only difference between a persistent buffer and a non-persistent buffer\n", " | is that the latter will not be a part of this module's\n", " | :attr:`state_dict`.\n", " | \n", " | Buffers can be accessed as attributes using given names.\n", " | \n", " | Args:\n", " | name (str): name of the buffer. The buffer can be accessed\n", " | from this module using the given name\n", " | tensor (Tensor or None): buffer to be registered. If ``None``, then operations\n", " | that run on buffers, such as :attr:`cuda`, are ignored. If ``None``,\n", " | the buffer is **not** included in the module's :attr:`state_dict`.\n", " | persistent (bool): whether the buffer is part of this module's\n", " | :attr:`state_dict`.\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> self.register_buffer('running_mean', torch.zeros(num_features))\n", " | \n", " | register_forward_hook(self, hook: Union[Callable[[~T, Tuple[Any, ...], Any], Union[Any, NoneType]], Callable[[~T, Tuple[Any, ...], Dict[str, Any], Any], Union[Any, NoneType]]], *, prepend: bool = False, with_kwargs: bool = False) -> torch.utils.hooks.RemovableHandle\n", " | Registers a forward hook on the module.\n", " | \n", " | The hook will be called every time after :func:`forward` has computed an output.\n", " | \n", " | If ``with_kwargs`` is ``False`` or not specified, the input contains only\n", " | the positional arguments given to the module. Keyword arguments won't be\n", " | passed to the hooks and only to the ``forward``. The hook can modify the\n", " | output. It can modify the input inplace but it will not have effect on\n", " | forward since this is called after :func:`forward` is called. The hook\n", " | should have the following signature::\n", " | \n", " | hook(module, args, output) -> None or modified output\n", " | \n", " | If ``with_kwargs`` is ``True``, the forward hook will be passed the\n", " | ``kwargs`` given to the forward function and be expected to return the\n", " | output possibly modified. The hook should have the following signature::\n", " | \n", " | hook(module, args, kwargs, output) -> None or modified output\n", " | \n", " | Args:\n", " | hook (Callable): The user defined hook to be registered.\n", " | prepend (bool): If ``True``, the provided ``hook`` will be fired\n", " | before all existing ``forward`` hooks on this\n", " | :class:`torch.nn.modules.Module`. Otherwise, the provided\n", " | ``hook`` will be fired after all existing ``forward`` hooks on\n", " | this :class:`torch.nn.modules.Module`. Note that global\n", " | ``forward`` hooks registered with\n", " | :func:`register_module_forward_hook` will fire before all hooks\n", " | registered by this method.\n", " | Default: ``False``\n", " | with_kwargs (bool): If ``True``, the ``hook`` will be passed the\n", " | kwargs given to the forward function.\n", " | Default: ``False``\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_forward_pre_hook(self, hook: Union[Callable[[~T, Tuple[Any, ...]], Union[Any, NoneType]], Callable[[~T, Tuple[Any, ...], Dict[str, Any]], Union[Tuple[Any, Dict[str, Any]], NoneType]]], *, prepend: bool = False, with_kwargs: bool = False) -> torch.utils.hooks.RemovableHandle\n", " | Registers a forward pre-hook on the module.\n", " | \n", " | The hook will be called every time before :func:`forward` is invoked.\n", " | \n", " | \n", " | If ``with_kwargs`` is false or not specified, the input contains only\n", " | the positional arguments given to the module. Keyword arguments won't be\n", " | passed to the hooks and only to the ``forward``. The hook can modify the\n", " | input. User can either return a tuple or a single modified value in the\n", " | hook. We will wrap the value into a tuple if a single value is returned\n", " | (unless that value is already a tuple). The hook should have the\n", " | following signature::\n", " | \n", " | hook(module, args) -> None or modified input\n", " | \n", " | If ``with_kwargs`` is true, the forward pre-hook will be passed the\n", " | kwargs given to the forward function. And if the hook modifies the\n", " | input, both the args and kwargs should be returned. The hook should have\n", " | the following signature::\n", " | \n", " | hook(module, args, kwargs) -> None or a tuple of modified input and kwargs\n", " | \n", " | Args:\n", " | hook (Callable): The user defined hook to be registered.\n", " | prepend (bool): If true, the provided ``hook`` will be fired before\n", " | all existing ``forward_pre`` hooks on this\n", " | :class:`torch.nn.modules.Module`. Otherwise, the provided\n", " | ``hook`` will be fired after all existing ``forward_pre`` hooks\n", " | on this :class:`torch.nn.modules.Module`. Note that global\n", " | ``forward_pre`` hooks registered with\n", " | :func:`register_module_forward_pre_hook` will fire before all\n", " | hooks registered by this method.\n", " | Default: ``False``\n", " | with_kwargs (bool): If true, the ``hook`` will be passed the kwargs\n", " | given to the forward function.\n", " | Default: ``False``\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_full_backward_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor], Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]], prepend: bool = False) -> torch.utils.hooks.RemovableHandle\n", " | Registers a backward hook on the module.\n", " | \n", " | The hook will be called every time the gradients with respect to a module\n", " | are computed, i.e. the hook will execute if and only if the gradients with\n", " | respect to module outputs are computed. The hook should have the following\n", " | signature::\n", " | \n", " | hook(module, grad_input, grad_output) -> tuple(Tensor) or None\n", " | \n", " | The :attr:`grad_input` and :attr:`grad_output` are tuples that contain the gradients\n", " | with respect to the inputs and outputs respectively. The hook should\n", " | not modify its arguments, but it can optionally return a new gradient with\n", " | respect to the input that will be used in place of :attr:`grad_input` in\n", " | subsequent computations. :attr:`grad_input` will only correspond to the inputs given\n", " | as positional arguments and all kwarg arguments are ignored. Entries\n", " | in :attr:`grad_input` and :attr:`grad_output` will be ``None`` for all non-Tensor\n", " | arguments.\n", " | \n", " | For technical reasons, when this hook is applied to a Module, its forward function will\n", " | receive a view of each Tensor passed to the Module. Similarly the caller will receive a view\n", " | of each Tensor returned by the Module's forward function.\n", " | \n", " | .. warning ::\n", " | Modifying inputs or outputs inplace is not allowed when using backward hooks and\n", " | will raise an error.\n", " | \n", " | Args:\n", " | hook (Callable): The user-defined hook to be registered.\n", " | prepend (bool): If true, the provided ``hook`` will be fired before\n", " | all existing ``backward`` hooks on this\n", " | :class:`torch.nn.modules.Module`. Otherwise, the provided\n", " | ``hook`` will be fired after all existing ``backward`` hooks on\n", " | this :class:`torch.nn.modules.Module`. Note that global\n", " | ``backward`` hooks registered with\n", " | :func:`register_module_full_backward_hook` will fire before\n", " | all hooks registered by this method.\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_full_backward_pre_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]], prepend: bool = False) -> torch.utils.hooks.RemovableHandle\n", " | Registers a backward pre-hook on the module.\n", " | \n", " | The hook will be called every time the gradients for the module are computed.\n", " | The hook should have the following signature::\n", " | \n", " | hook(module, grad_output) -> Tensor or None\n", " | \n", " | The :attr:`grad_output` is a tuple. The hook should\n", " | not modify its arguments, but it can optionally return a new gradient with\n", " | respect to the output that will be used in place of :attr:`grad_output` in\n", " | subsequent computations. Entries in :attr:`grad_output` will be ``None`` for\n", " | all non-Tensor arguments.\n", " | \n", " | For technical reasons, when this hook is applied to a Module, its forward function will\n", " | receive a view of each Tensor passed to the Module. Similarly the caller will receive a view\n", " | of each Tensor returned by the Module's forward function.\n", " | \n", " | .. warning ::\n", " | Modifying inputs inplace is not allowed when using backward hooks and\n", " | will raise an error.\n", " | \n", " | Args:\n", " | hook (Callable): The user-defined hook to be registered.\n", " | prepend (bool): If true, the provided ``hook`` will be fired before\n", " | all existing ``backward_pre`` hooks on this\n", " | :class:`torch.nn.modules.Module`. Otherwise, the provided\n", " | ``hook`` will be fired after all existing ``backward_pre`` hooks\n", " | on this :class:`torch.nn.modules.Module`. Note that global\n", " | ``backward_pre`` hooks registered with\n", " | :func:`register_module_full_backward_pre_hook` will fire before\n", " | all hooks registered by this method.\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_load_state_dict_post_hook(self, hook)\n", " | Registers a post hook to be run after module's ``load_state_dict``\n", " | is called.\n", " | \n", " | It should have the following signature::\n", " | hook(module, incompatible_keys) -> None\n", " | \n", " | The ``module`` argument is the current module that this hook is registered\n", " | on, and the ``incompatible_keys`` argument is a ``NamedTuple`` consisting\n", " | of attributes ``missing_keys`` and ``unexpected_keys``. ``missing_keys``\n", " | is a ``list`` of ``str`` containing the missing keys and\n", " | ``unexpected_keys`` is a ``list`` of ``str`` containing the unexpected keys.\n", " | \n", " | The given incompatible_keys can be modified inplace if needed.\n", " | \n", " | Note that the checks performed when calling :func:`load_state_dict` with\n", " | ``strict=True`` are affected by modifications the hook makes to\n", " | ``missing_keys`` or ``unexpected_keys``, as expected. Additions to either\n", " | set of keys will result in an error being thrown when ``strict=True``, and\n", " | clearing out both missing and unexpected keys will avoid an error.\n", " | \n", " | Returns:\n", " | :class:`torch.utils.hooks.RemovableHandle`:\n", " | a handle that can be used to remove the added hook by calling\n", " | ``handle.remove()``\n", " | \n", " | register_module(self, name: str, module: Union[ForwardRef('Module'), NoneType]) -> None\n", " | Alias for :func:`add_module`.\n", " | \n", " | register_parameter(self, name: str, param: Union[torch.nn.parameter.Parameter, NoneType]) -> None\n", " | Adds a parameter to the module.\n", " | \n", " | The parameter can be accessed as an attribute using given name.\n", " | \n", " | Args:\n", " | name (str): name of the parameter. The parameter can be accessed\n", " | from this module using the given name\n", " | param (Parameter or None): parameter to be added to the module. If\n", " | ``None``, then operations that run on parameters, such as :attr:`cuda`,\n", " | are ignored. If ``None``, the parameter is **not** included in the\n", " | module's :attr:`state_dict`.\n", " | \n", " | register_state_dict_pre_hook(self, hook)\n", " | These hooks will be called with arguments: ``self``, ``prefix``,\n", " | and ``keep_vars`` before calling ``state_dict`` on ``self``. The registered\n", " | hooks can be used to perform pre-processing before the ``state_dict``\n", " | call is made.\n", " | \n", " | requires_grad_(self: ~T, requires_grad: bool = True) -> ~T\n", " | Change if autograd should record operations on parameters in this\n", " | module.\n", " | \n", " | This method sets the parameters' :attr:`requires_grad` attributes\n", " | in-place.\n", " | \n", " | This method is helpful for freezing part of the module for finetuning\n", " | or training parts of a model individually (e.g., GAN training).\n", " | \n", " | See :ref:`locally-disable-grad-doc` for a comparison between\n", " | `.requires_grad_()` and several similar mechanisms that may be confused with it.\n", " | \n", " | Args:\n", " | requires_grad (bool): whether autograd should record operations on\n", " | parameters in this module. Default: ``True``.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | set_extra_state(self, state: Any)\n", " | This function is called from :func:`load_state_dict` to handle any extra state\n", " | found within the `state_dict`. Implement this function and a corresponding\n", " | :func:`get_extra_state` for your module if you need to store extra state within its\n", " | `state_dict`.\n", " | \n", " | Args:\n", " | state (dict): Extra state from the `state_dict`\n", " | \n", " | share_memory(self: ~T) -> ~T\n", " | See :meth:`torch.Tensor.share_memory_`\n", " | \n", " | state_dict(self, *args, destination=None, prefix='', keep_vars=False)\n", " | Returns a dictionary containing references to the whole state of the module.\n", " | \n", " | Both parameters and persistent buffers (e.g. running averages) are\n", " | included. Keys are corresponding parameter and buffer names.\n", " | Parameters and buffers set to ``None`` are not included.\n", " | \n", " | .. note::\n", " | The returned object is a shallow copy. It contains references\n", " | to the module's parameters and buffers.\n", " | \n", " | .. warning::\n", " | Currently ``state_dict()`` also accepts positional arguments for\n", " | ``destination``, ``prefix`` and ``keep_vars`` in order. However,\n", " | this is being deprecated and keyword arguments will be enforced in\n", " | future releases.\n", " | \n", " | .. warning::\n", " | Please avoid the use of argument ``destination`` as it is not\n", " | designed for end-users.\n", " | \n", " | Args:\n", " | destination (dict, optional): If provided, the state of module will\n", " | be updated into the dict and the same object is returned.\n", " | Otherwise, an ``OrderedDict`` will be created and returned.\n", " | Default: ``None``.\n", " | prefix (str, optional): a prefix added to parameter and buffer\n", " | names to compose the keys in state_dict. Default: ``''``.\n", " | keep_vars (bool, optional): by default the :class:`~torch.Tensor` s\n", " | returned in the state dict are detached from autograd. If it's\n", " | set to ``True``, detaching will not be performed.\n", " | Default: ``False``.\n", " | \n", " | Returns:\n", " | dict:\n", " | a dictionary containing a whole state of the module\n", " | \n", " | Example::\n", " | \n", " | >>> # xdoctest: +SKIP(\"undefined vars\")\n", " | >>> module.state_dict().keys()\n", " | ['bias', 'weight']\n", " | \n", " | to(self, *args, **kwargs)\n", " | Moves and/or casts the parameters and buffers.\n", " | \n", " | This can be called as\n", " | \n", " | .. function:: to(device=None, dtype=None, non_blocking=False)\n", " | :noindex:\n", " | \n", " | .. function:: to(dtype, non_blocking=False)\n", " | :noindex:\n", " | \n", " | .. function:: to(tensor, non_blocking=False)\n", " | :noindex:\n", " | \n", " | .. function:: to(memory_format=torch.channels_last)\n", " | :noindex:\n", " | \n", " | Its signature is similar to :meth:`torch.Tensor.to`, but only accepts\n", " | floating point or complex :attr:`dtype`\\ s. In addition, this method will\n", " | only cast the floating point or complex parameters and buffers to :attr:`dtype`\n", " | (if given). The integral parameters and buffers will be moved\n", " | :attr:`device`, if that is given, but with dtypes unchanged. When\n", " | :attr:`non_blocking` is set, it tries to convert/move asynchronously\n", " | with respect to the host if possible, e.g., moving CPU Tensors with\n", " | pinned memory to CUDA devices.\n", " | \n", " | See below for examples.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Args:\n", " | device (:class:`torch.device`): the desired device of the parameters\n", " | and buffers in this module\n", " | dtype (:class:`torch.dtype`): the desired floating point or complex dtype of\n", " | the parameters and buffers in this module\n", " | tensor (torch.Tensor): Tensor whose dtype and device are the desired\n", " | dtype and device for all parameters and buffers in this module\n", " | memory_format (:class:`torch.memory_format`): the desired memory\n", " | format for 4D parameters and buffers in this module (keyword\n", " | only argument)\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | Examples::\n", " | \n", " | >>> # xdoctest: +IGNORE_WANT(\"non-deterministic\")\n", " | >>> linear = nn.Linear(2, 2)\n", " | >>> linear.weight\n", " | Parameter containing:\n", " | tensor([[ 0.1913, -0.3420],\n", " | [-0.5113, -0.2325]])\n", " | >>> linear.to(torch.double)\n", " | Linear(in_features=2, out_features=2, bias=True)\n", " | >>> linear.weight\n", " | Parameter containing:\n", " | tensor([[ 0.1913, -0.3420],\n", " | [-0.5113, -0.2325]], dtype=torch.float64)\n", " | >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA1)\n", " | >>> gpu1 = torch.device(\"cuda:1\")\n", " | >>> linear.to(gpu1, dtype=torch.half, non_blocking=True)\n", " | Linear(in_features=2, out_features=2, bias=True)\n", " | >>> linear.weight\n", " | Parameter containing:\n", " | tensor([[ 0.1914, -0.3420],\n", " | [-0.5112, -0.2324]], dtype=torch.float16, device='cuda:1')\n", " | >>> cpu = torch.device(\"cpu\")\n", " | >>> linear.to(cpu)\n", " | Linear(in_features=2, out_features=2, bias=True)\n", " | >>> linear.weight\n", " | Parameter containing:\n", " | tensor([[ 0.1914, -0.3420],\n", " | [-0.5112, -0.2324]], dtype=torch.float16)\n", " | \n", " | >>> linear = nn.Linear(2, 2, bias=None).to(torch.cdouble)\n", " | >>> linear.weight\n", " | Parameter containing:\n", " | tensor([[ 0.3741+0.j, 0.2382+0.j],\n", " | [ 0.5593+0.j, -0.4443+0.j]], dtype=torch.complex128)\n", " | >>> linear(torch.ones(3, 2, dtype=torch.cdouble))\n", " | tensor([[0.6122+0.j, 0.1150+0.j],\n", " | [0.6122+0.j, 0.1150+0.j],\n", " | [0.6122+0.j, 0.1150+0.j]], dtype=torch.complex128)\n", " | \n", " | to_empty(self: ~T, *, device: Union[str, torch.device]) -> ~T\n", " | Moves the parameters and buffers to the specified device without copying storage.\n", " | \n", " | Args:\n", " | device (:class:`torch.device`): The desired device of the parameters\n", " | and buffers in this module.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | train(self: ~T, mode: bool = True) -> ~T\n", " | Sets the module in training mode.\n", " | \n", " | This has any effect only on certain modules. See documentations of\n", " | particular modules for details of their behaviors in training/evaluation\n", " | mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,\n", " | etc.\n", " | \n", " | Args:\n", " | mode (bool): whether to set training mode (``True``) or evaluation\n", " | mode (``False``). Default: ``True``.\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | type(self: ~T, dst_type: Union[torch.dtype, str]) -> ~T\n", " | Casts all parameters and buffers to :attr:`dst_type`.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Args:\n", " | dst_type (type or string): the desired type\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | xpu(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n", " | Moves all model parameters and buffers to the XPU.\n", " | \n", " | This also makes associated parameters and buffers different objects. So\n", " | it should be called before constructing optimizer if the module will\n", " | live on XPU while being optimized.\n", " | \n", " | .. note::\n", " | This method modifies the module in-place.\n", " | \n", " | Arguments:\n", " | device (int, optional): if specified, all parameters will be\n", " | copied to that device\n", " | \n", " | Returns:\n", " | Module: self\n", " | \n", " | zero_grad(self, set_to_none: bool = True) -> None\n", " | Sets gradients of all model parameters to zero. See similar function\n", " | under :class:`torch.optim.Optimizer` for more context.\n", " | \n", " | Args:\n", " | set_to_none (bool): instead of setting to zero, set the grads to None.\n", " | See :meth:`torch.optim.Optimizer.zero_grad` for details.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data descriptors inherited from torch.nn.modules.module.Module:\n", " | \n", " | __dict__\n", " | dictionary for instance variables (if defined)\n", " | \n", " | __weakref__\n", " | list of weak references to the object (if defined)\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data and other attributes inherited from torch.nn.modules.module.Module:\n", " | \n", " | T_destination = ~T_destination\n", " | \n", " | __annotations__ = {'__call__': typing.Callable[..., typing.Any], '_bac...\n", " | \n", " | call_super_init = False\n", " | \n", " | dump_patches = False\n", "\n" ] } ], "source": [ "help(vocab)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1000\n", "2000\n", "3000\n", "4000\n", "5000\n", "6000\n", "7000\n", "8000\n", "9000\n", "10000\n", "11000\n", "12000\n", "13000\n", "14000\n", "15000\n", "16000\n", "17000\n", "18000\n", "19000\n", "20000\n", "21000\n", "22000\n", "23000\n", "24000\n", "25000\n", "26000\n", "27000\n", "28000\n", "29000\n", "30000\n", "31000\n", "32000\n", "33000\n", "34000\n", "35000\n", "36000\n", "37000\n", "38000\n", "39000\n", "40000\n", "41000\n", "42000\n", "43000\n", "44000\n", "45000\n", "46000\n", "47000\n", "48000\n", "49000\n", "50000\n", "51000\n", "52000\n", "53000\n", "54000\n", "55000\n", "56000\n", "57000\n", "58000\n", "59000\n", "60000\n", "61000\n", "62000\n", "63000\n", "64000\n", "65000\n", "66000\n", "67000\n", "68000\n", "69000\n", "70000\n", "71000\n", "72000\n", "73000\n", "74000\n", "75000\n", "76000\n", "77000\n", "78000\n", "79000\n", "80000\n", "81000\n", "82000\n", "83000\n", "84000\n", "85000\n", "86000\n", "87000\n", "88000\n", "89000\n", "90000\n", "91000\n", "92000\n", "93000\n", "94000\n", "95000\n", "96000\n", "97000\n", "98000\n", "99000\n", "100000\n", "101000\n", "102000\n", "103000\n", "104000\n", "105000\n", "106000\n", "107000\n", "108000\n", "109000\n", "110000\n", "111000\n", "112000\n", "113000\n", "114000\n", "115000\n", "116000\n", "117000\n", "118000\n", "119000\n", "120000\n", "121000\n", "122000\n", "123000\n", "124000\n", "125000\n", "126000\n", "127000\n", "128000\n", "129000\n", "130000\n", "131000\n", "132000\n", "133000\n", "134000\n", "135000\n", "136000\n", "137000\n", "138000\n", "139000\n", "140000\n", "141000\n", "142000\n", "143000\n", "144000\n", "145000\n", "146000\n", "147000\n", "148000\n", "149000\n", "150000\n", "151000\n", "152000\n", "153000\n", "154000\n", "155000\n", "156000\n", "157000\n", "158000\n", "159000\n", "160000\n", "161000\n", "162000\n", "163000\n", "164000\n", "165000\n", "166000\n", "167000\n", "168000\n", "169000\n", "170000\n", "171000\n", "172000\n", "173000\n", "174000\n", "175000\n", "176000\n", "177000\n", "178000\n", "179000\n", "180000\n", "181000\n", "182000\n", "183000\n", "184000\n", "185000\n", "186000\n", "187000\n", "188000\n", "189000\n", "190000\n", "191000\n", "192000\n", "193000\n", "194000\n", "195000\n", "196000\n", "197000\n", "198000\n", "199000\n", "200000\n", "201000\n", "202000\n", "203000\n", "204000\n", "205000\n", "206000\n", "207000\n", "208000\n", "209000\n", "210000\n", "211000\n", "212000\n", "213000\n", "214000\n", "215000\n", "216000\n", "217000\n", "218000\n", "219000\n", "220000\n", "221000\n", "222000\n", "223000\n", "224000\n", "225000\n", "226000\n", "227000\n", "228000\n", "229000\n", "230000\n", "231000\n", "232000\n", "233000\n", "234000\n", "235000\n", "236000\n", "237000\n", "238000\n", "239000\n", "240000\n", "241000\n", "242000\n", "243000\n", "244000\n", "245000\n", "246000\n", "247000\n", "248000\n", "249000\n", "250000\n", "251000\n", "252000\n", "253000\n", "254000\n", "255000\n", "256000\n", "257000\n", "258000\n", "259000\n", "260000\n", "261000\n", "262000\n", "263000\n", "264000\n", "265000\n", "266000\n", "267000\n", "268000\n", "269000\n", "270000\n", "271000\n", "272000\n", "273000\n", "274000\n", "275000\n", "276000\n", "277000\n", "278000\n", "279000\n", "280000\n", "281000\n", "282000\n", "283000\n", "284000\n", "285000\n", "286000\n", "287000\n", "288000\n", "289000\n", "290000\n", "291000\n", "292000\n", "293000\n", "294000\n", "295000\n", "296000\n", "297000\n", "298000\n", "299000\n", "300000\n", "301000\n", "302000\n", "303000\n", "304000\n", "305000\n", "306000\n", "307000\n", "308000\n", "309000\n", "310000\n", "311000\n", "312000\n", "313000\n", "314000\n", "315000\n", "316000\n", "317000\n", "318000\n", "319000\n", "320000\n", "321000\n", "322000\n", "323000\n", "324000\n", "325000\n", "326000\n", "327000\n", "328000\n", "329000\n", "330000\n", "331000\n", "332000\n", "333000\n", "334000\n", "335000\n", "336000\n", "337000\n", "338000\n", "339000\n", "340000\n", "341000\n", "342000\n", "343000\n", "344000\n", "345000\n", "346000\n", "347000\n", "348000\n", "349000\n", "350000\n", "351000\n", "352000\n", "353000\n", "354000\n", "355000\n", "356000\n", "357000\n", "358000\n", "359000\n", "360000\n", "361000\n", "362000\n", "363000\n", "364000\n", "365000\n", "366000\n", "367000\n", "368000\n", "369000\n", "370000\n", "371000\n", "372000\n", "373000\n", "374000\n", "375000\n", "376000\n", "377000\n", "378000\n", "379000\n", "380000\n", "381000\n", "382000\n", "383000\n", "384000\n", "385000\n", "386000\n", "387000\n", "388000\n", "389000\n", "390000\n", "391000\n", "392000\n", "393000\n", "394000\n", "395000\n", "396000\n", "397000\n", "398000\n", "399000\n", "400000\n", "401000\n", "402000\n", "403000\n", "404000\n", "405000\n", "406000\n", "407000\n", "408000\n", "409000\n", "410000\n", "411000\n", "412000\n", "413000\n", "414000\n", "415000\n", "416000\n", "417000\n", "418000\n", "419000\n", "420000\n", "421000\n", "422000\n", "423000\n", "424000\n", "425000\n", "426000\n", "427000\n", "428000\n", "429000\n", "430000\n", "431000\n", "432000\n" ] } ], "source": [ "def look_ahead_iterator(gen):\n", " prev = None\n", " for item in gen:\n", " if prev is not None:\n", " yield (prev, item)\n", " prev = item\n", "\n", "class Bigrams(IterableDataset):\n", " def __init__(self, text_file, vocabulary_size):\n", " self.vocab = build_vocab_from_iterator(\n", " get_word_lines_from_file(text_file),\n", " max_tokens = vocabulary_size,\n", " specials = [''])\n", " self.vocab.set_default_index(self.vocab[''])\n", " self.vocabulary_size = vocabulary_size\n", " self.text_file = text_file\n", "\n", " def __iter__(self):\n", " return look_ahead_iterator(\n", " (self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_file(self.text_file))))\n", "\n", "train_dataset = Bigrams(train_file, vocab_size)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<__main__.Bigrams object at 0x7fdd26d23940>\n" ] } ], "source": [ "print(train_dataset)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "'|===========================================================================|\\n| PyTorch CUDA memory summary, device ID 0 |\\n|---------------------------------------------------------------------------|\\n| CUDA OOMs: 1 | cudaMalloc retries: 1 |\\n|===========================================================================|\\n| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\\n|---------------------------------------------------------------------------|\\n| Allocated memory | 699613 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699414 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 199 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| Active memory | 699613 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699414 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 199 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| Requested memory | 699611 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699413 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 197 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| GPU reserved memory | 710656 KiB | 1918 MiB | 1918 MiB | 1224 MiB |\\n| from large pool | 708608 KiB | 1916 MiB | 1916 MiB | 1224 MiB |\\n| from small pool | 2048 KiB | 2 MiB | 2 MiB | 0 MiB |\\n|---------------------------------------------------------------------------|\\n| Non-releasable memory | 11043 KiB | 19364 KiB | 28939 KiB | 17896 KiB |\\n| from large pool | 9194 KiB | 17514 KiB | 25954 KiB | 16760 KiB |\\n| from small pool | 1849 KiB | 1950 KiB | 2985 KiB | 1136 KiB |\\n|---------------------------------------------------------------------------|\\n| Allocations | 10 | 17 | 38 | 28 |\\n| from large pool | 5 | 7 | 10 | 5 |\\n| from small pool | 5 | 11 | 28 | 23 |\\n|---------------------------------------------------------------------------|\\n| Active allocs | 10 | 17 | 38 | 28 |\\n| from large pool | 5 | 7 | 10 | 5 |\\n| from small pool | 5 | 11 | 28 | 23 |\\n|---------------------------------------------------------------------------|\\n| GPU reserved segments | 5 | 7 | 7 | 2 |\\n| from large pool | 4 | 6 | 6 | 2 |\\n| from small pool | 1 | 1 | 1 | 0 |\\n|---------------------------------------------------------------------------|\\n| Non-releasable allocs | 6 | 8 | 20 | 14 |\\n| from large pool | 4 | 6 | 9 | 5 |\\n| from small pool | 2 | 3 | 11 | 9 |\\n|---------------------------------------------------------------------------|\\n| Oversize allocations | 0 | 0 | 0 | 0 |\\n|---------------------------------------------------------------------------|\\n| Oversize GPU segments | 0 | 0 | 0 | 0 |\\n|===========================================================================|\\n'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.memory_summary(device=None, abbreviated=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:256\"" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "device = 'cuda'\n", "model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch: = 1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/gedin/.local/lib/python3.8/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n", " input = module(input)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "0 tensor(5.9599, device='cuda:0', grad_fn=)\n", "1000\n", "100 tensor(6.1015, device='cuda:0', grad_fn=)\n", "200 tensor(5.9708, device='cuda:0', grad_fn=)\n", "2000\n", "300 tensor(6.2176, device='cuda:0', grad_fn=)\n", "3000\n", "400 tensor(5.9401, device='cuda:0', grad_fn=)\n", "4000\n", "500 tensor(6.2084, device='cuda:0', grad_fn=)\n", "5000\n", "600 tensor(5.9736, device='cuda:0', grad_fn=)\n", "6000\n", "700 tensor(6.1423, device='cuda:0', grad_fn=)\n", "7000\n", "800 tensor(5.7344, device='cuda:0', grad_fn=)\n", "8000\n", "900 tensor(6.0950, device='cuda:0', grad_fn=)\n", "9000\n", "1000 tensor(5.8473, device='cuda:0', grad_fn=)\n", "10000\n", "1100 tensor(6.0612, device='cuda:0', grad_fn=)\n", "11000\n", "1200 tensor(6.1509, device='cuda:0', grad_fn=)\n", "12000\n", "1300 tensor(6.0760, device='cuda:0', grad_fn=)\n", "13000\n", "1400 tensor(6.2047, device='cuda:0', grad_fn=)\n", "14000\n", "1500 tensor(6.1186, device='cuda:0', grad_fn=)\n", "15000\n", "1600 tensor(5.8722, device='cuda:0', grad_fn=)\n", "16000\n", "1700 tensor(5.8741, device='cuda:0', grad_fn=)\n", "17000\n", "1800 tensor(5.8971, device='cuda:0', grad_fn=)\n", "18000\n", "1900 tensor(5.8521, device='cuda:0', grad_fn=)\n", "19000\n", "2000 tensor(5.9434, device='cuda:0', grad_fn=)\n", "20000\n", "2100 tensor(6.0348, device='cuda:0', grad_fn=)\n", "21000\n", "2200 tensor(5.8840, device='cuda:0', grad_fn=)\n", "22000\n", "2300 tensor(5.8641, device='cuda:0', grad_fn=)\n", "23000\n", "2400 tensor(5.9068, device='cuda:0', grad_fn=)\n", "24000\n", "2500 tensor(5.9170, device='cuda:0', grad_fn=)\n", "25000\n", "2600 tensor(5.9812, device='cuda:0', grad_fn=)\n", "26000\n", "2700 tensor(5.8985, device='cuda:0', grad_fn=)\n", "27000\n", "2800 tensor(6.0008, device='cuda:0', grad_fn=)\n", "28000\n", "2900 tensor(6.1230, device='cuda:0', grad_fn=)\n", "29000\n", "3000 tensor(5.8770, device='cuda:0', grad_fn=)\n", "30000\n", "3100 tensor(5.9268, device='cuda:0', grad_fn=)\n", "31000\n", "3200 tensor(5.8530, device='cuda:0', grad_fn=)\n", "32000\n", "3300 tensor(5.8436, device='cuda:0', grad_fn=)\n", "33000\n", "3400 tensor(5.7692, device='cuda:0', grad_fn=)\n", "34000\n", "3500 tensor(5.8909, device='cuda:0', grad_fn=)\n", "35000\n", "3600 tensor(5.8325, device='cuda:0', grad_fn=)\n", "36000\n", "3700 tensor(5.8082, device='cuda:0', grad_fn=)\n", "37000\n", "3800 tensor(5.8106, device='cuda:0', grad_fn=)\n", "38000\n", "3900 tensor(5.6382, device='cuda:0', grad_fn=)\n", "39000\n", "4000 tensor(5.6596, device='cuda:0', grad_fn=)\n", "40000\n", "4100 tensor(5.9587, device='cuda:0', grad_fn=)\n", "41000\n", "4200 tensor(5.8862, device='cuda:0', grad_fn=)\n", "42000\n", "4300 tensor(5.9541, device='cuda:0', grad_fn=)\n", "43000\n", "4400 tensor(5.8681, device='cuda:0', grad_fn=)\n", "44000\n", "4500 tensor(5.6963, device='cuda:0', grad_fn=)\n", "45000\n", "4600 tensor(6.0707, device='cuda:0', grad_fn=)\n", "46000\n", "4700 tensor(5.7091, device='cuda:0', grad_fn=)\n", "47000\n", "4800 tensor(5.8139, device='cuda:0', grad_fn=)\n", "48000\n", "4900 tensor(5.8696, device='cuda:0', grad_fn=)\n", "49000\n", "5000 tensor(5.8844, device='cuda:0', grad_fn=)\n", "50000\n", "5100 tensor(5.9806, device='cuda:0', grad_fn=)\n", "51000\n", "5200 tensor(6.0075, device='cuda:0', grad_fn=)\n", "52000\n", "5300 tensor(6.0588, device='cuda:0', grad_fn=)\n", "53000\n", "5400 tensor(5.8456, device='cuda:0', grad_fn=)\n", "54000\n", "5500 tensor(5.9166, device='cuda:0', grad_fn=)\n", "55000\n", "5600 tensor(5.6528, device='cuda:0', grad_fn=)\n", "56000\n", "5700 tensor(5.8988, device='cuda:0', grad_fn=)\n", "57000\n", "5800 tensor(5.9132, device='cuda:0', grad_fn=)\n", "58000\n", "5900 tensor(5.9460, device='cuda:0', grad_fn=)\n", "59000\n", "6000 tensor(5.7543, device='cuda:0', grad_fn=)\n", "60000\n", "6100 tensor(5.8256, device='cuda:0', grad_fn=)\n", "61000\n", "6200 tensor(5.9448, device='cuda:0', grad_fn=)\n", "62000\n", "6300 tensor(5.7601, device='cuda:0', grad_fn=)\n", "63000\n", "6400 tensor(5.7091, device='cuda:0', grad_fn=)\n", "64000\n", "6500 tensor(5.5621, device='cuda:0', grad_fn=)\n", "65000\n", "6600 tensor(5.7094, device='cuda:0', grad_fn=)\n", "66000\n", "6700 tensor(5.6785, device='cuda:0', grad_fn=)\n", "67000\n", "6800 tensor(5.9249, device='cuda:0', grad_fn=)\n", "68000\n", "6900 tensor(5.8775, device='cuda:0', grad_fn=)\n", "69000\n", "7000 tensor(5.8075, device='cuda:0', grad_fn=)\n", "70000\n", "7100 tensor(5.5748, device='cuda:0', grad_fn=)\n", "71000\n", "7200 tensor(5.7217, device='cuda:0', grad_fn=)\n", "72000\n", "7300 tensor(5.9124, device='cuda:0', grad_fn=)\n", "73000\n", "7400 tensor(5.7197, device='cuda:0', grad_fn=)\n", "74000\n", "7500 tensor(5.6429, device='cuda:0', grad_fn=)\n", "75000\n", "7600 tensor(5.6847, device='cuda:0', grad_fn=)\n", "76000\n", "7700 tensor(5.7197, device='cuda:0', grad_fn=)\n", "77000\n", "7800 tensor(5.8559, device='cuda:0', grad_fn=)\n", "78000\n", "7900 tensor(5.5600, device='cuda:0', grad_fn=)\n", "79000\n", "8000 tensor(5.6288, device='cuda:0', grad_fn=)\n", "80000\n", "8100 tensor(5.7767, device='cuda:0', grad_fn=)\n", "81000\n", "8200 tensor(5.8037, device='cuda:0', grad_fn=)\n", "82000\n", "8300 tensor(5.7344, device='cuda:0', grad_fn=)\n", "83000\n", "8400 tensor(5.8092, device='cuda:0', grad_fn=)\n", "84000\n", "8500 tensor(5.8847, device='cuda:0', grad_fn=)\n", "85000\n", "8600 tensor(5.8754, device='cuda:0', grad_fn=)\n", "86000\n", "8700 tensor(5.9227, device='cuda:0', grad_fn=)\n", "87000\n", "8800 tensor(5.8028, device='cuda:0', grad_fn=)\n", "88000\n", "8900 tensor(5.6476, device='cuda:0', grad_fn=)\n", "89000\n", "9000 tensor(5.7656, device='cuda:0', grad_fn=)\n", "90000\n", "9100 tensor(5.7805, device='cuda:0', grad_fn=)\n", "91000\n", "9200 tensor(5.6879, device='cuda:0', grad_fn=)\n", "92000\n", "9300 tensor(5.7098, device='cuda:0', grad_fn=)\n", "93000\n", "9400 tensor(5.5631, device='cuda:0', grad_fn=)\n", "94000\n", "9500 tensor(5.6497, device='cuda:0', grad_fn=)\n", "95000\n", "9600 tensor(5.7500, device='cuda:0', grad_fn=)\n", "96000\n", "9700 tensor(5.6607, device='cuda:0', grad_fn=)\n", "97000\n", "9800 tensor(5.7196, device='cuda:0', grad_fn=)\n", "9900 tensor(5.5987, device='cuda:0', grad_fn=)\n", "98000\n", "10000 tensor(5.7795, device='cuda:0', grad_fn=)\n", "99000\n", "10100 tensor(5.6980, device='cuda:0', grad_fn=)\n", "100000\n", "10200 tensor(5.6093, device='cuda:0', grad_fn=)\n", "101000\n", "10300 tensor(5.6792, device='cuda:0', grad_fn=)\n", "102000\n", "10400 tensor(5.7035, device='cuda:0', grad_fn=)\n", "103000\n", "10500 tensor(5.8282, device='cuda:0', grad_fn=)\n", "104000\n", "10600 tensor(5.8605, device='cuda:0', grad_fn=)\n", "105000\n", "10700 tensor(5.7354, device='cuda:0', grad_fn=)\n", "106000\n", "10800 tensor(5.8034, device='cuda:0', grad_fn=)\n", "107000\n", "10900 tensor(5.6194, device='cuda:0', grad_fn=)\n", "108000\n", "11000 tensor(5.8502, device='cuda:0', grad_fn=)\n", "109000\n", "11100 tensor(5.4406, device='cuda:0', grad_fn=)\n", "110000\n", "11200 tensor(5.6379, device='cuda:0', grad_fn=)\n", "111000\n", "11300 tensor(5.6668, device='cuda:0', grad_fn=)\n", "112000\n", "11400 tensor(5.6140, device='cuda:0', grad_fn=)\n", "113000\n", "11500 tensor(5.6565, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "114000\n", "11600 tensor(5.6308, device='cuda:0', grad_fn=)\n", "115000\n", "11700 tensor(5.5680, device='cuda:0', grad_fn=)\n", "116000\n", "11800 tensor(5.7604, device='cuda:0', grad_fn=)\n", "117000\n", "11900 tensor(5.5792, device='cuda:0', grad_fn=)\n", "118000\n", "12000 tensor(5.7329, device='cuda:0', grad_fn=)\n", "119000\n", "12100 tensor(5.7726, device='cuda:0', grad_fn=)\n", "120000\n", "12200 tensor(5.7151, device='cuda:0', grad_fn=)\n", "121000\n", "12300 tensor(5.8561, device='cuda:0', grad_fn=)\n", "122000\n", "12400 tensor(5.6791, device='cuda:0', grad_fn=)\n", "123000\n", "12500 tensor(5.5574, device='cuda:0', grad_fn=)\n", "124000\n", "12600 tensor(5.6817, device='cuda:0', grad_fn=)\n", "125000\n", "12700 tensor(5.5375, device='cuda:0', grad_fn=)\n", "126000\n", "12800 tensor(5.7270, device='cuda:0', grad_fn=)\n", "127000\n", "12900 tensor(5.6252, device='cuda:0', grad_fn=)\n", "128000\n", "13000 tensor(5.4536, device='cuda:0', grad_fn=)\n", "129000\n", "13100 tensor(5.6091, device='cuda:0', grad_fn=)\n", "130000\n", "13200 tensor(5.7324, device='cuda:0', grad_fn=)\n", "131000\n", "13300 tensor(5.5253, device='cuda:0', grad_fn=)\n", "132000\n", "13400 tensor(5.6491, device='cuda:0', grad_fn=)\n", "133000\n", "13500 tensor(5.5728, device='cuda:0', grad_fn=)\n", "134000\n", "13600 tensor(5.6632, device='cuda:0', grad_fn=)\n", "135000\n", "13700 tensor(5.6678, device='cuda:0', grad_fn=)\n", "136000\n", "13800 tensor(5.6112, device='cuda:0', grad_fn=)\n", "137000\n", "13900 tensor(5.4884, device='cuda:0', grad_fn=)\n", "138000\n", "14000 tensor(5.7304, device='cuda:0', grad_fn=)\n", "139000\n", "14100 tensor(5.4326, device='cuda:0', grad_fn=)\n", "140000\n", "14200 tensor(5.7188, device='cuda:0', grad_fn=)\n", "141000\n", "14300 tensor(5.6519, device='cuda:0', grad_fn=)\n", "142000\n", "14400 tensor(5.5892, device='cuda:0', grad_fn=)\n", "143000\n", "14500 tensor(5.7225, device='cuda:0', grad_fn=)\n", "144000\n", "14600 tensor(5.7216, device='cuda:0', grad_fn=)\n", "145000\n", "14700 tensor(5.5748, device='cuda:0', grad_fn=)\n", "146000\n", "14800 tensor(6.0184, device='cuda:0', grad_fn=)\n", "147000\n", "14900 tensor(5.6781, device='cuda:0', grad_fn=)\n", "148000\n", "15000 tensor(5.6038, device='cuda:0', grad_fn=)\n", "149000\n", "15100 tensor(5.7875, device='cuda:0', grad_fn=)\n", "150000\n", "15200 tensor(5.6485, device='cuda:0', grad_fn=)\n", "151000\n", "15300 tensor(5.5927, device='cuda:0', grad_fn=)\n", "152000\n", "15400 tensor(5.5156, device='cuda:0', grad_fn=)\n", "153000\n", "15500 tensor(5.6556, device='cuda:0', grad_fn=)\n", "154000\n", "15600 tensor(5.6485, device='cuda:0', grad_fn=)\n", "155000\n", "15700 tensor(5.5904, device='cuda:0', grad_fn=)\n", "156000\n", "15800 tensor(5.4613, device='cuda:0', grad_fn=)\n", "157000\n", "15900 tensor(5.6254, device='cuda:0', grad_fn=)\n", "158000\n", "16000 tensor(5.4349, device='cuda:0', grad_fn=)\n", "159000\n", "16100 tensor(5.5205, device='cuda:0', grad_fn=)\n", "160000\n", "16200 tensor(5.8051, device='cuda:0', grad_fn=)\n", "161000\n", "16300 tensor(5.6452, device='cuda:0', grad_fn=)\n", "162000\n", "16400 tensor(5.6071, device='cuda:0', grad_fn=)\n", "163000\n", "16500 tensor(5.7237, device='cuda:0', grad_fn=)\n", "164000\n", "16600 tensor(5.5771, device='cuda:0', grad_fn=)\n", "165000\n", "16700 tensor(5.5355, device='cuda:0', grad_fn=)\n", "166000\n", "16800 tensor(5.6363, device='cuda:0', grad_fn=)\n", "167000\n", "16900 tensor(5.3746, device='cuda:0', grad_fn=)\n", "168000\n", "17000 tensor(5.6707, device='cuda:0', grad_fn=)\n", "169000\n", "17100 tensor(5.5359, device='cuda:0', grad_fn=)\n", "170000\n", "17200 tensor(5.6118, device='cuda:0', grad_fn=)\n", "171000\n", "17300 tensor(5.6740, device='cuda:0', grad_fn=)\n", "172000\n", "17400 tensor(5.4438, device='cuda:0', grad_fn=)\n", "173000\n", "17500 tensor(5.5001, device='cuda:0', grad_fn=)\n", "174000\n", "17600 tensor(5.4953, device='cuda:0', grad_fn=)\n", "175000\n", "17700 tensor(5.5398, device='cuda:0', grad_fn=)\n", "176000\n", "17800 tensor(5.6053, device='cuda:0', grad_fn=)\n", "177000\n", "17900 tensor(5.4726, device='cuda:0', grad_fn=)\n", "178000\n", "18000 tensor(5.6747, device='cuda:0', grad_fn=)\n", "179000\n", "18100 tensor(5.6238, device='cuda:0', grad_fn=)\n", "180000\n", "18200 tensor(5.5469, device='cuda:0', grad_fn=)\n", "181000\n", "18300 tensor(5.5299, device='cuda:0', grad_fn=)\n", "182000\n", "18400 tensor(5.6323, device='cuda:0', grad_fn=)\n", "183000\n", "18500 tensor(5.5893, device='cuda:0', grad_fn=)\n", "184000\n", "18600 tensor(5.7452, device='cuda:0', grad_fn=)\n", "185000\n", "18700 tensor(5.5576, device='cuda:0', grad_fn=)\n", "186000\n", "18800 tensor(5.7439, device='cuda:0', grad_fn=)\n", "187000\n", "18900 tensor(5.6106, device='cuda:0', grad_fn=)\n", "188000\n", "19000 tensor(5.6647, device='cuda:0', grad_fn=)\n", "189000\n", "19100 tensor(5.7728, device='cuda:0', grad_fn=)\n", "190000\n", "19200 tensor(5.6169, device='cuda:0', grad_fn=)\n", "191000\n", "19300 tensor(5.7852, device='cuda:0', grad_fn=)\n", "192000\n", "19400 tensor(5.5627, device='cuda:0', grad_fn=)\n", "193000\n", "19500 tensor(5.5682, device='cuda:0', grad_fn=)\n", "194000\n", "19600 tensor(5.5978, device='cuda:0', grad_fn=)\n", "195000\n", "19700 tensor(5.6453, device='cuda:0', grad_fn=)\n", "196000\n", "19800 tensor(5.4786, device='cuda:0', grad_fn=)\n", "197000\n", "19900 tensor(5.4894, device='cuda:0', grad_fn=)\n", "198000\n", "20000 tensor(5.4999, device='cuda:0', grad_fn=)\n", "199000\n", "20100 tensor(5.4881, device='cuda:0', grad_fn=)\n", "200000\n", "20200 tensor(5.3915, device='cuda:0', grad_fn=)\n", "201000\n", "20300 tensor(5.5216, device='cuda:0', grad_fn=)\n", "20400 tensor(5.5761, device='cuda:0', grad_fn=)\n", "202000\n", "20500 tensor(5.5586, device='cuda:0', grad_fn=)\n", "203000\n", "20600 tensor(5.7870, device='cuda:0', grad_fn=)\n", "204000\n", "20700 tensor(5.5776, device='cuda:0', grad_fn=)\n", "205000\n", "20800 tensor(5.4417, device='cuda:0', grad_fn=)\n", "206000\n", "20900 tensor(5.7186, device='cuda:0', grad_fn=)\n", "207000\n", "21000 tensor(5.5415, device='cuda:0', grad_fn=)\n", "208000\n", "21100 tensor(5.5141, device='cuda:0', grad_fn=)\n", "209000\n", "21200 tensor(5.4401, device='cuda:0', grad_fn=)\n", "210000\n", "21300 tensor(5.6511, device='cuda:0', grad_fn=)\n", "211000\n", "21400 tensor(5.6474, device='cuda:0', grad_fn=)\n", "212000\n", "21500 tensor(5.3946, device='cuda:0', grad_fn=)\n", "213000\n", "21600 tensor(5.3958, device='cuda:0', grad_fn=)\n", "214000\n", "21700 tensor(5.4040, device='cuda:0', grad_fn=)\n", "215000\n", "21800 tensor(5.5745, device='cuda:0', grad_fn=)\n", "216000\n", "21900 tensor(5.4996, device='cuda:0', grad_fn=)\n", "217000\n", "22000 tensor(5.5234, device='cuda:0', grad_fn=)\n", "218000\n", "22100 tensor(5.3870, device='cuda:0', grad_fn=)\n", "219000\n", "22200 tensor(5.2661, device='cuda:0', grad_fn=)\n", "220000\n", "22300 tensor(5.7031, device='cuda:0', grad_fn=)\n", "221000\n", "22400 tensor(5.3633, device='cuda:0', grad_fn=)\n", "222000\n", "22500 tensor(5.4404, device='cuda:0', grad_fn=)\n", "223000\n", "22600 tensor(5.5951, device='cuda:0', grad_fn=)\n", "224000\n", "22700 tensor(5.3901, device='cuda:0', grad_fn=)\n", "225000\n", "22800 tensor(5.6404, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "226000\n", "22900 tensor(5.6646, device='cuda:0', grad_fn=)\n", "227000\n", "23000 tensor(5.5949, device='cuda:0', grad_fn=)\n", "228000\n", "23100 tensor(5.5284, device='cuda:0', grad_fn=)\n", "229000\n", "23200 tensor(5.5617, device='cuda:0', grad_fn=)\n", "230000\n", "23300 tensor(5.6426, device='cuda:0', grad_fn=)\n", "231000\n", "23400 tensor(5.7283, device='cuda:0', grad_fn=)\n", "232000\n", "23500 tensor(5.4558, device='cuda:0', grad_fn=)\n", "233000\n", "23600 tensor(5.4600, device='cuda:0', grad_fn=)\n", "234000\n", "23700 tensor(5.4961, device='cuda:0', grad_fn=)\n", "235000\n", "23800 tensor(5.3373, device='cuda:0', grad_fn=)\n", "236000\n", "23900 tensor(5.4470, device='cuda:0', grad_fn=)\n", "237000\n", "24000 tensor(5.4346, device='cuda:0', grad_fn=)\n", "238000\n", "24100 tensor(5.5112, device='cuda:0', grad_fn=)\n", "239000\n", "24200 tensor(5.6918, device='cuda:0', grad_fn=)\n", "240000\n", "24300 tensor(5.6115, device='cuda:0', grad_fn=)\n", "241000\n", "24400 tensor(5.7404, device='cuda:0', grad_fn=)\n", "242000\n", "24500 tensor(5.4982, device='cuda:0', grad_fn=)\n", "243000\n", "24600 tensor(5.6136, device='cuda:0', grad_fn=)\n", "244000\n", "24700 tensor(5.5225, device='cuda:0', grad_fn=)\n", "245000\n", "24800 tensor(5.5563, device='cuda:0', grad_fn=)\n", "246000\n", "24900 tensor(5.6283, device='cuda:0', grad_fn=)\n", "247000\n", "25000 tensor(5.6176, device='cuda:0', grad_fn=)\n", "248000\n", "25100 tensor(5.5795, device='cuda:0', grad_fn=)\n", "249000\n", "25200 tensor(5.5831, device='cuda:0', grad_fn=)\n", "250000\n", "25300 tensor(5.5894, device='cuda:0', grad_fn=)\n", "251000\n", "25400 tensor(5.5670, device='cuda:0', grad_fn=)\n", "252000\n", "25500 tensor(5.5016, device='cuda:0', grad_fn=)\n", "253000\n", "25600 tensor(5.7909, device='cuda:0', grad_fn=)\n", "254000\n", "25700 tensor(5.5229, device='cuda:0', grad_fn=)\n", "255000\n", "25800 tensor(5.6035, device='cuda:0', grad_fn=)\n", "256000\n", "25900 tensor(5.5293, device='cuda:0', grad_fn=)\n", "257000\n", "26000 tensor(5.5553, device='cuda:0', grad_fn=)\n", "258000\n", "26100 tensor(5.4476, device='cuda:0', grad_fn=)\n", "259000\n", "26200 tensor(5.3721, device='cuda:0', grad_fn=)\n", "260000\n", "26300 tensor(5.6142, device='cuda:0', grad_fn=)\n", "261000\n", "26400 tensor(5.6202, device='cuda:0', grad_fn=)\n", "262000\n", "26500 tensor(5.3529, device='cuda:0', grad_fn=)\n", "263000\n", "26600 tensor(5.7148, device='cuda:0', grad_fn=)\n", "264000\n", "26700 tensor(5.5755, device='cuda:0', grad_fn=)\n", "265000\n", "26800 tensor(5.7480, device='cuda:0', grad_fn=)\n", "266000\n", "26900 tensor(5.5025, device='cuda:0', grad_fn=)\n", "267000\n", "27000 tensor(5.4017, device='cuda:0', grad_fn=)\n", "268000\n", "27100 tensor(5.3996, device='cuda:0', grad_fn=)\n", "269000\n", "27200 tensor(5.4862, device='cuda:0', grad_fn=)\n", "270000\n", "27300 tensor(5.6392, device='cuda:0', grad_fn=)\n", "271000\n", "27400 tensor(5.5634, device='cuda:0', grad_fn=)\n", "272000\n", "27500 tensor(5.4420, device='cuda:0', grad_fn=)\n", "273000\n", "27600 tensor(5.7835, device='cuda:0', grad_fn=)\n", "274000\n", "27700 tensor(5.5555, device='cuda:0', grad_fn=)\n", "275000\n", "27800 tensor(5.5381, device='cuda:0', grad_fn=)\n", "276000\n", "27900 tensor(5.6515, device='cuda:0', grad_fn=)\n", "277000\n", "28000 tensor(5.5254, device='cuda:0', grad_fn=)\n", "278000\n", "28100 tensor(5.4929, device='cuda:0', grad_fn=)\n", "279000\n", "28200 tensor(5.6218, device='cuda:0', grad_fn=)\n", "280000\n", "28300 tensor(5.2878, device='cuda:0', grad_fn=)\n", "281000\n", "28400 tensor(5.7112, device='cuda:0', grad_fn=)\n", "282000\n", "28500 tensor(5.5490, device='cuda:0', grad_fn=)\n", "283000\n", "28600 tensor(5.4572, device='cuda:0', grad_fn=)\n", "284000\n", "28700 tensor(5.6349, device='cuda:0', grad_fn=)\n", "285000\n", "28800 tensor(5.6607, device='cuda:0', grad_fn=)\n", "286000\n", "28900 tensor(5.5422, device='cuda:0', grad_fn=)\n", "287000\n", "29000 tensor(5.4277, device='cuda:0', grad_fn=)\n", "288000\n", "29100 tensor(5.1870, device='cuda:0', grad_fn=)\n", "289000\n", "29200 tensor(5.3593, device='cuda:0', grad_fn=)\n", "290000\n", "29300 tensor(5.6512, device='cuda:0', grad_fn=)\n", "291000\n", "29400 tensor(5.8051, device='cuda:0', grad_fn=)\n", "292000\n", "29500 tensor(5.5308, device='cuda:0', grad_fn=)\n", "293000\n", "29600 tensor(5.3791, device='cuda:0', grad_fn=)\n", "294000\n", "29700 tensor(5.6108, device='cuda:0', grad_fn=)\n", "295000\n", "29800 tensor(5.4015, device='cuda:0', grad_fn=)\n", "296000\n", "29900 tensor(5.6953, device='cuda:0', grad_fn=)\n", "297000\n", "30000 tensor(5.3925, device='cuda:0', grad_fn=)\n", "298000\n", "30100 tensor(5.4241, device='cuda:0', grad_fn=)\n", "299000\n", "30200 tensor(5.4216, device='cuda:0', grad_fn=)\n", "300000\n", "30300 tensor(5.5074, device='cuda:0', grad_fn=)\n", "301000\n", "30400 tensor(5.3631, device='cuda:0', grad_fn=)\n", "302000\n", "30500 tensor(5.5690, device='cuda:0', grad_fn=)\n", "30600 tensor(5.4734, device='cuda:0', grad_fn=)\n", "303000\n", "30700 tensor(5.5061, device='cuda:0', grad_fn=)\n", "304000\n", "30800 tensor(5.5709, device='cuda:0', grad_fn=)\n", "305000\n", "30900 tensor(5.5478, device='cuda:0', grad_fn=)\n", "306000\n", "31000 tensor(5.6687, device='cuda:0', grad_fn=)\n", "307000\n", "31100 tensor(5.2899, device='cuda:0', grad_fn=)\n", "308000\n", "31200 tensor(5.3663, device='cuda:0', grad_fn=)\n", "309000\n", "31300 tensor(5.6274, device='cuda:0', grad_fn=)\n", "310000\n", "31400 tensor(5.4358, device='cuda:0', grad_fn=)\n", "311000\n", "31500 tensor(5.5738, device='cuda:0', grad_fn=)\n", "312000\n", "31600 tensor(5.5612, device='cuda:0', grad_fn=)\n", "313000\n", "31700 tensor(5.5104, device='cuda:0', grad_fn=)\n", "314000\n", "31800 tensor(5.6343, device='cuda:0', grad_fn=)\n", "315000\n", "31900 tensor(5.2243, device='cuda:0', grad_fn=)\n", "316000\n", "32000 tensor(5.4320, device='cuda:0', grad_fn=)\n", "317000\n", "32100 tensor(5.3344, device='cuda:0', grad_fn=)\n", "318000\n", "32200 tensor(5.6543, device='cuda:0', grad_fn=)\n", "319000\n", "32300 tensor(5.6512, device='cuda:0', grad_fn=)\n", "320000\n", "32400 tensor(5.6237, device='cuda:0', grad_fn=)\n", "321000\n", "32500 tensor(5.4246, device='cuda:0', grad_fn=)\n", "322000\n", "32600 tensor(5.5469, device='cuda:0', grad_fn=)\n", "323000\n", "32700 tensor(5.5338, device='cuda:0', grad_fn=)\n", "324000\n", "32800 tensor(5.6954, device='cuda:0', grad_fn=)\n", "325000\n", "32900 tensor(5.5754, device='cuda:0', grad_fn=)\n", "326000\n", "33000 tensor(5.3334, device='cuda:0', grad_fn=)\n", "327000\n", "33100 tensor(5.5284, device='cuda:0', grad_fn=)\n", "328000\n", "33200 tensor(5.6350, device='cuda:0', grad_fn=)\n", "329000\n", "33300 tensor(5.4312, device='cuda:0', grad_fn=)\n", "330000\n", "33400 tensor(5.6854, device='cuda:0', grad_fn=)\n", "331000\n", "33500 tensor(5.4921, device='cuda:0', grad_fn=)\n", "332000\n", "33600 tensor(5.4345, device='cuda:0', grad_fn=)\n", "333000\n", "33700 tensor(5.4950, device='cuda:0', grad_fn=)\n", "334000\n", "33800 tensor(5.5757, device='cuda:0', grad_fn=)\n", "335000\n", "33900 tensor(5.3466, device='cuda:0', grad_fn=)\n", "336000\n", "34000 tensor(5.5373, device='cuda:0', grad_fn=)\n", "337000\n", "34100 tensor(5.5144, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "338000\n", "34200 tensor(5.5543, device='cuda:0', grad_fn=)\n", "339000\n", "34300 tensor(5.3564, device='cuda:0', grad_fn=)\n", "340000\n", "34400 tensor(5.8091, device='cuda:0', grad_fn=)\n", "341000\n", "34500 tensor(5.6699, device='cuda:0', grad_fn=)\n", "342000\n", "34600 tensor(5.5536, device='cuda:0', grad_fn=)\n", "343000\n", "34700 tensor(5.6261, device='cuda:0', grad_fn=)\n", "344000\n", "34800 tensor(5.6504, device='cuda:0', grad_fn=)\n", "345000\n", "34900 tensor(5.7067, device='cuda:0', grad_fn=)\n", "346000\n", "35000 tensor(5.7307, device='cuda:0', grad_fn=)\n", "347000\n", "35100 tensor(5.4831, device='cuda:0', grad_fn=)\n", "348000\n", "35200 tensor(5.4367, device='cuda:0', grad_fn=)\n", "349000\n", "35300 tensor(5.6503, device='cuda:0', grad_fn=)\n", "350000\n", "35400 tensor(5.2892, device='cuda:0', grad_fn=)\n", "351000\n", "35500 tensor(5.4198, device='cuda:0', grad_fn=)\n", "352000\n", "35600 tensor(5.4870, device='cuda:0', grad_fn=)\n", "353000\n", "35700 tensor(5.4489, device='cuda:0', grad_fn=)\n", "354000\n", "35800 tensor(5.5170, device='cuda:0', grad_fn=)\n", "355000\n", "35900 tensor(5.4699, device='cuda:0', grad_fn=)\n", "356000\n", "36000 tensor(5.2451, device='cuda:0', grad_fn=)\n", "357000\n", "36100 tensor(5.6311, device='cuda:0', grad_fn=)\n", "358000\n", "36200 tensor(5.5157, device='cuda:0', grad_fn=)\n", "359000\n", "36300 tensor(5.7751, device='cuda:0', grad_fn=)\n", "360000\n", "36400 tensor(5.4740, device='cuda:0', grad_fn=)\n", "361000\n", "36500 tensor(5.4746, device='cuda:0', grad_fn=)\n", "362000\n", "36600 tensor(5.5244, device='cuda:0', grad_fn=)\n", "363000\n", "36700 tensor(5.3037, device='cuda:0', grad_fn=)\n", "364000\n", "36800 tensor(5.4238, device='cuda:0', grad_fn=)\n", "365000\n", "36900 tensor(5.5203, device='cuda:0', grad_fn=)\n", "366000\n", "37000 tensor(5.4431, device='cuda:0', grad_fn=)\n", "367000\n", "37100 tensor(5.4286, device='cuda:0', grad_fn=)\n", "368000\n", "37200 tensor(5.5108, device='cuda:0', grad_fn=)\n", "369000\n", "37300 tensor(5.4229, device='cuda:0', grad_fn=)\n", "370000\n", "37400 tensor(5.8406, device='cuda:0', grad_fn=)\n", "371000\n", "37500 tensor(5.4602, device='cuda:0', grad_fn=)\n", "372000\n", "37600 tensor(5.4417, device='cuda:0', grad_fn=)\n", "373000\n", "37700 tensor(5.6200, device='cuda:0', grad_fn=)\n", "374000\n", "37800 tensor(5.4527, device='cuda:0', grad_fn=)\n", "375000\n", "37900 tensor(5.4631, device='cuda:0', grad_fn=)\n", "376000\n", "38000 tensor(5.5196, device='cuda:0', grad_fn=)\n", "377000\n", "38100 tensor(5.5436, device='cuda:0', grad_fn=)\n", "378000\n", "38200 tensor(5.5269, device='cuda:0', grad_fn=)\n", "379000\n", "38300 tensor(5.4716, device='cuda:0', grad_fn=)\n", "380000\n", "38400 tensor(5.5081, device='cuda:0', grad_fn=)\n", "381000\n", "38500 tensor(5.5249, device='cuda:0', grad_fn=)\n", "382000\n", "38600 tensor(5.5018, device='cuda:0', grad_fn=)\n", "383000\n", "38700 tensor(5.4845, device='cuda:0', grad_fn=)\n", "384000\n", "38800 tensor(5.5505, device='cuda:0', grad_fn=)\n", "385000\n", "38900 tensor(5.6658, device='cuda:0', grad_fn=)\n", "386000\n", "39000 tensor(5.3333, device='cuda:0', grad_fn=)\n", "387000\n", "39100 tensor(5.5598, device='cuda:0', grad_fn=)\n", "388000\n", "39200 tensor(5.6624, device='cuda:0', grad_fn=)\n", "389000\n", "39300 tensor(5.4714, device='cuda:0', grad_fn=)\n", "390000\n", "39400 tensor(5.5470, device='cuda:0', grad_fn=)\n", "391000\n", "39500 tensor(5.6905, device='cuda:0', grad_fn=)\n", "392000\n", "39600 tensor(5.3592, device='cuda:0', grad_fn=)\n", "393000\n", "39700 tensor(5.3170, device='cuda:0', grad_fn=)\n", "394000\n", "39800 tensor(5.4491, device='cuda:0', grad_fn=)\n", "395000\n", "39900 tensor(5.2872, device='cuda:0', grad_fn=)\n", "396000\n", "40000 tensor(5.3865, device='cuda:0', grad_fn=)\n", "397000\n", "40100 tensor(5.4536, device='cuda:0', grad_fn=)\n", "398000\n", "40200 tensor(5.4382, device='cuda:0', grad_fn=)\n", "399000\n", "40300 tensor(5.4819, device='cuda:0', grad_fn=)\n", "40400 tensor(5.5250, device='cuda:0', grad_fn=)\n", "400000\n", "40500 tensor(5.4396, device='cuda:0', grad_fn=)\n", "401000\n", "40600 tensor(5.5062, device='cuda:0', grad_fn=)\n", "402000\n", "40700 tensor(5.5362, device='cuda:0', grad_fn=)\n", "403000\n", "40800 tensor(5.5015, device='cuda:0', grad_fn=)\n", "404000\n", "40900 tensor(5.4610, device='cuda:0', grad_fn=)\n", "405000\n", "41000 tensor(5.5083, device='cuda:0', grad_fn=)\n", "406000\n", "41100 tensor(5.4346, device='cuda:0', grad_fn=)\n", "407000\n", "41200 tensor(5.3340, device='cuda:0', grad_fn=)\n", "408000\n", "41300 tensor(5.4608, device='cuda:0', grad_fn=)\n", "409000\n", "41400 tensor(5.3758, device='cuda:0', grad_fn=)\n", "410000\n", "41500 tensor(5.5160, device='cuda:0', grad_fn=)\n", "411000\n", "41600 tensor(5.4290, device='cuda:0', grad_fn=)\n", "412000\n", "41700 tensor(5.4426, device='cuda:0', grad_fn=)\n", "413000\n", "41800 tensor(5.4764, device='cuda:0', grad_fn=)\n", "414000\n", "41900 tensor(5.4730, device='cuda:0', grad_fn=)\n", "415000\n", "42000 tensor(5.6150, device='cuda:0', grad_fn=)\n", "416000\n", "42100 tensor(5.3622, device='cuda:0', grad_fn=)\n", "417000\n", "42200 tensor(5.4380, device='cuda:0', grad_fn=)\n", "418000\n", "42300 tensor(5.5031, device='cuda:0', grad_fn=)\n", "419000\n", "42400 tensor(5.3124, device='cuda:0', grad_fn=)\n", "420000\n", "42500 tensor(5.4812, device='cuda:0', grad_fn=)\n", "421000\n", "42600 tensor(5.2723, device='cuda:0', grad_fn=)\n", "422000\n", "42700 tensor(5.5998, device='cuda:0', grad_fn=)\n", "423000\n", "42800 tensor(5.5254, device='cuda:0', grad_fn=)\n", "424000\n", "42900 tensor(5.3716, device='cuda:0', grad_fn=)\n", "425000\n", "43000 tensor(5.5020, device='cuda:0', grad_fn=)\n", "426000\n", "43100 tensor(5.5091, device='cuda:0', grad_fn=)\n", "427000\n", "43200 tensor(5.3182, device='cuda:0', grad_fn=)\n", "428000\n", "43300 tensor(5.4001, device='cuda:0', grad_fn=)\n", "429000\n", "43400 tensor(5.5150, device='cuda:0', grad_fn=)\n", "430000\n", "43500 tensor(5.2440, device='cuda:0', grad_fn=)\n", "431000\n", "43600 tensor(5.4439, device='cuda:0', grad_fn=)\n", "432000\n", "epoch: = 2\n", "0 tensor(5.3953, device='cuda:0', grad_fn=)\n", "1000\n", "100 tensor(5.4847, device='cuda:0', grad_fn=)\n", "200 tensor(5.3626, device='cuda:0', grad_fn=)\n", "2000\n", "300 tensor(5.4127, device='cuda:0', grad_fn=)\n", "3000\n", "400 tensor(5.3734, device='cuda:0', grad_fn=)\n", "4000\n", "500 tensor(5.5564, device='cuda:0', grad_fn=)\n", "5000\n", "600 tensor(5.3391, device='cuda:0', grad_fn=)\n", "6000\n", "700 tensor(5.6198, device='cuda:0', grad_fn=)\n", "7000\n", "800 tensor(5.2255, device='cuda:0', grad_fn=)\n", "8000\n", "900 tensor(5.5161, device='cuda:0', grad_fn=)\n", "9000\n", "1000 tensor(5.3517, device='cuda:0', grad_fn=)\n", "10000\n", "1100 tensor(5.5420, device='cuda:0', grad_fn=)\n", "11000\n", "1200 tensor(5.6031, device='cuda:0', grad_fn=)\n", "12000\n", "1300 tensor(5.5343, device='cuda:0', grad_fn=)\n", "13000\n", "1400 tensor(5.5547, device='cuda:0', grad_fn=)\n", "14000\n", "1500 tensor(5.6080, device='cuda:0', grad_fn=)\n", "15000\n", "1600 tensor(5.2940, device='cuda:0', grad_fn=)\n", "16000\n", "1700 tensor(5.3671, device='cuda:0', grad_fn=)\n", "17000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1800 tensor(5.3777, device='cuda:0', grad_fn=)\n", "18000\n", "1900 tensor(5.3593, device='cuda:0', grad_fn=)\n", "19000\n", "2000 tensor(5.4348, device='cuda:0', grad_fn=)\n", "20000\n", "2100 tensor(5.5513, device='cuda:0', grad_fn=)\n", "21000\n", "2200 tensor(5.3939, device='cuda:0', grad_fn=)\n", "22000\n", "2300 tensor(5.4063, device='cuda:0', grad_fn=)\n", "23000\n", "2400 tensor(5.4092, device='cuda:0', grad_fn=)\n", "24000\n", "2500 tensor(5.4460, device='cuda:0', grad_fn=)\n", "25000\n", "2600 tensor(5.4738, device='cuda:0', grad_fn=)\n", "26000\n", "2700 tensor(5.4848, device='cuda:0', grad_fn=)\n", "27000\n", "2800 tensor(5.5244, device='cuda:0', grad_fn=)\n", "28000\n", "2900 tensor(5.6711, device='cuda:0', grad_fn=)\n", "29000\n", "3000 tensor(5.4024, device='cuda:0', grad_fn=)\n", "30000\n", "3100 tensor(5.4842, device='cuda:0', grad_fn=)\n", "31000\n", "3200 tensor(5.4863, device='cuda:0', grad_fn=)\n", "32000\n", "3300 tensor(5.4114, device='cuda:0', grad_fn=)\n", "33000\n", "3400 tensor(5.3231, device='cuda:0', grad_fn=)\n", "34000\n", "3500 tensor(5.4598, device='cuda:0', grad_fn=)\n", "35000\n", "3600 tensor(5.4579, device='cuda:0', grad_fn=)\n", "36000\n", "3700 tensor(5.3890, device='cuda:0', grad_fn=)\n", "37000\n", "3800 tensor(5.4162, device='cuda:0', grad_fn=)\n", "38000\n", "3900 tensor(5.2854, device='cuda:0', grad_fn=)\n", "39000\n", "4000 tensor(5.3370, device='cuda:0', grad_fn=)\n", "40000\n", "4100 tensor(5.5078, device='cuda:0', grad_fn=)\n", "41000\n", "4200 tensor(5.5341, device='cuda:0', grad_fn=)\n", "42000\n", "4300 tensor(5.4704, device='cuda:0', grad_fn=)\n", "43000\n", "4400 tensor(5.4990, device='cuda:0', grad_fn=)\n", "44000\n", "4500 tensor(5.3300, device='cuda:0', grad_fn=)\n", "45000\n", "4600 tensor(5.6674, device='cuda:0', grad_fn=)\n", "46000\n", "4700 tensor(5.3622, device='cuda:0', grad_fn=)\n", "47000\n", "4800 tensor(5.4762, device='cuda:0', grad_fn=)\n", "48000\n", "4900 tensor(5.5403, device='cuda:0', grad_fn=)\n", "49000\n", "5000 tensor(5.5359, device='cuda:0', grad_fn=)\n", "50000\n", "5100 tensor(5.6058, device='cuda:0', grad_fn=)\n", "51000\n", "5200 tensor(5.6209, device='cuda:0', grad_fn=)\n", "52000\n", "5300 tensor(5.6273, device='cuda:0', grad_fn=)\n", "53000\n", "5400 tensor(5.4695, device='cuda:0', grad_fn=)\n", "54000\n", "5500 tensor(5.5771, device='cuda:0', grad_fn=)\n", "55000\n", "5600 tensor(5.3552, device='cuda:0', grad_fn=)\n", "56000\n", "5700 tensor(5.5957, device='cuda:0', grad_fn=)\n", "57000\n", "5800 tensor(5.5952, device='cuda:0', grad_fn=)\n", "58000\n", "5900 tensor(5.5643, device='cuda:0', grad_fn=)\n", "59000\n", "6000 tensor(5.4346, device='cuda:0', grad_fn=)\n", "60000\n", "6100 tensor(5.4620, device='cuda:0', grad_fn=)\n", "61000\n", "6200 tensor(5.6256, device='cuda:0', grad_fn=)\n", "62000\n", "6300 tensor(5.4832, device='cuda:0', grad_fn=)\n", "63000\n", "6400 tensor(5.4063, device='cuda:0', grad_fn=)\n", "64000\n", "6500 tensor(5.2587, device='cuda:0', grad_fn=)\n", "65000\n", "6600 tensor(5.4320, device='cuda:0', grad_fn=)\n", "66000\n", "6700 tensor(5.3770, device='cuda:0', grad_fn=)\n", "67000\n", "6800 tensor(5.6077, device='cuda:0', grad_fn=)\n", "68000\n", "6900 tensor(5.5788, device='cuda:0', grad_fn=)\n", "69000\n", "7000 tensor(5.4929, device='cuda:0', grad_fn=)\n", "70000\n", "7100 tensor(5.2828, device='cuda:0', grad_fn=)\n", "71000\n", "7200 tensor(5.3992, device='cuda:0', grad_fn=)\n", "72000\n", "7300 tensor(5.6273, device='cuda:0', grad_fn=)\n", "73000\n", "7400 tensor(5.4385, device='cuda:0', grad_fn=)\n", "74000\n", "7500 tensor(5.3176, device='cuda:0', grad_fn=)\n", "75000\n", "7600 tensor(5.3834, device='cuda:0', grad_fn=)\n", "76000\n", "7700 tensor(5.4532, device='cuda:0', grad_fn=)\n", "77000\n", "7800 tensor(5.5669, device='cuda:0', grad_fn=)\n", "78000\n", "7900 tensor(5.2508, device='cuda:0', grad_fn=)\n", "79000\n", "8000 tensor(5.3027, device='cuda:0', grad_fn=)\n", "80000\n", "8100 tensor(5.4813, device='cuda:0', grad_fn=)\n", "81000\n", "8200 tensor(5.4822, device='cuda:0', grad_fn=)\n", "82000\n", "8300 tensor(5.4510, device='cuda:0', grad_fn=)\n", "83000\n", "8400 tensor(5.5712, device='cuda:0', grad_fn=)\n", "84000\n", "8500 tensor(5.5634, device='cuda:0', grad_fn=)\n", "85000\n", "8600 tensor(5.5616, device='cuda:0', grad_fn=)\n", "86000\n", "8700 tensor(5.6568, device='cuda:0', grad_fn=)\n", "87000\n", "8800 tensor(5.5397, device='cuda:0', grad_fn=)\n", "88000\n", "8900 tensor(5.3852, device='cuda:0', grad_fn=)\n", "89000\n", "9000 tensor(5.5022, device='cuda:0', grad_fn=)\n", "90000\n", "9100 tensor(5.5088, device='cuda:0', grad_fn=)\n", "91000\n", "9200 tensor(5.4214, device='cuda:0', grad_fn=)\n", "92000\n", "9300 tensor(5.4641, device='cuda:0', grad_fn=)\n", "93000\n", "9400 tensor(5.3085, device='cuda:0', grad_fn=)\n", "94000\n", "9500 tensor(5.3852, device='cuda:0', grad_fn=)\n", "95000\n", "9600 tensor(5.5097, device='cuda:0', grad_fn=)\n", "96000\n", "9700 tensor(5.4373, device='cuda:0', grad_fn=)\n", "97000\n", "9800 tensor(5.4786, device='cuda:0', grad_fn=)\n", "9900 tensor(5.3198, device='cuda:0', grad_fn=)\n", "98000\n", "10000 tensor(5.5310, device='cuda:0', grad_fn=)\n", "99000\n", "10100 tensor(5.4341, device='cuda:0', grad_fn=)\n", "100000\n", "10200 tensor(5.3571, device='cuda:0', grad_fn=)\n", "101000\n", "10300 tensor(5.4712, device='cuda:0', grad_fn=)\n", "102000\n", "10400 tensor(5.4810, device='cuda:0', grad_fn=)\n", "103000\n", "10500 tensor(5.5463, device='cuda:0', grad_fn=)\n", "104000\n", "10600 tensor(5.6233, device='cuda:0', grad_fn=)\n", "105000\n", "10700 tensor(5.4678, device='cuda:0', grad_fn=)\n", "106000\n", "10800 tensor(5.5040, device='cuda:0', grad_fn=)\n", "107000\n", "10900 tensor(5.3963, device='cuda:0', grad_fn=)\n", "108000\n", "11000 tensor(5.6295, device='cuda:0', grad_fn=)\n", "109000\n", "11100 tensor(5.2378, device='cuda:0', grad_fn=)\n", "110000\n", "11200 tensor(5.4184, device='cuda:0', grad_fn=)\n", "111000\n", "11300 tensor(5.4404, device='cuda:0', grad_fn=)\n", "112000\n", "11400 tensor(5.3875, device='cuda:0', grad_fn=)\n", "113000\n", "11500 tensor(5.4523, device='cuda:0', grad_fn=)\n", "114000\n", "11600 tensor(5.4418, device='cuda:0', grad_fn=)\n", "115000\n", "11700 tensor(5.3604, device='cuda:0', grad_fn=)\n", "116000\n", "11800 tensor(5.5647, device='cuda:0', grad_fn=)\n", "117000\n", "11900 tensor(5.3936, device='cuda:0', grad_fn=)\n", "118000\n", "12000 tensor(5.4823, device='cuda:0', grad_fn=)\n", "119000\n", "12100 tensor(5.5069, device='cuda:0', grad_fn=)\n", "120000\n", "12200 tensor(5.4983, device='cuda:0', grad_fn=)\n", "121000\n", "12300 tensor(5.6030, device='cuda:0', grad_fn=)\n", "122000\n", "12400 tensor(5.4763, device='cuda:0', grad_fn=)\n", "123000\n", "12500 tensor(5.3718, device='cuda:0', grad_fn=)\n", "124000\n", "12600 tensor(5.4416, device='cuda:0', grad_fn=)\n", "125000\n", "12700 tensor(5.3554, device='cuda:0', grad_fn=)\n", "126000\n", "12800 tensor(5.5392, device='cuda:0', grad_fn=)\n", "127000\n", "12900 tensor(5.4164, device='cuda:0', grad_fn=)\n", "128000\n", "13000 tensor(5.2286, device='cuda:0', grad_fn=)\n", "129000\n", "13100 tensor(5.4288, device='cuda:0', grad_fn=)\n", "130000\n", "13200 tensor(5.4770, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "131000\n", "13300 tensor(5.3352, device='cuda:0', grad_fn=)\n", "132000\n", "13400 tensor(5.4349, device='cuda:0', grad_fn=)\n", "133000\n", "13500 tensor(5.3860, device='cuda:0', grad_fn=)\n", "134000\n", "13600 tensor(5.4648, device='cuda:0', grad_fn=)\n", "135000\n", "13700 tensor(5.4444, device='cuda:0', grad_fn=)\n", "136000\n", "13800 tensor(5.4320, device='cuda:0', grad_fn=)\n", "137000\n", "13900 tensor(5.2935, device='cuda:0', grad_fn=)\n", "138000\n", "14000 tensor(5.5387, device='cuda:0', grad_fn=)\n", "139000\n", "14100 tensor(5.2424, device='cuda:0', grad_fn=)\n", "140000\n", "14200 tensor(5.5177, device='cuda:0', grad_fn=)\n", "141000\n", "14300 tensor(5.4831, device='cuda:0', grad_fn=)\n", "142000\n", "14400 tensor(5.3877, device='cuda:0', grad_fn=)\n", "143000\n", "14500 tensor(5.4919, device='cuda:0', grad_fn=)\n", "144000\n", "14600 tensor(5.5253, device='cuda:0', grad_fn=)\n", "145000\n", "14700 tensor(5.3948, device='cuda:0', grad_fn=)\n", "146000\n", "14800 tensor(5.8442, device='cuda:0', grad_fn=)\n", "147000\n", "14900 tensor(5.4967, device='cuda:0', grad_fn=)\n", "148000\n", "15000 tensor(5.3788, device='cuda:0', grad_fn=)\n", "149000\n", "15100 tensor(5.5832, device='cuda:0', grad_fn=)\n", "150000\n", "15200 tensor(5.4482, device='cuda:0', grad_fn=)\n", "151000\n", "15300 tensor(5.4260, device='cuda:0', grad_fn=)\n", "152000\n", "15400 tensor(5.3273, device='cuda:0', grad_fn=)\n", "153000\n", "15500 tensor(5.4840, device='cuda:0', grad_fn=)\n", "154000\n", "15600 tensor(5.4851, device='cuda:0', grad_fn=)\n", "155000\n", "15700 tensor(5.3871, device='cuda:0', grad_fn=)\n", "156000\n", "15800 tensor(5.2933, device='cuda:0', grad_fn=)\n", "157000\n", "15900 tensor(5.4374, device='cuda:0', grad_fn=)\n", "158000\n", "16000 tensor(5.2555, device='cuda:0', grad_fn=)\n", "159000\n", "16100 tensor(5.3127, device='cuda:0', grad_fn=)\n", "160000\n", "16200 tensor(5.6423, device='cuda:0', grad_fn=)\n", "161000\n", "16300 tensor(5.4702, device='cuda:0', grad_fn=)\n", "162000\n", "16400 tensor(5.4419, device='cuda:0', grad_fn=)\n", "163000\n", "16500 tensor(5.5640, device='cuda:0', grad_fn=)\n", "164000\n", "16600 tensor(5.4099, device='cuda:0', grad_fn=)\n", "165000\n", "16700 tensor(5.3822, device='cuda:0', grad_fn=)\n", "166000\n", "16800 tensor(5.4643, device='cuda:0', grad_fn=)\n", "167000\n", "16900 tensor(5.2234, device='cuda:0', grad_fn=)\n", "168000\n", "17000 tensor(5.5021, device='cuda:0', grad_fn=)\n", "169000\n", "17100 tensor(5.3524, device='cuda:0', grad_fn=)\n", "170000\n", "17200 tensor(5.4725, device='cuda:0', grad_fn=)\n", "171000\n", "17300 tensor(5.5034, device='cuda:0', grad_fn=)\n", "172000\n", "17400 tensor(5.2911, device='cuda:0', grad_fn=)\n", "173000\n", "17500 tensor(5.3147, device='cuda:0', grad_fn=)\n", "174000\n", "17600 tensor(5.3426, device='cuda:0', grad_fn=)\n", "175000\n", "17700 tensor(5.3414, device='cuda:0', grad_fn=)\n", "176000\n", "17800 tensor(5.3991, device='cuda:0', grad_fn=)\n", "177000\n", "17900 tensor(5.2936, device='cuda:0', grad_fn=)\n", "178000\n", "18000 tensor(5.5238, device='cuda:0', grad_fn=)\n", "179000\n", "18100 tensor(5.4684, device='cuda:0', grad_fn=)\n", "180000\n", "18200 tensor(5.3916, device='cuda:0', grad_fn=)\n", "181000\n", "18300 tensor(5.3888, device='cuda:0', grad_fn=)\n", "182000\n", "18400 tensor(5.4299, device='cuda:0', grad_fn=)\n", "183000\n", "18500 tensor(5.4103, device='cuda:0', grad_fn=)\n", "184000\n", "18600 tensor(5.5980, device='cuda:0', grad_fn=)\n", "185000\n", "18700 tensor(5.4135, device='cuda:0', grad_fn=)\n", "186000\n", "18800 tensor(5.5855, device='cuda:0', grad_fn=)\n", "187000\n", "18900 tensor(5.4583, device='cuda:0', grad_fn=)\n", "188000\n", "19000 tensor(5.4854, device='cuda:0', grad_fn=)\n", "189000\n", "19100 tensor(5.5879, device='cuda:0', grad_fn=)\n", "190000\n", "19200 tensor(5.4675, device='cuda:0', grad_fn=)\n", "191000\n", "19300 tensor(5.5741, device='cuda:0', grad_fn=)\n", "192000\n", "19400 tensor(5.3977, device='cuda:0', grad_fn=)\n", "193000\n", "19500 tensor(5.4042, device='cuda:0', grad_fn=)\n", "194000\n", "19600 tensor(5.4364, device='cuda:0', grad_fn=)\n", "195000\n", "19700 tensor(5.4868, device='cuda:0', grad_fn=)\n", "196000\n", "19800 tensor(5.3476, device='cuda:0', grad_fn=)\n", "197000\n", "19900 tensor(5.3553, device='cuda:0', grad_fn=)\n", "198000\n", "20000 tensor(5.3707, device='cuda:0', grad_fn=)\n", "199000\n", "20100 tensor(5.3226, device='cuda:0', grad_fn=)\n", "200000\n", "20200 tensor(5.2488, device='cuda:0', grad_fn=)\n", "201000\n", "20300 tensor(5.3648, device='cuda:0', grad_fn=)\n", "20400 tensor(5.4156, device='cuda:0', grad_fn=)\n", "202000\n", "20500 tensor(5.4102, device='cuda:0', grad_fn=)\n", "203000\n", "20600 tensor(5.6109, device='cuda:0', grad_fn=)\n", "204000\n", "20700 tensor(5.4335, device='cuda:0', grad_fn=)\n", "205000\n", "20800 tensor(5.2795, device='cuda:0', grad_fn=)\n", "206000\n", "20900 tensor(5.5609, device='cuda:0', grad_fn=)\n", "207000\n", "21000 tensor(5.3918, device='cuda:0', grad_fn=)\n", "208000\n", "21100 tensor(5.3831, device='cuda:0', grad_fn=)\n", "209000\n", "21200 tensor(5.2790, device='cuda:0', grad_fn=)\n", "210000\n", "21300 tensor(5.4710, device='cuda:0', grad_fn=)\n", "211000\n", "21400 tensor(5.5050, device='cuda:0', grad_fn=)\n", "212000\n", "21500 tensor(5.2692, device='cuda:0', grad_fn=)\n", "213000\n", "21600 tensor(5.2668, device='cuda:0', grad_fn=)\n", "214000\n", "21700 tensor(5.2633, device='cuda:0', grad_fn=)\n", "215000\n", "21800 tensor(5.4067, device='cuda:0', grad_fn=)\n", "216000\n", "21900 tensor(5.3829, device='cuda:0', grad_fn=)\n", "217000\n", "22000 tensor(5.3773, device='cuda:0', grad_fn=)\n", "218000\n", "22100 tensor(5.2472, device='cuda:0', grad_fn=)\n", "219000\n", "22200 tensor(5.1171, device='cuda:0', grad_fn=)\n", "220000\n", "22300 tensor(5.5545, device='cuda:0', grad_fn=)\n", "221000\n", "22400 tensor(5.2499, device='cuda:0', grad_fn=)\n", "222000\n", "22500 tensor(5.2943, device='cuda:0', grad_fn=)\n", "223000\n", "22600 tensor(5.4748, device='cuda:0', grad_fn=)\n", "224000\n", "22700 tensor(5.2436, device='cuda:0', grad_fn=)\n", "225000\n", "22800 tensor(5.5053, device='cuda:0', grad_fn=)\n", "226000\n", "22900 tensor(5.5519, device='cuda:0', grad_fn=)\n", "227000\n", "23000 tensor(5.4541, device='cuda:0', grad_fn=)\n", "228000\n", "23100 tensor(5.4279, device='cuda:0', grad_fn=)\n", "229000\n", "23200 tensor(5.4286, device='cuda:0', grad_fn=)\n", "230000\n", "23300 tensor(5.5179, device='cuda:0', grad_fn=)\n", "231000\n", "23400 tensor(5.5355, device='cuda:0', grad_fn=)\n", "232000\n", "23500 tensor(5.3505, device='cuda:0', grad_fn=)\n", "233000\n", "23600 tensor(5.3313, device='cuda:0', grad_fn=)\n", "234000\n", "23700 tensor(5.3509, device='cuda:0', grad_fn=)\n", "235000\n", "23800 tensor(5.2170, device='cuda:0', grad_fn=)\n", "236000\n", "23900 tensor(5.3101, device='cuda:0', grad_fn=)\n", "237000\n", "24000 tensor(5.2962, device='cuda:0', grad_fn=)\n", "238000\n", "24100 tensor(5.3882, device='cuda:0', grad_fn=)\n", "239000\n", "24200 tensor(5.5633, device='cuda:0', grad_fn=)\n", "240000\n", "24300 tensor(5.4595, device='cuda:0', grad_fn=)\n", "241000\n", "24400 tensor(5.5932, device='cuda:0', grad_fn=)\n", "242000\n", "24500 tensor(5.3717, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "243000\n", "24600 tensor(5.4943, device='cuda:0', grad_fn=)\n", "244000\n", "24700 tensor(5.3985, device='cuda:0', grad_fn=)\n", "245000\n", "24800 tensor(5.4347, device='cuda:0', grad_fn=)\n", "246000\n", "24900 tensor(5.5008, device='cuda:0', grad_fn=)\n", "247000\n", "25000 tensor(5.5100, device='cuda:0', grad_fn=)\n", "248000\n", "25100 tensor(5.4427, device='cuda:0', grad_fn=)\n", "249000\n", "25200 tensor(5.4508, device='cuda:0', grad_fn=)\n", "250000\n", "25300 tensor(5.4724, device='cuda:0', grad_fn=)\n", "251000\n", "25400 tensor(5.4525, device='cuda:0', grad_fn=)\n", "252000\n", "25500 tensor(5.3620, device='cuda:0', grad_fn=)\n", "253000\n", "25600 tensor(5.6446, device='cuda:0', grad_fn=)\n", "254000\n", "25700 tensor(5.3966, device='cuda:0', grad_fn=)\n", "255000\n", "25800 tensor(5.4889, device='cuda:0', grad_fn=)\n", "256000\n", "25900 tensor(5.4251, device='cuda:0', grad_fn=)\n", "257000\n", "26000 tensor(5.4346, device='cuda:0', grad_fn=)\n", "258000\n", "26100 tensor(5.3395, device='cuda:0', grad_fn=)\n", "259000\n", "26200 tensor(5.2695, device='cuda:0', grad_fn=)\n", "260000\n", "26300 tensor(5.4767, device='cuda:0', grad_fn=)\n", "261000\n", "26400 tensor(5.5083, device='cuda:0', grad_fn=)\n", "262000\n", "26500 tensor(5.2347, device='cuda:0', grad_fn=)\n", "263000\n", "26600 tensor(5.5761, device='cuda:0', grad_fn=)\n", "264000\n", "26700 tensor(5.4402, device='cuda:0', grad_fn=)\n", "265000\n", "26800 tensor(5.6173, device='cuda:0', grad_fn=)\n", "266000\n", "26900 tensor(5.3775, device='cuda:0', grad_fn=)\n", "267000\n", "27000 tensor(5.2863, device='cuda:0', grad_fn=)\n", "268000\n", "27100 tensor(5.3007, device='cuda:0', grad_fn=)\n", "269000\n", "27200 tensor(5.3551, device='cuda:0', grad_fn=)\n", "270000\n", "27300 tensor(5.5439, device='cuda:0', grad_fn=)\n", "271000\n", "27400 tensor(5.4334, device='cuda:0', grad_fn=)\n", "272000\n", "27500 tensor(5.3266, device='cuda:0', grad_fn=)\n", "273000\n", "27600 tensor(5.6412, device='cuda:0', grad_fn=)\n", "274000\n", "27700 tensor(5.4420, device='cuda:0', grad_fn=)\n", "275000\n", "27800 tensor(5.4381, device='cuda:0', grad_fn=)\n", "276000\n", "27900 tensor(5.5550, device='cuda:0', grad_fn=)\n", "277000\n", "28000 tensor(5.4154, device='cuda:0', grad_fn=)\n", "278000\n", "28100 tensor(5.3823, device='cuda:0', grad_fn=)\n", "279000\n", "28200 tensor(5.5344, device='cuda:0', grad_fn=)\n", "280000\n", "28300 tensor(5.1615, device='cuda:0', grad_fn=)\n", "281000\n", "28400 tensor(5.6069, device='cuda:0', grad_fn=)\n", "282000\n", "28500 tensor(5.4426, device='cuda:0', grad_fn=)\n", "283000\n", "28600 tensor(5.3672, device='cuda:0', grad_fn=)\n", "284000\n", "28700 tensor(5.5133, device='cuda:0', grad_fn=)\n", "285000\n", "28800 tensor(5.5556, device='cuda:0', grad_fn=)\n", "286000\n", "28900 tensor(5.4294, device='cuda:0', grad_fn=)\n", "287000\n", "29000 tensor(5.3359, device='cuda:0', grad_fn=)\n", "288000\n", "29100 tensor(5.0951, device='cuda:0', grad_fn=)\n", "289000\n", "29200 tensor(5.2511, device='cuda:0', grad_fn=)\n", "290000\n", "29300 tensor(5.5364, device='cuda:0', grad_fn=)\n", "291000\n", "29400 tensor(5.6708, device='cuda:0', grad_fn=)\n", "292000\n", "29500 tensor(5.4371, device='cuda:0', grad_fn=)\n", "293000\n", "29600 tensor(5.2942, device='cuda:0', grad_fn=)\n", "294000\n", "29700 tensor(5.4637, device='cuda:0', grad_fn=)\n", "295000\n", "29800 tensor(5.2914, device='cuda:0', grad_fn=)\n", "296000\n", "29900 tensor(5.5562, device='cuda:0', grad_fn=)\n", "297000\n", "30000 tensor(5.2833, device='cuda:0', grad_fn=)\n", "298000\n", "30100 tensor(5.3481, device='cuda:0', grad_fn=)\n", "299000\n", "30200 tensor(5.3122, device='cuda:0', grad_fn=)\n", "300000\n", "30300 tensor(5.4103, device='cuda:0', grad_fn=)\n", "301000\n", "30400 tensor(5.2480, device='cuda:0', grad_fn=)\n", "302000\n", "30500 tensor(5.4258, device='cuda:0', grad_fn=)\n", "30600 tensor(5.3835, device='cuda:0', grad_fn=)\n", "303000\n", "30700 tensor(5.4193, device='cuda:0', grad_fn=)\n", "304000\n", "30800 tensor(5.4438, device='cuda:0', grad_fn=)\n", "305000\n", "30900 tensor(5.4518, device='cuda:0', grad_fn=)\n", "306000\n", "31000 tensor(5.5607, device='cuda:0', grad_fn=)\n", "307000\n", "31100 tensor(5.2059, device='cuda:0', grad_fn=)\n", "308000\n", "31200 tensor(5.2571, device='cuda:0', grad_fn=)\n", "309000\n", "31300 tensor(5.5208, device='cuda:0', grad_fn=)\n", "310000\n", "31400 tensor(5.3061, device='cuda:0', grad_fn=)\n", "311000\n", "31500 tensor(5.4834, device='cuda:0', grad_fn=)\n", "312000\n", "31600 tensor(5.4653, device='cuda:0', grad_fn=)\n", "313000\n", "31700 tensor(5.4308, device='cuda:0', grad_fn=)\n", "314000\n", "31800 tensor(5.5400, device='cuda:0', grad_fn=)\n", "315000\n", "31900 tensor(5.1536, device='cuda:0', grad_fn=)\n", "316000\n", "32000 tensor(5.3460, device='cuda:0', grad_fn=)\n", "317000\n", "32100 tensor(5.2300, device='cuda:0', grad_fn=)\n", "318000\n", "32200 tensor(5.5511, device='cuda:0', grad_fn=)\n", "319000\n", "32300 tensor(5.5391, device='cuda:0', grad_fn=)\n", "320000\n", "32400 tensor(5.5157, device='cuda:0', grad_fn=)\n", "321000\n", "32500 tensor(5.3336, device='cuda:0', grad_fn=)\n", "322000\n", "32600 tensor(5.4475, device='cuda:0', grad_fn=)\n", "323000\n", "32700 tensor(5.3894, device='cuda:0', grad_fn=)\n", "324000\n", "32800 tensor(5.6022, device='cuda:0', grad_fn=)\n", "325000\n", "32900 tensor(5.4663, device='cuda:0', grad_fn=)\n", "326000\n", "33000 tensor(5.2387, device='cuda:0', grad_fn=)\n", "327000\n", "33100 tensor(5.4446, device='cuda:0', grad_fn=)\n", "328000\n", "33200 tensor(5.5450, device='cuda:0', grad_fn=)\n", "329000\n", "33300 tensor(5.3179, device='cuda:0', grad_fn=)\n", "330000\n", "33400 tensor(5.5905, device='cuda:0', grad_fn=)\n", "331000\n", "33500 tensor(5.4066, device='cuda:0', grad_fn=)\n", "332000\n", "33600 tensor(5.3542, device='cuda:0', grad_fn=)\n", "333000\n", "33700 tensor(5.4097, device='cuda:0', grad_fn=)\n", "334000\n", "33800 tensor(5.4912, device='cuda:0', grad_fn=)\n", "335000\n", "33900 tensor(5.2358, device='cuda:0', grad_fn=)\n", "336000\n", "34000 tensor(5.4470, device='cuda:0', grad_fn=)\n", "337000\n", "34100 tensor(5.4207, device='cuda:0', grad_fn=)\n", "338000\n", "34200 tensor(5.4651, device='cuda:0', grad_fn=)\n", "339000\n", "34300 tensor(5.2545, device='cuda:0', grad_fn=)\n", "340000\n", "34400 tensor(5.7106, device='cuda:0', grad_fn=)\n", "341000\n", "34500 tensor(5.5699, device='cuda:0', grad_fn=)\n", "342000\n", "34600 tensor(5.4638, device='cuda:0', grad_fn=)\n", "343000\n", "34700 tensor(5.5382, device='cuda:0', grad_fn=)\n", "344000\n", "34800 tensor(5.5603, device='cuda:0', grad_fn=)\n", "345000\n", "34900 tensor(5.6072, device='cuda:0', grad_fn=)\n", "346000\n", "35000 tensor(5.6037, device='cuda:0', grad_fn=)\n", "347000\n", "35100 tensor(5.4069, device='cuda:0', grad_fn=)\n", "348000\n", "35200 tensor(5.3398, device='cuda:0', grad_fn=)\n", "349000\n", "35300 tensor(5.5607, device='cuda:0', grad_fn=)\n", "350000\n", "35400 tensor(5.2068, device='cuda:0', grad_fn=)\n", "351000\n", "35500 tensor(5.3112, device='cuda:0', grad_fn=)\n", "352000\n", "35600 tensor(5.4126, device='cuda:0', grad_fn=)\n", "353000\n", "35700 tensor(5.3091, device='cuda:0', grad_fn=)\n", "354000\n", "35800 tensor(5.4252, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "355000\n", "35900 tensor(5.3956, device='cuda:0', grad_fn=)\n", "356000\n", "36000 tensor(5.1705, device='cuda:0', grad_fn=)\n", "357000\n", "36100 tensor(5.5497, device='cuda:0', grad_fn=)\n", "358000\n", "36200 tensor(5.4066, device='cuda:0', grad_fn=)\n", "359000\n", "36300 tensor(5.6858, device='cuda:0', grad_fn=)\n", "360000\n", "36400 tensor(5.3812, device='cuda:0', grad_fn=)\n", "361000\n", "36500 tensor(5.3990, device='cuda:0', grad_fn=)\n", "362000\n", "36600 tensor(5.4302, device='cuda:0', grad_fn=)\n", "363000\n", "36700 tensor(5.2253, device='cuda:0', grad_fn=)\n", "364000\n", "36800 tensor(5.3347, device='cuda:0', grad_fn=)\n", "365000\n", "36900 tensor(5.4426, device='cuda:0', grad_fn=)\n", "366000\n", "37000 tensor(5.3419, device='cuda:0', grad_fn=)\n", "367000\n", "37100 tensor(5.3579, device='cuda:0', grad_fn=)\n", "368000\n", "37200 tensor(5.4332, device='cuda:0', grad_fn=)\n", "369000\n", "37300 tensor(5.3362, device='cuda:0', grad_fn=)\n", "370000\n", "37400 tensor(5.7100, device='cuda:0', grad_fn=)\n", "371000\n", "37500 tensor(5.3742, device='cuda:0', grad_fn=)\n", "372000\n", "37600 tensor(5.3615, device='cuda:0', grad_fn=)\n", "373000\n", "37700 tensor(5.5402, device='cuda:0', grad_fn=)\n", "374000\n", "37800 tensor(5.3734, device='cuda:0', grad_fn=)\n", "375000\n", "37900 tensor(5.3621, device='cuda:0', grad_fn=)\n", "376000\n", "38000 tensor(5.4380, device='cuda:0', grad_fn=)\n", "377000\n", "38100 tensor(5.4513, device='cuda:0', grad_fn=)\n", "378000\n", "38200 tensor(5.4554, device='cuda:0', grad_fn=)\n", "379000\n", "38300 tensor(5.3735, device='cuda:0', grad_fn=)\n", "380000\n", "38400 tensor(5.4297, device='cuda:0', grad_fn=)\n", "381000\n", "38500 tensor(5.4561, device='cuda:0', grad_fn=)\n", "382000\n", "38600 tensor(5.4118, device='cuda:0', grad_fn=)\n", "383000\n", "38700 tensor(5.3996, device='cuda:0', grad_fn=)\n", "384000\n", "38800 tensor(5.4825, device='cuda:0', grad_fn=)\n", "385000\n", "38900 tensor(5.5692, device='cuda:0', grad_fn=)\n", "386000\n", "39000 tensor(5.2573, device='cuda:0', grad_fn=)\n", "387000\n", "39100 tensor(5.4847, device='cuda:0', grad_fn=)\n", "388000\n", "39200 tensor(5.5802, device='cuda:0', grad_fn=)\n", "389000\n", "39300 tensor(5.3968, device='cuda:0', grad_fn=)\n", "390000\n", "39400 tensor(5.4666, device='cuda:0', grad_fn=)\n", "391000\n", "39500 tensor(5.5847, device='cuda:0', grad_fn=)\n", "392000\n", "39600 tensor(5.2648, device='cuda:0', grad_fn=)\n", "393000\n", "39700 tensor(5.2423, device='cuda:0', grad_fn=)\n", "394000\n", "39800 tensor(5.3731, device='cuda:0', grad_fn=)\n", "395000\n", "39900 tensor(5.2014, device='cuda:0', grad_fn=)\n", "396000\n", "40000 tensor(5.2903, device='cuda:0', grad_fn=)\n", "397000\n", "40100 tensor(5.3712, device='cuda:0', grad_fn=)\n", "398000\n", "40200 tensor(5.3557, device='cuda:0', grad_fn=)\n", "399000\n", "40300 tensor(5.4151, device='cuda:0', grad_fn=)\n", "40400 tensor(5.4358, device='cuda:0', grad_fn=)\n", "400000\n", "40500 tensor(5.3498, device='cuda:0', grad_fn=)\n", "401000\n", "40600 tensor(5.4152, device='cuda:0', grad_fn=)\n", "402000\n", "40700 tensor(5.4551, device='cuda:0', grad_fn=)\n", "403000\n", "40800 tensor(5.4138, device='cuda:0', grad_fn=)\n", "404000\n", "40900 tensor(5.3628, device='cuda:0', grad_fn=)\n", "405000\n", "41000 tensor(5.4124, device='cuda:0', grad_fn=)\n", "406000\n", "41100 tensor(5.3750, device='cuda:0', grad_fn=)\n", "407000\n", "41200 tensor(5.2687, device='cuda:0', grad_fn=)\n", "408000\n", "41300 tensor(5.3987, device='cuda:0', grad_fn=)\n", "409000\n", "41400 tensor(5.2976, device='cuda:0', grad_fn=)\n", "410000\n", "41500 tensor(5.4418, device='cuda:0', grad_fn=)\n", "411000\n", "41600 tensor(5.3558, device='cuda:0', grad_fn=)\n", "412000\n", "41700 tensor(5.3767, device='cuda:0', grad_fn=)\n", "413000\n", "41800 tensor(5.3836, device='cuda:0', grad_fn=)\n", "414000\n", "41900 tensor(5.3904, device='cuda:0', grad_fn=)\n", "415000\n", "42000 tensor(5.5445, device='cuda:0', grad_fn=)\n", "416000\n", "42100 tensor(5.2890, device='cuda:0', grad_fn=)\n", "417000\n", "42200 tensor(5.3691, device='cuda:0', grad_fn=)\n", "418000\n", "42300 tensor(5.4364, device='cuda:0', grad_fn=)\n", "419000\n", "42400 tensor(5.2507, device='cuda:0', grad_fn=)\n", "420000\n", "42500 tensor(5.4215, device='cuda:0', grad_fn=)\n", "421000\n", "42600 tensor(5.2136, device='cuda:0', grad_fn=)\n", "422000\n", "42700 tensor(5.5296, device='cuda:0', grad_fn=)\n", "423000\n", "42800 tensor(5.4544, device='cuda:0', grad_fn=)\n", "424000\n", "42900 tensor(5.3009, device='cuda:0', grad_fn=)\n", "425000\n", "43000 tensor(5.4403, device='cuda:0', grad_fn=)\n", "426000\n", "43100 tensor(5.4384, device='cuda:0', grad_fn=)\n", "427000\n", "43200 tensor(5.2520, device='cuda:0', grad_fn=)\n", "428000\n", "43300 tensor(5.2945, device='cuda:0', grad_fn=)\n", "429000\n", "43400 tensor(5.4455, device='cuda:0', grad_fn=)\n", "430000\n", "43500 tensor(5.1633, device='cuda:0', grad_fn=)\n", "431000\n", "43600 tensor(5.3649, device='cuda:0', grad_fn=)\n", "432000\n", "epoch: = 3\n", "0 tensor(5.3427, device='cuda:0', grad_fn=)\n", "1000\n", "100 tensor(5.4180, device='cuda:0', grad_fn=)\n", "200 tensor(5.2939, device='cuda:0', grad_fn=)\n", "2000\n", "300 tensor(5.3083, device='cuda:0', grad_fn=)\n", "3000\n", "400 tensor(5.3086, device='cuda:0', grad_fn=)\n", "4000\n", "500 tensor(5.4733, device='cuda:0', grad_fn=)\n", "5000\n", "600 tensor(5.2627, device='cuda:0', grad_fn=)\n", "6000\n", "700 tensor(5.5664, device='cuda:0', grad_fn=)\n", "7000\n", "800 tensor(5.1641, device='cuda:0', grad_fn=)\n", "8000\n", "900 tensor(5.4272, device='cuda:0', grad_fn=)\n", "9000\n", "1000 tensor(5.2926, device='cuda:0', grad_fn=)\n", "10000\n", "1100 tensor(5.4848, device='cuda:0', grad_fn=)\n", "11000\n", "1200 tensor(5.5283, device='cuda:0', grad_fn=)\n", "12000\n", "1300 tensor(5.4635, device='cuda:0', grad_fn=)\n", "13000\n", "1400 tensor(5.4590, device='cuda:0', grad_fn=)\n", "14000\n", "1500 tensor(5.5386, device='cuda:0', grad_fn=)\n", "15000\n", "1600 tensor(5.2150, device='cuda:0', grad_fn=)\n", "16000\n", "1700 tensor(5.3116, device='cuda:0', grad_fn=)\n", "17000\n", "1800 tensor(5.3130, device='cuda:0', grad_fn=)\n", "18000\n", "1900 tensor(5.2889, device='cuda:0', grad_fn=)\n", "19000\n", "2000 tensor(5.3574, device='cuda:0', grad_fn=)\n", "20000\n", "2100 tensor(5.4860, device='cuda:0', grad_fn=)\n", "21000\n", "2200 tensor(5.3206, device='cuda:0', grad_fn=)\n", "22000\n", "2300 tensor(5.3447, device='cuda:0', grad_fn=)\n", "23000\n", "2400 tensor(5.3333, device='cuda:0', grad_fn=)\n", "24000\n", "2500 tensor(5.3822, device='cuda:0', grad_fn=)\n", "25000\n", "2600 tensor(5.4039, device='cuda:0', grad_fn=)\n", "26000\n", "2700 tensor(5.4280, device='cuda:0', grad_fn=)\n", "27000\n", "2800 tensor(5.4575, device='cuda:0', grad_fn=)\n", "28000\n", "2900 tensor(5.5878, device='cuda:0', grad_fn=)\n", "29000\n", "3000 tensor(5.3311, device='cuda:0', grad_fn=)\n", "30000\n", "3100 tensor(5.4103, device='cuda:0', grad_fn=)\n", "31000\n", "3200 tensor(5.4323, device='cuda:0', grad_fn=)\n", "32000\n", "3300 tensor(5.3521, device='cuda:0', grad_fn=)\n", "33000\n", "3400 tensor(5.2512, device='cuda:0', grad_fn=)\n", "34000\n", "3500 tensor(5.3813, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "35000\n", "3600 tensor(5.4000, device='cuda:0', grad_fn=)\n", "36000\n", "3700 tensor(5.3312, device='cuda:0', grad_fn=)\n", "37000\n", "3800 tensor(5.3553, device='cuda:0', grad_fn=)\n", "38000\n", "3900 tensor(5.2275, device='cuda:0', grad_fn=)\n", "39000\n", "4000 tensor(5.2883, device='cuda:0', grad_fn=)\n", "40000\n", "4100 tensor(5.4294, device='cuda:0', grad_fn=)\n", "41000\n", "4200 tensor(5.4801, device='cuda:0', grad_fn=)\n", "42000\n", "4300 tensor(5.3863, device='cuda:0', grad_fn=)\n", "43000\n", "4400 tensor(5.4470, device='cuda:0', grad_fn=)\n", "44000\n", "4500 tensor(5.2610, device='cuda:0', grad_fn=)\n", "45000\n", "4600 tensor(5.5962, device='cuda:0', grad_fn=)\n", "46000\n", "4700 tensor(5.3029, device='cuda:0', grad_fn=)\n", "47000\n", "4800 tensor(5.4265, device='cuda:0', grad_fn=)\n", "48000\n", "4900 tensor(5.4823, device='cuda:0', grad_fn=)\n", "49000\n", "5000 tensor(5.4749, device='cuda:0', grad_fn=)\n", "50000\n", "5100 tensor(5.5356, device='cuda:0', grad_fn=)\n", "51000\n", "5200 tensor(5.5513, device='cuda:0', grad_fn=)\n", "52000\n", "5300 tensor(5.5476, device='cuda:0', grad_fn=)\n", "53000\n", "5400 tensor(5.4039, device='cuda:0', grad_fn=)\n", "54000\n", "5500 tensor(5.5156, device='cuda:0', grad_fn=)\n", "55000\n", "5600 tensor(5.2975, device='cuda:0', grad_fn=)\n", "56000\n", "5700 tensor(5.5492, device='cuda:0', grad_fn=)\n", "57000\n", "5800 tensor(5.5379, device='cuda:0', grad_fn=)\n", "58000\n", "5900 tensor(5.4874, device='cuda:0', grad_fn=)\n", "59000\n", "6000 tensor(5.3808, device='cuda:0', grad_fn=)\n", "60000\n", "6100 tensor(5.3932, device='cuda:0', grad_fn=)\n", "61000\n", "6200 tensor(5.5657, device='cuda:0', grad_fn=)\n", "62000\n", "6300 tensor(5.4233, device='cuda:0', grad_fn=)\n", "63000\n", "6400 tensor(5.3438, device='cuda:0', grad_fn=)\n", "64000\n", "6500 tensor(5.2002, device='cuda:0', grad_fn=)\n", "65000\n", "6600 tensor(5.3774, device='cuda:0', grad_fn=)\n", "66000\n", "6700 tensor(5.3193, device='cuda:0', grad_fn=)\n", "67000\n", "6800 tensor(5.5394, device='cuda:0', grad_fn=)\n", "68000\n", "6900 tensor(5.5196, device='cuda:0', grad_fn=)\n", "69000\n", "7000 tensor(5.4282, device='cuda:0', grad_fn=)\n", "70000\n", "7100 tensor(5.2296, device='cuda:0', grad_fn=)\n", "71000\n", "7200 tensor(5.3175, device='cuda:0', grad_fn=)\n", "72000\n", "7300 tensor(5.5642, device='cuda:0', grad_fn=)\n", "73000\n", "7400 tensor(5.3784, device='cuda:0', grad_fn=)\n", "74000\n", "7500 tensor(5.2475, device='cuda:0', grad_fn=)\n", "75000\n", "7600 tensor(5.3194, device='cuda:0', grad_fn=)\n", "76000\n", "7700 tensor(5.3934, device='cuda:0', grad_fn=)\n", "77000\n", "7800 tensor(5.5041, device='cuda:0', grad_fn=)\n", "78000\n", "7900 tensor(5.1814, device='cuda:0', grad_fn=)\n", "79000\n", "8000 tensor(5.2426, device='cuda:0', grad_fn=)\n", "80000\n", "8100 tensor(5.4104, device='cuda:0', grad_fn=)\n", "81000\n", "8200 tensor(5.4198, device='cuda:0', grad_fn=)\n", "82000\n", "8300 tensor(5.3854, device='cuda:0', grad_fn=)\n", "83000\n", "8400 tensor(5.5128, device='cuda:0', grad_fn=)\n", "84000\n", "8500 tensor(5.4898, device='cuda:0', grad_fn=)\n", "85000\n", "8600 tensor(5.4943, device='cuda:0', grad_fn=)\n", "86000\n", "8700 tensor(5.6012, device='cuda:0', grad_fn=)\n", "87000\n", "8800 tensor(5.4790, device='cuda:0', grad_fn=)\n", "88000\n", "8900 tensor(5.3312, device='cuda:0', grad_fn=)\n", "89000\n", "9000 tensor(5.4456, device='cuda:0', grad_fn=)\n", "90000\n", "9100 tensor(5.4537, device='cuda:0', grad_fn=)\n", "91000\n", "9200 tensor(5.3643, device='cuda:0', grad_fn=)\n", "92000\n", "9300 tensor(5.4085, device='cuda:0', grad_fn=)\n", "93000\n", "9400 tensor(5.2527, device='cuda:0', grad_fn=)\n", "94000\n", "9500 tensor(5.3289, device='cuda:0', grad_fn=)\n", "95000\n", "9600 tensor(5.4516, device='cuda:0', grad_fn=)\n", "96000\n", "9700 tensor(5.3881, device='cuda:0', grad_fn=)\n", "97000\n", "9800 tensor(5.4321, device='cuda:0', grad_fn=)\n", "9900 tensor(5.2532, device='cuda:0', grad_fn=)\n", "98000\n", "10000 tensor(5.4727, device='cuda:0', grad_fn=)\n", "99000\n", "10100 tensor(5.3607, device='cuda:0', grad_fn=)\n", "100000\n", "10200 tensor(5.2989, device='cuda:0', grad_fn=)\n", "101000\n", "10300 tensor(5.4168, device='cuda:0', grad_fn=)\n", "102000\n", "10400 tensor(5.4272, device='cuda:0', grad_fn=)\n", "103000\n", "10500 tensor(5.4838, device='cuda:0', grad_fn=)\n", "104000\n", "10600 tensor(5.5675, device='cuda:0', grad_fn=)\n", "105000\n", "10700 tensor(5.4027, device='cuda:0', grad_fn=)\n", "106000\n", "10800 tensor(5.4252, device='cuda:0', grad_fn=)\n", "107000\n", "10900 tensor(5.3408, device='cuda:0', grad_fn=)\n", "108000\n", "11000 tensor(5.5754, device='cuda:0', grad_fn=)\n", "109000\n", "11100 tensor(5.1920, device='cuda:0', grad_fn=)\n", "110000\n", "11200 tensor(5.3604, device='cuda:0', grad_fn=)\n", "111000\n", "11300 tensor(5.3836, device='cuda:0', grad_fn=)\n", "112000\n", "11400 tensor(5.3330, device='cuda:0', grad_fn=)\n", "113000\n", "11500 tensor(5.4023, device='cuda:0', grad_fn=)\n", "114000\n", "11600 tensor(5.3923, device='cuda:0', grad_fn=)\n", "115000\n", "11700 tensor(5.3145, device='cuda:0', grad_fn=)\n", "116000\n", "11800 tensor(5.5174, device='cuda:0', grad_fn=)\n", "117000\n", "11900 tensor(5.3522, device='cuda:0', grad_fn=)\n", "118000\n", "12000 tensor(5.4232, device='cuda:0', grad_fn=)\n", "119000\n", "12100 tensor(5.4382, device='cuda:0', grad_fn=)\n", "120000\n", "12200 tensor(5.4488, device='cuda:0', grad_fn=)\n", "121000\n", "12300 tensor(5.5409, device='cuda:0', grad_fn=)\n", "122000\n", "12400 tensor(5.4200, device='cuda:0', grad_fn=)\n", "123000\n", "12500 tensor(5.3292, device='cuda:0', grad_fn=)\n", "124000\n", "12600 tensor(5.3788, device='cuda:0', grad_fn=)\n", "125000\n", "12700 tensor(5.3116, device='cuda:0', grad_fn=)\n", "126000\n", "12800 tensor(5.4948, device='cuda:0', grad_fn=)\n", "127000\n", "12900 tensor(5.3557, device='cuda:0', grad_fn=)\n", "128000\n", "13000 tensor(5.1732, device='cuda:0', grad_fn=)\n", "129000\n", "13100 tensor(5.3782, device='cuda:0', grad_fn=)\n", "130000\n", "13200 tensor(5.4178, device='cuda:0', grad_fn=)\n", "131000\n", "13300 tensor(5.2929, device='cuda:0', grad_fn=)\n", "132000\n", "13400 tensor(5.3806, device='cuda:0', grad_fn=)\n", "133000\n", "13500 tensor(5.3394, device='cuda:0', grad_fn=)\n", "134000\n", "13600 tensor(5.4191, device='cuda:0', grad_fn=)\n", "135000\n", "13700 tensor(5.3856, device='cuda:0', grad_fn=)\n", "136000\n", "13800 tensor(5.3839, device='cuda:0', grad_fn=)\n", "137000\n", "13900 tensor(5.2391, device='cuda:0', grad_fn=)\n", "138000\n", "14000 tensor(5.4865, device='cuda:0', grad_fn=)\n", "139000\n", "14100 tensor(5.1952, device='cuda:0', grad_fn=)\n", "140000\n", "14200 tensor(5.4670, device='cuda:0', grad_fn=)\n", "141000\n", "14300 tensor(5.4385, device='cuda:0', grad_fn=)\n", "142000\n", "14400 tensor(5.3347, device='cuda:0', grad_fn=)\n", "143000\n", "14500 tensor(5.4370, device='cuda:0', grad_fn=)\n", "144000\n", "14600 tensor(5.4695, device='cuda:0', grad_fn=)\n", "145000\n", "14700 tensor(5.3453, device='cuda:0', grad_fn=)\n", "146000\n", "14800 tensor(5.7928, device='cuda:0', grad_fn=)\n", "147000\n", "14900 tensor(5.4451, device='cuda:0', grad_fn=)\n", "148000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "15000 tensor(5.3087, device='cuda:0', grad_fn=)\n", "149000\n", "15100 tensor(5.5241, device='cuda:0', grad_fn=)\n", "150000\n", "15200 tensor(5.3894, device='cuda:0', grad_fn=)\n", "151000\n", "15300 tensor(5.3809, device='cuda:0', grad_fn=)\n", "152000\n", "15400 tensor(5.2696, device='cuda:0', grad_fn=)\n", "153000\n", "15500 tensor(5.4343, device='cuda:0', grad_fn=)\n", "154000\n", "15600 tensor(5.4322, device='cuda:0', grad_fn=)\n", "155000\n", "15700 tensor(5.3296, device='cuda:0', grad_fn=)\n", "156000\n", "15800 tensor(5.2456, device='cuda:0', grad_fn=)\n", "157000\n", "15900 tensor(5.3806, device='cuda:0', grad_fn=)\n", "158000\n", "16000 tensor(5.2008, device='cuda:0', grad_fn=)\n", "159000\n", "16100 tensor(5.2489, device='cuda:0', grad_fn=)\n", "160000\n", "16200 tensor(5.5902, device='cuda:0', grad_fn=)\n", "161000\n", "16300 tensor(5.4159, device='cuda:0', grad_fn=)\n", "162000\n", "16400 tensor(5.3966, device='cuda:0', grad_fn=)\n", "163000\n", "16500 tensor(5.5113, device='cuda:0', grad_fn=)\n", "164000\n", "16600 tensor(5.3599, device='cuda:0', grad_fn=)\n", "165000\n", "16700 tensor(5.3372, device='cuda:0', grad_fn=)\n", "166000\n", "16800 tensor(5.4158, device='cuda:0', grad_fn=)\n", "167000\n", "16900 tensor(5.1788, device='cuda:0', grad_fn=)\n", "168000\n", "17000 tensor(5.4497, device='cuda:0', grad_fn=)\n", "169000\n", "17100 tensor(5.2981, device='cuda:0', grad_fn=)\n", "170000\n", "17200 tensor(5.4330, device='cuda:0', grad_fn=)\n", "171000\n", "17300 tensor(5.4495, device='cuda:0', grad_fn=)\n", "172000\n", "17400 tensor(5.2431, device='cuda:0', grad_fn=)\n", "173000\n", "17500 tensor(5.2652, device='cuda:0', grad_fn=)\n", "174000\n", "17600 tensor(5.3007, device='cuda:0', grad_fn=)\n", "175000\n", "17700 tensor(5.2852, device='cuda:0', grad_fn=)\n", "176000\n", "17800 tensor(5.3431, device='cuda:0', grad_fn=)\n", "177000\n", "17900 tensor(5.2395, device='cuda:0', grad_fn=)\n", "178000\n", "18000 tensor(5.4841, device='cuda:0', grad_fn=)\n", "179000\n", "18100 tensor(5.4218, device='cuda:0', grad_fn=)\n", "180000\n", "18200 tensor(5.3397, device='cuda:0', grad_fn=)\n", "181000\n", "18300 tensor(5.3426, device='cuda:0', grad_fn=)\n", "182000\n", "18400 tensor(5.3654, device='cuda:0', grad_fn=)\n", "183000\n", "18500 tensor(5.3484, device='cuda:0', grad_fn=)\n", "184000\n", "18600 tensor(5.5509, device='cuda:0', grad_fn=)\n", "185000\n", "18700 tensor(5.3702, device='cuda:0', grad_fn=)\n", "186000\n", "18800 tensor(5.5361, device='cuda:0', grad_fn=)\n", "187000\n", "18900 tensor(5.4132, device='cuda:0', grad_fn=)\n", "188000\n", "19000 tensor(5.4235, device='cuda:0', grad_fn=)\n", "189000\n", "19100 tensor(5.5318, device='cuda:0', grad_fn=)\n", "190000\n", "19200 tensor(5.4136, device='cuda:0', grad_fn=)\n", "191000\n", "19300 tensor(5.5053, device='cuda:0', grad_fn=)\n", "192000\n", "19400 tensor(5.3472, device='cuda:0', grad_fn=)\n", "193000\n", "19500 tensor(5.3511, device='cuda:0', grad_fn=)\n", "194000\n", "19600 tensor(5.3861, device='cuda:0', grad_fn=)\n", "195000\n", "19700 tensor(5.4345, device='cuda:0', grad_fn=)\n", "196000\n", "19800 tensor(5.3067, device='cuda:0', grad_fn=)\n", "197000\n", "19900 tensor(5.3079, device='cuda:0', grad_fn=)\n", "198000\n", "20000 tensor(5.3268, device='cuda:0', grad_fn=)\n", "199000\n", "20100 tensor(5.2668, device='cuda:0', grad_fn=)\n", "200000\n", "20200 tensor(5.1998, device='cuda:0', grad_fn=)\n", "201000\n", "20300 tensor(5.3105, device='cuda:0', grad_fn=)\n", "20400 tensor(5.3584, device='cuda:0', grad_fn=)\n", "202000\n", "20500 tensor(5.3580, device='cuda:0', grad_fn=)\n", "203000\n", "20600 tensor(5.5528, device='cuda:0', grad_fn=)\n", "204000\n", "20700 tensor(5.3871, device='cuda:0', grad_fn=)\n", "205000\n", "20800 tensor(5.2208, device='cuda:0', grad_fn=)\n", "206000\n", "20900 tensor(5.5007, device='cuda:0', grad_fn=)\n", "207000\n", "21000 tensor(5.3396, device='cuda:0', grad_fn=)\n", "208000\n", "21100 tensor(5.3407, device='cuda:0', grad_fn=)\n", "209000\n", "21200 tensor(5.2243, device='cuda:0', grad_fn=)\n", "210000\n", "21300 tensor(5.4206, device='cuda:0', grad_fn=)\n", "211000\n", "21400 tensor(5.4574, device='cuda:0', grad_fn=)\n", "212000\n", "21500 tensor(5.2328, device='cuda:0', grad_fn=)\n", "213000\n", "21600 tensor(5.2233, device='cuda:0', grad_fn=)\n", "214000\n", "21700 tensor(5.2152, device='cuda:0', grad_fn=)\n", "215000\n", "21800 tensor(5.3497, device='cuda:0', grad_fn=)\n", "216000\n", "21900 tensor(5.3425, device='cuda:0', grad_fn=)\n", "217000\n", "22000 tensor(5.3277, device='cuda:0', grad_fn=)\n", "218000\n", "22100 tensor(5.2012, device='cuda:0', grad_fn=)\n", "219000\n", "22200 tensor(5.0736, device='cuda:0', grad_fn=)\n", "220000\n", "22300 tensor(5.5070, device='cuda:0', grad_fn=)\n", "221000\n", "22400 tensor(5.2190, device='cuda:0', grad_fn=)\n", "222000\n", "22500 tensor(5.2434, device='cuda:0', grad_fn=)\n", "223000\n", "22600 tensor(5.4325, device='cuda:0', grad_fn=)\n", "224000\n", "22700 tensor(5.1909, device='cuda:0', grad_fn=)\n", "225000\n", "22800 tensor(5.4576, device='cuda:0', grad_fn=)\n", "226000\n", "22900 tensor(5.5069, device='cuda:0', grad_fn=)\n", "227000\n", "23000 tensor(5.4041, device='cuda:0', grad_fn=)\n", "228000\n", "23100 tensor(5.3908, device='cuda:0', grad_fn=)\n", "229000\n", "23200 tensor(5.3866, device='cuda:0', grad_fn=)\n", "230000\n", "23300 tensor(5.4714, device='cuda:0', grad_fn=)\n", "231000\n", "23400 tensor(5.4781, device='cuda:0', grad_fn=)\n", "232000\n", "23500 tensor(5.3154, device='cuda:0', grad_fn=)\n", "233000\n", "23600 tensor(5.2854, device='cuda:0', grad_fn=)\n", "234000\n", "23700 tensor(5.3050, device='cuda:0', grad_fn=)\n", "235000\n", "23800 tensor(5.1721, device='cuda:0', grad_fn=)\n", "236000\n", "23900 tensor(5.2637, device='cuda:0', grad_fn=)\n", "237000\n", "24000 tensor(5.2519, device='cuda:0', grad_fn=)\n", "238000\n", "24100 tensor(5.3407, device='cuda:0', grad_fn=)\n", "239000\n", "24200 tensor(5.5137, device='cuda:0', grad_fn=)\n", "240000\n", "24300 tensor(5.4080, device='cuda:0', grad_fn=)\n", "241000\n", "24400 tensor(5.5379, device='cuda:0', grad_fn=)\n", "242000\n", "24500 tensor(5.3255, device='cuda:0', grad_fn=)\n", "243000\n", "24600 tensor(5.4515, device='cuda:0', grad_fn=)\n", "244000\n", "24700 tensor(5.3535, device='cuda:0', grad_fn=)\n", "245000\n", "24800 tensor(5.3935, device='cuda:0', grad_fn=)\n", "246000\n", "24900 tensor(5.4553, device='cuda:0', grad_fn=)\n", "247000\n", "25000 tensor(5.4708, device='cuda:0', grad_fn=)\n", "248000\n", "25100 tensor(5.3920, device='cuda:0', grad_fn=)\n", "249000\n", "25200 tensor(5.4083, device='cuda:0', grad_fn=)\n", "250000\n", "25300 tensor(5.4332, device='cuda:0', grad_fn=)\n", "251000\n", "25400 tensor(5.4136, device='cuda:0', grad_fn=)\n", "252000\n", "25500 tensor(5.3147, device='cuda:0', grad_fn=)\n", "253000\n", "25600 tensor(5.5860, device='cuda:0', grad_fn=)\n", "254000\n", "25700 tensor(5.3490, device='cuda:0', grad_fn=)\n", "255000\n", "25800 tensor(5.4464, device='cuda:0', grad_fn=)\n", "256000\n", "25900 tensor(5.3857, device='cuda:0', grad_fn=)\n", "257000\n", "26000 tensor(5.3893, device='cuda:0', grad_fn=)\n", "258000\n", "26100 tensor(5.3041, device='cuda:0', grad_fn=)\n", "259000\n", "26200 tensor(5.2321, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "260000\n", "26300 tensor(5.4289, device='cuda:0', grad_fn=)\n", "261000\n", "26400 tensor(5.4663, device='cuda:0', grad_fn=)\n", "262000\n", "26500 tensor(5.1922, device='cuda:0', grad_fn=)\n", "263000\n", "26600 tensor(5.5283, device='cuda:0', grad_fn=)\n", "264000\n", "26700 tensor(5.3933, device='cuda:0', grad_fn=)\n", "265000\n", "26800 tensor(5.5680, device='cuda:0', grad_fn=)\n", "266000\n", "26900 tensor(5.3281, device='cuda:0', grad_fn=)\n", "267000\n", "27000 tensor(5.2408, device='cuda:0', grad_fn=)\n", "268000\n", "27100 tensor(5.2671, device='cuda:0', grad_fn=)\n", "269000\n", "27200 tensor(5.3099, device='cuda:0', grad_fn=)\n", "270000\n", "27300 tensor(5.5049, device='cuda:0', grad_fn=)\n", "271000\n", "27400 tensor(5.3850, device='cuda:0', grad_fn=)\n", "272000\n", "27500 tensor(5.2843, device='cuda:0', grad_fn=)\n", "273000\n", "27600 tensor(5.5777, device='cuda:0', grad_fn=)\n", "274000\n", "27700 tensor(5.4017, device='cuda:0', grad_fn=)\n", "275000\n", "27800 tensor(5.3994, device='cuda:0', grad_fn=)\n", "276000\n", "27900 tensor(5.5128, device='cuda:0', grad_fn=)\n", "277000\n", "28000 tensor(5.3708, device='cuda:0', grad_fn=)\n", "278000\n", "28100 tensor(5.3382, device='cuda:0', grad_fn=)\n", "279000\n", "28200 tensor(5.4996, device='cuda:0', grad_fn=)\n", "280000\n", "28300 tensor(5.1214, device='cuda:0', grad_fn=)\n", "281000\n", "28400 tensor(5.5647, device='cuda:0', grad_fn=)\n", "282000\n", "28500 tensor(5.3959, device='cuda:0', grad_fn=)\n", "283000\n", "28600 tensor(5.3312, device='cuda:0', grad_fn=)\n", "284000\n", "28700 tensor(5.4663, device='cuda:0', grad_fn=)\n", "285000\n", "28800 tensor(5.5155, device='cuda:0', grad_fn=)\n", "286000\n", "28900 tensor(5.3872, device='cuda:0', grad_fn=)\n", "287000\n", "29000 tensor(5.3017, device='cuda:0', grad_fn=)\n", "288000\n", "29100 tensor(5.0583, device='cuda:0', grad_fn=)\n", "289000\n", "29200 tensor(5.2099, device='cuda:0', grad_fn=)\n", "290000\n", "29300 tensor(5.4934, device='cuda:0', grad_fn=)\n", "291000\n", "29400 tensor(5.6202, device='cuda:0', grad_fn=)\n", "292000\n", "29500 tensor(5.4016, device='cuda:0', grad_fn=)\n", "293000\n", "29600 tensor(5.2601, device='cuda:0', grad_fn=)\n", "294000\n", "29700 tensor(5.4038, device='cuda:0', grad_fn=)\n", "295000\n", "29800 tensor(5.2475, device='cuda:0', grad_fn=)\n", "296000\n", "29900 tensor(5.4960, device='cuda:0', grad_fn=)\n", "297000\n", "30000 tensor(5.2438, device='cuda:0', grad_fn=)\n", "298000\n", "30100 tensor(5.3221, device='cuda:0', grad_fn=)\n", "299000\n", "30200 tensor(5.2686, device='cuda:0', grad_fn=)\n", "300000\n", "30300 tensor(5.3735, device='cuda:0', grad_fn=)\n", "301000\n", "30400 tensor(5.2057, device='cuda:0', grad_fn=)\n", "302000\n", "30500 tensor(5.3767, device='cuda:0', grad_fn=)\n", "30600 tensor(5.3515, device='cuda:0', grad_fn=)\n", "303000\n", "30700 tensor(5.3841, device='cuda:0', grad_fn=)\n", "304000\n", "30800 tensor(5.3889, device='cuda:0', grad_fn=)\n", "305000\n", "30900 tensor(5.4117, device='cuda:0', grad_fn=)\n", "306000\n", "31000 tensor(5.5205, device='cuda:0', grad_fn=)\n", "307000\n", "31100 tensor(5.1742, device='cuda:0', grad_fn=)\n", "308000\n", "31200 tensor(5.2173, device='cuda:0', grad_fn=)\n", "309000\n", "31300 tensor(5.4785, device='cuda:0', grad_fn=)\n", "310000\n", "31400 tensor(5.2577, device='cuda:0', grad_fn=)\n", "311000\n", "31500 tensor(5.4429, device='cuda:0', grad_fn=)\n", "312000\n", "31600 tensor(5.4289, device='cuda:0', grad_fn=)\n", "313000\n", "31700 tensor(5.3961, device='cuda:0', grad_fn=)\n", "314000\n", "31800 tensor(5.4999, device='cuda:0', grad_fn=)\n", "315000\n", "31900 tensor(5.1248, device='cuda:0', grad_fn=)\n", "316000\n", "32000 tensor(5.3122, device='cuda:0', grad_fn=)\n", "317000\n", "32100 tensor(5.1931, device='cuda:0', grad_fn=)\n", "318000\n", "32200 tensor(5.5096, device='cuda:0', grad_fn=)\n", "319000\n", "32300 tensor(5.4973, device='cuda:0', grad_fn=)\n", "320000\n", "32400 tensor(5.4742, device='cuda:0', grad_fn=)\n", "321000\n", "32500 tensor(5.2964, device='cuda:0', grad_fn=)\n", "322000\n", "32600 tensor(5.4063, device='cuda:0', grad_fn=)\n", "323000\n", "32700 tensor(5.3369, device='cuda:0', grad_fn=)\n", "324000\n", "32800 tensor(5.5636, device='cuda:0', grad_fn=)\n", "325000\n", "32900 tensor(5.4245, device='cuda:0', grad_fn=)\n", "326000\n", "33000 tensor(5.2032, device='cuda:0', grad_fn=)\n", "327000\n", "33100 tensor(5.4095, device='cuda:0', grad_fn=)\n", "328000\n", "33200 tensor(5.5071, device='cuda:0', grad_fn=)\n", "329000\n", "33300 tensor(5.2729, device='cuda:0', grad_fn=)\n", "330000\n", "33400 tensor(5.5492, device='cuda:0', grad_fn=)\n", "331000\n", "33500 tensor(5.3701, device='cuda:0', grad_fn=)\n", "332000\n", "33600 tensor(5.3223, device='cuda:0', grad_fn=)\n", "333000\n", "33700 tensor(5.3725, device='cuda:0', grad_fn=)\n", "334000\n", "33800 tensor(5.4572, device='cuda:0', grad_fn=)\n", "335000\n", "33900 tensor(5.1889, device='cuda:0', grad_fn=)\n", "336000\n", "34000 tensor(5.4090, device='cuda:0', grad_fn=)\n", "337000\n", "34100 tensor(5.3798, device='cuda:0', grad_fn=)\n", "338000\n", "34200 tensor(5.4259, device='cuda:0', grad_fn=)\n", "339000\n", "34300 tensor(5.2132, device='cuda:0', grad_fn=)\n", "340000\n", "34400 tensor(5.6692, device='cuda:0', grad_fn=)\n", "341000\n", "34500 tensor(5.5324, device='cuda:0', grad_fn=)\n", "342000\n", "34600 tensor(5.4271, device='cuda:0', grad_fn=)\n", "343000\n", "34700 tensor(5.4978, device='cuda:0', grad_fn=)\n", "344000\n", "34800 tensor(5.5230, device='cuda:0', grad_fn=)\n", "345000\n", "34900 tensor(5.5652, device='cuda:0', grad_fn=)\n", "346000\n", "35000 tensor(5.5478, device='cuda:0', grad_fn=)\n", "347000\n", "35100 tensor(5.3700, device='cuda:0', grad_fn=)\n", "348000\n", "35200 tensor(5.2958, device='cuda:0', grad_fn=)\n", "349000\n", "35300 tensor(5.5219, device='cuda:0', grad_fn=)\n", "350000\n", "35400 tensor(5.1702, device='cuda:0', grad_fn=)\n", "351000\n", "35500 tensor(5.2604, device='cuda:0', grad_fn=)\n", "352000\n", "35600 tensor(5.3821, device='cuda:0', grad_fn=)\n", "353000\n", "35700 tensor(5.2551, device='cuda:0', grad_fn=)\n", "354000\n", "35800 tensor(5.3840, device='cuda:0', grad_fn=)\n", "355000\n", "35900 tensor(5.3635, device='cuda:0', grad_fn=)\n", "356000\n", "36000 tensor(5.1400, device='cuda:0', grad_fn=)\n", "357000\n", "36100 tensor(5.5134, device='cuda:0', grad_fn=)\n", "358000\n", "36200 tensor(5.3632, device='cuda:0', grad_fn=)\n", "359000\n", "36300 tensor(5.6461, device='cuda:0', grad_fn=)\n", "360000\n", "36400 tensor(5.3415, device='cuda:0', grad_fn=)\n", "361000\n", "36500 tensor(5.3659, device='cuda:0', grad_fn=)\n", "362000\n", "36600 tensor(5.3874, device='cuda:0', grad_fn=)\n", "363000\n", "36700 tensor(5.1886, device='cuda:0', grad_fn=)\n", "364000\n", "36800 tensor(5.2958, device='cuda:0', grad_fn=)\n", "365000\n", "36900 tensor(5.4094, device='cuda:0', grad_fn=)\n", "366000\n", "37000 tensor(5.3023, device='cuda:0', grad_fn=)\n", "367000\n", "37100 tensor(5.3287, device='cuda:0', grad_fn=)\n", "368000\n", "37200 tensor(5.3996, device='cuda:0', grad_fn=)\n", "369000\n", "37300 tensor(5.3001, device='cuda:0', grad_fn=)\n", "370000\n", "37400 tensor(5.6516, device='cuda:0', grad_fn=)\n", "371000\n", "37500 tensor(5.3366, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "372000\n", "37600 tensor(5.3282, device='cuda:0', grad_fn=)\n", "373000\n", "37700 tensor(5.5061, device='cuda:0', grad_fn=)\n", "374000\n", "37800 tensor(5.3408, device='cuda:0', grad_fn=)\n", "375000\n", "37900 tensor(5.3203, device='cuda:0', grad_fn=)\n", "376000\n", "38000 tensor(5.3996, device='cuda:0', grad_fn=)\n", "377000\n", "38100 tensor(5.4133, device='cuda:0', grad_fn=)\n", "378000\n", "38200 tensor(5.4262, device='cuda:0', grad_fn=)\n", "379000\n", "38300 tensor(5.3305, device='cuda:0', grad_fn=)\n", "380000\n", "38400 tensor(5.3983, device='cuda:0', grad_fn=)\n", "381000\n", "38500 tensor(5.4246, device='cuda:0', grad_fn=)\n", "382000\n", "38600 tensor(5.3713, device='cuda:0', grad_fn=)\n", "383000\n", "38700 tensor(5.3634, device='cuda:0', grad_fn=)\n", "384000\n", "38800 tensor(5.4504, device='cuda:0', grad_fn=)\n", "385000\n", "38900 tensor(5.5273, device='cuda:0', grad_fn=)\n", "386000\n", "39000 tensor(5.2229, device='cuda:0', grad_fn=)\n", "387000\n", "39100 tensor(5.4503, device='cuda:0', grad_fn=)\n", "388000\n", "39200 tensor(5.5406, device='cuda:0', grad_fn=)\n", "389000\n", "39300 tensor(5.3640, device='cuda:0', grad_fn=)\n", "390000\n", "39400 tensor(5.4311, device='cuda:0', grad_fn=)\n", "391000\n", "39500 tensor(5.5292, device='cuda:0', grad_fn=)\n", "392000\n", "39600 tensor(5.2217, device='cuda:0', grad_fn=)\n", "393000\n", "39700 tensor(5.2121, device='cuda:0', grad_fn=)\n", "394000\n", "39800 tensor(5.3415, device='cuda:0', grad_fn=)\n", "395000\n", "39900 tensor(5.1605, device='cuda:0', grad_fn=)\n", "396000\n", "40000 tensor(5.2472, device='cuda:0', grad_fn=)\n", "397000\n", "40100 tensor(5.3351, device='cuda:0', grad_fn=)\n", "398000\n", "40200 tensor(5.3198, device='cuda:0', grad_fn=)\n", "399000\n", "40300 tensor(5.3862, device='cuda:0', grad_fn=)\n", "40400 tensor(5.3946, device='cuda:0', grad_fn=)\n", "400000\n", "40500 tensor(5.3120, device='cuda:0', grad_fn=)\n", "401000\n", "40600 tensor(5.3741, device='cuda:0', grad_fn=)\n", "402000\n", "40700 tensor(5.4199, device='cuda:0', grad_fn=)\n", "403000\n", "40800 tensor(5.3702, device='cuda:0', grad_fn=)\n", "404000\n", "40900 tensor(5.3212, device='cuda:0', grad_fn=)\n", "405000\n", "41000 tensor(5.3683, device='cuda:0', grad_fn=)\n", "406000\n", "41100 tensor(5.3491, device='cuda:0', grad_fn=)\n", "407000\n", "41200 tensor(5.2400, device='cuda:0', grad_fn=)\n", "408000\n", "41300 tensor(5.3728, device='cuda:0', grad_fn=)\n", "409000\n", "41400 tensor(5.2643, device='cuda:0', grad_fn=)\n", "410000\n", "41500 tensor(5.4064, device='cuda:0', grad_fn=)\n", "411000\n", "41600 tensor(5.3238, device='cuda:0', grad_fn=)\n", "412000\n", "41700 tensor(5.3469, device='cuda:0', grad_fn=)\n", "413000\n", "41800 tensor(5.3432, device='cuda:0', grad_fn=)\n", "414000\n", "41900 tensor(5.3521, device='cuda:0', grad_fn=)\n", "415000\n", "42000 tensor(5.5087, device='cuda:0', grad_fn=)\n", "416000\n", "42100 tensor(5.2556, device='cuda:0', grad_fn=)\n", "417000\n", "42200 tensor(5.3407, device='cuda:0', grad_fn=)\n", "418000\n", "42300 tensor(5.4058, device='cuda:0', grad_fn=)\n", "419000\n", "42400 tensor(5.2231, device='cuda:0', grad_fn=)\n", "420000\n", "42500 tensor(5.3912, device='cuda:0', grad_fn=)\n", "421000\n", "42600 tensor(5.1878, device='cuda:0', grad_fn=)\n", "422000\n", "42700 tensor(5.4955, device='cuda:0', grad_fn=)\n", "423000\n", "42800 tensor(5.4193, device='cuda:0', grad_fn=)\n", "424000\n", "42900 tensor(5.2662, device='cuda:0', grad_fn=)\n", "425000\n", "43000 tensor(5.4093, device='cuda:0', grad_fn=)\n", "426000\n", "43100 tensor(5.4089, device='cuda:0', grad_fn=)\n", "427000\n", "43200 tensor(5.2223, device='cuda:0', grad_fn=)\n", "428000\n", "43300 tensor(5.2456, device='cuda:0', grad_fn=)\n", "429000\n", "43400 tensor(5.4129, device='cuda:0', grad_fn=)\n", "430000\n", "43500 tensor(5.1283, device='cuda:0', grad_fn=)\n", "431000\n", "43600 tensor(5.3275, device='cuda:0', grad_fn=)\n", "432000\n", "epoch: = 4\n", "0 tensor(5.3172, device='cuda:0', grad_fn=)\n", "1000\n", "100 tensor(5.3864, device='cuda:0', grad_fn=)\n", "200 tensor(5.2618, device='cuda:0', grad_fn=)\n", "2000\n", "300 tensor(5.2652, device='cuda:0', grad_fn=)\n", "3000\n", "400 tensor(5.2749, device='cuda:0', grad_fn=)\n", "4000\n", "500 tensor(5.4347, device='cuda:0', grad_fn=)\n", "5000\n", "600 tensor(5.2271, device='cuda:0', grad_fn=)\n", "6000\n", "700 tensor(5.5396, device='cuda:0', grad_fn=)\n", "7000\n", "800 tensor(5.1379, device='cuda:0', grad_fn=)\n", "8000\n", "900 tensor(5.3861, device='cuda:0', grad_fn=)\n", "9000\n", "1000 tensor(5.2629, device='cuda:0', grad_fn=)\n", "10000\n", "1100 tensor(5.4575, device='cuda:0', grad_fn=)\n", "11000\n", "1200 tensor(5.4936, device='cuda:0', grad_fn=)\n", "12000\n", "1300 tensor(5.4281, device='cuda:0', grad_fn=)\n", "13000\n", "1400 tensor(5.4186, device='cuda:0', grad_fn=)\n", "14000\n", "1500 tensor(5.5070, device='cuda:0', grad_fn=)\n", "15000\n", "1600 tensor(5.1769, device='cuda:0', grad_fn=)\n", "16000\n", "1700 tensor(5.2856, device='cuda:0', grad_fn=)\n", "17000\n", "1800 tensor(5.2827, device='cuda:0', grad_fn=)\n", "18000\n", "1900 tensor(5.2544, device='cuda:0', grad_fn=)\n", "19000\n", "2000 tensor(5.3218, device='cuda:0', grad_fn=)\n", "20000\n", "2100 tensor(5.4549, device='cuda:0', grad_fn=)\n", "21000\n", "2200 tensor(5.2864, device='cuda:0', grad_fn=)\n", "22000\n", "2300 tensor(5.3145, device='cuda:0', grad_fn=)\n", "23000\n", "2400 tensor(5.2987, device='cuda:0', grad_fn=)\n", "24000\n", "2500 tensor(5.3498, device='cuda:0', grad_fn=)\n", "25000\n", "2600 tensor(5.3730, device='cuda:0', grad_fn=)\n", "26000\n", "2700 tensor(5.4017, device='cuda:0', grad_fn=)\n", "27000\n", "2800 tensor(5.4255, device='cuda:0', grad_fn=)\n", "28000\n", "2900 tensor(5.5475, device='cuda:0', grad_fn=)\n", "29000\n", "3000 tensor(5.2988, device='cuda:0', grad_fn=)\n", "30000\n", "3100 tensor(5.3753, device='cuda:0', grad_fn=)\n", "31000\n", "3200 tensor(5.4049, device='cuda:0', grad_fn=)\n", "32000\n", "3300 tensor(5.3206, device='cuda:0', grad_fn=)\n", "33000\n", "3400 tensor(5.2159, device='cuda:0', grad_fn=)\n", "34000\n", "3500 tensor(5.3423, device='cuda:0', grad_fn=)\n", "35000\n", "3600 tensor(5.3717, device='cuda:0', grad_fn=)\n", "36000\n", "3700 tensor(5.3042, device='cuda:0', grad_fn=)\n", "37000\n", "3800 tensor(5.3258, device='cuda:0', grad_fn=)\n", "38000\n", "3900 tensor(5.1989, device='cuda:0', grad_fn=)\n", "39000\n", "4000 tensor(5.2650, device='cuda:0', grad_fn=)\n", "40000\n", "4100 tensor(5.3953, device='cuda:0', grad_fn=)\n", "41000\n", "4200 tensor(5.4542, device='cuda:0', grad_fn=)\n", "42000\n", "4300 tensor(5.3466, device='cuda:0', grad_fn=)\n", "43000\n", "4400 tensor(5.4222, device='cuda:0', grad_fn=)\n", "44000\n", "4500 tensor(5.2254, device='cuda:0', grad_fn=)\n", "45000\n", "4600 tensor(5.5610, device='cuda:0', grad_fn=)\n", "46000\n", "4700 tensor(5.2753, device='cuda:0', grad_fn=)\n", "47000\n", "4800 tensor(5.4028, device='cuda:0', grad_fn=)\n", "48000\n", "4900 tensor(5.4516, device='cuda:0', grad_fn=)\n", "49000\n", "5000 tensor(5.4464, device='cuda:0', grad_fn=)\n", "50000\n", "5100 tensor(5.5018, device='cuda:0', grad_fn=)\n", "51000\n", "5200 tensor(5.5194, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "52000\n", "5300 tensor(5.5077, device='cuda:0', grad_fn=)\n", "53000\n", "5400 tensor(5.3746, device='cuda:0', grad_fn=)\n", "54000\n", "5500 tensor(5.4847, device='cuda:0', grad_fn=)\n", "55000\n", "5600 tensor(5.2664, device='cuda:0', grad_fn=)\n", "56000\n", "5700 tensor(5.5265, device='cuda:0', grad_fn=)\n", "57000\n", "5800 tensor(5.5101, device='cuda:0', grad_fn=)\n", "58000\n", "5900 tensor(5.4513, device='cuda:0', grad_fn=)\n", "59000\n", "6000 tensor(5.3554, device='cuda:0', grad_fn=)\n", "60000\n", "6100 tensor(5.3616, device='cuda:0', grad_fn=)\n", "61000\n", "6200 tensor(5.5360, device='cuda:0', grad_fn=)\n", "62000\n", "6300 tensor(5.3952, device='cuda:0', grad_fn=)\n", "63000\n", "6400 tensor(5.3132, device='cuda:0', grad_fn=)\n", "64000\n", "6500 tensor(5.1732, device='cuda:0', grad_fn=)\n", "65000\n", "6600 tensor(5.3505, device='cuda:0', grad_fn=)\n", "66000\n", "6700 tensor(5.2919, device='cuda:0', grad_fn=)\n", "67000\n", "6800 tensor(5.5064, device='cuda:0', grad_fn=)\n", "68000\n", "6900 tensor(5.4881, device='cuda:0', grad_fn=)\n", "69000\n", "7000 tensor(5.3978, device='cuda:0', grad_fn=)\n", "70000\n", "7100 tensor(5.2030, device='cuda:0', grad_fn=)\n", "71000\n", "7200 tensor(5.2738, device='cuda:0', grad_fn=)\n", "72000\n", "7300 tensor(5.5317, device='cuda:0', grad_fn=)\n", "73000\n", "7400 tensor(5.3487, device='cuda:0', grad_fn=)\n", "74000\n", "7500 tensor(5.2133, device='cuda:0', grad_fn=)\n", "75000\n", "7600 tensor(5.2878, device='cuda:0', grad_fn=)\n", "76000\n", "7700 tensor(5.3644, device='cuda:0', grad_fn=)\n", "77000\n", "7800 tensor(5.4711, device='cuda:0', grad_fn=)\n", "78000\n", "7900 tensor(5.1445, device='cuda:0', grad_fn=)\n", "79000\n", "8000 tensor(5.2138, device='cuda:0', grad_fn=)\n", "80000\n", "8100 tensor(5.3741, device='cuda:0', grad_fn=)\n", "81000\n", "8200 tensor(5.3893, device='cuda:0', grad_fn=)\n", "82000\n", "8300 tensor(5.3492, device='cuda:0', grad_fn=)\n", "83000\n", "8400 tensor(5.4797, device='cuda:0', grad_fn=)\n", "84000\n", "8500 tensor(5.4501, device='cuda:0', grad_fn=)\n", "85000\n", "8600 tensor(5.4600, device='cuda:0', grad_fn=)\n", "86000\n", "8700 tensor(5.5758, device='cuda:0', grad_fn=)\n", "87000\n", "8800 tensor(5.4493, device='cuda:0', grad_fn=)\n", "88000\n", "8900 tensor(5.3035, device='cuda:0', grad_fn=)\n", "89000\n", "9000 tensor(5.4164, device='cuda:0', grad_fn=)\n", "90000\n", "9100 tensor(5.4273, device='cuda:0', grad_fn=)\n", "91000\n", "9200 tensor(5.3343, device='cuda:0', grad_fn=)\n", "92000\n", "9300 tensor(5.3797, device='cuda:0', grad_fn=)\n", "93000\n", "9400 tensor(5.2260, device='cuda:0', grad_fn=)\n", "94000\n", "9500 tensor(5.3006, device='cuda:0', grad_fn=)\n", "95000\n", "9600 tensor(5.4211, device='cuda:0', grad_fn=)\n", "96000\n", "9700 tensor(5.3615, device='cuda:0', grad_fn=)\n", "97000\n", "9800 tensor(5.4089, device='cuda:0', grad_fn=)\n", "9900 tensor(5.2200, device='cuda:0', grad_fn=)\n", "98000\n", "10000 tensor(5.4428, device='cuda:0', grad_fn=)\n", "99000\n", "10100 tensor(5.3219, device='cuda:0', grad_fn=)\n", "100000\n", "10200 tensor(5.2692, device='cuda:0', grad_fn=)\n", "101000\n", "10300 tensor(5.3854, device='cuda:0', grad_fn=)\n", "102000\n", "10400 tensor(5.3984, device='cuda:0', grad_fn=)\n", "103000\n", "10500 tensor(5.4516, device='cuda:0', grad_fn=)\n", "104000\n", "10600 tensor(5.5380, device='cuda:0', grad_fn=)\n", "105000\n", "10700 tensor(5.3724, device='cuda:0', grad_fn=)\n", "106000\n", "10800 tensor(5.3862, device='cuda:0', grad_fn=)\n", "107000\n", "10900 tensor(5.3102, device='cuda:0', grad_fn=)\n", "108000\n", "11000 tensor(5.5487, device='cuda:0', grad_fn=)\n", "109000\n", "11100 tensor(5.1684, device='cuda:0', grad_fn=)\n", "110000\n", "11200 tensor(5.3303, device='cuda:0', grad_fn=)\n", "111000\n", "11300 tensor(5.3537, device='cuda:0', grad_fn=)\n", "112000\n", "11400 tensor(5.3064, device='cuda:0', grad_fn=)\n", "113000\n", "11500 tensor(5.3775, device='cuda:0', grad_fn=)\n", "114000\n", "11600 tensor(5.3649, device='cuda:0', grad_fn=)\n", "115000\n", "11700 tensor(5.2920, device='cuda:0', grad_fn=)\n", "116000\n", "11800 tensor(5.4908, device='cuda:0', grad_fn=)\n", "117000\n", "11900 tensor(5.3293, device='cuda:0', grad_fn=)\n", "118000\n", "12000 tensor(5.3926, device='cuda:0', grad_fn=)\n", "119000\n", "12100 tensor(5.4045, device='cuda:0', grad_fn=)\n", "120000\n", "12200 tensor(5.4246, device='cuda:0', grad_fn=)\n", "121000\n", "12300 tensor(5.5096, device='cuda:0', grad_fn=)\n", "122000\n", "12400 tensor(5.3884, device='cuda:0', grad_fn=)\n", "123000\n", "12500 tensor(5.3057, device='cuda:0', grad_fn=)\n", "124000\n", "12600 tensor(5.3466, device='cuda:0', grad_fn=)\n", "125000\n", "12700 tensor(5.2898, device='cuda:0', grad_fn=)\n", "126000\n", "12800 tensor(5.4714, device='cuda:0', grad_fn=)\n", "127000\n", "12900 tensor(5.3255, device='cuda:0', grad_fn=)\n", "128000\n", "13000 tensor(5.1438, device='cuda:0', grad_fn=)\n", "129000\n", "13100 tensor(5.3498, device='cuda:0', grad_fn=)\n", "130000\n", "13200 tensor(5.3890, device='cuda:0', grad_fn=)\n", "131000\n", "13300 tensor(5.2710, device='cuda:0', grad_fn=)\n", "132000\n", "13400 tensor(5.3541, device='cuda:0', grad_fn=)\n", "133000\n", "13500 tensor(5.3156, device='cuda:0', grad_fn=)\n", "134000\n", "13600 tensor(5.3957, device='cuda:0', grad_fn=)\n", "135000\n", "13700 tensor(5.3548, device='cuda:0', grad_fn=)\n", "136000\n", "13800 tensor(5.3577, device='cuda:0', grad_fn=)\n", "137000\n", "13900 tensor(5.2122, device='cuda:0', grad_fn=)\n", "138000\n", "14000 tensor(5.4587, device='cuda:0', grad_fn=)\n", "139000\n", "14100 tensor(5.1704, device='cuda:0', grad_fn=)\n", "140000\n", "14200 tensor(5.4419, device='cuda:0', grad_fn=)\n", "141000\n", "14300 tensor(5.4142, device='cuda:0', grad_fn=)\n", "142000\n", "14400 tensor(5.3058, device='cuda:0', grad_fn=)\n", "143000\n", "14500 tensor(5.4082, device='cuda:0', grad_fn=)\n", "144000\n", "14600 tensor(5.4414, device='cuda:0', grad_fn=)\n", "145000\n", "14700 tensor(5.3177, device='cuda:0', grad_fn=)\n", "146000\n", "14800 tensor(5.7665, device='cuda:0', grad_fn=)\n", "147000\n", "14900 tensor(5.4171, device='cuda:0', grad_fn=)\n", "148000\n", "15000 tensor(5.2698, device='cuda:0', grad_fn=)\n", "149000\n", "15100 tensor(5.4915, device='cuda:0', grad_fn=)\n", "150000\n", "15200 tensor(5.3576, device='cuda:0', grad_fn=)\n", "151000\n", "15300 tensor(5.3567, device='cuda:0', grad_fn=)\n", "152000\n", "15400 tensor(5.2379, device='cuda:0', grad_fn=)\n", "153000\n", "15500 tensor(5.4092, device='cuda:0', grad_fn=)\n", "154000\n", "15600 tensor(5.4042, device='cuda:0', grad_fn=)\n", "155000\n", "15700 tensor(5.3017, device='cuda:0', grad_fn=)\n", "156000\n", "15800 tensor(5.2188, device='cuda:0', grad_fn=)\n", "157000\n", "15900 tensor(5.3497, device='cuda:0', grad_fn=)\n", "158000\n", "16000 tensor(5.1718, device='cuda:0', grad_fn=)\n", "159000\n", "16100 tensor(5.2145, device='cuda:0', grad_fn=)\n", "160000\n", "16200 tensor(5.5591, device='cuda:0', grad_fn=)\n", "161000\n", "16300 tensor(5.3864, device='cuda:0', grad_fn=)\n", "162000\n", "16400 tensor(5.3719, device='cuda:0', grad_fn=)\n", "163000\n", "16500 tensor(5.4842, device='cuda:0', grad_fn=)\n", "164000\n", "16600 tensor(5.3329, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "165000\n", "16700 tensor(5.3130, device='cuda:0', grad_fn=)\n", "166000\n", "16800 tensor(5.3903, device='cuda:0', grad_fn=)\n", "167000\n", "16900 tensor(5.1551, device='cuda:0', grad_fn=)\n", "168000\n", "17000 tensor(5.4229, device='cuda:0', grad_fn=)\n", "169000\n", "17100 tensor(5.2686, device='cuda:0', grad_fn=)\n", "170000\n", "17200 tensor(5.4099, device='cuda:0', grad_fn=)\n", "171000\n", "17300 tensor(5.4198, device='cuda:0', grad_fn=)\n", "172000\n", "17400 tensor(5.2162, device='cuda:0', grad_fn=)\n", "173000\n", "17500 tensor(5.2385, device='cuda:0', grad_fn=)\n", "174000\n", "17600 tensor(5.2786, device='cuda:0', grad_fn=)\n", "175000\n", "17700 tensor(5.2576, device='cuda:0', grad_fn=)\n", "176000\n", "17800 tensor(5.3158, device='cuda:0', grad_fn=)\n", "177000\n", "17900 tensor(5.2105, device='cuda:0', grad_fn=)\n", "178000\n", "18000 tensor(5.4627, device='cuda:0', grad_fn=)\n", "179000\n", "18100 tensor(5.3966, device='cuda:0', grad_fn=)\n", "180000\n", "18200 tensor(5.3108, device='cuda:0', grad_fn=)\n", "181000\n", "18300 tensor(5.3148, device='cuda:0', grad_fn=)\n", "182000\n", "18400 tensor(5.3321, device='cuda:0', grad_fn=)\n", "183000\n", "18500 tensor(5.3171, device='cuda:0', grad_fn=)\n", "184000\n", "18600 tensor(5.5247, device='cuda:0', grad_fn=)\n", "185000\n", "18700 tensor(5.3469, device='cuda:0', grad_fn=)\n", "186000\n", "18800 tensor(5.5092, device='cuda:0', grad_fn=)\n", "187000\n", "18900 tensor(5.3902, device='cuda:0', grad_fn=)\n", "188000\n", "19000 tensor(5.3904, device='cuda:0', grad_fn=)\n", "189000\n", "19100 tensor(5.5019, device='cuda:0', grad_fn=)\n", "190000\n", "19200 tensor(5.3838, device='cuda:0', grad_fn=)\n", "191000\n", "19300 tensor(5.4674, device='cuda:0', grad_fn=)\n", "192000\n", "19400 tensor(5.3223, device='cuda:0', grad_fn=)\n", "193000\n", "19500 tensor(5.3235, device='cuda:0', grad_fn=)\n", "194000\n", "19600 tensor(5.3589, device='cuda:0', grad_fn=)\n", "195000\n", "19700 tensor(5.4063, device='cuda:0', grad_fn=)\n", "196000\n", "19800 tensor(5.2838, device='cuda:0', grad_fn=)\n", "197000\n", "19900 tensor(5.2807, device='cuda:0', grad_fn=)\n", "198000\n", "20000 tensor(5.3038, device='cuda:0', grad_fn=)\n", "199000\n", "20100 tensor(5.2397, device='cuda:0', grad_fn=)\n", "200000\n", "20200 tensor(5.1723, device='cuda:0', grad_fn=)\n", "201000\n", "20300 tensor(5.2827, device='cuda:0', grad_fn=)\n", "20400 tensor(5.3245, device='cuda:0', grad_fn=)\n", "202000\n", "20500 tensor(5.3303, device='cuda:0', grad_fn=)\n", "203000\n", "20600 tensor(5.5211, device='cuda:0', grad_fn=)\n", "204000\n", "20700 tensor(5.3629, device='cuda:0', grad_fn=)\n", "205000\n", "20800 tensor(5.1882, device='cuda:0', grad_fn=)\n", "206000\n", "20900 tensor(5.4671, device='cuda:0', grad_fn=)\n", "207000\n", "21000 tensor(5.3110, device='cuda:0', grad_fn=)\n", "208000\n", "21100 tensor(5.3181, device='cuda:0', grad_fn=)\n", "209000\n", "21200 tensor(5.1968, device='cuda:0', grad_fn=)\n", "210000\n", "21300 tensor(5.3940, device='cuda:0', grad_fn=)\n", "211000\n", "21400 tensor(5.4308, device='cuda:0', grad_fn=)\n", "212000\n", "21500 tensor(5.2127, device='cuda:0', grad_fn=)\n", "213000\n", "21600 tensor(5.2003, device='cuda:0', grad_fn=)\n", "214000\n", "21700 tensor(5.1881, device='cuda:0', grad_fn=)\n", "215000\n", "21800 tensor(5.3180, device='cuda:0', grad_fn=)\n", "216000\n", "21900 tensor(5.3197, device='cuda:0', grad_fn=)\n", "217000\n", "22000 tensor(5.3005, device='cuda:0', grad_fn=)\n", "218000\n", "22100 tensor(5.1776, device='cuda:0', grad_fn=)\n", "219000\n", "22200 tensor(5.0509, device='cuda:0', grad_fn=)\n", "220000\n", "22300 tensor(5.4807, device='cuda:0', grad_fn=)\n", "221000\n", "22400 tensor(5.2040, device='cuda:0', grad_fn=)\n", "222000\n", "22500 tensor(5.2161, device='cuda:0', grad_fn=)\n", "223000\n", "22600 tensor(5.4083, device='cuda:0', grad_fn=)\n", "224000\n", "22700 tensor(5.1619, device='cuda:0', grad_fn=)\n", "225000\n", "22800 tensor(5.4301, device='cuda:0', grad_fn=)\n", "226000\n", "22900 tensor(5.4791, device='cuda:0', grad_fn=)\n", "227000\n", "23000 tensor(5.3785, device='cuda:0', grad_fn=)\n", "228000\n", "23100 tensor(5.3705, device='cuda:0', grad_fn=)\n", "229000\n", "23200 tensor(5.3633, device='cuda:0', grad_fn=)\n", "230000\n", "23300 tensor(5.4443, device='cuda:0', grad_fn=)\n", "231000\n", "23400 tensor(5.4496, device='cuda:0', grad_fn=)\n", "232000\n", "23500 tensor(5.2961, device='cuda:0', grad_fn=)\n", "233000\n", "23600 tensor(5.2603, device='cuda:0', grad_fn=)\n", "234000\n", "23700 tensor(5.2793, device='cuda:0', grad_fn=)\n", "235000\n", "23800 tensor(5.1461, device='cuda:0', grad_fn=)\n", "236000\n", "23900 tensor(5.2376, device='cuda:0', grad_fn=)\n", "237000\n", "24000 tensor(5.2269, device='cuda:0', grad_fn=)\n", "238000\n", "24100 tensor(5.3154, device='cuda:0', grad_fn=)\n", "239000\n", "24200 tensor(5.4852, device='cuda:0', grad_fn=)\n", "240000\n", "24300 tensor(5.3785, device='cuda:0', grad_fn=)\n", "241000\n", "24400 tensor(5.5053, device='cuda:0', grad_fn=)\n", "242000\n", "24500 tensor(5.2987, device='cuda:0', grad_fn=)\n", "243000\n", "24600 tensor(5.4275, device='cuda:0', grad_fn=)\n", "244000\n", "24700 tensor(5.3283, device='cuda:0', grad_fn=)\n", "245000\n", "24800 tensor(5.3707, device='cuda:0', grad_fn=)\n", "246000\n", "24900 tensor(5.4294, device='cuda:0', grad_fn=)\n", "247000\n", "25000 tensor(5.4479, device='cuda:0', grad_fn=)\n", "248000\n", "25100 tensor(5.3629, device='cuda:0', grad_fn=)\n", "249000\n", "25200 tensor(5.3849, device='cuda:0', grad_fn=)\n", "250000\n", "25300 tensor(5.4124, device='cuda:0', grad_fn=)\n", "251000\n", "25400 tensor(5.3932, device='cuda:0', grad_fn=)\n", "252000\n", "25500 tensor(5.2893, device='cuda:0', grad_fn=)\n", "253000\n", "25600 tensor(5.5512, device='cuda:0', grad_fn=)\n", "254000\n", "25700 tensor(5.3227, device='cuda:0', grad_fn=)\n", "255000\n", "25800 tensor(5.4217, device='cuda:0', grad_fn=)\n", "256000\n", "25900 tensor(5.3637, device='cuda:0', grad_fn=)\n", "257000\n", "26000 tensor(5.3632, device='cuda:0', grad_fn=)\n", "258000\n", "26100 tensor(5.2841, device='cuda:0', grad_fn=)\n", "259000\n", "26200 tensor(5.2107, device='cuda:0', grad_fn=)\n", "260000\n", "26300 tensor(5.4024, device='cuda:0', grad_fn=)\n", "261000\n", "26400 tensor(5.4410, device='cuda:0', grad_fn=)\n", "262000\n", "26500 tensor(5.1685, device='cuda:0', grad_fn=)\n", "263000\n", "26600 tensor(5.5023, device='cuda:0', grad_fn=)\n", "264000\n", "26700 tensor(5.3654, device='cuda:0', grad_fn=)\n", "265000\n", "26800 tensor(5.5407, device='cuda:0', grad_fn=)\n", "266000\n", "26900 tensor(5.3000, device='cuda:0', grad_fn=)\n", "267000\n", "27000 tensor(5.2141, device='cuda:0', grad_fn=)\n", "268000\n", "27100 tensor(5.2490, device='cuda:0', grad_fn=)\n", "269000\n", "27200 tensor(5.2850, device='cuda:0', grad_fn=)\n", "270000\n", "27300 tensor(5.4811, device='cuda:0', grad_fn=)\n", "271000\n", "27400 tensor(5.3561, device='cuda:0', grad_fn=)\n", "272000\n", "27500 tensor(5.2602, device='cuda:0', grad_fn=)\n", "273000\n", "27600 tensor(5.5429, device='cuda:0', grad_fn=)\n", "274000\n", "27700 tensor(5.3794, device='cuda:0', grad_fn=)\n", "275000\n", "27800 tensor(5.3792, device='cuda:0', grad_fn=)\n", "276000\n", "27900 tensor(5.4873, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "277000\n", "28000 tensor(5.3454, device='cuda:0', grad_fn=)\n", "278000\n", "28100 tensor(5.3113, device='cuda:0', grad_fn=)\n", "279000\n", "28200 tensor(5.4785, device='cuda:0', grad_fn=)\n", "280000\n", "28300 tensor(5.1013, device='cuda:0', grad_fn=)\n", "281000\n", "28400 tensor(5.5403, device='cuda:0', grad_fn=)\n", "282000\n", "28500 tensor(5.3676, device='cuda:0', grad_fn=)\n", "283000\n", "28600 tensor(5.3108, device='cuda:0', grad_fn=)\n", "284000\n", "28700 tensor(5.4403, device='cuda:0', grad_fn=)\n", "285000\n", "28800 tensor(5.4926, device='cuda:0', grad_fn=)\n", "286000\n", "28900 tensor(5.3638, device='cuda:0', grad_fn=)\n", "287000\n", "29000 tensor(5.2819, device='cuda:0', grad_fn=)\n", "288000\n", "29100 tensor(5.0362, device='cuda:0', grad_fn=)\n", "289000\n", "29200 tensor(5.1871, device='cuda:0', grad_fn=)\n", "290000\n", "29300 tensor(5.4697, device='cuda:0', grad_fn=)\n", "291000\n", "29400 tensor(5.5909, device='cuda:0', grad_fn=)\n", "292000\n", "29500 tensor(5.3807, device='cuda:0', grad_fn=)\n", "293000\n", "29600 tensor(5.2398, device='cuda:0', grad_fn=)\n", "294000\n", "29700 tensor(5.3690, device='cuda:0', grad_fn=)\n", "295000\n", "29800 tensor(5.2220, device='cuda:0', grad_fn=)\n", "296000\n", "29900 tensor(5.4597, device='cuda:0', grad_fn=)\n", "297000\n", "30000 tensor(5.2205, device='cuda:0', grad_fn=)\n", "298000\n", "30100 tensor(5.3061, device='cuda:0', grad_fn=)\n", "299000\n", "30200 tensor(5.2432, device='cuda:0', grad_fn=)\n", "300000\n", "30300 tensor(5.3527, device='cuda:0', grad_fn=)\n", "301000\n", "30400 tensor(5.1823, device='cuda:0', grad_fn=)\n", "302000\n", "30500 tensor(5.3526, device='cuda:0', grad_fn=)\n", "30600 tensor(5.3318, device='cuda:0', grad_fn=)\n", "303000\n", "30700 tensor(5.3634, device='cuda:0', grad_fn=)\n", "304000\n", "30800 tensor(5.3571, device='cuda:0', grad_fn=)\n", "305000\n", "30900 tensor(5.3875, device='cuda:0', grad_fn=)\n", "306000\n", "31000 tensor(5.4983, device='cuda:0', grad_fn=)\n", "307000\n", "31100 tensor(5.1554, device='cuda:0', grad_fn=)\n", "308000\n", "31200 tensor(5.1952, device='cuda:0', grad_fn=)\n", "309000\n", "31300 tensor(5.4546, device='cuda:0', grad_fn=)\n", "310000\n", "31400 tensor(5.2307, device='cuda:0', grad_fn=)\n", "311000\n", "31500 tensor(5.4188, device='cuda:0', grad_fn=)\n", "312000\n", "31600 tensor(5.4085, device='cuda:0', grad_fn=)\n", "313000\n", "31700 tensor(5.3744, device='cuda:0', grad_fn=)\n", "314000\n", "31800 tensor(5.4766, device='cuda:0', grad_fn=)\n", "315000\n", "31900 tensor(5.1062, device='cuda:0', grad_fn=)\n", "316000\n", "32000 tensor(5.2924, device='cuda:0', grad_fn=)\n", "317000\n", "32100 tensor(5.1728, device='cuda:0', grad_fn=)\n", "318000\n", "32200 tensor(5.4863, device='cuda:0', grad_fn=)\n", "319000\n", "32300 tensor(5.4748, device='cuda:0', grad_fn=)\n", "320000\n", "32400 tensor(5.4518, device='cuda:0', grad_fn=)\n", "321000\n", "32500 tensor(5.2752, device='cuda:0', grad_fn=)\n", "322000\n", "32600 tensor(5.3822, device='cuda:0', grad_fn=)\n", "323000\n", "32700 tensor(5.3088, device='cuda:0', grad_fn=)\n", "324000\n", "32800 tensor(5.5403, device='cuda:0', grad_fn=)\n", "325000\n", "32900 tensor(5.4000, device='cuda:0', grad_fn=)\n", "326000\n", "33000 tensor(5.1837, device='cuda:0', grad_fn=)\n", "327000\n", "33100 tensor(5.3888, device='cuda:0', grad_fn=)\n", "328000\n", "33200 tensor(5.4849, device='cuda:0', grad_fn=)\n", "329000\n", "33300 tensor(5.2471, device='cuda:0', grad_fn=)\n", "330000\n", "33400 tensor(5.5246, device='cuda:0', grad_fn=)\n", "331000\n", "33500 tensor(5.3479, device='cuda:0', grad_fn=)\n", "332000\n", "33600 tensor(5.3043, device='cuda:0', grad_fn=)\n", "333000\n", "33700 tensor(5.3487, device='cuda:0', grad_fn=)\n", "334000\n", "33800 tensor(5.4368, device='cuda:0', grad_fn=)\n", "335000\n", "33900 tensor(5.1620, device='cuda:0', grad_fn=)\n", "336000\n", "34000 tensor(5.3873, device='cuda:0', grad_fn=)\n", "337000\n", "34100 tensor(5.3545, device='cuda:0', grad_fn=)\n", "338000\n", "34200 tensor(5.4001, device='cuda:0', grad_fn=)\n", "339000\n", "34300 tensor(5.1902, device='cuda:0', grad_fn=)\n", "340000\n", "34400 tensor(5.6453, device='cuda:0', grad_fn=)\n", "341000\n", "34500 tensor(5.5124, device='cuda:0', grad_fn=)\n", "342000\n", "34600 tensor(5.4069, device='cuda:0', grad_fn=)\n", "343000\n", "34700 tensor(5.4734, device='cuda:0', grad_fn=)\n", "344000\n", "34800 tensor(5.5014, device='cuda:0', grad_fn=)\n", "345000\n", "34900 tensor(5.5412, device='cuda:0', grad_fn=)\n", "346000\n", "35000 tensor(5.5132, device='cuda:0', grad_fn=)\n", "347000\n", "35100 tensor(5.3455, device='cuda:0', grad_fn=)\n", "348000\n", "35200 tensor(5.2694, device='cuda:0', grad_fn=)\n", "349000\n", "35300 tensor(5.4988, device='cuda:0', grad_fn=)\n", "350000\n", "35400 tensor(5.1485, device='cuda:0', grad_fn=)\n", "351000\n", "35500 tensor(5.2299, device='cuda:0', grad_fn=)\n", "352000\n", "35600 tensor(5.3643, device='cuda:0', grad_fn=)\n", "353000\n", "35700 tensor(5.2247, device='cuda:0', grad_fn=)\n", "354000\n", "35800 tensor(5.3615, device='cuda:0', grad_fn=)\n", "355000\n", "35900 tensor(5.3453, device='cuda:0', grad_fn=)\n", "356000\n", "36000 tensor(5.1217, device='cuda:0', grad_fn=)\n", "357000\n", "36100 tensor(5.4909, device='cuda:0', grad_fn=)\n", "358000\n", "36200 tensor(5.3382, device='cuda:0', grad_fn=)\n", "359000\n", "36300 tensor(5.6225, device='cuda:0', grad_fn=)\n", "360000\n", "36400 tensor(5.3167, device='cuda:0', grad_fn=)\n", "361000\n", "36500 tensor(5.3458, device='cuda:0', grad_fn=)\n", "362000\n", "36600 tensor(5.3608, device='cuda:0', grad_fn=)\n", "363000\n", "36700 tensor(5.1660, device='cuda:0', grad_fn=)\n", "364000\n", "36800 tensor(5.2737, device='cuda:0', grad_fn=)\n", "365000\n", "36900 tensor(5.3883, device='cuda:0', grad_fn=)\n", "366000\n", "37000 tensor(5.2783, device='cuda:0', grad_fn=)\n", "367000\n", "37100 tensor(5.3110, device='cuda:0', grad_fn=)\n", "368000\n", "37200 tensor(5.3794, device='cuda:0', grad_fn=)\n", "369000\n", "37300 tensor(5.2802, device='cuda:0', grad_fn=)\n", "370000\n", "37400 tensor(5.6133, device='cuda:0', grad_fn=)\n", "371000\n", "37500 tensor(5.3138, device='cuda:0', grad_fn=)\n", "372000\n", "37600 tensor(5.3083, device='cuda:0', grad_fn=)\n", "373000\n", "37700 tensor(5.4860, device='cuda:0', grad_fn=)\n", "374000\n", "37800 tensor(5.3216, device='cuda:0', grad_fn=)\n", "375000\n", "37900 tensor(5.2969, device='cuda:0', grad_fn=)\n", "376000\n", "38000 tensor(5.3759, device='cuda:0', grad_fn=)\n", "377000\n", "38100 tensor(5.3914, device='cuda:0', grad_fn=)\n", "378000\n", "38200 tensor(5.4089, device='cuda:0', grad_fn=)\n", "379000\n", "38300 tensor(5.3068, device='cuda:0', grad_fn=)\n", "380000\n", "38400 tensor(5.3798, device='cuda:0', grad_fn=)\n", "381000\n", "38500 tensor(5.4051, device='cuda:0', grad_fn=)\n", "382000\n", "38600 tensor(5.3471, device='cuda:0', grad_fn=)\n", "383000\n", "38700 tensor(5.3415, device='cuda:0', grad_fn=)\n", "384000\n", "38800 tensor(5.4310, device='cuda:0', grad_fn=)\n", "385000\n", "38900 tensor(5.5029, device='cuda:0', grad_fn=)\n", "386000\n", "39000 tensor(5.2021, device='cuda:0', grad_fn=)\n", "387000\n", "39100 tensor(5.4283, device='cuda:0', grad_fn=)\n", "388000\n", "39200 tensor(5.5158, device='cuda:0', grad_fn=)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "389000\n", "39300 tensor(5.3452, device='cuda:0', grad_fn=)\n", "390000\n", "39400 tensor(5.4111, device='cuda:0', grad_fn=)\n", "391000\n", "39500 tensor(5.4969, device='cuda:0', grad_fn=)\n", "392000\n", "39600 tensor(5.1952, device='cuda:0', grad_fn=)\n", "393000\n", "39700 tensor(5.1946, device='cuda:0', grad_fn=)\n", "394000\n", "39800 tensor(5.3234, device='cuda:0', grad_fn=)\n", "395000\n", "39900 tensor(5.1354, device='cuda:0', grad_fn=)\n", "396000\n", "40000 tensor(5.2210, device='cuda:0', grad_fn=)\n", "397000\n", "40100 tensor(5.3133, device='cuda:0', grad_fn=)\n", "398000\n", "40200 tensor(5.2990, device='cuda:0', grad_fn=)\n", "399000\n", "40300 tensor(5.3684, device='cuda:0', grad_fn=)\n", "40400 tensor(5.3700, device='cuda:0', grad_fn=)\n", "400000\n", "40500 tensor(5.2911, device='cuda:0', grad_fn=)\n", "401000\n", "40600 tensor(5.3497, device='cuda:0', grad_fn=)\n", "402000\n", "40700 tensor(5.3981, device='cuda:0', grad_fn=)\n", "403000\n", "40800 tensor(5.3436, device='cuda:0', grad_fn=)\n", "404000\n", "40900 tensor(5.2978, device='cuda:0', grad_fn=)\n", "405000\n", "41000 tensor(5.3420, device='cuda:0', grad_fn=)\n", "406000\n", "41100 tensor(5.3342, device='cuda:0', grad_fn=)\n", "407000\n", "41200 tensor(5.2226, device='cuda:0', grad_fn=)\n", "408000\n", "41300 tensor(5.3573, device='cuda:0', grad_fn=)\n", "409000\n", "41400 tensor(5.2448, device='cuda:0', grad_fn=)\n", "410000\n", "41500 tensor(5.3863, device='cuda:0', grad_fn=)\n", "411000\n", "41600 tensor(5.3051, device='cuda:0', grad_fn=)\n", "412000\n", "41700 tensor(5.3294, device='cuda:0', grad_fn=)\n", "413000\n", "41800 tensor(5.3191, device='cuda:0', grad_fn=)\n", "414000\n", "41900 tensor(5.3289, device='cuda:0', grad_fn=)\n", "415000\n", "42000 tensor(5.4860, device='cuda:0', grad_fn=)\n", "416000\n", "42100 tensor(5.2358, device='cuda:0', grad_fn=)\n", "417000\n", "42200 tensor(5.3253, device='cuda:0', grad_fn=)\n", "418000\n", "42300 tensor(5.3869, device='cuda:0', grad_fn=)\n", "419000\n", "42400 tensor(5.2062, device='cuda:0', grad_fn=)\n", "420000\n", "42500 tensor(5.3712, device='cuda:0', grad_fn=)\n", "421000\n", "42600 tensor(5.1718, device='cuda:0', grad_fn=)\n", "422000\n", "42700 tensor(5.4735, device='cuda:0', grad_fn=)\n", "423000\n", "42800 tensor(5.3973, device='cuda:0', grad_fn=)\n", "424000\n", "42900 tensor(5.2447, device='cuda:0', grad_fn=)\n", "425000\n", "43000 tensor(5.3896, device='cuda:0', grad_fn=)\n", "426000\n", "43100 tensor(5.3916, device='cuda:0', grad_fn=)\n", "427000\n", "43200 tensor(5.2044, device='cuda:0', grad_fn=)\n", "428000\n", "43300 tensor(5.2167, device='cuda:0', grad_fn=)\n", "429000\n", "43400 tensor(5.3933, device='cuda:0', grad_fn=)\n", "430000\n", "43500 tensor(5.1078, device='cuda:0', grad_fn=)\n", "431000\n", "43600 tensor(5.3045, device='cuda:0', grad_fn=)\n", "432000\n" ] } ], "source": [ "data = DataLoader(train_dataset, batch_size=batch_s)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", "criterion = torch.nn.NLLLoss()\n", "torch.cuda.empty_cache()\n", "gc.collect()\n", "\n", "model.load_state_dict(torch.load('model-bigram_final.bin'))\n", "for i in range(1, epochs+1):\n", " print('epoch: =', i)\n", " model.train()\n", " step = 0\n", " for x, y in data: # prev, predicting, following words\n", " x = x.to(device)\n", " y = y.to(device)\n", " optimizer.zero_grad()\n", " ypredicted = model(x) #previous, following word\n", " loss = criterion(torch.log(ypredicted), y)\n", " if step % 100 == 0:\n", " print(step, loss)\n", " step += 1\n", " loss.backward()\n", " optimizer.step()\n", " torch.save(model.state_dict(), f'model-bigram_2nd-run{i}.bin') \n", "torch.save(model.state_dict(), f'model-bigram_final.bin') " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "[('be', 11, 0.2570849657058716),\n", " ('', 0, 0.07411641627550125),\n", " ('not', 22, 0.05940083786845207),\n", " ('have', 28, 0.02751326560974121),\n", " ('bo', 167, 0.014936885796487331),\n", " ('make', 116, 0.013943656347692013),\n", " ('give', 193, 0.011286991648375988),\n", " ('take', 153, 0.011171611957252026),\n", " ('do', 86, 0.010088067501783371),\n", " ('he', 20, 0.009703895077109337)]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "device = 'cuda'\n", "torch.cuda.empty_cache()\n", "model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)\n", "model.load_state_dict(torch.load(f'model-bigram_final.bin'))\n", "model.eval()\n", "\n", "ixs = torch.tensor(vocab.forward(['will'])).to(device)\n", "\n", "out = model(ixs)\n", "top = torch.topk(out[0], 10)\n", "top_indices = top.indices.tolist()\n", "top_probs = top.values.tolist()\n", "top_words = vocab.lookup_tokens(top_indices)\n", "list(zip(top_words, top_indices, top_probs))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "[('', 0, 0.19996878504753113),\n", " ('and', 3, 0.05288130044937134),\n", " ('of', 2, 0.042051784694194794),\n", " ('the', 1, 0.026572922244668007),\n", " ('to', 4, 0.022689413279294968),\n", " ('in', 6, 0.015904497355222702),\n", " ('The', 17, 0.012827681377530098),\n", " ('a', 5, 0.00961760152131319),\n", " ('for', 8, 0.008938422426581383),\n", " ('', 32, 0.00840282253921032)]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vocab = train_dataset.vocab\n", "ixs = torch.tensor(vocab.forward(['cerned.'])).to(device)\n", "\n", "out = model(ixs)\n", "top = torch.topk(out[0], 10)\n", "top_indices = top.indices.tolist()\n", "top_probs = top.values.tolist()\n", "top_words = vocab.lookup_tokens(top_indices)\n", "list(zip(top_words, top_indices, top_probs))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('', 0, 1.0),\n", " ('particular,', 14538, 0.24527804553508759),\n", " ('revolution.', 20446, 0.23776617646217346),\n", " ('Territory.', 14189, 0.23417341709136963),\n", " ('or-', 2261, 0.22888363897800446),\n", " ('3', 479, 0.2288265973329544),\n", " ('speak.', 13722, 0.2252315878868103),\n", " ('attend.', 19397, 0.22110989689826965),\n", " ('say,', 1455, 0.22106117010116577),\n", " ('Lee.', 15326, 0.21764159202575684)]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cos = nn.CosineSimilarity(dim=1, eps=1e-6)\n", "\n", "embeddings = model.model[0].weight\n", "\n", "vec = embeddings[vocab['cerned.']]\n", "\n", "similarities = cos(vec, embeddings)\n", "\n", "top = torch.topk(similarities, 10)\n", "\n", "top_indices = top.indices.tolist()\n", "top_probs = top.values.tolist()\n", "top_words = vocab.lookup_tokens(top_indices)\n", "list(zip(top_words, top_indices, top_probs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "def get_values_from_model(presc_word, model, vocab, k):\n", " ixs = torch.tensor(vocab.forward([presc_word])).to(device)\n", " out = model(ixs)\n", " top = torch.topk(out[0], k)\n", " top_indices = top.indices.tolist()\n", " top_probs = top.values.tolist()\n", " top_words = vocab.lookup_tokens(top_indices)\n", " return list(zip(top_words, top_probs))\n", "\n", "def gonito_format(dic):\n", " tab = summarize_probs_unk(dic)\n", " result = ''\n", " for element in tab[:-1]:\n", " result+=str(element[0])+':'+str(element[1])+'\\t'\n", " result+=':'+ str(tab[-1][1])+'\\n'\n", " return result\n", "\n", "def summarize_probs_unk(dic):\n", " if '' in dic.keys():\n", " probsum = sum(float(val) for key, val in dic.items())\n", " for key in dic:\n", " dic[key] = dic[key]/probsum ###leave some space for wildcard\n", " wildcard = dic['']\n", " del dic['']\n", " tab = [(key, val) for key, val in dic.items()]\n", " tab.append(('', wildcard))\n", " else:\n", " probsum = sum(float(val) for key, val in dic.items())\n", " for key in dic:\n", " dic[key] = dic[key]/(probsum*(1+wildcard_minweight)) #plus, becouse it's denominator\n", " tab = [(key, val) for key, val in dic.items()]\n", " tab.append(('', 1-1-sum([val for val in dic.values()])))\n", " return tab\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.load_state_dict(torch.load('model-bigram_final.bin'))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/gedin/.local/lib/python3.10/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n", " input = module(input)\n" ] } ], "source": [ "\n", "with lzma.open(test_file, 'rt') as file:\n", " predict_words = []\n", " results = []\n", " for line in file:\n", "# print(line)\n", " line = preprocess(line) #get only relevant\n", " split = line.split('\\t')\n", " predict_words.append(get_last_word(split[0])) #get_first_word(split[1])\n", " vocab = train_dataset.vocab\n", " for presc_word in predict_words:\n", " results.append(dict(get_values_from_model(presc_word, model, vocab, k=k)))\n", " with open(out_file, 'w') as outfile:\n", " for elem in results:\n", " outfile.write(gonito_format(elem))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "org": null }, "nbformat": 4, "nbformat_minor": 1 }