{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Collecting torchtext\n",
" Downloading torchtext-0.15.2-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n",
"\u001b[?25hCollecting tqdm\n",
" Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\n",
"Requirement already satisfied: numpy in /home/gedin/.local/lib/python3.10/site-packages (from torchtext) (1.24.3)\n",
"Collecting torchdata==0.6.1\n",
" Downloading torchdata-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: requests in /usr/lib/python3/dist-packages (from torchtext) (2.25.1)\n",
"Collecting torch==2.0.1\n",
" Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m619.9/619.9 MB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:09\u001b[0m\n",
"\u001b[?25hCollecting sympy\n",
" Downloading sympy-1.12-py3-none-any.whl (5.7 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cudnn-cu11==8.5.0.96\n",
" Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\n",
"Collecting nvidia-cuda-cupti-cu11==11.7.101\n",
" Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\n",
"Collecting nvidia-cusparse-cu11==11.7.4.91\n",
" Using cached nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\n",
"Collecting networkx\n",
" Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n",
"Collecting nvidia-cufft-cu11==10.9.0.58\n",
" Using cached nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\n",
"Collecting filelock\n",
" Downloading filelock-3.12.0-py3-none-any.whl (10 kB)\n",
"Collecting nvidia-cuda-runtime-cu11==11.7.99\n",
" Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\n",
"Collecting triton==2.0.0\n",
" Downloading triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n",
"\u001b[?25hCollecting nvidia-cusolver-cu11==11.4.0.1\n",
" Using cached nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\n",
"Requirement already satisfied: jinja2 in /home/gedin/.local/lib/python3.10/site-packages (from torch==2.0.1->torchtext) (3.1.2)\n",
"Collecting nvidia-cublas-cu11==11.10.3.66\n",
" Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\n",
"Collecting typing-extensions\n",
" Downloading typing_extensions-4.6.3-py3-none-any.whl (31 kB)\n",
"Collecting nvidia-nccl-cu11==2.14.3\n",
" Using cached nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\n",
"Collecting nvidia-cuda-nvrtc-cu11==11.7.99\n",
" Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\n",
"Collecting nvidia-curand-cu11==10.2.10.91\n",
" Using cached nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\n",
"Collecting nvidia-nvtx-cu11==11.7.91\n",
" Using cached nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\n",
"Requirement already satisfied: urllib3>=1.25 in /usr/lib/python3/dist-packages (from torchdata==0.6.1->torchtext) (1.26.5)\n",
"Requirement already satisfied: wheel in /usr/lib/python3/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1->torchtext) (0.37.1)\n",
"Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch==2.0.1->torchtext) (59.6.0)\n",
"Collecting lit\n",
" Downloading lit-16.0.5.tar.gz (138 kB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.0/138.0 KB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hCollecting cmake\n",
" Using cached cmake-3.26.3-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.0 MB)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/lib/python3/dist-packages (from jinja2->torch==2.0.1->torchtext) (2.0.1)\n",
"Collecting mpmath>=0.19\n",
" Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
"Building wheels for collected packages: lit\n",
" Building wheel for lit (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for lit: filename=lit-16.0.5-py3-none-any.whl size=88192 sha256=f6c57a31a147cbfe0af3d6bf4b856390ad14c28a9ddb38c8044ec29331b35c26\n",
" Stored in directory: /home/gedin/.cache/pip/wheels/eb/02/84/d82f0b1a6098209edf7e3607be6cc592ebbc015a8a3127c68d\n",
"Successfully built lit\n",
"Installing collected packages: mpmath, lit, cmake, typing-extensions, tqdm, sympy, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, networkx, filelock, nvidia-cusolver-cu11, nvidia-cudnn-cu11, triton, torch, torchdata, torchtext\n",
"Successfully installed cmake-3.26.3 filelock-3.12.0 lit-16.0.5 mpmath-1.3.0 networkx-3.1 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 sympy-1.12 torch-2.0.1 torchdata-0.6.1 torchtext-0.15.2 tqdm-4.65.0 triton-2.0.0 typing-extensions-4.6.3\n"
]
}
],
"source": [
"!pip install torchtext"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"train_file ='train/in.tsv.xz'\n",
"test_file = 'dev-0/in.tsv.xz'\n",
"out_file = 'dev-0/out.tsv'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from itertools import islice\n",
"import regex as re\n",
"import sys\n",
"from torchtext.vocab import build_vocab_from_iterator\n",
"import lzma\n",
"import pickle\n",
"import re\n",
"import torch\n",
"from torch import nn\n",
"from torch.utils.data import IterableDataset\n",
"import itertools\n",
"from torch.utils.data import DataLoader\n",
"import gc"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"embed_size = 300\n",
"device = 'cuda'\n",
"vocab_size = 25000\n",
"batch_s = 3200\n",
"learning_rate = 0.0001\n",
"epochs = 4\n",
"k = 20 #top k words\n",
"wildcard_minweight = 0.001"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"###preprocessing\n",
"def preprocess(line):\n",
" line = get_rid_of_header(line)\n",
" line = replace_endline(line)\n",
" return line\n",
"\n",
"def get_rid_of_header(line):\n",
" line = line.split('\\t')[6:]\n",
" return \"\".join(line)\n",
" \n",
"def replace_endline(line):\n",
" line = line.replace(\"\\\\n\", \" \")\n",
" return line\n",
"\n",
"\n",
"def get_last_word(text):\n",
" \"\"\"Return the last word of a string.\"\"\"\n",
" last_word = \"\"\n",
" for i in range(len(text)-1, -1, -1):\n",
" if text[i] == ' ':\n",
" return last_word[::-1].rstrip()\n",
" else:\n",
" last_word += text[i]\n",
" return last_word[::-1].rstrip()\n",
"\n",
"def get_first_word(text):\n",
" \"\"\"Return the first word of a string.\"\"\"\n",
" word = \"\"\n",
" for i in range(len(text)-1):\n",
" if text[i] == ' ':\n",
" return word\n",
" else:\n",
" word += text[i]\n",
" return word\n",
"\n",
"\n",
"def get_words_from_line(line):\n",
" line = line.rstrip()\n",
" yield ''\n",
" line = preprocess(line)\n",
" for t in line.split(' '):\n",
" yield t\n",
" yield ''\n",
"\n",
"\n",
"def get_word_lines_from_file(file_name):\n",
" n = 0\n",
" with lzma.open(file_name, 'r') as fh:\n",
" for line in fh:\n",
" n+=1\n",
" if n%1000==0:\n",
" print(n)\n",
" yield get_words_from_line(line.decode('utf-8'))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000\n",
"2000\n",
"3000\n",
"4000\n",
"5000\n",
"6000\n",
"7000\n",
"8000\n",
"9000\n",
"10000\n",
"11000\n",
"12000\n",
"13000\n",
"14000\n",
"15000\n",
"16000\n",
"17000\n",
"18000\n",
"19000\n",
"20000\n",
"21000\n",
"22000\n",
"23000\n",
"24000\n",
"25000\n",
"26000\n",
"27000\n",
"28000\n",
"29000\n",
"30000\n",
"31000\n",
"32000\n",
"33000\n",
"34000\n",
"35000\n",
"36000\n",
"37000\n",
"38000\n",
"39000\n",
"40000\n",
"41000\n",
"42000\n",
"43000\n",
"44000\n",
"45000\n",
"46000\n",
"47000\n",
"48000\n",
"49000\n",
"50000\n",
"51000\n",
"52000\n",
"53000\n",
"54000\n",
"55000\n",
"56000\n",
"57000\n",
"58000\n",
"59000\n",
"60000\n",
"61000\n",
"62000\n",
"63000\n",
"64000\n",
"65000\n",
"66000\n",
"67000\n",
"68000\n",
"69000\n",
"70000\n",
"71000\n",
"72000\n",
"73000\n",
"74000\n",
"75000\n",
"76000\n",
"77000\n",
"78000\n",
"79000\n",
"80000\n",
"81000\n",
"82000\n",
"83000\n",
"84000\n",
"85000\n",
"86000\n",
"87000\n",
"88000\n",
"89000\n",
"90000\n",
"91000\n",
"92000\n",
"93000\n",
"94000\n",
"95000\n",
"96000\n",
"97000\n",
"98000\n",
"99000\n",
"100000\n",
"101000\n",
"102000\n",
"103000\n",
"104000\n",
"105000\n",
"106000\n",
"107000\n",
"108000\n",
"109000\n",
"110000\n",
"111000\n",
"112000\n",
"113000\n",
"114000\n",
"115000\n",
"116000\n",
"117000\n",
"118000\n",
"119000\n",
"120000\n",
"121000\n",
"122000\n",
"123000\n",
"124000\n",
"125000\n",
"126000\n",
"127000\n",
"128000\n",
"129000\n",
"130000\n",
"131000\n",
"132000\n",
"133000\n",
"134000\n",
"135000\n",
"136000\n",
"137000\n",
"138000\n",
"139000\n",
"140000\n",
"141000\n",
"142000\n",
"143000\n",
"144000\n",
"145000\n",
"146000\n",
"147000\n",
"148000\n",
"149000\n",
"150000\n",
"151000\n",
"152000\n",
"153000\n",
"154000\n",
"155000\n",
"156000\n",
"157000\n",
"158000\n",
"159000\n",
"160000\n",
"161000\n",
"162000\n",
"163000\n",
"164000\n",
"165000\n",
"166000\n",
"167000\n",
"168000\n",
"169000\n",
"170000\n",
"171000\n",
"172000\n",
"173000\n",
"174000\n",
"175000\n",
"176000\n",
"177000\n",
"178000\n",
"179000\n",
"180000\n",
"181000\n",
"182000\n",
"183000\n",
"184000\n",
"185000\n",
"186000\n",
"187000\n",
"188000\n",
"189000\n",
"190000\n",
"191000\n",
"192000\n",
"193000\n",
"194000\n",
"195000\n",
"196000\n",
"197000\n",
"198000\n",
"199000\n",
"200000\n",
"201000\n",
"202000\n",
"203000\n",
"204000\n",
"205000\n",
"206000\n",
"207000\n",
"208000\n",
"209000\n",
"210000\n",
"211000\n",
"212000\n",
"213000\n",
"214000\n",
"215000\n",
"216000\n",
"217000\n",
"218000\n",
"219000\n",
"220000\n",
"221000\n",
"222000\n",
"223000\n",
"224000\n",
"225000\n",
"226000\n",
"227000\n",
"228000\n",
"229000\n",
"230000\n",
"231000\n",
"232000\n",
"233000\n",
"234000\n",
"235000\n",
"236000\n",
"237000\n",
"238000\n",
"239000\n",
"240000\n",
"241000\n",
"242000\n",
"243000\n",
"244000\n",
"245000\n",
"246000\n",
"247000\n",
"248000\n",
"249000\n",
"250000\n",
"251000\n",
"252000\n",
"253000\n",
"254000\n",
"255000\n",
"256000\n",
"257000\n",
"258000\n",
"259000\n",
"260000\n",
"261000\n",
"262000\n",
"263000\n",
"264000\n",
"265000\n",
"266000\n",
"267000\n",
"268000\n",
"269000\n",
"270000\n",
"271000\n",
"272000\n",
"273000\n",
"274000\n",
"275000\n",
"276000\n",
"277000\n",
"278000\n",
"279000\n",
"280000\n",
"281000\n",
"282000\n",
"283000\n",
"284000\n",
"285000\n",
"286000\n",
"287000\n",
"288000\n",
"289000\n",
"290000\n",
"291000\n",
"292000\n",
"293000\n",
"294000\n",
"295000\n",
"296000\n",
"297000\n",
"298000\n",
"299000\n",
"300000\n",
"301000\n",
"302000\n",
"303000\n",
"304000\n",
"305000\n",
"306000\n",
"307000\n",
"308000\n",
"309000\n",
"310000\n",
"311000\n",
"312000\n",
"313000\n",
"314000\n",
"315000\n",
"316000\n",
"317000\n",
"318000\n",
"319000\n",
"320000\n",
"321000\n",
"322000\n",
"323000\n",
"324000\n",
"325000\n",
"326000\n",
"327000\n",
"328000\n",
"329000\n",
"330000\n",
"331000\n",
"332000\n",
"333000\n",
"334000\n",
"335000\n",
"336000\n",
"337000\n",
"338000\n",
"339000\n",
"340000\n",
"341000\n",
"342000\n",
"343000\n",
"344000\n",
"345000\n",
"346000\n",
"347000\n",
"348000\n",
"349000\n",
"350000\n",
"351000\n",
"352000\n",
"353000\n",
"354000\n",
"355000\n",
"356000\n",
"357000\n",
"358000\n",
"359000\n",
"360000\n",
"361000\n",
"362000\n",
"363000\n",
"364000\n",
"365000\n",
"366000\n",
"367000\n",
"368000\n",
"369000\n",
"370000\n",
"371000\n",
"372000\n",
"373000\n",
"374000\n",
"375000\n",
"376000\n",
"377000\n",
"378000\n",
"379000\n",
"380000\n",
"381000\n",
"382000\n",
"383000\n",
"384000\n",
"385000\n",
"386000\n",
"387000\n",
"388000\n",
"389000\n",
"390000\n",
"391000\n",
"392000\n",
"393000\n",
"394000\n",
"395000\n",
"396000\n",
"397000\n",
"398000\n",
"399000\n",
"400000\n",
"401000\n",
"402000\n",
"403000\n",
"404000\n",
"405000\n",
"406000\n",
"407000\n",
"408000\n",
"409000\n",
"410000\n",
"411000\n",
"412000\n",
"413000\n",
"414000\n",
"415000\n",
"416000\n",
"417000\n",
"418000\n",
"419000\n",
"420000\n",
"421000\n",
"422000\n",
"423000\n",
"424000\n",
"425000\n",
"426000\n",
"427000\n",
"428000\n",
"429000\n",
"430000\n",
"431000\n",
"432000\n"
]
}
],
"source": [
"vocab = build_vocab_from_iterator(\n",
" get_word_lines_from_file(train_file),\n",
" max_tokens = vocab_size,\n",
" specials = [''])\n",
"\n",
"with open('filename.pickle', 'wb') as handle:\n",
" pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['', 'the', 'of', 'was', 'ladies']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vocab.lookup_tokens([0, 1, 2, 10, 2000])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Definicja sieci\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Naszą prostą sieć neuronową zaimplementujemy używając frameworku PyTorch.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"class SimpleBigramNeuralLanguageModel(nn.Module):\n",
" def __init__(self, vocabulary_size, embedding_size):\n",
" super(SimpleBigramNeuralLanguageModel, self).__init__()\n",
" self.model = nn.Sequential(\n",
" nn.Embedding(vocabulary_size, embedding_size),\n",
" nn.Linear(embedding_size, vocabulary_size),\n",
" nn.Softmax()\n",
" )\n",
" \n",
" def forward(self, x):\n",
" return self.model(x)\n",
"\n",
"with open('filename.pickle','rb') as handle:\n",
" vocab = pickle.load(handle)\n",
"\n",
"vocab.set_default_index(vocab[''])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on Vocab in module torchtext.vocab.vocab object:\n",
"\n",
"class Vocab(torch.nn.modules.module.Module)\n",
" | Vocab(vocab) -> None\n",
" | \n",
" | Base class for all neural network modules.\n",
" | \n",
" | Your models should also subclass this class.\n",
" | \n",
" | Modules can also contain other Modules, allowing to nest them in\n",
" | a tree structure. You can assign the submodules as regular attributes::\n",
" | \n",
" | import torch.nn as nn\n",
" | import torch.nn.functional as F\n",
" | \n",
" | class Model(nn.Module):\n",
" | def __init__(self):\n",
" | super().__init__()\n",
" | self.conv1 = nn.Conv2d(1, 20, 5)\n",
" | self.conv2 = nn.Conv2d(20, 20, 5)\n",
" | \n",
" | def forward(self, x):\n",
" | x = F.relu(self.conv1(x))\n",
" | return F.relu(self.conv2(x))\n",
" | \n",
" | Submodules assigned in this way will be registered, and will have their\n",
" | parameters converted too when you call :meth:`to`, etc.\n",
" | \n",
" | .. note::\n",
" | As per the example above, an ``__init__()`` call to the parent class\n",
" | must be made before assignment on the child.\n",
" | \n",
" | :ivar training: Boolean represents whether this module is in training or\n",
" | evaluation mode.\n",
" | :vartype training: bool\n",
" | \n",
" | Method resolution order:\n",
" | Vocab\n",
" | torch.nn.modules.module.Module\n",
" | builtins.object\n",
" | \n",
" | Methods defined here:\n",
" | \n",
" | __contains__(self, token: str) -> bool\n",
" | Args:\n",
" | token: The token for which to check the membership.\n",
" | \n",
" | Returns:\n",
" | Whether the token is member of vocab or not.\n",
" | \n",
" | __getitem__(self, token: str) -> int\n",
" | Args:\n",
" | token: The token used to lookup the corresponding index.\n",
" | \n",
" | Returns:\n",
" | The index corresponding to the associated token.\n",
" | \n",
" | __init__(self, vocab) -> None\n",
" | Initializes internal Module state, shared by both nn.Module and ScriptModule.\n",
" | \n",
" | __len__(self) -> int\n",
" | Returns:\n",
" | The length of the vocab.\n",
" | \n",
" | __prepare_scriptable__(self)\n",
" | Return a JITable Vocab.\n",
" | \n",
" | append_token(self, token: str) -> None\n",
" | Args:\n",
" | token: The token used to lookup the corresponding index.\n",
" | \n",
" | Raises:\n",
" | RuntimeError: If `token` already exists in the vocab\n",
" | \n",
" | forward(self, tokens: List[str]) -> List[int]\n",
" | Calls the `lookup_indices` method\n",
" | \n",
" | Args:\n",
" | tokens: a list of tokens used to lookup their corresponding `indices`.\n",
" | \n",
" | Returns:\n",
" | The indices associated with a list of `tokens`.\n",
" | \n",
" | get_default_index(self) -> Union[int, NoneType]\n",
" | Returns:\n",
" | Value of default index if it is set.\n",
" | \n",
" | get_itos(self) -> List[str]\n",
" | Returns:\n",
" | List mapping indices to tokens.\n",
" | \n",
" | get_stoi(self) -> Dict[str, int]\n",
" | Returns:\n",
" | Dictionary mapping tokens to indices.\n",
" | \n",
" | insert_token(self, token: str, index: int) -> None\n",
" | Args:\n",
" | token: The token used to lookup the corresponding index.\n",
" | index: The index corresponding to the associated token.\n",
" | Raises:\n",
" | RuntimeError: If `index` is not in range [0, Vocab.size()] or if `token` already exists in the vocab.\n",
" | \n",
" | lookup_indices(self, tokens: List[str]) -> List[int]\n",
" | Args:\n",
" | tokens: the tokens used to lookup their corresponding `indices`.\n",
" | \n",
" | Returns:\n",
" | The 'indices` associated with `tokens`.\n",
" | \n",
" | lookup_token(self, index: int) -> str\n",
" | Args:\n",
" | index: The index corresponding to the associated token.\n",
" | \n",
" | Returns:\n",
" | token: The token used to lookup the corresponding index.\n",
" | \n",
" | Raises:\n",
" | RuntimeError: If `index` not in range [0, itos.size()).\n",
" | \n",
" | lookup_tokens(self, indices: List[int]) -> List[str]\n",
" | Args:\n",
" | indices: The `indices` used to lookup their corresponding`tokens`.\n",
" | \n",
" | Returns:\n",
" | The `tokens` associated with `indices`.\n",
" | \n",
" | Raises:\n",
" | RuntimeError: If an index within `indices` is not int range [0, itos.size()).\n",
" | \n",
" | set_default_index(self, index: Union[int, NoneType]) -> None\n",
" | Args:\n",
" | index: Value of default index. This index will be returned when OOV token is queried.\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Readonly properties defined here:\n",
" | \n",
" | is_jitable\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data and other attributes defined here:\n",
" | \n",
" | __jit_unused_properties__ = ['is_jitable']\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Methods inherited from torch.nn.modules.module.Module:\n",
" | \n",
" | __call__ = _call_impl(self, *args, **kwargs)\n",
" | \n",
" | __delattr__(self, name)\n",
" | Implement delattr(self, name).\n",
" | \n",
" | __dir__(self)\n",
" | Default dir() implementation.\n",
" | \n",
" | __getattr__(self, name: str) -> Union[torch.Tensor, ForwardRef('Module')]\n",
" | \n",
" | __repr__(self)\n",
" | Return repr(self).\n",
" | \n",
" | __setattr__(self, name: str, value: Union[torch.Tensor, ForwardRef('Module')]) -> None\n",
" | Implement setattr(self, name, value).\n",
" | \n",
" | __setstate__(self, state)\n",
" | \n",
" | add_module(self, name: str, module: Union[ForwardRef('Module'), NoneType]) -> None\n",
" | Adds a child module to the current module.\n",
" | \n",
" | The module can be accessed as an attribute using the given name.\n",
" | \n",
" | Args:\n",
" | name (str): name of the child module. The child module can be\n",
" | accessed from this module using the given name\n",
" | module (Module): child module to be added to the module.\n",
" | \n",
" | apply(self: ~T, fn: Callable[[ForwardRef('Module')], NoneType]) -> ~T\n",
" | Applies ``fn`` recursively to every submodule (as returned by ``.children()``)\n",
" | as well as self. Typical use includes initializing the parameters of a model\n",
" | (see also :ref:`nn-init-doc`).\n",
" | \n",
" | Args:\n",
" | fn (:class:`Module` -> None): function to be applied to each submodule\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> @torch.no_grad()\n",
" | >>> def init_weights(m):\n",
" | >>> print(m)\n",
" | >>> if type(m) == nn.Linear:\n",
" | >>> m.weight.fill_(1.0)\n",
" | >>> print(m.weight)\n",
" | >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))\n",
" | >>> net.apply(init_weights)\n",
" | Linear(in_features=2, out_features=2, bias=True)\n",
" | Parameter containing:\n",
" | tensor([[1., 1.],\n",
" | [1., 1.]], requires_grad=True)\n",
" | Linear(in_features=2, out_features=2, bias=True)\n",
" | Parameter containing:\n",
" | tensor([[1., 1.],\n",
" | [1., 1.]], requires_grad=True)\n",
" | Sequential(\n",
" | (0): Linear(in_features=2, out_features=2, bias=True)\n",
" | (1): Linear(in_features=2, out_features=2, bias=True)\n",
" | )\n",
" | \n",
" | bfloat16(self: ~T) -> ~T\n",
" | Casts all floating point parameters and buffers to ``bfloat16`` datatype.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | buffers(self, recurse: bool = True) -> Iterator[torch.Tensor]\n",
" | Returns an iterator over module buffers.\n",
" | \n",
" | Args:\n",
" | recurse (bool): if True, then yields buffers of this module\n",
" | and all submodules. Otherwise, yields only buffers that\n",
" | are direct members of this module.\n",
" | \n",
" | Yields:\n",
" | torch.Tensor: module buffer\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> for buf in model.buffers():\n",
" | >>> print(type(buf), buf.size())\n",
" | (20L,)\n",
" | (20L, 1L, 5L, 5L)\n",
" | \n",
" | children(self) -> Iterator[ForwardRef('Module')]\n",
" | Returns an iterator over immediate children modules.\n",
" | \n",
" | Yields:\n",
" | Module: a child module\n",
" | \n",
" | cpu(self: ~T) -> ~T\n",
" | Moves all model parameters and buffers to the CPU.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | cuda(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n",
" | Moves all model parameters and buffers to the GPU.\n",
" | \n",
" | This also makes associated parameters and buffers different objects. So\n",
" | it should be called before constructing optimizer if the module will\n",
" | live on GPU while being optimized.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Args:\n",
" | device (int, optional): if specified, all parameters will be\n",
" | copied to that device\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | double(self: ~T) -> ~T\n",
" | Casts all floating point parameters and buffers to ``double`` datatype.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | eval(self: ~T) -> ~T\n",
" | Sets the module in evaluation mode.\n",
" | \n",
" | This has any effect only on certain modules. See documentations of\n",
" | particular modules for details of their behaviors in training/evaluation\n",
" | mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,\n",
" | etc.\n",
" | \n",
" | This is equivalent with :meth:`self.train(False) `.\n",
" | \n",
" | See :ref:`locally-disable-grad-doc` for a comparison between\n",
" | `.eval()` and several similar mechanisms that may be confused with it.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | extra_repr(self) -> str\n",
" | Set the extra representation of the module\n",
" | \n",
" | To print customized extra information, you should re-implement\n",
" | this method in your own modules. Both single-line and multi-line\n",
" | strings are acceptable.\n",
" | \n",
" | float(self: ~T) -> ~T\n",
" | Casts all floating point parameters and buffers to ``float`` datatype.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | get_buffer(self, target: str) -> 'Tensor'\n",
" | Returns the buffer given by ``target`` if it exists,\n",
" | otherwise throws an error.\n",
" | \n",
" | See the docstring for ``get_submodule`` for a more detailed\n",
" | explanation of this method's functionality as well as how to\n",
" | correctly specify ``target``.\n",
" | \n",
" | Args:\n",
" | target: The fully-qualified string name of the buffer\n",
" | to look for. (See ``get_submodule`` for how to specify a\n",
" | fully-qualified string.)\n",
" | \n",
" | Returns:\n",
" | torch.Tensor: The buffer referenced by ``target``\n",
" | \n",
" | Raises:\n",
" | AttributeError: If the target string references an invalid\n",
" | path or resolves to something that is not a\n",
" | buffer\n",
" | \n",
" | get_extra_state(self) -> Any\n",
" | Returns any extra state to include in the module's state_dict.\n",
" | Implement this and a corresponding :func:`set_extra_state` for your module\n",
" | if you need to store extra state. This function is called when building the\n",
" | module's `state_dict()`.\n",
" | \n",
" | Note that extra state should be picklable to ensure working serialization\n",
" | of the state_dict. We only provide provide backwards compatibility guarantees\n",
" | for serializing Tensors; other objects may break backwards compatibility if\n",
" | their serialized pickled form changes.\n",
" | \n",
" | Returns:\n",
" | object: Any extra state to store in the module's state_dict\n",
" | \n",
" | get_parameter(self, target: str) -> 'Parameter'\n",
" | Returns the parameter given by ``target`` if it exists,\n",
" | otherwise throws an error.\n",
" | \n",
" | See the docstring for ``get_submodule`` for a more detailed\n",
" | explanation of this method's functionality as well as how to\n",
" | correctly specify ``target``.\n",
" | \n",
" | Args:\n",
" | target: The fully-qualified string name of the Parameter\n",
" | to look for. (See ``get_submodule`` for how to specify a\n",
" | fully-qualified string.)\n",
" | \n",
" | Returns:\n",
" | torch.nn.Parameter: The Parameter referenced by ``target``\n",
" | \n",
" | Raises:\n",
" | AttributeError: If the target string references an invalid\n",
" | path or resolves to something that is not an\n",
" | ``nn.Parameter``\n",
" | \n",
" | get_submodule(self, target: str) -> 'Module'\n",
" | Returns the submodule given by ``target`` if it exists,\n",
" | otherwise throws an error.\n",
" | \n",
" | For example, let's say you have an ``nn.Module`` ``A`` that\n",
" | looks like this:\n",
" | \n",
" | .. code-block:: text\n",
" | \n",
" | A(\n",
" | (net_b): Module(\n",
" | (net_c): Module(\n",
" | (conv): Conv2d(16, 33, kernel_size=(3, 3), stride=(2, 2))\n",
" | )\n",
" | (linear): Linear(in_features=100, out_features=200, bias=True)\n",
" | )\n",
" | )\n",
" | \n",
" | (The diagram shows an ``nn.Module`` ``A``. ``A`` has a nested\n",
" | submodule ``net_b``, which itself has two submodules ``net_c``\n",
" | and ``linear``. ``net_c`` then has a submodule ``conv``.)\n",
" | \n",
" | To check whether or not we have the ``linear`` submodule, we\n",
" | would call ``get_submodule(\"net_b.linear\")``. To check whether\n",
" | we have the ``conv`` submodule, we would call\n",
" | ``get_submodule(\"net_b.net_c.conv\")``.\n",
" | \n",
" | The runtime of ``get_submodule`` is bounded by the degree\n",
" | of module nesting in ``target``. A query against\n",
" | ``named_modules`` achieves the same result, but it is O(N) in\n",
" | the number of transitive modules. So, for a simple check to see\n",
" | if some submodule exists, ``get_submodule`` should always be\n",
" | used.\n",
" | \n",
" | Args:\n",
" | target: The fully-qualified string name of the submodule\n",
" | to look for. (See above example for how to specify a\n",
" | fully-qualified string.)\n",
" | \n",
" | Returns:\n",
" | torch.nn.Module: The submodule referenced by ``target``\n",
" | \n",
" | Raises:\n",
" | AttributeError: If the target string references an invalid\n",
" | path or resolves to something that is not an\n",
" | ``nn.Module``\n",
" | \n",
" | half(self: ~T) -> ~T\n",
" | Casts all floating point parameters and buffers to ``half`` datatype.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | ipu(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n",
" | Moves all model parameters and buffers to the IPU.\n",
" | \n",
" | This also makes associated parameters and buffers different objects. So\n",
" | it should be called before constructing optimizer if the module will\n",
" | live on IPU while being optimized.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Arguments:\n",
" | device (int, optional): if specified, all parameters will be\n",
" | copied to that device\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True)\n",
" | Copies parameters and buffers from :attr:`state_dict` into\n",
" | this module and its descendants. If :attr:`strict` is ``True``, then\n",
" | the keys of :attr:`state_dict` must exactly match the keys returned\n",
" | by this module's :meth:`~torch.nn.Module.state_dict` function.\n",
" | \n",
" | Args:\n",
" | state_dict (dict): a dict containing parameters and\n",
" | persistent buffers.\n",
" | strict (bool, optional): whether to strictly enforce that the keys\n",
" | in :attr:`state_dict` match the keys returned by this module's\n",
" | :meth:`~torch.nn.Module.state_dict` function. Default: ``True``\n",
" | \n",
" | Returns:\n",
" | ``NamedTuple`` with ``missing_keys`` and ``unexpected_keys`` fields:\n",
" | * **missing_keys** is a list of str containing the missing keys\n",
" | * **unexpected_keys** is a list of str containing the unexpected keys\n",
" | \n",
" | Note:\n",
" | If a parameter or buffer is registered as ``None`` and its corresponding key\n",
" | exists in :attr:`state_dict`, :meth:`load_state_dict` will raise a\n",
" | ``RuntimeError``.\n",
" | \n",
" | modules(self) -> Iterator[ForwardRef('Module')]\n",
" | Returns an iterator over all modules in the network.\n",
" | \n",
" | Yields:\n",
" | Module: a module in the network\n",
" | \n",
" | Note:\n",
" | Duplicate modules are returned only once. In the following\n",
" | example, ``l`` will be returned only once.\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> l = nn.Linear(2, 2)\n",
" | >>> net = nn.Sequential(l, l)\n",
" | >>> for idx, m in enumerate(net.modules()):\n",
" | ... print(idx, '->', m)\n",
" | \n",
" | 0 -> Sequential(\n",
" | (0): Linear(in_features=2, out_features=2, bias=True)\n",
" | (1): Linear(in_features=2, out_features=2, bias=True)\n",
" | )\n",
" | 1 -> Linear(in_features=2, out_features=2, bias=True)\n",
" | \n",
" | named_buffers(self, prefix: str = '', recurse: bool = True, remove_duplicate: bool = True) -> Iterator[Tuple[str, torch.Tensor]]\n",
" | Returns an iterator over module buffers, yielding both the\n",
" | name of the buffer as well as the buffer itself.\n",
" | \n",
" | Args:\n",
" | prefix (str): prefix to prepend to all buffer names.\n",
" | recurse (bool, optional): if True, then yields buffers of this module\n",
" | and all submodules. Otherwise, yields only buffers that\n",
" | are direct members of this module. Defaults to True.\n",
" | remove_duplicate (bool, optional): whether to remove the duplicated buffers in the result. Defaults to True.\n",
" | \n",
" | Yields:\n",
" | (str, torch.Tensor): Tuple containing the name and buffer\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> for name, buf in self.named_buffers():\n",
" | >>> if name in ['running_var']:\n",
" | >>> print(buf.size())\n",
" | \n",
" | named_children(self) -> Iterator[Tuple[str, ForwardRef('Module')]]\n",
" | Returns an iterator over immediate children modules, yielding both\n",
" | the name of the module as well as the module itself.\n",
" | \n",
" | Yields:\n",
" | (str, Module): Tuple containing a name and child module\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> for name, module in model.named_children():\n",
" | >>> if name in ['conv4', 'conv5']:\n",
" | >>> print(module)\n",
" | \n",
" | named_modules(self, memo: Union[Set[ForwardRef('Module')], NoneType] = None, prefix: str = '', remove_duplicate: bool = True)\n",
" | Returns an iterator over all modules in the network, yielding\n",
" | both the name of the module as well as the module itself.\n",
" | \n",
" | Args:\n",
" | memo: a memo to store the set of modules already added to the result\n",
" | prefix: a prefix that will be added to the name of the module\n",
" | remove_duplicate: whether to remove the duplicated module instances in the result\n",
" | or not\n",
" | \n",
" | Yields:\n",
" | (str, Module): Tuple of name and module\n",
" | \n",
" | Note:\n",
" | Duplicate modules are returned only once. In the following\n",
" | example, ``l`` will be returned only once.\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> l = nn.Linear(2, 2)\n",
" | >>> net = nn.Sequential(l, l)\n",
" | >>> for idx, m in enumerate(net.named_modules()):\n",
" | ... print(idx, '->', m)\n",
" | \n",
" | 0 -> ('', Sequential(\n",
" | (0): Linear(in_features=2, out_features=2, bias=True)\n",
" | (1): Linear(in_features=2, out_features=2, bias=True)\n",
" | ))\n",
" | 1 -> ('0', Linear(in_features=2, out_features=2, bias=True))\n",
" | \n",
" | named_parameters(self, prefix: str = '', recurse: bool = True, remove_duplicate: bool = True) -> Iterator[Tuple[str, torch.nn.parameter.Parameter]]\n",
" | Returns an iterator over module parameters, yielding both the\n",
" | name of the parameter as well as the parameter itself.\n",
" | \n",
" | Args:\n",
" | prefix (str): prefix to prepend to all parameter names.\n",
" | recurse (bool): if True, then yields parameters of this module\n",
" | and all submodules. Otherwise, yields only parameters that\n",
" | are direct members of this module.\n",
" | remove_duplicate (bool, optional): whether to remove the duplicated\n",
" | parameters in the result. Defaults to True.\n",
" | \n",
" | Yields:\n",
" | (str, Parameter): Tuple containing the name and parameter\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> for name, param in self.named_parameters():\n",
" | >>> if name in ['bias']:\n",
" | >>> print(param.size())\n",
" | \n",
" | parameters(self, recurse: bool = True) -> Iterator[torch.nn.parameter.Parameter]\n",
" | Returns an iterator over module parameters.\n",
" | \n",
" | This is typically passed to an optimizer.\n",
" | \n",
" | Args:\n",
" | recurse (bool): if True, then yields parameters of this module\n",
" | and all submodules. Otherwise, yields only parameters that\n",
" | are direct members of this module.\n",
" | \n",
" | Yields:\n",
" | Parameter: module parameter\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> for param in model.parameters():\n",
" | >>> print(type(param), param.size())\n",
" | (20L,)\n",
" | (20L, 1L, 5L, 5L)\n",
" | \n",
" | register_backward_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor], Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]]) -> torch.utils.hooks.RemovableHandle\n",
" | Registers a backward hook on the module.\n",
" | \n",
" | This function is deprecated in favor of :meth:`~torch.nn.Module.register_full_backward_hook` and\n",
" | the behavior of this function will change in future versions.\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_buffer(self, name: str, tensor: Union[torch.Tensor, NoneType], persistent: bool = True) -> None\n",
" | Adds a buffer to the module.\n",
" | \n",
" | This is typically used to register a buffer that should not to be\n",
" | considered a model parameter. For example, BatchNorm's ``running_mean``\n",
" | is not a parameter, but is part of the module's state. Buffers, by\n",
" | default, are persistent and will be saved alongside parameters. This\n",
" | behavior can be changed by setting :attr:`persistent` to ``False``. The\n",
" | only difference between a persistent buffer and a non-persistent buffer\n",
" | is that the latter will not be a part of this module's\n",
" | :attr:`state_dict`.\n",
" | \n",
" | Buffers can be accessed as attributes using given names.\n",
" | \n",
" | Args:\n",
" | name (str): name of the buffer. The buffer can be accessed\n",
" | from this module using the given name\n",
" | tensor (Tensor or None): buffer to be registered. If ``None``, then operations\n",
" | that run on buffers, such as :attr:`cuda`, are ignored. If ``None``,\n",
" | the buffer is **not** included in the module's :attr:`state_dict`.\n",
" | persistent (bool): whether the buffer is part of this module's\n",
" | :attr:`state_dict`.\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> self.register_buffer('running_mean', torch.zeros(num_features))\n",
" | \n",
" | register_forward_hook(self, hook: Union[Callable[[~T, Tuple[Any, ...], Any], Union[Any, NoneType]], Callable[[~T, Tuple[Any, ...], Dict[str, Any], Any], Union[Any, NoneType]]], *, prepend: bool = False, with_kwargs: bool = False) -> torch.utils.hooks.RemovableHandle\n",
" | Registers a forward hook on the module.\n",
" | \n",
" | The hook will be called every time after :func:`forward` has computed an output.\n",
" | \n",
" | If ``with_kwargs`` is ``False`` or not specified, the input contains only\n",
" | the positional arguments given to the module. Keyword arguments won't be\n",
" | passed to the hooks and only to the ``forward``. The hook can modify the\n",
" | output. It can modify the input inplace but it will not have effect on\n",
" | forward since this is called after :func:`forward` is called. The hook\n",
" | should have the following signature::\n",
" | \n",
" | hook(module, args, output) -> None or modified output\n",
" | \n",
" | If ``with_kwargs`` is ``True``, the forward hook will be passed the\n",
" | ``kwargs`` given to the forward function and be expected to return the\n",
" | output possibly modified. The hook should have the following signature::\n",
" | \n",
" | hook(module, args, kwargs, output) -> None or modified output\n",
" | \n",
" | Args:\n",
" | hook (Callable): The user defined hook to be registered.\n",
" | prepend (bool): If ``True``, the provided ``hook`` will be fired\n",
" | before all existing ``forward`` hooks on this\n",
" | :class:`torch.nn.modules.Module`. Otherwise, the provided\n",
" | ``hook`` will be fired after all existing ``forward`` hooks on\n",
" | this :class:`torch.nn.modules.Module`. Note that global\n",
" | ``forward`` hooks registered with\n",
" | :func:`register_module_forward_hook` will fire before all hooks\n",
" | registered by this method.\n",
" | Default: ``False``\n",
" | with_kwargs (bool): If ``True``, the ``hook`` will be passed the\n",
" | kwargs given to the forward function.\n",
" | Default: ``False``\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_forward_pre_hook(self, hook: Union[Callable[[~T, Tuple[Any, ...]], Union[Any, NoneType]], Callable[[~T, Tuple[Any, ...], Dict[str, Any]], Union[Tuple[Any, Dict[str, Any]], NoneType]]], *, prepend: bool = False, with_kwargs: bool = False) -> torch.utils.hooks.RemovableHandle\n",
" | Registers a forward pre-hook on the module.\n",
" | \n",
" | The hook will be called every time before :func:`forward` is invoked.\n",
" | \n",
" | \n",
" | If ``with_kwargs`` is false or not specified, the input contains only\n",
" | the positional arguments given to the module. Keyword arguments won't be\n",
" | passed to the hooks and only to the ``forward``. The hook can modify the\n",
" | input. User can either return a tuple or a single modified value in the\n",
" | hook. We will wrap the value into a tuple if a single value is returned\n",
" | (unless that value is already a tuple). The hook should have the\n",
" | following signature::\n",
" | \n",
" | hook(module, args) -> None or modified input\n",
" | \n",
" | If ``with_kwargs`` is true, the forward pre-hook will be passed the\n",
" | kwargs given to the forward function. And if the hook modifies the\n",
" | input, both the args and kwargs should be returned. The hook should have\n",
" | the following signature::\n",
" | \n",
" | hook(module, args, kwargs) -> None or a tuple of modified input and kwargs\n",
" | \n",
" | Args:\n",
" | hook (Callable): The user defined hook to be registered.\n",
" | prepend (bool): If true, the provided ``hook`` will be fired before\n",
" | all existing ``forward_pre`` hooks on this\n",
" | :class:`torch.nn.modules.Module`. Otherwise, the provided\n",
" | ``hook`` will be fired after all existing ``forward_pre`` hooks\n",
" | on this :class:`torch.nn.modules.Module`. Note that global\n",
" | ``forward_pre`` hooks registered with\n",
" | :func:`register_module_forward_pre_hook` will fire before all\n",
" | hooks registered by this method.\n",
" | Default: ``False``\n",
" | with_kwargs (bool): If true, the ``hook`` will be passed the kwargs\n",
" | given to the forward function.\n",
" | Default: ``False``\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_full_backward_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor], Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]], prepend: bool = False) -> torch.utils.hooks.RemovableHandle\n",
" | Registers a backward hook on the module.\n",
" | \n",
" | The hook will be called every time the gradients with respect to a module\n",
" | are computed, i.e. the hook will execute if and only if the gradients with\n",
" | respect to module outputs are computed. The hook should have the following\n",
" | signature::\n",
" | \n",
" | hook(module, grad_input, grad_output) -> tuple(Tensor) or None\n",
" | \n",
" | The :attr:`grad_input` and :attr:`grad_output` are tuples that contain the gradients\n",
" | with respect to the inputs and outputs respectively. The hook should\n",
" | not modify its arguments, but it can optionally return a new gradient with\n",
" | respect to the input that will be used in place of :attr:`grad_input` in\n",
" | subsequent computations. :attr:`grad_input` will only correspond to the inputs given\n",
" | as positional arguments and all kwarg arguments are ignored. Entries\n",
" | in :attr:`grad_input` and :attr:`grad_output` will be ``None`` for all non-Tensor\n",
" | arguments.\n",
" | \n",
" | For technical reasons, when this hook is applied to a Module, its forward function will\n",
" | receive a view of each Tensor passed to the Module. Similarly the caller will receive a view\n",
" | of each Tensor returned by the Module's forward function.\n",
" | \n",
" | .. warning ::\n",
" | Modifying inputs or outputs inplace is not allowed when using backward hooks and\n",
" | will raise an error.\n",
" | \n",
" | Args:\n",
" | hook (Callable): The user-defined hook to be registered.\n",
" | prepend (bool): If true, the provided ``hook`` will be fired before\n",
" | all existing ``backward`` hooks on this\n",
" | :class:`torch.nn.modules.Module`. Otherwise, the provided\n",
" | ``hook`` will be fired after all existing ``backward`` hooks on\n",
" | this :class:`torch.nn.modules.Module`. Note that global\n",
" | ``backward`` hooks registered with\n",
" | :func:`register_module_full_backward_hook` will fire before\n",
" | all hooks registered by this method.\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_full_backward_pre_hook(self, hook: Callable[[ForwardRef('Module'), Union[Tuple[torch.Tensor, ...], torch.Tensor]], Union[NoneType, Tuple[torch.Tensor, ...], torch.Tensor]], prepend: bool = False) -> torch.utils.hooks.RemovableHandle\n",
" | Registers a backward pre-hook on the module.\n",
" | \n",
" | The hook will be called every time the gradients for the module are computed.\n",
" | The hook should have the following signature::\n",
" | \n",
" | hook(module, grad_output) -> Tensor or None\n",
" | \n",
" | The :attr:`grad_output` is a tuple. The hook should\n",
" | not modify its arguments, but it can optionally return a new gradient with\n",
" | respect to the output that will be used in place of :attr:`grad_output` in\n",
" | subsequent computations. Entries in :attr:`grad_output` will be ``None`` for\n",
" | all non-Tensor arguments.\n",
" | \n",
" | For technical reasons, when this hook is applied to a Module, its forward function will\n",
" | receive a view of each Tensor passed to the Module. Similarly the caller will receive a view\n",
" | of each Tensor returned by the Module's forward function.\n",
" | \n",
" | .. warning ::\n",
" | Modifying inputs inplace is not allowed when using backward hooks and\n",
" | will raise an error.\n",
" | \n",
" | Args:\n",
" | hook (Callable): The user-defined hook to be registered.\n",
" | prepend (bool): If true, the provided ``hook`` will be fired before\n",
" | all existing ``backward_pre`` hooks on this\n",
" | :class:`torch.nn.modules.Module`. Otherwise, the provided\n",
" | ``hook`` will be fired after all existing ``backward_pre`` hooks\n",
" | on this :class:`torch.nn.modules.Module`. Note that global\n",
" | ``backward_pre`` hooks registered with\n",
" | :func:`register_module_full_backward_pre_hook` will fire before\n",
" | all hooks registered by this method.\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_load_state_dict_post_hook(self, hook)\n",
" | Registers a post hook to be run after module's ``load_state_dict``\n",
" | is called.\n",
" | \n",
" | It should have the following signature::\n",
" | hook(module, incompatible_keys) -> None\n",
" | \n",
" | The ``module`` argument is the current module that this hook is registered\n",
" | on, and the ``incompatible_keys`` argument is a ``NamedTuple`` consisting\n",
" | of attributes ``missing_keys`` and ``unexpected_keys``. ``missing_keys``\n",
" | is a ``list`` of ``str`` containing the missing keys and\n",
" | ``unexpected_keys`` is a ``list`` of ``str`` containing the unexpected keys.\n",
" | \n",
" | The given incompatible_keys can be modified inplace if needed.\n",
" | \n",
" | Note that the checks performed when calling :func:`load_state_dict` with\n",
" | ``strict=True`` are affected by modifications the hook makes to\n",
" | ``missing_keys`` or ``unexpected_keys``, as expected. Additions to either\n",
" | set of keys will result in an error being thrown when ``strict=True``, and\n",
" | clearing out both missing and unexpected keys will avoid an error.\n",
" | \n",
" | Returns:\n",
" | :class:`torch.utils.hooks.RemovableHandle`:\n",
" | a handle that can be used to remove the added hook by calling\n",
" | ``handle.remove()``\n",
" | \n",
" | register_module(self, name: str, module: Union[ForwardRef('Module'), NoneType]) -> None\n",
" | Alias for :func:`add_module`.\n",
" | \n",
" | register_parameter(self, name: str, param: Union[torch.nn.parameter.Parameter, NoneType]) -> None\n",
" | Adds a parameter to the module.\n",
" | \n",
" | The parameter can be accessed as an attribute using given name.\n",
" | \n",
" | Args:\n",
" | name (str): name of the parameter. The parameter can be accessed\n",
" | from this module using the given name\n",
" | param (Parameter or None): parameter to be added to the module. If\n",
" | ``None``, then operations that run on parameters, such as :attr:`cuda`,\n",
" | are ignored. If ``None``, the parameter is **not** included in the\n",
" | module's :attr:`state_dict`.\n",
" | \n",
" | register_state_dict_pre_hook(self, hook)\n",
" | These hooks will be called with arguments: ``self``, ``prefix``,\n",
" | and ``keep_vars`` before calling ``state_dict`` on ``self``. The registered\n",
" | hooks can be used to perform pre-processing before the ``state_dict``\n",
" | call is made.\n",
" | \n",
" | requires_grad_(self: ~T, requires_grad: bool = True) -> ~T\n",
" | Change if autograd should record operations on parameters in this\n",
" | module.\n",
" | \n",
" | This method sets the parameters' :attr:`requires_grad` attributes\n",
" | in-place.\n",
" | \n",
" | This method is helpful for freezing part of the module for finetuning\n",
" | or training parts of a model individually (e.g., GAN training).\n",
" | \n",
" | See :ref:`locally-disable-grad-doc` for a comparison between\n",
" | `.requires_grad_()` and several similar mechanisms that may be confused with it.\n",
" | \n",
" | Args:\n",
" | requires_grad (bool): whether autograd should record operations on\n",
" | parameters in this module. Default: ``True``.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | set_extra_state(self, state: Any)\n",
" | This function is called from :func:`load_state_dict` to handle any extra state\n",
" | found within the `state_dict`. Implement this function and a corresponding\n",
" | :func:`get_extra_state` for your module if you need to store extra state within its\n",
" | `state_dict`.\n",
" | \n",
" | Args:\n",
" | state (dict): Extra state from the `state_dict`\n",
" | \n",
" | share_memory(self: ~T) -> ~T\n",
" | See :meth:`torch.Tensor.share_memory_`\n",
" | \n",
" | state_dict(self, *args, destination=None, prefix='', keep_vars=False)\n",
" | Returns a dictionary containing references to the whole state of the module.\n",
" | \n",
" | Both parameters and persistent buffers (e.g. running averages) are\n",
" | included. Keys are corresponding parameter and buffer names.\n",
" | Parameters and buffers set to ``None`` are not included.\n",
" | \n",
" | .. note::\n",
" | The returned object is a shallow copy. It contains references\n",
" | to the module's parameters and buffers.\n",
" | \n",
" | .. warning::\n",
" | Currently ``state_dict()`` also accepts positional arguments for\n",
" | ``destination``, ``prefix`` and ``keep_vars`` in order. However,\n",
" | this is being deprecated and keyword arguments will be enforced in\n",
" | future releases.\n",
" | \n",
" | .. warning::\n",
" | Please avoid the use of argument ``destination`` as it is not\n",
" | designed for end-users.\n",
" | \n",
" | Args:\n",
" | destination (dict, optional): If provided, the state of module will\n",
" | be updated into the dict and the same object is returned.\n",
" | Otherwise, an ``OrderedDict`` will be created and returned.\n",
" | Default: ``None``.\n",
" | prefix (str, optional): a prefix added to parameter and buffer\n",
" | names to compose the keys in state_dict. Default: ``''``.\n",
" | keep_vars (bool, optional): by default the :class:`~torch.Tensor` s\n",
" | returned in the state dict are detached from autograd. If it's\n",
" | set to ``True``, detaching will not be performed.\n",
" | Default: ``False``.\n",
" | \n",
" | Returns:\n",
" | dict:\n",
" | a dictionary containing a whole state of the module\n",
" | \n",
" | Example::\n",
" | \n",
" | >>> # xdoctest: +SKIP(\"undefined vars\")\n",
" | >>> module.state_dict().keys()\n",
" | ['bias', 'weight']\n",
" | \n",
" | to(self, *args, **kwargs)\n",
" | Moves and/or casts the parameters and buffers.\n",
" | \n",
" | This can be called as\n",
" | \n",
" | .. function:: to(device=None, dtype=None, non_blocking=False)\n",
" | :noindex:\n",
" | \n",
" | .. function:: to(dtype, non_blocking=False)\n",
" | :noindex:\n",
" | \n",
" | .. function:: to(tensor, non_blocking=False)\n",
" | :noindex:\n",
" | \n",
" | .. function:: to(memory_format=torch.channels_last)\n",
" | :noindex:\n",
" | \n",
" | Its signature is similar to :meth:`torch.Tensor.to`, but only accepts\n",
" | floating point or complex :attr:`dtype`\\ s. In addition, this method will\n",
" | only cast the floating point or complex parameters and buffers to :attr:`dtype`\n",
" | (if given). The integral parameters and buffers will be moved\n",
" | :attr:`device`, if that is given, but with dtypes unchanged. When\n",
" | :attr:`non_blocking` is set, it tries to convert/move asynchronously\n",
" | with respect to the host if possible, e.g., moving CPU Tensors with\n",
" | pinned memory to CUDA devices.\n",
" | \n",
" | See below for examples.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Args:\n",
" | device (:class:`torch.device`): the desired device of the parameters\n",
" | and buffers in this module\n",
" | dtype (:class:`torch.dtype`): the desired floating point or complex dtype of\n",
" | the parameters and buffers in this module\n",
" | tensor (torch.Tensor): Tensor whose dtype and device are the desired\n",
" | dtype and device for all parameters and buffers in this module\n",
" | memory_format (:class:`torch.memory_format`): the desired memory\n",
" | format for 4D parameters and buffers in this module (keyword\n",
" | only argument)\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | Examples::\n",
" | \n",
" | >>> # xdoctest: +IGNORE_WANT(\"non-deterministic\")\n",
" | >>> linear = nn.Linear(2, 2)\n",
" | >>> linear.weight\n",
" | Parameter containing:\n",
" | tensor([[ 0.1913, -0.3420],\n",
" | [-0.5113, -0.2325]])\n",
" | >>> linear.to(torch.double)\n",
" | Linear(in_features=2, out_features=2, bias=True)\n",
" | >>> linear.weight\n",
" | Parameter containing:\n",
" | tensor([[ 0.1913, -0.3420],\n",
" | [-0.5113, -0.2325]], dtype=torch.float64)\n",
" | >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA1)\n",
" | >>> gpu1 = torch.device(\"cuda:1\")\n",
" | >>> linear.to(gpu1, dtype=torch.half, non_blocking=True)\n",
" | Linear(in_features=2, out_features=2, bias=True)\n",
" | >>> linear.weight\n",
" | Parameter containing:\n",
" | tensor([[ 0.1914, -0.3420],\n",
" | [-0.5112, -0.2324]], dtype=torch.float16, device='cuda:1')\n",
" | >>> cpu = torch.device(\"cpu\")\n",
" | >>> linear.to(cpu)\n",
" | Linear(in_features=2, out_features=2, bias=True)\n",
" | >>> linear.weight\n",
" | Parameter containing:\n",
" | tensor([[ 0.1914, -0.3420],\n",
" | [-0.5112, -0.2324]], dtype=torch.float16)\n",
" | \n",
" | >>> linear = nn.Linear(2, 2, bias=None).to(torch.cdouble)\n",
" | >>> linear.weight\n",
" | Parameter containing:\n",
" | tensor([[ 0.3741+0.j, 0.2382+0.j],\n",
" | [ 0.5593+0.j, -0.4443+0.j]], dtype=torch.complex128)\n",
" | >>> linear(torch.ones(3, 2, dtype=torch.cdouble))\n",
" | tensor([[0.6122+0.j, 0.1150+0.j],\n",
" | [0.6122+0.j, 0.1150+0.j],\n",
" | [0.6122+0.j, 0.1150+0.j]], dtype=torch.complex128)\n",
" | \n",
" | to_empty(self: ~T, *, device: Union[str, torch.device]) -> ~T\n",
" | Moves the parameters and buffers to the specified device without copying storage.\n",
" | \n",
" | Args:\n",
" | device (:class:`torch.device`): The desired device of the parameters\n",
" | and buffers in this module.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | train(self: ~T, mode: bool = True) -> ~T\n",
" | Sets the module in training mode.\n",
" | \n",
" | This has any effect only on certain modules. See documentations of\n",
" | particular modules for details of their behaviors in training/evaluation\n",
" | mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,\n",
" | etc.\n",
" | \n",
" | Args:\n",
" | mode (bool): whether to set training mode (``True``) or evaluation\n",
" | mode (``False``). Default: ``True``.\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | type(self: ~T, dst_type: Union[torch.dtype, str]) -> ~T\n",
" | Casts all parameters and buffers to :attr:`dst_type`.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Args:\n",
" | dst_type (type or string): the desired type\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | xpu(self: ~T, device: Union[int, torch.device, NoneType] = None) -> ~T\n",
" | Moves all model parameters and buffers to the XPU.\n",
" | \n",
" | This also makes associated parameters and buffers different objects. So\n",
" | it should be called before constructing optimizer if the module will\n",
" | live on XPU while being optimized.\n",
" | \n",
" | .. note::\n",
" | This method modifies the module in-place.\n",
" | \n",
" | Arguments:\n",
" | device (int, optional): if specified, all parameters will be\n",
" | copied to that device\n",
" | \n",
" | Returns:\n",
" | Module: self\n",
" | \n",
" | zero_grad(self, set_to_none: bool = True) -> None\n",
" | Sets gradients of all model parameters to zero. See similar function\n",
" | under :class:`torch.optim.Optimizer` for more context.\n",
" | \n",
" | Args:\n",
" | set_to_none (bool): instead of setting to zero, set the grads to None.\n",
" | See :meth:`torch.optim.Optimizer.zero_grad` for details.\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors inherited from torch.nn.modules.module.Module:\n",
" | \n",
" | __dict__\n",
" | dictionary for instance variables (if defined)\n",
" | \n",
" | __weakref__\n",
" | list of weak references to the object (if defined)\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data and other attributes inherited from torch.nn.modules.module.Module:\n",
" | \n",
" | T_destination = ~T_destination\n",
" | \n",
" | __annotations__ = {'__call__': typing.Callable[..., typing.Any], '_bac...\n",
" | \n",
" | call_super_init = False\n",
" | \n",
" | dump_patches = False\n",
"\n"
]
}
],
"source": [
"help(vocab)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000\n",
"2000\n",
"3000\n",
"4000\n",
"5000\n",
"6000\n",
"7000\n",
"8000\n",
"9000\n",
"10000\n",
"11000\n",
"12000\n",
"13000\n",
"14000\n",
"15000\n",
"16000\n",
"17000\n",
"18000\n",
"19000\n",
"20000\n",
"21000\n",
"22000\n",
"23000\n",
"24000\n",
"25000\n",
"26000\n",
"27000\n",
"28000\n",
"29000\n",
"30000\n",
"31000\n",
"32000\n",
"33000\n",
"34000\n",
"35000\n",
"36000\n",
"37000\n",
"38000\n",
"39000\n",
"40000\n",
"41000\n",
"42000\n",
"43000\n",
"44000\n",
"45000\n",
"46000\n",
"47000\n",
"48000\n",
"49000\n",
"50000\n",
"51000\n",
"52000\n",
"53000\n",
"54000\n",
"55000\n",
"56000\n",
"57000\n",
"58000\n",
"59000\n",
"60000\n",
"61000\n",
"62000\n",
"63000\n",
"64000\n",
"65000\n",
"66000\n",
"67000\n",
"68000\n",
"69000\n",
"70000\n",
"71000\n",
"72000\n",
"73000\n",
"74000\n",
"75000\n",
"76000\n",
"77000\n",
"78000\n",
"79000\n",
"80000\n",
"81000\n",
"82000\n",
"83000\n",
"84000\n",
"85000\n",
"86000\n",
"87000\n",
"88000\n",
"89000\n",
"90000\n",
"91000\n",
"92000\n",
"93000\n",
"94000\n",
"95000\n",
"96000\n",
"97000\n",
"98000\n",
"99000\n",
"100000\n",
"101000\n",
"102000\n",
"103000\n",
"104000\n",
"105000\n",
"106000\n",
"107000\n",
"108000\n",
"109000\n",
"110000\n",
"111000\n",
"112000\n",
"113000\n",
"114000\n",
"115000\n",
"116000\n",
"117000\n",
"118000\n",
"119000\n",
"120000\n",
"121000\n",
"122000\n",
"123000\n",
"124000\n",
"125000\n",
"126000\n",
"127000\n",
"128000\n",
"129000\n",
"130000\n",
"131000\n",
"132000\n",
"133000\n",
"134000\n",
"135000\n",
"136000\n",
"137000\n",
"138000\n",
"139000\n",
"140000\n",
"141000\n",
"142000\n",
"143000\n",
"144000\n",
"145000\n",
"146000\n",
"147000\n",
"148000\n",
"149000\n",
"150000\n",
"151000\n",
"152000\n",
"153000\n",
"154000\n",
"155000\n",
"156000\n",
"157000\n",
"158000\n",
"159000\n",
"160000\n",
"161000\n",
"162000\n",
"163000\n",
"164000\n",
"165000\n",
"166000\n",
"167000\n",
"168000\n",
"169000\n",
"170000\n",
"171000\n",
"172000\n",
"173000\n",
"174000\n",
"175000\n",
"176000\n",
"177000\n",
"178000\n",
"179000\n",
"180000\n",
"181000\n",
"182000\n",
"183000\n",
"184000\n",
"185000\n",
"186000\n",
"187000\n",
"188000\n",
"189000\n",
"190000\n",
"191000\n",
"192000\n",
"193000\n",
"194000\n",
"195000\n",
"196000\n",
"197000\n",
"198000\n",
"199000\n",
"200000\n",
"201000\n",
"202000\n",
"203000\n",
"204000\n",
"205000\n",
"206000\n",
"207000\n",
"208000\n",
"209000\n",
"210000\n",
"211000\n",
"212000\n",
"213000\n",
"214000\n",
"215000\n",
"216000\n",
"217000\n",
"218000\n",
"219000\n",
"220000\n",
"221000\n",
"222000\n",
"223000\n",
"224000\n",
"225000\n",
"226000\n",
"227000\n",
"228000\n",
"229000\n",
"230000\n",
"231000\n",
"232000\n",
"233000\n",
"234000\n",
"235000\n",
"236000\n",
"237000\n",
"238000\n",
"239000\n",
"240000\n",
"241000\n",
"242000\n",
"243000\n",
"244000\n",
"245000\n",
"246000\n",
"247000\n",
"248000\n",
"249000\n",
"250000\n",
"251000\n",
"252000\n",
"253000\n",
"254000\n",
"255000\n",
"256000\n",
"257000\n",
"258000\n",
"259000\n",
"260000\n",
"261000\n",
"262000\n",
"263000\n",
"264000\n",
"265000\n",
"266000\n",
"267000\n",
"268000\n",
"269000\n",
"270000\n",
"271000\n",
"272000\n",
"273000\n",
"274000\n",
"275000\n",
"276000\n",
"277000\n",
"278000\n",
"279000\n",
"280000\n",
"281000\n",
"282000\n",
"283000\n",
"284000\n",
"285000\n",
"286000\n",
"287000\n",
"288000\n",
"289000\n",
"290000\n",
"291000\n",
"292000\n",
"293000\n",
"294000\n",
"295000\n",
"296000\n",
"297000\n",
"298000\n",
"299000\n",
"300000\n",
"301000\n",
"302000\n",
"303000\n",
"304000\n",
"305000\n",
"306000\n",
"307000\n",
"308000\n",
"309000\n",
"310000\n",
"311000\n",
"312000\n",
"313000\n",
"314000\n",
"315000\n",
"316000\n",
"317000\n",
"318000\n",
"319000\n",
"320000\n",
"321000\n",
"322000\n",
"323000\n",
"324000\n",
"325000\n",
"326000\n",
"327000\n",
"328000\n",
"329000\n",
"330000\n",
"331000\n",
"332000\n",
"333000\n",
"334000\n",
"335000\n",
"336000\n",
"337000\n",
"338000\n",
"339000\n",
"340000\n",
"341000\n",
"342000\n",
"343000\n",
"344000\n",
"345000\n",
"346000\n",
"347000\n",
"348000\n",
"349000\n",
"350000\n",
"351000\n",
"352000\n",
"353000\n",
"354000\n",
"355000\n",
"356000\n",
"357000\n",
"358000\n",
"359000\n",
"360000\n",
"361000\n",
"362000\n",
"363000\n",
"364000\n",
"365000\n",
"366000\n",
"367000\n",
"368000\n",
"369000\n",
"370000\n",
"371000\n",
"372000\n",
"373000\n",
"374000\n",
"375000\n",
"376000\n",
"377000\n",
"378000\n",
"379000\n",
"380000\n",
"381000\n",
"382000\n",
"383000\n",
"384000\n",
"385000\n",
"386000\n",
"387000\n",
"388000\n",
"389000\n",
"390000\n",
"391000\n",
"392000\n",
"393000\n",
"394000\n",
"395000\n",
"396000\n",
"397000\n",
"398000\n",
"399000\n",
"400000\n",
"401000\n",
"402000\n",
"403000\n",
"404000\n",
"405000\n",
"406000\n",
"407000\n",
"408000\n",
"409000\n",
"410000\n",
"411000\n",
"412000\n",
"413000\n",
"414000\n",
"415000\n",
"416000\n",
"417000\n",
"418000\n",
"419000\n",
"420000\n",
"421000\n",
"422000\n",
"423000\n",
"424000\n",
"425000\n",
"426000\n",
"427000\n",
"428000\n",
"429000\n",
"430000\n",
"431000\n",
"432000\n"
]
}
],
"source": [
"def look_ahead_iterator(gen):\n",
" prev = None\n",
" for item in gen:\n",
" if prev is not None:\n",
" yield (prev, item)\n",
" prev = item\n",
"\n",
"class Bigrams(IterableDataset):\n",
" def __init__(self, text_file, vocabulary_size):\n",
" self.vocab = build_vocab_from_iterator(\n",
" get_word_lines_from_file(text_file),\n",
" max_tokens = vocabulary_size,\n",
" specials = [''])\n",
" self.vocab.set_default_index(self.vocab[''])\n",
" self.vocabulary_size = vocabulary_size\n",
" self.text_file = text_file\n",
"\n",
" def __iter__(self):\n",
" return look_ahead_iterator(\n",
" (self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_file(self.text_file))))\n",
"\n",
"train_dataset = Bigrams(train_file, vocab_size)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<__main__.Bigrams object at 0x7fdd26d23940>\n"
]
}
],
"source": [
"print(train_dataset)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"'|===========================================================================|\\n| PyTorch CUDA memory summary, device ID 0 |\\n|---------------------------------------------------------------------------|\\n| CUDA OOMs: 1 | cudaMalloc retries: 1 |\\n|===========================================================================|\\n| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\\n|---------------------------------------------------------------------------|\\n| Allocated memory | 699613 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699414 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 199 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| Active memory | 699613 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699414 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 199 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| Requested memory | 699611 KiB | 1903 MiB | 3735 MiB | 3052 MiB |\\n| from large pool | 699413 KiB | 1903 MiB | 3734 MiB | 3051 MiB |\\n| from small pool | 197 KiB | 1 MiB | 1 MiB | 1 MiB |\\n|---------------------------------------------------------------------------|\\n| GPU reserved memory | 710656 KiB | 1918 MiB | 1918 MiB | 1224 MiB |\\n| from large pool | 708608 KiB | 1916 MiB | 1916 MiB | 1224 MiB |\\n| from small pool | 2048 KiB | 2 MiB | 2 MiB | 0 MiB |\\n|---------------------------------------------------------------------------|\\n| Non-releasable memory | 11043 KiB | 19364 KiB | 28939 KiB | 17896 KiB |\\n| from large pool | 9194 KiB | 17514 KiB | 25954 KiB | 16760 KiB |\\n| from small pool | 1849 KiB | 1950 KiB | 2985 KiB | 1136 KiB |\\n|---------------------------------------------------------------------------|\\n| Allocations | 10 | 17 | 38 | 28 |\\n| from large pool | 5 | 7 | 10 | 5 |\\n| from small pool | 5 | 11 | 28 | 23 |\\n|---------------------------------------------------------------------------|\\n| Active allocs | 10 | 17 | 38 | 28 |\\n| from large pool | 5 | 7 | 10 | 5 |\\n| from small pool | 5 | 11 | 28 | 23 |\\n|---------------------------------------------------------------------------|\\n| GPU reserved segments | 5 | 7 | 7 | 2 |\\n| from large pool | 4 | 6 | 6 | 2 |\\n| from small pool | 1 | 1 | 1 | 0 |\\n|---------------------------------------------------------------------------|\\n| Non-releasable allocs | 6 | 8 | 20 | 14 |\\n| from large pool | 4 | 6 | 9 | 5 |\\n| from small pool | 2 | 3 | 11 | 9 |\\n|---------------------------------------------------------------------------|\\n| Oversize allocations | 0 | 0 | 0 | 0 |\\n|---------------------------------------------------------------------------|\\n| Oversize GPU segments | 0 | 0 | 0 | 0 |\\n|===========================================================================|\\n'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.cuda.memory_summary(device=None, abbreviated=False)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"max_split_size_mb:256\""
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"device = 'cuda'\n",
"model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: = 1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/gedin/.local/lib/python3.8/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" input = module(input)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 tensor(5.9599, device='cuda:0', grad_fn=)\n",
"1000\n",
"100 tensor(6.1015, device='cuda:0', grad_fn=)\n",
"200 tensor(5.9708, device='cuda:0', grad_fn=)\n",
"2000\n",
"300 tensor(6.2176, device='cuda:0', grad_fn=)\n",
"3000\n",
"400 tensor(5.9401, device='cuda:0', grad_fn=)\n",
"4000\n",
"500 tensor(6.2084, device='cuda:0', grad_fn=)\n",
"5000\n",
"600 tensor(5.9736, device='cuda:0', grad_fn=)\n",
"6000\n",
"700 tensor(6.1423, device='cuda:0', grad_fn=)\n",
"7000\n",
"800 tensor(5.7344, device='cuda:0', grad_fn=)\n",
"8000\n",
"900 tensor(6.0950, device='cuda:0', grad_fn=)\n",
"9000\n",
"1000 tensor(5.8473, device='cuda:0', grad_fn=)\n",
"10000\n",
"1100 tensor(6.0612, device='cuda:0', grad_fn=)\n",
"11000\n",
"1200 tensor(6.1509, device='cuda:0', grad_fn=)\n",
"12000\n",
"1300 tensor(6.0760, device='cuda:0', grad_fn=)\n",
"13000\n",
"1400 tensor(6.2047, device='cuda:0', grad_fn=)\n",
"14000\n",
"1500 tensor(6.1186, device='cuda:0', grad_fn=)\n",
"15000\n",
"1600 tensor(5.8722, device='cuda:0', grad_fn=)\n",
"16000\n",
"1700 tensor(5.8741, device='cuda:0', grad_fn=)\n",
"17000\n",
"1800 tensor(5.8971, device='cuda:0', grad_fn=)\n",
"18000\n",
"1900 tensor(5.8521, device='cuda:0', grad_fn=)\n",
"19000\n",
"2000 tensor(5.9434, device='cuda:0', grad_fn=)\n",
"20000\n",
"2100 tensor(6.0348, device='cuda:0', grad_fn=)\n",
"21000\n",
"2200 tensor(5.8840, device='cuda:0', grad_fn=)\n",
"22000\n",
"2300 tensor(5.8641, device='cuda:0', grad_fn=)\n",
"23000\n",
"2400 tensor(5.9068, device='cuda:0', grad_fn=)\n",
"24000\n",
"2500 tensor(5.9170, device='cuda:0', grad_fn=)\n",
"25000\n",
"2600 tensor(5.9812, device='cuda:0', grad_fn=)\n",
"26000\n",
"2700 tensor(5.8985, device='cuda:0', grad_fn=)\n",
"27000\n",
"2800 tensor(6.0008, device='cuda:0', grad_fn=)\n",
"28000\n",
"2900 tensor(6.1230, device='cuda:0', grad_fn=)\n",
"29000\n",
"3000 tensor(5.8770, device='cuda:0', grad_fn=)\n",
"30000\n",
"3100 tensor(5.9268, device='cuda:0', grad_fn=)\n",
"31000\n",
"3200 tensor(5.8530, device='cuda:0', grad_fn=)\n",
"32000\n",
"3300 tensor(5.8436, device='cuda:0', grad_fn=)\n",
"33000\n",
"3400 tensor(5.7692, device='cuda:0', grad_fn=)\n",
"34000\n",
"3500 tensor(5.8909, device='cuda:0', grad_fn=)\n",
"35000\n",
"3600 tensor(5.8325, device='cuda:0', grad_fn=)\n",
"36000\n",
"3700 tensor(5.8082, device='cuda:0', grad_fn=)\n",
"37000\n",
"3800 tensor(5.8106, device='cuda:0', grad_fn=)\n",
"38000\n",
"3900 tensor(5.6382, device='cuda:0', grad_fn=)\n",
"39000\n",
"4000 tensor(5.6596, device='cuda:0', grad_fn=)\n",
"40000\n",
"4100 tensor(5.9587, device='cuda:0', grad_fn=)\n",
"41000\n",
"4200 tensor(5.8862, device='cuda:0', grad_fn=)\n",
"42000\n",
"4300 tensor(5.9541, device='cuda:0', grad_fn=)\n",
"43000\n",
"4400 tensor(5.8681, device='cuda:0', grad_fn=)\n",
"44000\n",
"4500 tensor(5.6963, device='cuda:0', grad_fn=)\n",
"45000\n",
"4600 tensor(6.0707, device='cuda:0', grad_fn=)\n",
"46000\n",
"4700 tensor(5.7091, device='cuda:0', grad_fn=)\n",
"47000\n",
"4800 tensor(5.8139, device='cuda:0', grad_fn=)\n",
"48000\n",
"4900 tensor(5.8696, device='cuda:0', grad_fn=)\n",
"49000\n",
"5000 tensor(5.8844, device='cuda:0', grad_fn=)\n",
"50000\n",
"5100 tensor(5.9806, device='cuda:0', grad_fn=)\n",
"51000\n",
"5200 tensor(6.0075, device='cuda:0', grad_fn=)\n",
"52000\n",
"5300 tensor(6.0588, device='cuda:0', grad_fn=)\n",
"53000\n",
"5400 tensor(5.8456, device='cuda:0', grad_fn=)\n",
"54000\n",
"5500 tensor(5.9166, device='cuda:0', grad_fn=)\n",
"55000\n",
"5600 tensor(5.6528, device='cuda:0', grad_fn=)\n",
"56000\n",
"5700 tensor(5.8988, device='cuda:0', grad_fn=)\n",
"57000\n",
"5800 tensor(5.9132, device='cuda:0', grad_fn=)\n",
"58000\n",
"5900 tensor(5.9460, device='cuda:0', grad_fn=)\n",
"59000\n",
"6000 tensor(5.7543, device='cuda:0', grad_fn=)\n",
"60000\n",
"6100 tensor(5.8256, device='cuda:0', grad_fn=)\n",
"61000\n",
"6200 tensor(5.9448, device='cuda:0', grad_fn=)\n",
"62000\n",
"6300 tensor(5.7601, device='cuda:0', grad_fn=)\n",
"63000\n",
"6400 tensor(5.7091, device='cuda:0', grad_fn=)\n",
"64000\n",
"6500 tensor(5.5621, device='cuda:0', grad_fn=)\n",
"65000\n",
"6600 tensor(5.7094, device='cuda:0', grad_fn=)\n",
"66000\n",
"6700 tensor(5.6785, device='cuda:0', grad_fn=)\n",
"67000\n",
"6800 tensor(5.9249, device='cuda:0', grad_fn=)\n",
"68000\n",
"6900 tensor(5.8775, device='cuda:0', grad_fn=)\n",
"69000\n",
"7000 tensor(5.8075, device='cuda:0', grad_fn=)\n",
"70000\n",
"7100 tensor(5.5748, device='cuda:0', grad_fn=)\n",
"71000\n",
"7200 tensor(5.7217, device='cuda:0', grad_fn=)\n",
"72000\n",
"7300 tensor(5.9124, device='cuda:0', grad_fn=)\n",
"73000\n",
"7400 tensor(5.7197, device='cuda:0', grad_fn=)\n",
"74000\n",
"7500 tensor(5.6429, device='cuda:0', grad_fn=)\n",
"75000\n",
"7600 tensor(5.6847, device='cuda:0', grad_fn=)\n",
"76000\n",
"7700 tensor(5.7197, device='cuda:0', grad_fn=)\n",
"77000\n",
"7800 tensor(5.8559, device='cuda:0', grad_fn=)\n",
"78000\n",
"7900 tensor(5.5600, device='cuda:0', grad_fn=)\n",
"79000\n",
"8000 tensor(5.6288, device='cuda:0', grad_fn=)\n",
"80000\n",
"8100 tensor(5.7767, device='cuda:0', grad_fn=)\n",
"81000\n",
"8200 tensor(5.8037, device='cuda:0', grad_fn=)\n",
"82000\n",
"8300 tensor(5.7344, device='cuda:0', grad_fn=)\n",
"83000\n",
"8400 tensor(5.8092, device='cuda:0', grad_fn=)\n",
"84000\n",
"8500 tensor(5.8847, device='cuda:0', grad_fn=)\n",
"85000\n",
"8600 tensor(5.8754, device='cuda:0', grad_fn=)\n",
"86000\n",
"8700 tensor(5.9227, device='cuda:0', grad_fn=)\n",
"87000\n",
"8800 tensor(5.8028, device='cuda:0', grad_fn=)\n",
"88000\n",
"8900 tensor(5.6476, device='cuda:0', grad_fn=)\n",
"89000\n",
"9000 tensor(5.7656, device='cuda:0', grad_fn=)\n",
"90000\n",
"9100 tensor(5.7805, device='cuda:0', grad_fn=)\n",
"91000\n",
"9200 tensor(5.6879, device='cuda:0', grad_fn=)\n",
"92000\n",
"9300 tensor(5.7098, device='cuda:0', grad_fn=)\n",
"93000\n",
"9400 tensor(5.5631, device='cuda:0', grad_fn=)\n",
"94000\n",
"9500 tensor(5.6497, device='cuda:0', grad_fn=)\n",
"95000\n",
"9600 tensor(5.7500, device='cuda:0', grad_fn=)\n",
"96000\n",
"9700 tensor(5.6607, device='cuda:0', grad_fn=)\n",
"97000\n",
"9800 tensor(5.7196, device='cuda:0', grad_fn=)\n",
"9900 tensor(5.5987, device='cuda:0', grad_fn=)\n",
"98000\n",
"10000 tensor(5.7795, device='cuda:0', grad_fn=)\n",
"99000\n",
"10100 tensor(5.6980, device='cuda:0', grad_fn=)\n",
"100000\n",
"10200 tensor(5.6093, device='cuda:0', grad_fn=)\n",
"101000\n",
"10300 tensor(5.6792, device='cuda:0', grad_fn=)\n",
"102000\n",
"10400 tensor(5.7035, device='cuda:0', grad_fn=)\n",
"103000\n",
"10500 tensor(5.8282, device='cuda:0', grad_fn=)\n",
"104000\n",
"10600 tensor(5.8605, device='cuda:0', grad_fn=)\n",
"105000\n",
"10700 tensor(5.7354, device='cuda:0', grad_fn=)\n",
"106000\n",
"10800 tensor(5.8034, device='cuda:0', grad_fn=)\n",
"107000\n",
"10900 tensor(5.6194, device='cuda:0', grad_fn=)\n",
"108000\n",
"11000 tensor(5.8502, device='cuda:0', grad_fn=)\n",
"109000\n",
"11100 tensor(5.4406, device='cuda:0', grad_fn=)\n",
"110000\n",
"11200 tensor(5.6379, device='cuda:0', grad_fn=)\n",
"111000\n",
"11300 tensor(5.6668, device='cuda:0', grad_fn=)\n",
"112000\n",
"11400 tensor(5.6140, device='cuda:0', grad_fn=)\n",
"113000\n",
"11500 tensor(5.6565, device='cuda:0', grad_fn=)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"114000\n",
"11600 tensor(5.6308, device='cuda:0', grad_fn=)\n",
"115000\n",
"11700 tensor(5.5680, device='cuda:0', grad_fn=)\n",
"116000\n",
"11800 tensor(5.7604, device='cuda:0', grad_fn=)\n",
"117000\n",
"11900 tensor(5.5792, device='cuda:0', grad_fn=)\n",
"118000\n",
"12000 tensor(5.7329, device='cuda:0', grad_fn=)\n",
"119000\n",
"12100 tensor(5.7726, device='cuda:0', grad_fn=)\n",
"120000\n",
"12200 tensor(5.7151, device='cuda:0', grad_fn=)\n",
"121000\n",
"12300 tensor(5.8561, device='cuda:0', grad_fn=)\n",
"122000\n",
"12400 tensor(5.6791, device='cuda:0', grad_fn=)\n",
"123000\n",
"12500 tensor(5.5574, device='cuda:0', grad_fn=)\n",
"124000\n",
"12600 tensor(5.6817, device='cuda:0', grad_fn=)\n",
"125000\n",
"12700 tensor(5.5375, device='cuda:0', grad_fn=)\n",
"126000\n",
"12800 tensor(5.7270, device='cuda:0', grad_fn=)\n",
"127000\n",
"12900 tensor(5.6252, device='cuda:0', grad_fn=)\n",
"128000\n",
"13000 tensor(5.4536, device='cuda:0', grad_fn=)\n",
"129000\n",
"13100 tensor(5.6091, device='cuda:0', grad_fn=)\n",
"130000\n",
"13200 tensor(5.7324, device='cuda:0', grad_fn=)\n",
"131000\n",
"13300 tensor(5.5253, device='cuda:0', grad_fn=)\n",
"132000\n",
"13400 tensor(5.6491, device='cuda:0', grad_fn=)\n",
"133000\n",
"13500 tensor(5.5728, device='cuda:0', grad_fn=)\n",
"134000\n",
"13600 tensor(5.6632, device='cuda:0', grad_fn=)\n",
"135000\n",
"13700 tensor(5.6678, device='cuda:0', grad_fn=)\n",
"136000\n",
"13800 tensor(5.6112, device='cuda:0', grad_fn=)\n",
"137000\n",
"13900 tensor(5.4884, device='cuda:0', grad_fn=)\n",
"138000\n",
"14000 tensor(5.7304, device='cuda:0', grad_fn=)\n",
"139000\n",
"14100 tensor(5.4326, device='cuda:0', grad_fn=)\n",
"140000\n",
"14200 tensor(5.7188, device='cuda:0', grad_fn=)\n",
"141000\n",
"14300 tensor(5.6519, device='cuda:0', grad_fn=)\n",
"142000\n",
"14400 tensor(5.5892, device='cuda:0', grad_fn=)\n",
"143000\n",
"14500 tensor(5.7225, device='cuda:0', grad_fn=)\n",
"144000\n",
"14600 tensor(5.7216, device='cuda:0', grad_fn=)\n",
"145000\n",
"14700 tensor(5.5748, device='cuda:0', grad_fn=)\n",
"146000\n",
"14800 tensor(6.0184, device='cuda:0', grad_fn=)\n",
"147000\n",
"14900 tensor(5.6781, device='cuda:0', grad_fn=)\n",
"148000\n",
"15000 tensor(5.6038, device='cuda:0', grad_fn=)\n",
"149000\n",
"15100 tensor(5.7875, device='cuda:0', grad_fn=)\n",
"150000\n",
"15200 tensor(5.6485, device='cuda:0', grad_fn=)\n",
"151000\n",
"15300 tensor(5.5927, device='cuda:0', grad_fn=)\n",
"152000\n",
"15400 tensor(5.5156, device='cuda:0', grad_fn=)\n",
"153000\n",
"15500 tensor(5.6556, device='cuda:0', grad_fn=)\n",
"154000\n",
"15600 tensor(5.6485, device='cuda:0', grad_fn=)\n",
"155000\n",
"15700 tensor(5.5904, device='cuda:0', grad_fn=)\n",
"156000\n",
"15800 tensor(5.4613, device='cuda:0', grad_fn=)\n",
"157000\n",
"15900 tensor(5.6254, device='cuda:0', grad_fn=)\n",
"158000\n",
"16000 tensor(5.4349, device='cuda:0', grad_fn=)\n",
"159000\n",
"16100 tensor(5.5205, device='cuda:0', grad_fn=)\n",
"160000\n",
"16200 tensor(5.8051, device='cuda:0', grad_fn=)\n",
"161000\n",
"16300 tensor(5.6452, device='cuda:0', grad_fn=)\n",
"162000\n",
"16400 tensor(5.6071, device='cuda:0', grad_fn=)\n",
"163000\n",
"16500 tensor(5.7237, device='cuda:0', grad_fn=)\n",
"164000\n",
"16600 tensor(5.5771, device='cuda:0', grad_fn=)\n",
"165000\n",
"16700 tensor(5.5355, device='cuda:0', grad_fn=)\n",
"166000\n",
"16800 tensor(5.6363, device='cuda:0', grad_fn=)\n",
"167000\n",
"16900 tensor(5.3746, device='cuda:0', grad_fn=)\n",
"168000\n",
"17000 tensor(5.6707, device='cuda:0', grad_fn=)\n",
"169000\n",
"17100 tensor(5.5359, device='cuda:0', grad_fn=)\n",
"170000\n",
"17200 tensor(5.6118, device='cuda:0', grad_fn=)\n",
"171000\n",
"17300 tensor(5.6740, device='cuda:0', grad_fn=)\n",
"172000\n",
"17400 tensor(5.4438, device='cuda:0', grad_fn=)\n",
"173000\n",
"17500 tensor(5.5001, device='cuda:0', grad_fn=)\n",
"174000\n",
"17600 tensor(5.4953, device='cuda:0', grad_fn=)\n",
"175000\n",
"17700 tensor(5.5398, device='cuda:0', grad_fn=)\n",
"176000\n",
"17800 tensor(5.6053, device='cuda:0', grad_fn=)\n",
"177000\n",
"17900 tensor(5.4726, device='cuda:0', grad_fn=)\n",
"178000\n",
"18000 tensor(5.6747, device='cuda:0', grad_fn=)\n",
"179000\n",
"18100 tensor(5.6238, device='cuda:0', grad_fn=)\n",
"180000\n",
"18200 tensor(5.5469, device='cuda:0', grad_fn=)\n",
"181000\n",
"18300 tensor(5.5299, device='cuda:0', grad_fn=)\n",
"182000\n",
"18400 tensor(5.6323, device='cuda:0', grad_fn=)\n",
"183000\n",
"18500 tensor(5.5893, device='cuda:0', grad_fn=)\n",
"184000\n",
"18600 tensor(5.7452, device='cuda:0', grad_fn=)\n",
"185000\n",
"18700 tensor(5.5576, device='cuda:0', grad_fn=)\n",
"186000\n",
"18800 tensor(5.7439, device='cuda:0', grad_fn=)\n",
"187000\n",
"18900 tensor(5.6106, device='cuda:0', grad_fn=)\n",
"188000\n",
"19000 tensor(5.6647, device='cuda:0', grad_fn=)\n",
"189000\n",
"19100 tensor(5.7728, device='cuda:0', grad_fn=)\n",
"190000\n",
"19200 tensor(5.6169, device='cuda:0', grad_fn=)\n",
"191000\n",
"19300 tensor(5.7852, device='cuda:0', grad_fn=)\n",
"192000\n",
"19400 tensor(5.5627, device='cuda:0', grad_fn=)\n",
"193000\n",
"19500 tensor(5.5682, device='cuda:0', grad_fn=)\n",
"194000\n",
"19600 tensor(5.5978, device='cuda:0', grad_fn=)\n",
"195000\n",
"19700 tensor(5.6453, device='cuda:0', grad_fn=)\n",
"196000\n",
"19800 tensor(5.4786, device='cuda:0', grad_fn=)\n",
"197000\n",
"19900 tensor(5.4894, device='cuda:0', grad_fn=)\n",
"198000\n",
"20000 tensor(5.4999, device='cuda:0', grad_fn=)\n",
"199000\n",
"20100 tensor(5.4881, device='cuda:0', grad_fn=)\n",
"200000\n",
"20200 tensor(5.3915, device='cuda:0', grad_fn=)\n",
"201000\n",
"20300 tensor(5.5216, device='cuda:0', grad_fn=)\n",
"20400 tensor(5.5761, device='cuda:0', grad_fn=)\n",
"202000\n",
"20500 tensor(5.5586, device='cuda:0', grad_fn=)\n",
"203000\n",
"20600 tensor(5.7870, device='cuda:0', grad_fn=)\n",
"204000\n",
"20700 tensor(5.5776, device='cuda:0', grad_fn=)\n",
"205000\n",
"20800 tensor(5.4417, device='cuda:0', grad_fn=)\n",
"206000\n",
"20900 tensor(5.7186, device='cuda:0', grad_fn=)\n",
"207000\n",
"21000 tensor(5.5415, device='cuda:0', grad_fn=)\n",
"208000\n",
"21100 tensor(5.5141, device='cuda:0', grad_fn=)\n",
"209000\n",
"21200 tensor(5.4401, device='cuda:0', grad_fn=)\n",
"210000\n",
"21300 tensor(5.6511, device='cuda:0', grad_fn=)\n",
"211000\n",
"21400 tensor(5.6474, device='cuda:0', grad_fn=)\n",
"212000\n",
"21500 tensor(5.3946, device='cuda:0', grad_fn=)\n",
"213000\n",
"21600 tensor(5.3958, device='cuda:0', grad_fn=)\n",
"214000\n",
"21700 tensor(5.4040, device='cuda:0', grad_fn=)\n",
"215000\n",
"21800 tensor(5.5745, device='cuda:0', grad_fn=)\n",
"216000\n",
"21900 tensor(5.4996, device='cuda:0', grad_fn=)\n",
"217000\n",
"22000 tensor(5.5234, device='cuda:0', grad_fn=)\n",
"218000\n",
"22100 tensor(5.3870, device='cuda:0', grad_fn=)\n",
"219000\n",
"22200 tensor(5.2661, device='cuda:0', grad_fn=)\n",
"220000\n",
"22300 tensor(5.7031, device='cuda:0', grad_fn=)\n",
"221000\n",
"22400 tensor(5.3633, device='cuda:0', grad_fn=)\n",
"222000\n",
"22500 tensor(5.4404, device='cuda:0', grad_fn=)\n",
"223000\n",
"22600 tensor(5.5951, device='cuda:0', grad_fn=)\n",
"224000\n",
"22700 tensor(5.3901, device='cuda:0', grad_fn=)\n",
"225000\n",
"22800 tensor(5.6404, device='cuda:0', grad_fn=)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"226000\n",
"22900 tensor(5.6646, device='cuda:0', grad_fn=)\n",
"227000\n",
"23000 tensor(5.5949, device='cuda:0', grad_fn=)\n",
"228000\n",
"23100 tensor(5.5284, device='cuda:0', grad_fn=)\n",
"229000\n",
"23200 tensor(5.5617, device='cuda:0', grad_fn=)\n",
"230000\n",
"23300 tensor(5.6426, device='cuda:0', grad_fn=)\n",
"231000\n",
"23400 tensor(5.7283, device='cuda:0', grad_fn=)\n",
"232000\n",
"23500 tensor(5.4558, device='cuda:0', grad_fn=)\n",
"233000\n",
"23600 tensor(5.4600, device='cuda:0', grad_fn=)\n",
"234000\n",
"23700 tensor(5.4961, device='cuda:0', grad_fn=)\n",
"235000\n",
"23800 tensor(5.3373, device='cuda:0', grad_fn=)\n",
"236000\n",
"23900 tensor(5.4470, device='cuda:0', grad_fn=)\n",
"237000\n",
"24000 tensor(5.4346, device='cuda:0', grad_fn=)\n",
"238000\n",
"24100 tensor(5.5112, device='cuda:0', grad_fn=)\n",
"239000\n",
"24200 tensor(5.6918, device='cuda:0', grad_fn=)\n",
"240000\n",
"24300 tensor(5.6115, device='cuda:0', grad_fn=)\n",
"241000\n",
"24400 tensor(5.7404, device='cuda:0', grad_fn=