1665 lines
123 KiB
Plaintext
1665 lines
123 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"id": "bAOmtZk6dzWU"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import itertools\n",
|
|
"import lzma\n",
|
|
"import numpy as np\n",
|
|
"import regex as re\n",
|
|
"import torch\n",
|
|
"from torch import nn\n",
|
|
"from torch.utils.data import IterableDataset, DataLoader\n",
|
|
"from torchtext.vocab import build_vocab_from_iterator"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"id": "N35v7jfAdzWV"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"vocab_size = 10000\n",
|
|
"embed_size = 200\n",
|
|
"hidden_size = 100\n",
|
|
"batch_size = 1000\n",
|
|
"device = 'cuda'\n",
|
|
"path_to_train = 'train/in.tsv.xz'\n",
|
|
"path_to_model = 'model1.bin'\n",
|
|
"\n",
|
|
"def clean_line(line: str):\n",
|
|
" # Preprocessing\n",
|
|
" separated = line.split('\\t')\n",
|
|
" prefix = separated[6].replace(r'\\n', ' ')\n",
|
|
" suffix = separated[7].replace(r'\\n', ' ')\n",
|
|
" return prefix + ' ' + suffix\n",
|
|
"\n",
|
|
"def get_words_from_line(line):\n",
|
|
" line = clean_line(line)\n",
|
|
" for word in line.split():\n",
|
|
" yield word\n",
|
|
"\n",
|
|
"def get_word_lines_from_file(file_name):\n",
|
|
" with lzma.open(file_name, mode='rt', encoding='utf-8') as fid:\n",
|
|
" for line in fid:\n",
|
|
" yield get_words_from_line(line)\n",
|
|
"\n",
|
|
"def double_look_ahead_iterator(gen):\n",
|
|
" prev_prev = None\n",
|
|
" prev = None\n",
|
|
" for item in gen:\n",
|
|
" if prev_prev is not None:\n",
|
|
" yield np.asarray((prev_prev, prev, item))\n",
|
|
" prev_prev = prev\n",
|
|
" prev = item\n",
|
|
"\n",
|
|
"def prediction(words, model, top) -> str:\n",
|
|
" words_tensor = [train_dataset.vocab.forward([word]) for word in words]\n",
|
|
" ixs = torch.tensor(words_tensor).view(-1).to(device)\n",
|
|
" out = model(ixs)\n",
|
|
" top = torch.topk(out[0], top)\n",
|
|
" top_indices = top.indices.tolist()\n",
|
|
" top_probs = top.values.tolist()\n",
|
|
" top_words = vocab.lookup_tokens(top_indices)\n",
|
|
" zipped = list(zip(top_words, top_probs))\n",
|
|
" for index, element in enumerate(zipped):\n",
|
|
" unk = None\n",
|
|
" if '<unk>' in element:\n",
|
|
" unk = zipped.pop(index)\n",
|
|
" zipped.append(('', unk[1]))\n",
|
|
" break\n",
|
|
" if unk is None:\n",
|
|
" zipped[-1] = ('', zipped[-1][1])\n",
|
|
" return ' '.join([f'{x[0]}:{x[1]:.5f}' for x in zipped])\n",
|
|
"\n",
|
|
"def create_outputs(folder_name, model, top):\n",
|
|
" print(f'Creating outputs in {folder_name}')\n",
|
|
" with lzma.open(f'{folder_name}/in.tsv.xz', mode='rt', encoding='utf-8') as fid:\n",
|
|
" with open(f'{folder_name}/out-top={top}.tsv', 'w', encoding='utf-8', newline='\\n') as f:\n",
|
|
" for line in fid:\n",
|
|
" separated = line.split('\\t')\n",
|
|
" prefix = separated[6].replace(r'\\n', ' ').split()[-2:]\n",
|
|
" output_line = prediction(prefix, model, top)\n",
|
|
" f.write(output_line + '\\n')\n",
|
|
"\n",
|
|
"def train_model(lr):\n",
|
|
" model = SimpleTrigramNeuralLanguageModel(vocab_size, embed_size, hidden_size).to(device)\n",
|
|
" data = DataLoader(train_dataset, batch_size=batch_size)\n",
|
|
" optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
|
|
" criterion = torch.nn.NLLLoss()\n",
|
|
"\n",
|
|
" model.train()\n",
|
|
" step = 0\n",
|
|
" for batch in data:\n",
|
|
" x = batch[:, :2]\n",
|
|
" y = batch[:, 2]\n",
|
|
" x = x.to(device)\n",
|
|
" y = y.to(device)\n",
|
|
" optimizer.zero_grad()\n",
|
|
" ypredicted = model(x)\n",
|
|
" loss = criterion(torch.log(ypredicted), y)\n",
|
|
" if step % 100 == 0:\n",
|
|
" print(step, loss)\n",
|
|
" step += 1\n",
|
|
" loss.backward()\n",
|
|
"\n",
|
|
" torch.nn.utils.clip_grad_norm_(model.parameters(), 10)\n",
|
|
"\n",
|
|
"\n",
|
|
" optimizer.step()\n",
|
|
"\n",
|
|
" torch.save(model.state_dict(), path_to_model)\n",
|
|
"\n",
|
|
"def with_hyperparams():\n",
|
|
" train_model(lr=0.0001)\n",
|
|
" model = SimpleTrigramNeuralLanguageModel(vocab_size, embed_size, hidden_size).to(device)\n",
|
|
" model.load_state_dict(torch.load(path_to_model))\n",
|
|
" model.eval()\n",
|
|
" for top in [200, 400, 600]:\n",
|
|
" create_outputs('dev-0', model, top)\n",
|
|
" create_outputs('test-A', model, top)\n",
|
|
"\n",
|
|
"class Trigrams(IterableDataset):\n",
|
|
" def __init__(self, text_file, vocabulary_size):\n",
|
|
" self.vocab = build_vocab_from_iterator(\n",
|
|
" get_word_lines_from_file(text_file),\n",
|
|
" max_tokens=vocabulary_size,\n",
|
|
" specials=['<unk>'])\n",
|
|
" self.vocab.set_default_index(self.vocab['<unk>'])\n",
|
|
" self.vocabulary_size = vocabulary_size\n",
|
|
" self.text_file = text_file\n",
|
|
"\n",
|
|
" def __iter__(self):\n",
|
|
" return double_look_ahead_iterator(\n",
|
|
" (self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_file(self.text_file))))\n",
|
|
"\n",
|
|
"\n",
|
|
"class SimpleTrigramNeuralLanguageModel(nn.Module):\n",
|
|
" def __init__(self, vocabulary_size, embedding_size, hidden_size):\n",
|
|
" super(SimpleTrigramNeuralLanguageModel, self).__init__()\n",
|
|
" self.embedding_size = embedding_size\n",
|
|
" self.embedding = nn.Embedding(vocabulary_size, embedding_size)\n",
|
|
" self.lin1 = nn.Linear(2 * embedding_size, hidden_size)\n",
|
|
" self.rel = nn.ReLU()\n",
|
|
" self.lin2 = nn.Linear(hidden_size, vocabulary_size)\n",
|
|
" self.sm = nn.Softmax()\n",
|
|
"\n",
|
|
" def forward(self, x):\n",
|
|
" x = self.embedding(x).view((-1, 2 * self.embedding_size))\n",
|
|
" x = self.lin1(x)\n",
|
|
" x = self.rel(x)\n",
|
|
" x = self.lin2(x)\n",
|
|
" return self.sm(x)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "NkOOve5pdzWZ",
|
|
"outputId": "35cf7a57-a1e3-4194-c76f-708d8929511f"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Mounted at /content/drive\n",
|
|
"/content/drive/MyDrive/modelowanie_jezyka\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from google.colab import drive\n",
|
|
"\n",
|
|
"drive.mount('/content/drive',force_remount=True)\n",
|
|
"%cd /content/drive/MyDrive/modelowanie_jezyka"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"id": "4RYvjsWvdzWZ"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"vocab = build_vocab_from_iterator(\n",
|
|
" get_word_lines_from_file(path_to_train),\n",
|
|
" max_tokens=vocab_size,\n",
|
|
" specials=['<unk>']\n",
|
|
")\n",
|
|
"\n",
|
|
"vocab.set_default_index(vocab['<unk>'])\n",
|
|
"train_dataset = Trigrams(path_to_train, vocab_size)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "9gg3bNu5dzWZ",
|
|
"outputId": "17b973c6-0247-4fe2-85c6-a97dc02242e1"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"metadata": {
|
|
"tags": null
|
|
},
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<ipython-input-7-15bd6173eff4>:131: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
|
|
" return self.sm(x)\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"0 tensor(9.2570, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100 tensor(8.3136, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"200 tensor(7.4358, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"300 tensor(7.2475, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"400 tensor(6.7645, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"500 tensor(6.5050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"600 tensor(6.4014, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"700 tensor(6.6077, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"800 tensor(6.3927, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"900 tensor(6.0547, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1000 tensor(6.1434, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1100 tensor(5.8979, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1200 tensor(6.1095, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1300 tensor(6.1998, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1400 tensor(5.9146, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1500 tensor(5.7813, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1600 tensor(5.9985, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1700 tensor(5.3452, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1800 tensor(5.7940, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"1900 tensor(5.5534, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2000 tensor(5.6145, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2100 tensor(5.8359, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2200 tensor(5.7575, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2300 tensor(5.8183, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2400 tensor(5.7377, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2500 tensor(5.8494, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2600 tensor(5.8213, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2700 tensor(5.6311, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2800 tensor(5.5126, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"2900 tensor(5.4556, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3000 tensor(5.2281, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3100 tensor(5.4690, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3200 tensor(5.2455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3300 tensor(5.8062, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3400 tensor(5.8635, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3500 tensor(5.5261, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3600 tensor(5.4733, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3700 tensor(5.3929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3800 tensor(5.5937, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"3900 tensor(5.4884, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4000 tensor(5.1098, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4100 tensor(5.4178, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4200 tensor(5.3855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4300 tensor(5.4540, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4400 tensor(5.2868, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4500 tensor(5.6309, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4600 tensor(5.6222, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4700 tensor(5.0256, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4800 tensor(5.5779, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"4900 tensor(5.4185, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5000 tensor(5.0892, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5100 tensor(5.2962, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5200 tensor(5.6080, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5300 tensor(5.5294, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5400 tensor(5.4334, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5500 tensor(5.3486, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5600 tensor(5.5954, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5700 tensor(5.6030, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5800 tensor(5.3286, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"5900 tensor(5.2896, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6000 tensor(5.7665, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6100 tensor(5.3989, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6200 tensor(5.1637, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6300 tensor(5.7359, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6400 tensor(5.4357, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6500 tensor(5.2306, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6600 tensor(5.4952, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6700 tensor(5.2873, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6800 tensor(5.6516, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"6900 tensor(5.3410, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7000 tensor(5.5064, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7100 tensor(5.3542, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7200 tensor(5.4427, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7300 tensor(5.4514, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7400 tensor(5.3294, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7500 tensor(5.7200, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7600 tensor(5.2620, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7700 tensor(5.3428, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7800 tensor(5.5721, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"7900 tensor(5.4102, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8000 tensor(5.4482, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8100 tensor(5.4963, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8200 tensor(5.5347, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8300 tensor(5.5172, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8400 tensor(5.0658, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8500 tensor(5.4056, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8600 tensor(5.1450, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8700 tensor(5.2514, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8800 tensor(5.0214, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"8900 tensor(5.3136, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9000 tensor(5.1997, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9100 tensor(4.8348, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9200 tensor(5.0372, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9300 tensor(5.2466, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9400 tensor(5.3362, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9500 tensor(5.2651, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9600 tensor(5.5185, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9700 tensor(5.5460, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9800 tensor(5.1778, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"9900 tensor(5.0272, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10000 tensor(5.4087, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10100 tensor(5.1365, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10200 tensor(5.1908, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10300 tensor(5.1816, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10400 tensor(5.5366, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10500 tensor(5.2595, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10600 tensor(5.3326, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10700 tensor(5.3983, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10800 tensor(5.4346, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"10900 tensor(5.3010, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11000 tensor(5.2640, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11100 tensor(4.9914, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11200 tensor(5.2707, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11300 tensor(5.1615, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11400 tensor(5.3967, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11500 tensor(5.0548, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11600 tensor(5.2904, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11700 tensor(5.0728, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11800 tensor(5.3685, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"11900 tensor(5.2579, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12000 tensor(5.4116, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12100 tensor(4.9850, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12200 tensor(5.1288, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12300 tensor(5.5204, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12400 tensor(5.2945, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12500 tensor(5.2505, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12600 tensor(4.8808, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12700 tensor(4.6960, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12800 tensor(5.2775, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"12900 tensor(5.4755, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13000 tensor(4.8998, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13100 tensor(5.0161, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13200 tensor(5.3238, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13300 tensor(5.3577, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13400 tensor(5.3376, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13500 tensor(5.4877, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13600 tensor(4.8643, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13700 tensor(4.9484, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13800 tensor(5.1129, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"13900 tensor(5.3575, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14000 tensor(5.4240, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14100 tensor(5.1704, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14200 tensor(5.1959, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14300 tensor(5.3272, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14400 tensor(5.4148, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14500 tensor(5.3436, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14600 tensor(5.2994, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14700 tensor(5.3515, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14800 tensor(5.0899, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"14900 tensor(5.0274, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15000 tensor(5.2740, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15100 tensor(5.4064, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15200 tensor(5.1399, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15300 tensor(5.0051, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15400 tensor(5.0513, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15500 tensor(5.2234, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15600 tensor(5.1812, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15700 tensor(5.2933, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15800 tensor(5.3839, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"15900 tensor(5.3919, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16000 tensor(5.0378, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16100 tensor(5.2975, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16200 tensor(5.2050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16300 tensor(4.9146, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16400 tensor(5.0289, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16500 tensor(5.5424, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16600 tensor(4.9872, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16700 tensor(5.3475, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16800 tensor(5.1461, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"16900 tensor(5.4058, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17000 tensor(4.3672, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17100 tensor(5.2553, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17200 tensor(5.0288, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17300 tensor(4.8915, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17400 tensor(4.6779, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17500 tensor(5.4898, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17600 tensor(5.1083, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17700 tensor(4.6789, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17800 tensor(5.3441, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"17900 tensor(5.1731, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18000 tensor(5.2611, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18100 tensor(4.5288, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18200 tensor(5.5324, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18300 tensor(4.8522, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18400 tensor(5.2299, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18500 tensor(5.1332, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18600 tensor(5.1331, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18700 tensor(5.0706, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18800 tensor(4.9425, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"18900 tensor(5.0168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19000 tensor(5.1194, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19100 tensor(5.1465, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19200 tensor(5.4118, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19300 tensor(5.1087, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19400 tensor(5.2879, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19500 tensor(4.7952, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19600 tensor(4.9889, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19700 tensor(5.2982, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19800 tensor(4.9682, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"19900 tensor(5.0727, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20000 tensor(5.3921, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20100 tensor(5.3464, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20200 tensor(5.2613, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20300 tensor(5.2227, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20400 tensor(4.8486, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20500 tensor(5.5216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20600 tensor(5.1865, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20700 tensor(5.0252, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20800 tensor(5.1742, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"20900 tensor(4.9608, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21000 tensor(4.8754, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21100 tensor(5.2599, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21200 tensor(4.7691, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21300 tensor(5.1363, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21400 tensor(5.2940, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21500 tensor(4.8573, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21600 tensor(5.2410, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21700 tensor(5.2941, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21800 tensor(5.0294, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"21900 tensor(5.1803, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22000 tensor(4.9386, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22100 tensor(5.3389, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22200 tensor(5.1924, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22300 tensor(5.1488, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22400 tensor(5.4211, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22500 tensor(4.7259, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22600 tensor(5.2168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22700 tensor(4.9509, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22800 tensor(5.1694, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"22900 tensor(4.9012, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23000 tensor(5.2488, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23100 tensor(5.4762, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23200 tensor(5.3331, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23300 tensor(5.1007, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23400 tensor(5.1397, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23500 tensor(4.9862, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23600 tensor(4.5681, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23700 tensor(5.0539, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23800 tensor(5.2384, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"23900 tensor(5.2213, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24000 tensor(5.2034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24100 tensor(4.8727, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24200 tensor(5.1615, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24300 tensor(5.2397, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24400 tensor(4.9909, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24500 tensor(5.3016, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24600 tensor(4.8689, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24700 tensor(5.0979, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24800 tensor(4.8190, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"24900 tensor(5.1405, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25000 tensor(5.2768, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25100 tensor(5.1046, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25200 tensor(5.0897, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25300 tensor(5.0305, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25400 tensor(4.8187, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25500 tensor(4.7033, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25600 tensor(5.4625, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25700 tensor(5.0582, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25800 tensor(4.7515, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"25900 tensor(4.9780, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26000 tensor(5.0385, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26100 tensor(5.3317, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26200 tensor(5.2198, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26300 tensor(5.2683, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26400 tensor(4.9987, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26500 tensor(5.0611, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26600 tensor(5.2856, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26700 tensor(5.0887, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26800 tensor(5.1377, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"26900 tensor(4.5615, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27000 tensor(5.2209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27100 tensor(5.0862, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27200 tensor(5.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27300 tensor(5.0321, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27400 tensor(5.0214, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27500 tensor(5.2483, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27600 tensor(4.8013, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27700 tensor(5.0453, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27800 tensor(4.8031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"27900 tensor(4.9852, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28000 tensor(4.8680, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28100 tensor(5.3965, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28200 tensor(5.1008, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28300 tensor(5.0263, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28400 tensor(4.9622, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28500 tensor(5.0803, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28600 tensor(5.1089, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28700 tensor(5.6485, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28800 tensor(5.0195, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"28900 tensor(4.7143, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29000 tensor(5.2968, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29100 tensor(5.0881, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29200 tensor(4.7767, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29300 tensor(4.9015, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29400 tensor(5.1177, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29500 tensor(5.0024, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29600 tensor(5.0116, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29700 tensor(5.0913, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29800 tensor(5.0730, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"29900 tensor(5.2098, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30000 tensor(4.8949, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30100 tensor(5.1074, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30200 tensor(5.2501, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30300 tensor(5.0952, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30400 tensor(4.7803, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30500 tensor(5.1438, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30600 tensor(5.1865, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30700 tensor(5.0876, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30800 tensor(4.9957, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"30900 tensor(4.9111, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31000 tensor(4.9795, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31100 tensor(5.1215, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31200 tensor(4.9652, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31300 tensor(5.0836, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31400 tensor(4.7480, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31500 tensor(5.0733, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31600 tensor(5.0257, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31700 tensor(5.2194, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31800 tensor(4.6968, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"31900 tensor(4.8459, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32000 tensor(4.9492, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32100 tensor(5.1559, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32200 tensor(4.9369, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32300 tensor(5.1198, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32400 tensor(4.9705, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32500 tensor(5.1361, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32600 tensor(4.9825, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32700 tensor(5.4973, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32800 tensor(5.1503, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"32900 tensor(5.0624, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33000 tensor(5.2216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33100 tensor(4.9986, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33200 tensor(5.1666, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33300 tensor(5.0774, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33400 tensor(4.7154, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33500 tensor(4.8050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33600 tensor(5.0121, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33700 tensor(4.9336, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33800 tensor(4.9983, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"33900 tensor(4.9299, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34000 tensor(4.8085, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34100 tensor(4.6787, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34200 tensor(5.0441, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34300 tensor(5.0500, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34400 tensor(4.8602, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34500 tensor(5.1163, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34600 tensor(5.0843, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34700 tensor(5.1913, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34800 tensor(5.0463, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"34900 tensor(5.0379, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35000 tensor(5.1605, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35100 tensor(4.9475, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35200 tensor(4.7895, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35300 tensor(4.9079, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35400 tensor(4.8474, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35500 tensor(5.1182, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35600 tensor(4.9876, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35700 tensor(5.0740, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35800 tensor(5.0899, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"35900 tensor(5.4795, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36000 tensor(4.8221, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36100 tensor(5.2199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36200 tensor(5.1585, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36300 tensor(4.9132, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36400 tensor(4.6865, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36500 tensor(5.0982, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36600 tensor(5.1886, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36700 tensor(5.2310, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36800 tensor(4.8729, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"36900 tensor(5.1362, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37000 tensor(5.2179, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37100 tensor(5.0787, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37200 tensor(5.1929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37300 tensor(5.1267, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37400 tensor(4.8166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37500 tensor(4.7019, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37600 tensor(5.0429, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37700 tensor(4.9993, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37800 tensor(4.9434, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"37900 tensor(4.9565, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38000 tensor(5.0422, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38100 tensor(4.9607, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38200 tensor(5.2120, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38300 tensor(5.1508, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38400 tensor(5.2408, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38500 tensor(5.1387, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38600 tensor(4.9913, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38700 tensor(5.0389, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38800 tensor(5.2357, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"38900 tensor(5.4844, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39000 tensor(4.8261, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39100 tensor(5.0514, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39200 tensor(5.1650, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39300 tensor(4.9774, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39400 tensor(5.0853, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39500 tensor(5.1007, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39600 tensor(5.2591, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39700 tensor(5.0162, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39800 tensor(5.1029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"39900 tensor(4.8540, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40000 tensor(5.0950, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40100 tensor(5.1870, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40200 tensor(4.8280, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40300 tensor(5.0794, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40400 tensor(5.0300, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40500 tensor(5.3105, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40600 tensor(5.1316, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40700 tensor(5.0367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40800 tensor(4.9724, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"40900 tensor(5.0658, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41000 tensor(4.9169, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41100 tensor(4.4952, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41200 tensor(4.8536, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41300 tensor(5.1255, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41400 tensor(5.1006, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41500 tensor(4.9304, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41600 tensor(4.8720, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41700 tensor(5.1611, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41800 tensor(4.9487, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"41900 tensor(4.7736, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42000 tensor(5.2250, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42100 tensor(5.3416, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42200 tensor(4.9317, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42300 tensor(4.8594, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42400 tensor(4.7213, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42500 tensor(4.9424, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42600 tensor(4.9085, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42700 tensor(5.0864, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42800 tensor(4.5522, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"42900 tensor(5.4337, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43000 tensor(5.0700, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43100 tensor(5.1494, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43200 tensor(4.7431, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43300 tensor(5.3639, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43400 tensor(4.8963, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43500 tensor(5.1136, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43600 tensor(4.8048, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43700 tensor(4.9236, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43800 tensor(4.9797, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"43900 tensor(5.0236, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44000 tensor(5.0134, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44100 tensor(4.8278, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44200 tensor(4.7282, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44300 tensor(5.1968, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44400 tensor(4.9318, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44500 tensor(4.9458, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44600 tensor(5.0479, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44700 tensor(4.8669, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44800 tensor(5.1006, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"44900 tensor(5.0761, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45000 tensor(4.8570, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45100 tensor(4.9536, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45200 tensor(4.7665, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45300 tensor(5.1462, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45400 tensor(4.7666, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45500 tensor(4.9350, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45600 tensor(4.9407, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45700 tensor(4.9847, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45800 tensor(4.3695, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"45900 tensor(5.0090, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46000 tensor(4.5963, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46100 tensor(5.2931, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46200 tensor(5.0931, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46300 tensor(4.8944, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46400 tensor(5.1315, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46500 tensor(5.0825, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46600 tensor(4.8329, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46700 tensor(5.1103, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46800 tensor(5.0603, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"46900 tensor(5.1512, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47000 tensor(4.6868, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47100 tensor(4.8712, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47200 tensor(5.1912, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47300 tensor(5.0159, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47400 tensor(5.0270, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47500 tensor(4.6956, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47600 tensor(4.9921, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47700 tensor(5.1164, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47800 tensor(4.7971, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"47900 tensor(4.9729, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48000 tensor(5.1074, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48100 tensor(5.0341, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48200 tensor(4.8993, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48300 tensor(4.8756, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48400 tensor(5.1059, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48500 tensor(5.0636, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48600 tensor(4.9931, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48700 tensor(4.9768, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48800 tensor(5.1949, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"48900 tensor(4.7792, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49000 tensor(4.7743, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49100 tensor(5.3373, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49200 tensor(4.7696, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49300 tensor(4.7552, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49400 tensor(4.8440, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49500 tensor(4.9257, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49600 tensor(5.1038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49700 tensor(4.9334, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49800 tensor(5.1488, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"49900 tensor(5.1520, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50000 tensor(4.6342, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50100 tensor(5.1441, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50200 tensor(4.4512, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50300 tensor(5.0929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50400 tensor(5.1754, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50500 tensor(4.9708, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50600 tensor(4.7339, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50700 tensor(4.8156, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50800 tensor(5.0074, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"50900 tensor(4.7649, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51000 tensor(5.0744, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51100 tensor(5.0727, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51200 tensor(4.8884, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51300 tensor(5.0004, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51400 tensor(4.7368, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51500 tensor(4.9471, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51600 tensor(5.0216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51700 tensor(4.8408, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51800 tensor(4.7706, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"51900 tensor(5.1493, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52000 tensor(4.9272, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52100 tensor(4.8667, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52200 tensor(4.8331, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52300 tensor(4.9672, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52400 tensor(4.9821, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52500 tensor(5.1103, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52600 tensor(5.0463, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52700 tensor(5.0696, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52800 tensor(4.5670, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"52900 tensor(4.9175, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53000 tensor(5.0619, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53100 tensor(4.9976, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53200 tensor(4.6688, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53300 tensor(4.9201, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53400 tensor(4.7648, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53500 tensor(5.0633, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53600 tensor(5.0900, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53700 tensor(4.6773, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53800 tensor(4.9783, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"53900 tensor(4.8892, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54000 tensor(5.1447, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54100 tensor(4.7897, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54200 tensor(4.9532, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54300 tensor(5.0852, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54400 tensor(4.8126, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54500 tensor(4.7961, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54600 tensor(4.4173, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54700 tensor(5.0556, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54800 tensor(4.8861, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"54900 tensor(4.9520, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55000 tensor(4.9933, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55100 tensor(5.1774, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55200 tensor(4.8481, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55300 tensor(4.9836, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55400 tensor(4.7960, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55500 tensor(5.0643, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55600 tensor(5.0122, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55700 tensor(4.7390, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55800 tensor(5.1234, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"55900 tensor(4.4134, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56000 tensor(4.0267, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56100 tensor(4.9464, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56200 tensor(4.9766, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56300 tensor(4.6557, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56400 tensor(4.9807, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56500 tensor(5.0741, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56600 tensor(5.2151, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56700 tensor(4.7876, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56800 tensor(4.8416, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"56900 tensor(4.6740, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57000 tensor(4.9346, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57100 tensor(4.4737, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57200 tensor(4.6889, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57300 tensor(4.6792, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57400 tensor(4.8290, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57500 tensor(4.8428, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57600 tensor(4.7810, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57700 tensor(4.9318, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57800 tensor(5.0835, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"57900 tensor(4.4041, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58000 tensor(4.9021, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58100 tensor(4.9826, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58200 tensor(4.8855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58300 tensor(5.1050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58400 tensor(4.8503, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58500 tensor(4.9113, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58600 tensor(4.8079, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58700 tensor(4.8311, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58800 tensor(4.8372, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"58900 tensor(5.0136, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59000 tensor(4.9982, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59100 tensor(4.9190, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59200 tensor(4.3102, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59300 tensor(4.7055, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59400 tensor(5.0937, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59500 tensor(4.7516, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59600 tensor(4.8521, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59700 tensor(4.8566, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59800 tensor(4.9476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"59900 tensor(4.9672, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60000 tensor(5.1426, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60100 tensor(4.6298, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60200 tensor(4.9855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60300 tensor(5.1328, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60400 tensor(4.8736, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60500 tensor(4.8211, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60600 tensor(4.6593, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60700 tensor(4.9276, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60800 tensor(4.9521, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"60900 tensor(5.1064, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61000 tensor(4.8707, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61100 tensor(5.0560, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61200 tensor(4.9808, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61300 tensor(4.7554, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61400 tensor(4.9695, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61500 tensor(4.9166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61600 tensor(5.0340, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61700 tensor(4.9099, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61800 tensor(4.7813, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"61900 tensor(5.0149, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62000 tensor(4.9312, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62100 tensor(4.7428, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62200 tensor(4.6879, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62300 tensor(4.5486, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62400 tensor(4.8862, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62500 tensor(4.9385, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62600 tensor(5.1668, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62700 tensor(4.7978, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62800 tensor(5.0571, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"62900 tensor(4.7626, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63000 tensor(4.6488, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63100 tensor(4.6918, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63200 tensor(4.3733, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63300 tensor(5.0669, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63400 tensor(4.7177, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63500 tensor(4.8673, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63600 tensor(4.9796, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63700 tensor(5.0491, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63800 tensor(4.7432, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"63900 tensor(5.0631, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64000 tensor(4.9573, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64100 tensor(4.5370, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64200 tensor(4.9560, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64300 tensor(4.9266, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64400 tensor(5.0917, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64500 tensor(4.9799, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64600 tensor(5.0494, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64700 tensor(4.9022, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64800 tensor(4.8322, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"64900 tensor(4.5922, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65000 tensor(4.9516, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65100 tensor(4.8843, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65200 tensor(5.0609, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65300 tensor(4.8728, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65400 tensor(4.9886, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65500 tensor(4.9286, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65600 tensor(4.8055, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65700 tensor(4.9524, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65800 tensor(4.9742, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"65900 tensor(4.8229, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66000 tensor(4.8436, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66100 tensor(4.8835, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66200 tensor(4.8788, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66300 tensor(4.3317, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66400 tensor(4.8368, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66500 tensor(5.0797, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66600 tensor(4.8528, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66700 tensor(5.0445, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66800 tensor(5.0235, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"66900 tensor(4.8326, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67000 tensor(4.9944, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67100 tensor(4.5768, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67200 tensor(4.7574, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67300 tensor(4.4299, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67400 tensor(5.0257, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67500 tensor(4.9804, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67600 tensor(4.7536, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67700 tensor(4.8228, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67800 tensor(4.9779, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"67900 tensor(5.0636, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68000 tensor(4.6665, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68100 tensor(4.7659, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68200 tensor(4.8760, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68300 tensor(4.7543, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68400 tensor(4.9039, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68500 tensor(4.9760, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68600 tensor(5.2005, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68700 tensor(4.9386, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68800 tensor(5.0149, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"68900 tensor(4.9252, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69000 tensor(4.9424, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69100 tensor(4.7407, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69200 tensor(4.6976, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69300 tensor(5.0037, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69400 tensor(4.9396, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69500 tensor(4.8608, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69600 tensor(4.8549, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69700 tensor(4.3331, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69800 tensor(4.7600, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"69900 tensor(5.1123, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70000 tensor(4.9554, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70100 tensor(4.8317, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70200 tensor(4.6206, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70300 tensor(5.0007, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70400 tensor(4.7921, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70500 tensor(4.9168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70600 tensor(4.9162, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70700 tensor(4.9351, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70800 tensor(4.7952, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"70900 tensor(4.8172, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71000 tensor(4.7470, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71100 tensor(4.8581, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71200 tensor(4.7746, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71300 tensor(4.8719, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71400 tensor(4.9696, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71500 tensor(4.8571, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71600 tensor(5.0854, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71700 tensor(5.0178, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71800 tensor(5.0655, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"71900 tensor(4.7754, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72000 tensor(4.8953, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72100 tensor(4.6920, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72200 tensor(4.8308, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72300 tensor(4.7962, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72400 tensor(4.9642, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72500 tensor(4.3556, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72600 tensor(5.0377, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72700 tensor(5.0918, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72800 tensor(4.6903, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"72900 tensor(4.8922, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73000 tensor(4.6560, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73100 tensor(4.5929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73200 tensor(4.9704, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73300 tensor(4.6826, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73400 tensor(4.8576, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73500 tensor(4.5133, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73600 tensor(5.0396, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73700 tensor(4.7019, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73800 tensor(4.6526, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"73900 tensor(4.7282, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74000 tensor(4.8241, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74100 tensor(4.8778, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74200 tensor(5.0028, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74300 tensor(4.9495, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74400 tensor(5.0913, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74500 tensor(5.1775, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74600 tensor(4.4610, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74700 tensor(5.3130, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74800 tensor(4.7988, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"74900 tensor(4.7432, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75000 tensor(5.1351, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75100 tensor(4.7960, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75200 tensor(4.6802, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75300 tensor(4.9530, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75400 tensor(5.1936, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75500 tensor(4.9072, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75600 tensor(4.9389, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75700 tensor(4.7479, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75800 tensor(5.0113, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"75900 tensor(4.8402, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76000 tensor(4.7632, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76100 tensor(5.0242, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76200 tensor(5.1758, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76300 tensor(5.2158, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76400 tensor(4.7053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76500 tensor(4.6121, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76600 tensor(4.8127, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76700 tensor(4.7923, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76800 tensor(4.9212, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"76900 tensor(4.7273, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77000 tensor(5.0132, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77100 tensor(5.0376, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77200 tensor(5.1086, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77300 tensor(5.0956, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77400 tensor(4.2917, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77500 tensor(4.7433, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77600 tensor(5.1936, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77700 tensor(4.6839, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77800 tensor(4.9525, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"77900 tensor(4.9587, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78000 tensor(4.6259, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78100 tensor(5.1369, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78200 tensor(4.8546, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78300 tensor(4.7167, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78400 tensor(4.8111, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78500 tensor(5.0958, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78600 tensor(4.6138, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78700 tensor(4.8627, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78800 tensor(4.7114, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"78900 tensor(4.9337, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79000 tensor(4.8261, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79100 tensor(4.8417, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79200 tensor(4.9143, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79300 tensor(5.1830, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79400 tensor(4.7524, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79500 tensor(4.8418, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79600 tensor(4.8417, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79700 tensor(4.8996, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79800 tensor(5.0514, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"79900 tensor(4.7655, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80000 tensor(5.0455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80100 tensor(4.7676, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80200 tensor(4.9099, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80300 tensor(4.6653, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80400 tensor(5.4648, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80500 tensor(5.2843, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80600 tensor(4.9210, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80700 tensor(5.0038, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80800 tensor(4.9964, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"80900 tensor(4.6542, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81000 tensor(4.4927, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81100 tensor(4.9112, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81200 tensor(4.7454, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81300 tensor(4.7941, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81400 tensor(4.7367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81500 tensor(4.4278, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81600 tensor(4.6781, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81700 tensor(4.7829, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81800 tensor(5.2671, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"81900 tensor(5.0455, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82000 tensor(4.5989, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82100 tensor(4.8426, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82200 tensor(5.1518, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82300 tensor(4.9861, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82400 tensor(4.7219, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82500 tensor(4.4432, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82600 tensor(5.1153, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82700 tensor(5.0588, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82800 tensor(4.9451, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"82900 tensor(4.8679, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83000 tensor(4.9801, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83100 tensor(4.5352, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83200 tensor(5.1042, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83300 tensor(4.8451, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83400 tensor(5.1197, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83500 tensor(5.0546, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83600 tensor(4.8199, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83700 tensor(5.0084, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83800 tensor(4.8168, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"83900 tensor(4.8227, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84000 tensor(4.7605, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84100 tensor(4.5356, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84200 tensor(4.4155, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84300 tensor(5.0845, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84400 tensor(5.0510, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84500 tensor(4.9410, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84600 tensor(4.8332, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84700 tensor(4.7127, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84800 tensor(4.8011, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"84900 tensor(4.7756, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85000 tensor(4.6604, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85100 tensor(4.5451, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85200 tensor(4.8663, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85300 tensor(5.0541, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85400 tensor(5.0160, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85500 tensor(5.1305, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85600 tensor(4.7185, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85700 tensor(4.1517, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85800 tensor(5.3590, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"85900 tensor(4.8706, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86000 tensor(4.9743, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86100 tensor(4.9927, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86200 tensor(4.9393, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86300 tensor(4.6270, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86400 tensor(5.1570, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86500 tensor(4.9389, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86600 tensor(4.8239, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86700 tensor(4.9450, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86800 tensor(4.6683, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"86900 tensor(4.9541, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87000 tensor(4.9188, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87100 tensor(4.9799, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87200 tensor(5.0830, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87300 tensor(5.0182, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87400 tensor(5.0457, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87500 tensor(5.0434, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87600 tensor(5.0894, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87700 tensor(4.8774, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87800 tensor(4.6824, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"87900 tensor(5.0711, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88000 tensor(4.6619, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88100 tensor(4.7817, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88200 tensor(4.6248, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88300 tensor(4.6969, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88400 tensor(5.1408, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88500 tensor(4.9432, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88600 tensor(5.0958, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88700 tensor(4.9197, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88800 tensor(4.8189, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"88900 tensor(5.0727, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89000 tensor(4.7310, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89100 tensor(4.8091, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89200 tensor(4.9663, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89300 tensor(4.8672, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89400 tensor(4.7209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89500 tensor(5.0378, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89600 tensor(4.8318, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89700 tensor(4.8880, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89800 tensor(5.1295, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"89900 tensor(4.7790, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90000 tensor(5.1169, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90100 tensor(4.8325, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90200 tensor(4.9860, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90300 tensor(4.8658, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90400 tensor(4.9993, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90500 tensor(4.6549, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90600 tensor(4.6946, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90700 tensor(4.9381, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90800 tensor(4.7142, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"90900 tensor(4.7790, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91000 tensor(5.2347, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91100 tensor(4.8655, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91200 tensor(4.5499, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91300 tensor(4.8705, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91400 tensor(4.7983, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91500 tensor(4.5896, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91600 tensor(4.8234, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91700 tensor(4.9716, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91800 tensor(4.7786, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"91900 tensor(4.7490, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92000 tensor(5.2922, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92100 tensor(5.0743, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92200 tensor(4.9666, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92300 tensor(4.9775, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92400 tensor(4.9803, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92500 tensor(4.7901, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92600 tensor(4.7642, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92700 tensor(4.8326, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92800 tensor(4.9265, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"92900 tensor(4.5904, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93000 tensor(5.0385, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93100 tensor(4.4090, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93200 tensor(4.8539, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93300 tensor(4.8513, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93400 tensor(5.1419, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93500 tensor(4.7645, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93600 tensor(4.9421, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93700 tensor(5.0132, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93800 tensor(4.7757, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"93900 tensor(4.8692, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94000 tensor(4.5337, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94100 tensor(4.9413, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94200 tensor(4.6704, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94300 tensor(4.7524, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94400 tensor(4.7512, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94500 tensor(4.7677, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94600 tensor(4.7220, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94700 tensor(5.1174, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94800 tensor(4.7162, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"94900 tensor(4.8592, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95000 tensor(5.1584, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95100 tensor(4.6757, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95200 tensor(4.9543, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95300 tensor(4.8269, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95400 tensor(4.3439, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95500 tensor(4.7870, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95600 tensor(4.9020, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95700 tensor(4.5368, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95800 tensor(5.0365, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"95900 tensor(5.1015, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96000 tensor(5.1919, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96100 tensor(5.0134, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96200 tensor(4.9848, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96300 tensor(4.8372, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96400 tensor(5.0092, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96500 tensor(4.9956, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96600 tensor(4.8314, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96700 tensor(4.8101, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96800 tensor(4.9651, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"96900 tensor(4.8259, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97000 tensor(4.7303, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97100 tensor(5.0106, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97200 tensor(4.6987, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97300 tensor(4.7748, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97400 tensor(4.8300, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97500 tensor(4.9247, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97600 tensor(4.7935, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97700 tensor(4.6968, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97800 tensor(4.9424, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"97900 tensor(4.5449, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98000 tensor(4.8785, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98100 tensor(5.2805, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98200 tensor(5.0360, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98300 tensor(4.8959, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98400 tensor(4.8205, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98500 tensor(4.8109, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98600 tensor(4.7517, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98700 tensor(4.2260, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98800 tensor(4.8782, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"98900 tensor(4.7034, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99000 tensor(4.5861, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99100 tensor(4.9904, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99200 tensor(4.9637, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99300 tensor(4.7789, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99400 tensor(4.9661, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99500 tensor(4.4535, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99600 tensor(4.7522, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99700 tensor(4.1814, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99800 tensor(4.4184, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"99900 tensor(4.9469, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100000 tensor(4.9216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100100 tensor(5.0670, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100200 tensor(4.4815, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100300 tensor(4.8047, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100400 tensor(4.7661, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100500 tensor(5.0412, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100600 tensor(5.0822, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100700 tensor(4.9617, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100800 tensor(5.0497, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"100900 tensor(5.0292, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101000 tensor(4.3783, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101100 tensor(5.1925, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101200 tensor(4.8833, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101300 tensor(4.8256, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101400 tensor(5.1346, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101500 tensor(4.9421, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101600 tensor(4.7890, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101700 tensor(4.5993, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101800 tensor(4.8848, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"101900 tensor(5.0264, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102000 tensor(4.5711, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102100 tensor(5.0150, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102200 tensor(4.8943, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102300 tensor(4.8837, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102400 tensor(5.0701, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102500 tensor(4.6735, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102600 tensor(4.7247, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102700 tensor(4.7052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102800 tensor(4.4751, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"102900 tensor(4.3246, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103000 tensor(4.8290, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103100 tensor(4.8880, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103200 tensor(4.7565, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103300 tensor(4.8007, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103400 tensor(4.9262, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103500 tensor(4.9762, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103600 tensor(4.9669, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103700 tensor(5.1680, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103800 tensor(5.0296, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"103900 tensor(4.9457, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104000 tensor(4.9833, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104100 tensor(5.1114, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104200 tensor(4.8435, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104300 tensor(4.5540, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104400 tensor(4.5313, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104500 tensor(5.0209, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104600 tensor(4.1863, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104700 tensor(4.9792, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104800 tensor(4.7222, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"104900 tensor(4.8395, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105000 tensor(4.8403, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105100 tensor(4.9299, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105200 tensor(4.9324, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105300 tensor(5.0470, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105400 tensor(4.7876, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105500 tensor(5.0069, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105600 tensor(5.0764, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105700 tensor(5.2300, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105800 tensor(4.8332, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"105900 tensor(5.0333, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106000 tensor(4.9133, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106100 tensor(4.8769, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106200 tensor(5.1212, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106300 tensor(4.8909, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106400 tensor(4.7504, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106500 tensor(4.9875, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106600 tensor(4.6188, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106700 tensor(4.4476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106800 tensor(4.9015, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"106900 tensor(4.9961, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107000 tensor(4.5983, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107100 tensor(4.9465, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107200 tensor(4.9934, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107300 tensor(4.2177, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107400 tensor(4.8564, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107500 tensor(4.7367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107600 tensor(4.6914, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107700 tensor(4.9439, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107800 tensor(4.8153, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"107900 tensor(4.3782, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108000 tensor(5.0639, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108100 tensor(5.0140, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108200 tensor(4.8026, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108300 tensor(5.0365, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108400 tensor(5.0422, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108500 tensor(4.7082, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108600 tensor(4.5527, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108700 tensor(4.7589, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108800 tensor(5.0291, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"108900 tensor(4.5464, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109000 tensor(4.9586, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109100 tensor(4.9770, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109200 tensor(4.9075, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109300 tensor(4.5973, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109400 tensor(5.0076, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109500 tensor(4.7639, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109600 tensor(4.7529, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109700 tensor(5.0050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109800 tensor(5.0360, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"109900 tensor(4.7775, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110000 tensor(4.6627, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110100 tensor(4.8799, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110200 tensor(4.7465, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110300 tensor(5.0446, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110400 tensor(4.8966, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110500 tensor(4.9575, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110600 tensor(4.8801, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110700 tensor(4.9705, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110800 tensor(4.8122, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"110900 tensor(4.7103, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111000 tensor(4.7193, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111100 tensor(4.9327, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111200 tensor(4.9441, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111300 tensor(5.2106, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111400 tensor(4.5818, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111500 tensor(4.6068, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111600 tensor(5.0798, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111700 tensor(5.1382, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111800 tensor(4.3443, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"111900 tensor(5.0532, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112000 tensor(4.6804, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112100 tensor(4.5050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112200 tensor(4.9678, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112300 tensor(5.1017, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112400 tensor(4.9474, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112500 tensor(5.0659, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112600 tensor(4.1369, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112700 tensor(4.8935, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112800 tensor(4.6240, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"112900 tensor(4.8367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113000 tensor(4.6743, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113100 tensor(5.0598, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113200 tensor(4.7863, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113300 tensor(3.9544, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113400 tensor(5.0014, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113500 tensor(4.8981, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113600 tensor(4.6241, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113700 tensor(4.7643, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113800 tensor(4.4898, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"113900 tensor(4.7177, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114000 tensor(5.1589, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114100 tensor(4.8498, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114200 tensor(4.6020, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114300 tensor(4.8476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114400 tensor(4.9836, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114500 tensor(5.2996, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114600 tensor(5.2211, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114700 tensor(4.6157, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114800 tensor(4.9978, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"114900 tensor(4.7124, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115000 tensor(4.8855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115100 tensor(4.8061, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115200 tensor(4.9216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115300 tensor(4.7439, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115400 tensor(5.1855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115500 tensor(5.0700, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115600 tensor(4.9031, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115700 tensor(4.9650, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115800 tensor(4.7938, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"115900 tensor(4.9974, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116000 tensor(4.7874, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116100 tensor(4.8274, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116200 tensor(4.8384, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116300 tensor(5.1083, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116400 tensor(4.6231, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116500 tensor(4.5965, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116600 tensor(4.6714, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116700 tensor(4.9358, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116800 tensor(5.0778, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"116900 tensor(4.7613, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117000 tensor(4.6919, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117100 tensor(4.7312, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117200 tensor(4.8210, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117300 tensor(4.3730, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117400 tensor(4.8292, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117500 tensor(4.7531, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117600 tensor(4.8388, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117700 tensor(4.8566, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117800 tensor(4.9021, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"117900 tensor(4.8977, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118000 tensor(4.7841, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118100 tensor(4.6447, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118200 tensor(4.5021, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118300 tensor(4.9625, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118400 tensor(5.0818, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118500 tensor(4.9943, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118600 tensor(4.8726, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118700 tensor(4.9113, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118800 tensor(4.5782, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"118900 tensor(4.4148, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119000 tensor(5.0278, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119100 tensor(5.2606, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119200 tensor(4.9693, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119300 tensor(4.6617, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119400 tensor(4.8571, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119500 tensor(4.2560, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119600 tensor(4.9186, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119700 tensor(4.9237, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119800 tensor(4.9456, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"119900 tensor(4.9364, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120000 tensor(4.5929, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120100 tensor(4.9357, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120200 tensor(4.8705, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120300 tensor(4.7914, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120400 tensor(4.7161, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120500 tensor(4.7885, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120600 tensor(4.8593, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120700 tensor(4.7298, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120800 tensor(4.8347, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"120900 tensor(4.9481, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121000 tensor(4.6276, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121100 tensor(4.6297, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121200 tensor(4.4420, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121300 tensor(4.9996, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121400 tensor(4.6057, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121500 tensor(5.2147, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121600 tensor(5.0158, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121700 tensor(4.7963, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121800 tensor(5.0160, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"121900 tensor(4.9921, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122000 tensor(4.9018, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122100 tensor(4.9365, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122200 tensor(4.6702, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122300 tensor(4.9160, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122400 tensor(4.9476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122500 tensor(4.8492, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122600 tensor(4.6968, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122700 tensor(4.7005, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122800 tensor(4.4889, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"122900 tensor(4.4938, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123000 tensor(4.8215, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123100 tensor(4.6476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123200 tensor(4.6063, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123300 tensor(4.7627, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123400 tensor(4.9589, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123500 tensor(5.0093, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123600 tensor(5.1528, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123700 tensor(4.5435, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123800 tensor(5.0693, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"123900 tensor(4.4490, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124000 tensor(4.9851, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124100 tensor(4.9539, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124200 tensor(5.0405, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124300 tensor(4.6855, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124400 tensor(4.9417, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124500 tensor(4.4380, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124600 tensor(4.0469, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124700 tensor(5.0332, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124800 tensor(4.9670, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"124900 tensor(5.0651, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125000 tensor(4.5116, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125100 tensor(5.0326, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125200 tensor(4.7236, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125300 tensor(4.7683, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125400 tensor(4.5482, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125500 tensor(4.5634, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125600 tensor(4.4635, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125700 tensor(4.4219, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125800 tensor(4.9547, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"125900 tensor(4.8685, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126000 tensor(4.3485, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126100 tensor(4.5584, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126200 tensor(4.6830, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126300 tensor(5.0172, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126400 tensor(4.9179, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126500 tensor(4.8389, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126600 tensor(4.7898, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126700 tensor(4.9195, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126800 tensor(4.8269, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"126900 tensor(4.8297, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127000 tensor(4.8922, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127100 tensor(4.8160, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127200 tensor(4.7967, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127300 tensor(4.7824, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127400 tensor(4.8233, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127500 tensor(4.9216, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127600 tensor(5.0369, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127700 tensor(4.8331, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127800 tensor(4.8210, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"127900 tensor(4.8657, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128000 tensor(4.7904, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128100 tensor(4.6721, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128200 tensor(5.0386, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128300 tensor(4.5596, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128400 tensor(4.5702, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128500 tensor(4.7675, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128600 tensor(4.8381, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128700 tensor(4.5835, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128800 tensor(4.7954, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"128900 tensor(4.7430, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129000 tensor(4.6958, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129100 tensor(4.4172, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129200 tensor(4.8874, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129300 tensor(4.3820, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129400 tensor(4.8125, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129500 tensor(4.9783, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129600 tensor(4.5286, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129700 tensor(4.8962, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129800 tensor(4.6818, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"129900 tensor(5.0567, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130000 tensor(5.1978, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130100 tensor(5.1098, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130200 tensor(4.6280, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130300 tensor(4.9298, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130400 tensor(4.6004, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130500 tensor(4.8462, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130600 tensor(4.7324, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130700 tensor(4.7167, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130800 tensor(4.4547, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"130900 tensor(4.5560, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131000 tensor(4.0357, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131100 tensor(4.8499, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131200 tensor(4.9097, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131300 tensor(4.8725, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131400 tensor(4.8507, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131500 tensor(4.8822, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131600 tensor(4.8053, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131700 tensor(4.7774, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131800 tensor(4.6166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"131900 tensor(5.0381, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132000 tensor(5.1435, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132100 tensor(4.9651, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132200 tensor(4.3226, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132300 tensor(4.9645, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132400 tensor(5.1297, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132500 tensor(4.9514, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132600 tensor(4.4122, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132700 tensor(4.8552, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132800 tensor(4.4157, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"132900 tensor(4.9525, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133000 tensor(4.7483, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133100 tensor(4.6578, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133200 tensor(5.0746, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133300 tensor(4.8121, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133400 tensor(4.8101, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133500 tensor(4.6605, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133600 tensor(4.7754, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133700 tensor(4.4397, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133800 tensor(4.8784, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"133900 tensor(4.6728, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134000 tensor(4.7126, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134100 tensor(4.5749, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134200 tensor(4.4029, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134300 tensor(4.7075, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134400 tensor(5.0396, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134500 tensor(4.6848, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134600 tensor(5.0197, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134700 tensor(4.7742, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134800 tensor(4.8104, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"134900 tensor(4.8825, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135000 tensor(4.7189, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135100 tensor(4.6685, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135200 tensor(4.7691, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135300 tensor(4.7166, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135400 tensor(4.6312, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135500 tensor(4.9367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135600 tensor(4.8343, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135700 tensor(4.8131, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135800 tensor(4.7636, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"135900 tensor(4.6484, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136000 tensor(4.7523, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136100 tensor(4.8950, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136200 tensor(4.9203, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136300 tensor(4.8050, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136400 tensor(4.8176, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136500 tensor(4.4287, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136600 tensor(4.5731, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136700 tensor(4.7796, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136800 tensor(5.1640, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"136900 tensor(4.5927, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137000 tensor(4.5682, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137100 tensor(5.0697, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137200 tensor(4.8193, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137300 tensor(4.8367, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137400 tensor(4.9668, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137500 tensor(4.6632, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137600 tensor(5.2432, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137700 tensor(4.6309, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137800 tensor(4.7814, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"137900 tensor(4.4679, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138000 tensor(4.9990, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138100 tensor(4.6476, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138200 tensor(4.9484, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138300 tensor(4.8118, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138400 tensor(4.8842, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138500 tensor(4.7052, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138600 tensor(3.9411, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138700 tensor(5.0011, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138800 tensor(4.8827, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"138900 tensor(4.8508, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"139000 tensor(4.6841, device='cuda:0', grad_fn=<NllLossBackward0>)\n",
|
|
"Creating outputs in dev-0\n",
|
|
"Creating outputs in test-A\n",
|
|
"Creating outputs in dev-0\n",
|
|
"Creating outputs in test-A\n",
|
|
"Creating outputs in dev-0\n",
|
|
"Creating outputs in test-A\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"with_hyperparams()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "GPU",
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"gpuClass": "standard",
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |