challenging-america-word-ga.../simple_neural_network.ipynb

4875 lines
92 KiB
Plaintext
Raw Normal View History

2023-04-27 21:39:28 +02:00
{
"cells": [
{
"cell_type": "markdown",
"source": [
"## IMPORTS"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n"
]
}
],
"source": [
"import regex as re\n",
"import sys\n",
"from torchtext.vocab import build_vocab_from_iterator\n",
"import lzma\n",
"from torch.utils.data import IterableDataset\n",
"import itertools\n",
"from torch import nn\n",
"import torch\n",
"import pickle\n",
"from torch.utils.data import DataLoader\n",
"\n",
"print(torch.backends.mps.is_available())\n",
"print(torch.backends.mps.is_built())"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## FUNCTIONS"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"def get_words_from_line(line):\n",
" line = line.rstrip()\n",
" yield '<s>'\n",
" for t in line.split(' '):\n",
" yield t\n",
" yield '</s>'\n",
"\n",
"def get_word_lines_from_file(file_name):\n",
" n = 0\n",
" with lzma.open(file_name, 'r') as fh:\n",
" for line in fh:\n",
" n += 1\n",
" if n % 1000 == 0:\n",
" print(n ,file=sys.stderr)\n",
" yield get_words_from_line(line.decode('utf-8'))\n",
"\n",
"def look_ahead_iterator(gen):\n",
" prev = None\n",
" for item in gen:\n",
" if prev is not None:\n",
" yield (prev, item)\n",
" prev = item\n",
"\n",
"def clean(text):\n",
" text = str(text).lower().replace('-\\\\n', '').replace('\\\\n', ' ').replace('-', '').replace('\\'s', ' is').replace('\\'re', ' are').replace('\\'m', ' am').replace('\\'ve', ' have').replace('\\'ll', ' will')\n",
" text = re.sub(r'\\p{P}', '', text)\n",
" return text\n",
"\n",
"def predict(word, model, vocab):\n",
" try:\n",
" ixs = torch.tensor(vocab.forward([word])).to(device)\n",
" except:\n",
" ixs = torch.tensor(vocab.forward(['<unk>'])).to(device)\n",
" word = '<unk>'\n",
" out = model(ixs)\n",
" top = torch.topk(out[0], 300)\n",
" top_indices = top.indices.tolist()\n",
" top_probs = top.values.tolist()\n",
" top_words = vocab.lookup_tokens(top_indices)\n",
" prob_list = list(zip(top_words, top_probs))\n",
" for index, element in enumerate(prob_list):\n",
" unk = None\n",
" if '<unk>' in element:\n",
" unk = prob_list.pop(index)\n",
" prob_list.append(('', unk[1]))\n",
" break\n",
" if unk is None:\n",
" prob_list[-1] = ('', prob_list[-1][1])\n",
" return ' '.join([f'{x[0]}:{x[1]}' for x in prob_list])\n",
"\n",
"def predicition_for_file(model, vocab, folder, file):\n",
" print('=' * 10, f' do prediction for {folder}/{file} ', '=' * 10)\n",
" with lzma.open(f'{folder}/in.tsv.xz', mode='rt', encoding='utf-8') as f:\n",
" with open(f'{folder}/out.tsv', 'w', encoding='utf-8') as fid:\n",
"\n",
" for line in f:\n",
" separated = line.split('\\t')\n",
" before = clean(separated[6]).split()[-1]\n",
" new_line = predict(before, model, vocab)\n",
" fid.write(new_line + '\\n')"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## CLASSES"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"class Bigrams(IterableDataset):\n",
" def __init__(self, text_file, vocabulary_size):\n",
" self.vocab = build_vocab_from_iterator(\n",
" get_word_lines_from_file(text_file),\n",
" max_tokens = vocabulary_size,\n",
" specials = ['<unk>'])\n",
" self.vocab.set_default_index(self.vocab['<unk>'])\n",
" self.vocabulary_size = vocabulary_size\n",
" self.text_file = text_file\n",
"\n",
" def __iter__(self):\n",
" return look_ahead_iterator(\n",
" (self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_file(self.text_file))))\n",
"\n",
"class SimpleBigramNeuralLanguageModel(nn.Module):\n",
" def __init__(self, vocabulary_size, embedding_size):\n",
" super(SimpleBigramNeuralLanguageModel, self).__init__()\n",
" self.model = nn.Sequential(\n",
" nn.Embedding(vocabulary_size, embedding_size),\n",
" nn.Linear(embedding_size, vocabulary_size),\n",
" nn.Softmax()\n",
" )\n",
"\n",
" def forward(self, x):\n",
" return self.model(x)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## PARAMETERS"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"vocab_size = 30000\n",
"embed_size = 1000\n",
"batch_size = 5000\n",
"device = 'mps'\n",
"path_to_training_file = './train/in.tsv.xz'\n",
"path_to_model_file = 'model_neural_network.bin'\n",
"folder_dev_0, file_dev_0 = 'dev-0', 'in.tsv.xz'\n",
"folder_test_a, file_test_a = 'test-A', 'in.tsv.xz'\n",
"path_to_vocabulary_file = 'vocabulary_neural_network.pickle'"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## VOCAB"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"1000\n",
"2000\n",
"3000\n",
"4000\n",
"5000\n",
"6000\n",
"7000\n",
"8000\n",
"9000\n",
"10000\n",
"11000\n",
"12000\n",
"13000\n",
"14000\n",
"15000\n",
"16000\n",
"17000\n",
"18000\n",
"19000\n",
"20000\n",
"21000\n",
"22000\n",
"23000\n",
"24000\n",
"25000\n",
"26000\n",
"27000\n",
"28000\n",
"29000\n",
"30000\n",
"31000\n",
"32000\n",
"33000\n",
"34000\n",
"35000\n",
"36000\n",
"37000\n",
"38000\n",
"39000\n",
"40000\n",
"41000\n",
"42000\n",
"43000\n",
"44000\n",
"45000\n",
"46000\n",
"47000\n",
"48000\n",
"49000\n",
"50000\n",
"51000\n",
"52000\n",
"53000\n",
"54000\n",
"55000\n",
"56000\n",
"57000\n",
"58000\n",
"59000\n",
"60000\n",
"61000\n",
"62000\n",
"63000\n",
"64000\n",
"65000\n",
"66000\n",
"67000\n",
"68000\n",
"69000\n",
"70000\n",
"71000\n",
"72000\n",
"73000\n",
"74000\n",
"75000\n",
"76000\n",
"77000\n",
"78000\n",
"79000\n",
"80000\n",
"81000\n",
"82000\n",
"83000\n",
"84000\n",
"85000\n",
"86000\n",
"87000\n",
"88000\n",
"89000\n",
"90000\n",
"91000\n",
"92000\n",
"93000\n",
"94000\n",
"95000\n",
"96000\n",
"97000\n",
"98000\n",
"99000\n",
"100000\n",
"101000\n",
"102000\n",
"103000\n",
"104000\n",
"105000\n",
"106000\n",
"107000\n",
"108000\n",
"109000\n",
"110000\n",
"111000\n",
"112000\n",
"113000\n",
"114000\n",
"115000\n",
"116000\n",
"117000\n",
"118000\n",
"119000\n",
"120000\n",
"121000\n",
"122000\n",
"123000\n",
"124000\n",
"125000\n",
"126000\n",
"127000\n",
"128000\n",
"129000\n",
"130000\n",
"131000\n",
"132000\n",
"133000\n",
"134000\n",
"135000\n",
"136000\n",
"137000\n",
"138000\n",
"139000\n",
"140000\n",
"141000\n",
"142000\n",
"143000\n",
"144000\n",
"145000\n",
"146000\n",
"147000\n",
"148000\n",
"149000\n",
"150000\n",
"151000\n",
"152000\n",
"153000\n",
"154000\n",
"155000\n",
"156000\n",
"157000\n",
"158000\n",
"159000\n",
"160000\n",
"161000\n",
"162000\n",
"163000\n",
"164000\n",
"165000\n",
"166000\n",
"167000\n",
"168000\n",
"169000\n",
"170000\n",
"171000\n",
"172000\n",
"173000\n",
"174000\n",
"175000\n",
"176000\n",
"177000\n",
"178000\n",
"179000\n",
"180000\n",
"181000\n",
"182000\n",
"183000\n",
"184000\n",
"185000\n",
"186000\n",
"187000\n",
"188000\n",
"189000\n",
"190000\n",
"191000\n",
"192000\n",
"193000\n",
"194000\n",
"195000\n",
"196000\n",
"197000\n",
"198000\n",
"199000\n",
"200000\n",
"201000\n",
"202000\n",
"203000\n",
"204000\n",
"205000\n",
"206000\n",
"207000\n",
"208000\n",
"209000\n",
"210000\n",
"211000\n",
"212000\n",
"213000\n",
"214000\n",
"215000\n",
"216000\n",
"217000\n",
"218000\n",
"219000\n",
"220000\n",
"221000\n",
"222000\n",
"223000\n",
"224000\n",
"225000\n",
"226000\n",
"227000\n",
"228000\n",
"229000\n",
"230000\n",
"231000\n",
"232000\n",
"233000\n",
"234000\n",
"235000\n",
"236000\n",
"237000\n",
"238000\n",
"239000\n",
"240000\n",
"241000\n",
"242000\n",
"243000\n",
"244000\n",
"245000\n",
"246000\n",
"247000\n",
"248000\n",
"249000\n",
"250000\n",
"251000\n",
"252000\n",
"253000\n",
"254000\n",
"255000\n",
"256000\n",
"257000\n",
"258000\n",
"259000\n",
"260000\n",
"261000\n",
"262000\n",
"263000\n",
"264000\n",
"265000\n",
"266000\n",
"267000\n",
"268000\n",
"269000\n",
"270000\n",
"271000\n",
"272000\n",
"273000\n",
"274000\n",
"275000\n",
"276000\n",
"277000\n",
"278000\n",
"279000\n",
"280000\n",
"281000\n",
"282000\n",
"283000\n",
"284000\n",
"285000\n",
"286000\n",
"287000\n",
"288000\n",
"289000\n",
"290000\n",
"291000\n",
"292000\n",
"293000\n",
"294000\n",
"295000\n",
"296000\n",
"297000\n",
"298000\n",
"299000\n",
"300000\n",
"301000\n",
"302000\n",
"303000\n",
"304000\n",
"305000\n",
"306000\n",
"307000\n",
"308000\n",
"309000\n",
"310000\n",
"311000\n",
"312000\n",
"313000\n",
"314000\n",
"315000\n",
"316000\n",
"317000\n",
"318000\n",
"319000\n",
"320000\n",
"321000\n",
"322000\n",
"323000\n",
"324000\n",
"325000\n",
"326000\n",
"327000\n",
"328000\n",
"329000\n",
"330000\n",
"331000\n",
"332000\n",
"333000\n",
"334000\n",
"335000\n",
"336000\n",
"337000\n",
"338000\n",
"339000\n",
"340000\n",
"341000\n",
"342000\n",
"343000\n",
"344000\n",
"345000\n",
"346000\n",
"347000\n",
"348000\n",
"349000\n",
"350000\n",
"351000\n",
"352000\n",
"353000\n",
"354000\n",
"355000\n",
"356000\n",
"357000\n",
"358000\n",
"359000\n",
"360000\n",
"361000\n",
"362000\n",
"363000\n",
"364000\n",
"365000\n",
"366000\n",
"367000\n",
"368000\n",
"369000\n",
"370000\n",
"371000\n",
"372000\n",
"373000\n",
"374000\n",
"375000\n",
"376000\n",
"377000\n",
"378000\n",
"379000\n",
"380000\n",
"381000\n",
"382000\n",
"383000\n",
"384000\n",
"385000\n",
"386000\n",
"387000\n",
"388000\n",
"389000\n",
"390000\n",
"391000\n",
"392000\n",
"393000\n",
"394000\n",
"395000\n",
"396000\n",
"397000\n",
"398000\n",
"399000\n",
"400000\n",
"401000\n",
"402000\n",
"403000\n",
"404000\n",
"405000\n",
"406000\n",
"407000\n",
"408000\n",
"409000\n",
"410000\n",
"411000\n",
"412000\n",
"413000\n",
"414000\n",
"415000\n",
"416000\n",
"417000\n",
"418000\n",
"419000\n",
"420000\n",
"421000\n",
"422000\n",
"423000\n",
"424000\n",
"425000\n",
"426000\n",
"427000\n",
"428000\n",
"429000\n",
"430000\n",
"431000\n",
"432000\n"
]
}
],
"source": [
"vocab = build_vocab_from_iterator(\n",
" get_word_lines_from_file(path_to_training_file),\n",
" max_tokens = vocab_size,\n",
" specials = ['<unk>'])\n",
"\n",
"with open(path_to_vocabulary_file, 'wb') as handle:\n",
" pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## TRAIN MODEL"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"1000\n",
"2000\n",
"3000\n",
"4000\n",
"5000\n",
"6000\n",
"7000\n",
"8000\n",
"9000\n",
"10000\n",
"11000\n",
"12000\n",
"13000\n",
"14000\n",
"15000\n",
"16000\n",
"17000\n",
"18000\n",
"19000\n",
"20000\n",
"21000\n",
"22000\n",
"23000\n",
"24000\n",
"25000\n",
"26000\n",
"27000\n",
"28000\n",
"29000\n",
"30000\n",
"31000\n",
"32000\n",
"33000\n",
"34000\n",
"35000\n",
"36000\n",
"37000\n",
"38000\n",
"39000\n",
"40000\n",
"41000\n",
"42000\n",
"43000\n",
"44000\n",
"45000\n",
"46000\n",
"47000\n",
"48000\n",
"49000\n",
"50000\n",
"51000\n",
"52000\n",
"53000\n",
"54000\n",
"55000\n",
"56000\n",
"57000\n",
"58000\n",
"59000\n",
"60000\n",
"61000\n",
"62000\n",
"63000\n",
"64000\n",
"65000\n",
"66000\n",
"67000\n",
"68000\n",
"69000\n",
"70000\n",
"71000\n",
"72000\n",
"73000\n",
"74000\n",
"75000\n",
"76000\n",
"77000\n",
"78000\n",
"79000\n",
"80000\n",
"81000\n",
"82000\n",
"83000\n",
"84000\n",
"85000\n",
"86000\n",
"87000\n",
"88000\n",
"89000\n",
"90000\n",
"91000\n",
"92000\n",
"93000\n",
"94000\n",
"95000\n",
"96000\n",
"97000\n",
"98000\n",
"99000\n",
"100000\n",
"101000\n",
"102000\n",
"103000\n",
"104000\n",
"105000\n",
"106000\n",
"107000\n",
"108000\n",
"109000\n",
"110000\n",
"111000\n",
"112000\n",
"113000\n",
"114000\n",
"115000\n",
"116000\n",
"117000\n",
"118000\n",
"119000\n",
"120000\n",
"121000\n",
"122000\n",
"123000\n",
"124000\n",
"125000\n",
"126000\n",
"127000\n",
"128000\n",
"129000\n",
"130000\n",
"131000\n",
"132000\n",
"133000\n",
"134000\n",
"135000\n",
"136000\n",
"137000\n",
"138000\n",
"139000\n",
"140000\n",
"141000\n",
"142000\n",
"143000\n",
"144000\n",
"145000\n",
"146000\n",
"147000\n",
"148000\n",
"149000\n",
"150000\n",
"151000\n",
"152000\n",
"153000\n",
"154000\n",
"155000\n",
"156000\n",
"157000\n",
"158000\n",
"159000\n",
"160000\n",
"161000\n",
"162000\n",
"163000\n",
"164000\n",
"165000\n",
"166000\n",
"167000\n",
"168000\n",
"169000\n",
"170000\n",
"171000\n",
"172000\n",
"173000\n",
"174000\n",
"175000\n",
"176000\n",
"177000\n",
"178000\n",
"179000\n",
"180000\n",
"181000\n",
"182000\n",
"183000\n",
"184000\n",
"185000\n",
"186000\n",
"187000\n",
"188000\n",
"189000\n",
"190000\n",
"191000\n",
"192000\n",
"193000\n",
"194000\n",
"195000\n",
"196000\n",
"197000\n",
"198000\n",
"199000\n",
"200000\n",
"201000\n",
"202000\n",
"203000\n",
"204000\n",
"205000\n",
"206000\n",
"207000\n",
"208000\n",
"209000\n",
"210000\n",
"211000\n",
"212000\n",
"213000\n",
"214000\n",
"215000\n",
"216000\n",
"217000\n",
"218000\n",
"219000\n",
"220000\n",
"221000\n",
"222000\n",
"223000\n",
"224000\n",
"225000\n",
"226000\n",
"227000\n",
"228000\n",
"229000\n",
"230000\n",
"231000\n",
"232000\n",
"233000\n",
"234000\n",
"235000\n",
"236000\n",
"237000\n",
"238000\n",
"239000\n",
"240000\n",
"241000\n",
"242000\n",
"243000\n",
"244000\n",
"245000\n",
"246000\n",
"247000\n",
"248000\n",
"249000\n",
"250000\n",
"251000\n",
"252000\n",
"253000\n",
"254000\n",
"255000\n",
"256000\n",
"257000\n",
"258000\n",
"259000\n",
"260000\n",
"261000\n",
"262000\n",
"263000\n",
"264000\n",
"265000\n",
"266000\n",
"267000\n",
"268000\n",
"269000\n",
"270000\n",
"271000\n",
"272000\n",
"273000\n",
"274000\n",
"275000\n",
"276000\n",
"277000\n",
"278000\n",
"279000\n",
"280000\n",
"281000\n",
"282000\n",
"283000\n",
"284000\n",
"285000\n",
"286000\n",
"287000\n",
"288000\n",
"289000\n",
"290000\n",
"291000\n",
"292000\n",
"293000\n",
"294000\n",
"295000\n",
"296000\n",
"297000\n",
"298000\n",
"299000\n",
"300000\n",
"301000\n",
"302000\n",
"303000\n",
"304000\n",
"305000\n",
"306000\n",
"307000\n",
"308000\n",
"309000\n",
"310000\n",
"311000\n",
"312000\n",
"313000\n",
"314000\n",
"315000\n",
"316000\n",
"317000\n",
"318000\n",
"319000\n",
"320000\n",
"321000\n",
"322000\n",
"323000\n",
"324000\n",
"325000\n",
"326000\n",
"327000\n",
"328000\n",
"329000\n",
"330000\n",
"331000\n",
"332000\n",
"333000\n",
"334000\n",
"335000\n",
"336000\n",
"337000\n",
"338000\n",
"339000\n",
"340000\n",
"341000\n",
"342000\n",
"343000\n",
"344000\n",
"345000\n",
"346000\n",
"347000\n",
"348000\n",
"349000\n",
"350000\n",
"351000\n",
"352000\n",
"353000\n",
"354000\n",
"355000\n",
"356000\n",
"357000\n",
"358000\n",
"359000\n",
"360000\n",
"361000\n",
"362000\n",
"363000\n",
"364000\n",
"365000\n",
"366000\n",
"367000\n",
"368000\n",
"369000\n",
"370000\n",
"371000\n",
"372000\n",
"373000\n",
"374000\n",
"375000\n",
"376000\n",
"377000\n",
"378000\n",
"379000\n",
"380000\n",
"381000\n",
"382000\n",
"383000\n",
"384000\n",
"385000\n",
"386000\n",
"387000\n",
"388000\n",
"389000\n",
"390000\n",
"391000\n",
"392000\n",
"393000\n",
"394000\n",
"395000\n",
"396000\n",
"397000\n",
"398000\n",
"399000\n",
"400000\n",
"401000\n",
"402000\n",
"403000\n",
"404000\n",
"405000\n",
"406000\n",
"407000\n",
"408000\n",
"409000\n",
"410000\n",
"411000\n",
"412000\n",
"413000\n",
"414000\n",
"415000\n",
"416000\n",
"417000\n",
"418000\n",
"419000\n",
"420000\n",
"421000\n",
"422000\n",
"423000\n",
"424000\n",
"425000\n",
"426000\n",
"427000\n",
"428000\n",
"429000\n",
"430000\n",
"431000\n",
"432000\n",
"/Users/maciej/miniconda3/envs/mj/lib/python3.11/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" input = module(input)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 tensor(10.5058, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"1000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 tensor(7.3365, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2000\n",
"3000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"200 tensor(6.6523, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"4000\n",
"5000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"300 tensor(6.1860, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"6000\n",
"7000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"400 tensor(6.0387, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"8000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"500 tensor(5.8481, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"9000\n",
"10000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"600 tensor(5.6081, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11000\n",
"12000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"700 tensor(5.5820, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"13000\n",
"14000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"800 tensor(5.5111, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"15000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"900 tensor(5.4927, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"16000\n",
"17000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 tensor(5.5190, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"18000\n",
"19000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1100 tensor(5.5600, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"20000\n",
"21000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1200 tensor(5.6395, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"22000\n",
"23000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1300 tensor(5.4455, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"24000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1400 tensor(5.5564, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"25000\n",
"26000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1500 tensor(5.4919, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"27000\n",
"28000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1600 tensor(5.2355, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"29000\n",
"30000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1700 tensor(5.4107, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"31000\n",
"32000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1800 tensor(5.5119, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"33000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1900 tensor(5.3500, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"34000\n",
"35000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2000 tensor(5.3722, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"36000\n",
"37000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2100 tensor(5.2736, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"38000\n",
"39000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2200 tensor(5.3808, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"40000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2300 tensor(5.5186, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"41000\n",
"42000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2400 tensor(5.2746, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"43000\n",
"44000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2500 tensor(5.3340, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"45000\n",
"46000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2600 tensor(5.4654, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"47000\n",
"48000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2700 tensor(5.4318, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"49000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2800 tensor(5.3528, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"50000\n",
"51000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2900 tensor(5.1630, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"52000\n",
"53000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3000 tensor(5.4531, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"54000\n",
"55000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3100 tensor(5.4153, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"56000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3200 tensor(5.3299, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"57000\n",
"58000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3300 tensor(5.3637, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"59000\n",
"60000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3400 tensor(5.3405, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"61000\n",
"62000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3500 tensor(5.3668, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"63000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3600 tensor(5.4104, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"64000\n",
"65000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3700 tensor(5.2142, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"66000\n",
"67000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3800 tensor(5.5528, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"68000\n",
"69000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3900 tensor(5.1879, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"70000\n",
"71000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4000 tensor(5.2014, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"72000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4100 tensor(5.4020, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"73000\n",
"74000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4200 tensor(5.2686, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"75000\n",
"76000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4300 tensor(5.3070, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"77000\n",
"78000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4400 tensor(5.1891, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"79000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4500 tensor(5.3085, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"80000\n",
"81000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4600 tensor(5.3568, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"82000\n",
"83000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4700 tensor(5.2280, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"84000\n",
"85000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4800 tensor(5.2878, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"86000\n",
"87000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4900 tensor(5.1588, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"88000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5000 tensor(5.1523, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"89000\n",
"90000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5100 tensor(5.2101, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"91000\n",
"92000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5200 tensor(5.2949, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"93000\n",
"94000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5300 tensor(5.3186, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"95000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5400 tensor(5.2580, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"96000\n",
"97000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5500 tensor(5.3632, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"98000\n",
"99000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5600 tensor(5.3885, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100000\n",
"101000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5700 tensor(5.2640, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"102000\n",
"103000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5800 tensor(5.4444, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"104000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5900 tensor(5.1981, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"105000\n",
"106000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6000 tensor(5.2765, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"107000\n",
"108000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6100 tensor(5.3015, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"109000\n",
"110000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6200 tensor(5.1958, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"111000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6300 tensor(5.1862, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"112000\n",
"113000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6400 tensor(5.4609, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"114000\n",
"115000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6500 tensor(5.2700, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"116000\n",
"117000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6600 tensor(5.3814, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"118000\n",
"119000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6700 tensor(5.2443, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"120000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6800 tensor(5.2292, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"121000\n",
"122000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6900 tensor(5.2252, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"123000\n",
"124000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7000 tensor(5.3240, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"125000\n",
"126000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7100 tensor(5.3584, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"127000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7200 tensor(5.2038, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"128000\n",
"129000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7300 tensor(5.3306, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"130000\n",
"131000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7400 tensor(5.3824, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"132000\n",
"133000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7500 tensor(5.1708, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"134000\n",
"135000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7600 tensor(5.3388, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"136000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7700 tensor(5.2014, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"137000\n",
"138000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7800 tensor(5.3407, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"139000\n",
"140000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7900 tensor(5.3078, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"141000\n",
"142000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8000 tensor(5.0961, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"143000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8100 tensor(5.1313, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"144000\n",
"145000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8200 tensor(5.2008, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"146000\n",
"147000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8300 tensor(5.1277, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"148000\n",
"149000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8400 tensor(5.3875, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"150000\n",
"151000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8500 tensor(5.3107, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"152000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8600 tensor(5.3640, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"153000\n",
"154000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8700 tensor(5.1869, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"155000\n",
"156000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8800 tensor(5.0180, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"157000\n",
"158000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8900 tensor(5.1767, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"159000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9000 tensor(5.3253, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"160000\n",
"161000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9100 tensor(5.1971, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"162000\n",
"163000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9200 tensor(5.2071, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"164000\n",
"165000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9300 tensor(5.1244, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"166000\n",
"167000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9400 tensor(5.2198, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"168000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9500 tensor(5.3042, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"169000\n",
"170000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9600 tensor(5.3171, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"171000\n",
"172000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9700 tensor(5.1956, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"173000\n",
"174000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9800 tensor(5.1559, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"175000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9900 tensor(5.1519, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"176000\n",
"177000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10000 tensor(5.3396, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"178000\n",
"179000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10100 tensor(5.2106, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"180000\n",
"181000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10200 tensor(5.3356, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"182000\n",
"183000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10300 tensor(5.2105, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"184000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10400 tensor(5.0844, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"185000\n",
"186000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10500 tensor(5.3788, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"187000\n",
"188000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10600 tensor(5.1145, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"189000\n",
"190000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10700 tensor(5.2610, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"191000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10800 tensor(5.2560, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"192000\n",
"193000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10900 tensor(5.2565, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"194000\n",
"195000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11000 tensor(5.2770, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"196000\n",
"197000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11100 tensor(5.1193, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"198000\n",
"199000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11200 tensor(5.1823, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"200000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11300 tensor(5.3099, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"201000\n",
"202000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11400 tensor(5.2330, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"203000\n",
"204000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11500 tensor(5.1722, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"205000\n",
"206000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11600 tensor(5.2136, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"207000\n",
"208000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11700 tensor(5.3126, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"209000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11800 tensor(5.1057, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"210000\n",
"211000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11900 tensor(5.2419, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"212000\n",
"213000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12000 tensor(5.2434, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"214000\n",
"215000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12100 tensor(5.1692, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"216000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12200 tensor(5.2075, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"217000\n",
"218000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12300 tensor(5.1290, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"219000\n",
"220000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12400 tensor(5.2380, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"221000\n",
"222000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12500 tensor(5.2779, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"223000\n",
"224000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12600 tensor(5.3369, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"225000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12700 tensor(5.2351, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"226000\n",
"227000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12800 tensor(5.2434, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"228000\n",
"229000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"12900 tensor(5.1963, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"230000\n",
"231000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13000 tensor(5.1363, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"232000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13100 tensor(5.1915, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"233000\n",
"234000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13200 tensor(5.1264, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"235000\n",
"236000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13300 tensor(5.1468, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"237000\n",
"238000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13400 tensor(5.3026, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"239000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13500 tensor(5.2925, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"240000\n",
"241000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13600 tensor(5.1511, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"242000\n",
"243000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13700 tensor(5.4282, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"244000\n",
"245000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13800 tensor(5.2730, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"246000\n",
"247000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"13900 tensor(5.2097, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"248000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14000 tensor(5.2728, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"249000\n",
"250000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14100 tensor(5.2134, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"251000\n",
"252000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14200 tensor(5.1931, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"253000\n",
"254000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14300 tensor(5.2459, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"255000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14400 tensor(5.1297, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"256000\n",
"257000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14500 tensor(5.0971, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"258000\n",
"259000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14600 tensor(5.2238, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"260000\n",
"261000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14700 tensor(5.2328, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"262000\n",
"263000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14800 tensor(5.1782, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"264000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"14900 tensor(5.3230, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"265000\n",
"266000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15000 tensor(5.1504, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"267000\n",
"268000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15100 tensor(5.1998, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"269000\n",
"270000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15200 tensor(5.2138, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"271000\n",
"272000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15300 tensor(5.4110, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"273000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15400 tensor(5.1748, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"274000\n",
"275000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15500 tensor(5.2118, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"276000\n",
"277000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15600 tensor(5.2297, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"278000\n",
"279000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15700 tensor(5.2977, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"280000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15800 tensor(5.2175, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"281000\n",
"282000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"15900 tensor(5.0613, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"283000\n",
"284000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16000 tensor(5.0862, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"285000\n",
"286000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16100 tensor(5.1910, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"287000\n",
"288000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16200 tensor(5.0195, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"289000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16300 tensor(5.1381, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"290000\n",
"291000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16400 tensor(5.2135, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"292000\n",
"293000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16500 tensor(5.2058, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"294000\n",
"295000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16600 tensor(5.2372, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"296000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16700 tensor(5.1753, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"297000\n",
"298000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16800 tensor(5.0765, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"299000\n",
"300000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"16900 tensor(5.3361, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"301000\n",
"302000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17000 tensor(5.2745, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"303000\n",
"304000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17100 tensor(5.2249, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"305000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17200 tensor(5.1877, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"306000\n",
"307000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17300 tensor(5.0891, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"308000\n",
"309000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17400 tensor(5.4181, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"310000\n",
"311000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17500 tensor(5.1299, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"312000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17600 tensor(5.1636, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"313000\n",
"314000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17700 tensor(5.2179, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"315000\n",
"316000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17800 tensor(5.2689, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"317000\n",
"318000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"17900 tensor(5.2410, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"319000\n",
"320000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18000 tensor(5.2342, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"321000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18100 tensor(5.2234, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"322000\n",
"323000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18200 tensor(5.0779, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"324000\n",
"325000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18300 tensor(5.2378, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"326000\n",
"327000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18400 tensor(5.1710, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"328000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18500 tensor(5.1134, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"329000\n",
"330000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18600 tensor(5.2679, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"331000\n",
"332000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18700 tensor(5.2590, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"333000\n",
"334000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18800 tensor(5.1842, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"335000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"18900 tensor(5.1379, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"336000\n",
"337000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19000 tensor(5.1416, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"338000\n",
"339000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19100 tensor(5.1602, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"340000\n",
"341000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19200 tensor(5.2670, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"342000\n",
"343000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19300 tensor(5.1622, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"344000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19400 tensor(5.1805, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"345000\n",
"346000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19500 tensor(5.1820, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"347000\n",
"348000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19600 tensor(5.2506, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"349000\n",
"350000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19700 tensor(5.1566, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"351000\n",
"352000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19800 tensor(5.1121, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"353000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"19900 tensor(5.1227, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"354000\n",
"355000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20000 tensor(5.2132, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"356000\n",
"357000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20100 tensor(5.2681, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"358000\n",
"359000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20200 tensor(5.2689, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"360000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20300 tensor(5.1758, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"361000\n",
"362000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20400 tensor(5.1275, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"363000\n",
"364000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20500 tensor(5.1803, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"365000\n",
"366000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20600 tensor(5.1202, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"367000\n",
"368000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20700 tensor(5.2343, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"369000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20800 tensor(5.2035, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"370000\n",
"371000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"20900 tensor(5.2992, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"372000\n",
"373000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21000 tensor(5.1540, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"374000\n",
"375000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21100 tensor(5.2739, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"376000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21200 tensor(5.2949, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"377000\n",
"378000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21300 tensor(5.2138, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"379000\n",
"380000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21400 tensor(5.2773, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"381000\n",
"382000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21500 tensor(5.2345, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"383000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21600 tensor(5.2528, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"384000\n",
"385000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21700 tensor(5.1824, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"386000\n",
"387000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21800 tensor(5.1943, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"388000\n",
"389000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"21900 tensor(5.0359, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"390000\n",
"391000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22000 tensor(5.1506, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"392000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22100 tensor(5.1253, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"393000\n",
"394000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22200 tensor(5.0982, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"395000\n",
"396000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22300 tensor(5.1554, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"397000\n",
"398000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22400 tensor(5.1673, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"399000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22500 tensor(5.1957, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"400000\n",
"401000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22600 tensor(5.1328, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"402000\n",
"403000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22700 tensor(5.2231, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"404000\n",
"405000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22800 tensor(5.1370, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"406000\n",
"407000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"22900 tensor(5.2334, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"408000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23000 tensor(5.1372, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"409000\n",
"410000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23100 tensor(5.1193, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"411000\n",
"412000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23200 tensor(5.2649, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"413000\n",
"414000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23300 tensor(5.1514, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"415000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23400 tensor(5.2532, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"416000\n",
"417000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23500 tensor(5.3751, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"418000\n",
"419000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23600 tensor(5.0766, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"420000\n",
"421000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23700 tensor(5.0915, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"422000\n",
"423000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23800 tensor(5.3195, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"424000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"23900 tensor(5.2758, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"425000\n",
"426000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"24000 tensor(5.0487, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"427000\n",
"428000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"24100 tensor(5.1555, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"429000\n",
"430000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"24200 tensor(5.2140, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"431000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"24300 tensor(5.2729, device='mps:0', grad_fn=<NllLossBackward0>)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"432000\n"
]
}
],
"source": [
"train_dataset = Bigrams(path_to_training_file, vocab_size)\n",
"model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)\n",
"data = DataLoader(train_dataset, batch_size=batch_size)\n",
"optimizer = torch.optim.Adam(model.parameters())\n",
"criterion = torch.nn.NLLLoss()\n",
"\n",
"model.train()\n",
"step = 0\n",
"for x, y in data:\n",
" x = x.to(device)\n",
" y = y.to(device)\n",
" optimizer.zero_grad()\n",
" ypredicted = model(x)\n",
" loss = criterion(torch.log(ypredicted), y)\n",
" if step % 100 == 0:\n",
" print(step, loss)\n",
" step += 1\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
"torch.save(model.state_dict(), path_to_model_file)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## LOAD MODEL AND VOCAB"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"data": {
"text/plain": "SimpleBigramNeuralLanguageModel(\n (model): Sequential(\n (0): Embedding(30000, 1000)\n (1): Linear(in_features=1000, out_features=30000, bias=True)\n (2): Softmax(dim=None)\n )\n)"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(path_to_vocabulary_file, 'rb') as handle:\n",
" vocab = pickle.load(handle)\n",
"model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)\n",
"model.load_state_dict(torch.load(path_to_model_file))\n",
"model.eval()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## CREATE OUTPUTS FILES"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"### DEV-0"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"========== do prediction for dev-0/in.tsv.xz ==========\n"
]
}
],
"source": [
"predicition_for_file(model, vocab, folder_dev_0, file_dev_0)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"### TEST-A"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"========== do prediction for test-A/in.tsv.xz ==========\n"
]
}
],
"source": [
"predicition_for_file(model, vocab, folder_test_a, file_test_a)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}