diff --git a/simple_neural_network.ipynb b/simple_neural_network.ipynb index ef10ea2..6453b8f 100644 --- a/simple_neural_network.ipynb +++ b/simple_neural_network.ipynb @@ -11,17 +11,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n", - "True\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "import regex as re\n", "import sys\n", @@ -52,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "outputs": [], "source": [ "def get_words_from_line(line):\n", @@ -131,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "outputs": [], "source": [ "class Bigrams(IterableDataset):\n", @@ -175,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "outputs": [], "source": [ "vocab_size = 30000\n", @@ -203,447 +194,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "1000\n", - "2000\n", - "3000\n", - "4000\n", - "5000\n", - "6000\n", - "7000\n", - "8000\n", - "9000\n", - "10000\n", - "11000\n", - "12000\n", - "13000\n", - "14000\n", - "15000\n", - "16000\n", - "17000\n", - "18000\n", - "19000\n", - "20000\n", - "21000\n", - "22000\n", - "23000\n", - "24000\n", - "25000\n", - "26000\n", - "27000\n", - "28000\n", - "29000\n", - "30000\n", - "31000\n", - "32000\n", - "33000\n", - "34000\n", - "35000\n", - "36000\n", - "37000\n", - "38000\n", - "39000\n", - "40000\n", - "41000\n", - "42000\n", - "43000\n", - "44000\n", - "45000\n", - "46000\n", - "47000\n", - "48000\n", - "49000\n", - "50000\n", - "51000\n", - "52000\n", - "53000\n", - "54000\n", - "55000\n", - "56000\n", - "57000\n", - "58000\n", - "59000\n", - "60000\n", - "61000\n", - "62000\n", - "63000\n", - "64000\n", - "65000\n", - "66000\n", - "67000\n", - "68000\n", - "69000\n", - "70000\n", - "71000\n", - "72000\n", - "73000\n", - "74000\n", - "75000\n", - "76000\n", - "77000\n", - "78000\n", - "79000\n", - "80000\n", - "81000\n", - "82000\n", - "83000\n", - "84000\n", - "85000\n", - "86000\n", - "87000\n", - "88000\n", - "89000\n", - "90000\n", - "91000\n", - "92000\n", - "93000\n", - "94000\n", - "95000\n", - "96000\n", - "97000\n", - "98000\n", - "99000\n", - "100000\n", - "101000\n", - "102000\n", - "103000\n", - "104000\n", - "105000\n", - "106000\n", - "107000\n", - "108000\n", - "109000\n", - "110000\n", - "111000\n", - "112000\n", - "113000\n", - "114000\n", - "115000\n", - "116000\n", - "117000\n", - "118000\n", - "119000\n", - "120000\n", - "121000\n", - "122000\n", - "123000\n", - "124000\n", - "125000\n", - "126000\n", - "127000\n", - "128000\n", - "129000\n", - "130000\n", - "131000\n", - "132000\n", - "133000\n", - "134000\n", - "135000\n", - "136000\n", - "137000\n", - "138000\n", - "139000\n", - "140000\n", - "141000\n", - "142000\n", - "143000\n", - "144000\n", - "145000\n", - "146000\n", - "147000\n", - "148000\n", - "149000\n", - "150000\n", - "151000\n", - "152000\n", - "153000\n", - "154000\n", - "155000\n", - "156000\n", - "157000\n", - "158000\n", - "159000\n", - "160000\n", - "161000\n", - "162000\n", - "163000\n", - "164000\n", - "165000\n", - "166000\n", - "167000\n", - "168000\n", - "169000\n", - "170000\n", - "171000\n", - "172000\n", - "173000\n", - "174000\n", - "175000\n", - "176000\n", - "177000\n", - "178000\n", - "179000\n", - "180000\n", - "181000\n", - "182000\n", - "183000\n", - "184000\n", - "185000\n", - "186000\n", - "187000\n", - "188000\n", - "189000\n", - "190000\n", - "191000\n", - "192000\n", - "193000\n", - "194000\n", - "195000\n", - "196000\n", - "197000\n", - "198000\n", - "199000\n", - "200000\n", - "201000\n", - "202000\n", - "203000\n", - "204000\n", - "205000\n", - "206000\n", - "207000\n", - "208000\n", - "209000\n", - "210000\n", - "211000\n", - "212000\n", - "213000\n", - "214000\n", - "215000\n", - "216000\n", - "217000\n", - "218000\n", - "219000\n", - "220000\n", - "221000\n", - "222000\n", - "223000\n", - "224000\n", - "225000\n", - "226000\n", - "227000\n", - "228000\n", - "229000\n", - "230000\n", - "231000\n", - "232000\n", - "233000\n", - "234000\n", - "235000\n", - "236000\n", - "237000\n", - "238000\n", - "239000\n", - "240000\n", - "241000\n", - "242000\n", - "243000\n", - "244000\n", - "245000\n", - "246000\n", - "247000\n", - "248000\n", - "249000\n", - "250000\n", - "251000\n", - "252000\n", - "253000\n", - "254000\n", - "255000\n", - "256000\n", - "257000\n", - "258000\n", - "259000\n", - "260000\n", - "261000\n", - "262000\n", - "263000\n", - "264000\n", - "265000\n", - "266000\n", - "267000\n", - "268000\n", - "269000\n", - "270000\n", - "271000\n", - "272000\n", - "273000\n", - "274000\n", - "275000\n", - "276000\n", - "277000\n", - "278000\n", - "279000\n", - "280000\n", - "281000\n", - "282000\n", - "283000\n", - "284000\n", - "285000\n", - "286000\n", - "287000\n", - "288000\n", - "289000\n", - "290000\n", - "291000\n", - "292000\n", - "293000\n", - "294000\n", - "295000\n", - "296000\n", - "297000\n", - "298000\n", - "299000\n", - "300000\n", - "301000\n", - "302000\n", - "303000\n", - "304000\n", - "305000\n", - "306000\n", - "307000\n", - "308000\n", - "309000\n", - "310000\n", - "311000\n", - "312000\n", - "313000\n", - "314000\n", - "315000\n", - "316000\n", - "317000\n", - "318000\n", - "319000\n", - "320000\n", - "321000\n", - "322000\n", - "323000\n", - "324000\n", - "325000\n", - "326000\n", - "327000\n", - "328000\n", - "329000\n", - "330000\n", - "331000\n", - "332000\n", - "333000\n", - "334000\n", - "335000\n", - "336000\n", - "337000\n", - "338000\n", - "339000\n", - "340000\n", - "341000\n", - "342000\n", - "343000\n", - "344000\n", - "345000\n", - "346000\n", - "347000\n", - "348000\n", - "349000\n", - "350000\n", - "351000\n", - "352000\n", - "353000\n", - "354000\n", - "355000\n", - "356000\n", - "357000\n", - "358000\n", - "359000\n", - "360000\n", - "361000\n", - "362000\n", - "363000\n", - "364000\n", - "365000\n", - "366000\n", - "367000\n", - "368000\n", - "369000\n", - "370000\n", - "371000\n", - "372000\n", - "373000\n", - "374000\n", - "375000\n", - "376000\n", - "377000\n", - "378000\n", - "379000\n", - "380000\n", - "381000\n", - "382000\n", - "383000\n", - "384000\n", - "385000\n", - "386000\n", - "387000\n", - "388000\n", - "389000\n", - "390000\n", - "391000\n", - "392000\n", - "393000\n", - "394000\n", - "395000\n", - "396000\n", - "397000\n", - "398000\n", - "399000\n", - "400000\n", - "401000\n", - "402000\n", - "403000\n", - "404000\n", - "405000\n", - "406000\n", - "407000\n", - "408000\n", - "409000\n", - "410000\n", - "411000\n", - "412000\n", - "413000\n", - "414000\n", - "415000\n", - "416000\n", - "417000\n", - "418000\n", - "419000\n", - "420000\n", - "421000\n", - "422000\n", - "423000\n", - "424000\n", - "425000\n", - "426000\n", - "427000\n", - "428000\n", - "429000\n", - "430000\n", - "431000\n", - "432000\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "vocab = build_vocab_from_iterator(\n", " get_word_lines_from_file(path_to_training_file),\n", @@ -668,4053 +220,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "1000\n", - "2000\n", - "3000\n", - "4000\n", - "5000\n", - "6000\n", - "7000\n", - "8000\n", - "9000\n", - "10000\n", - "11000\n", - "12000\n", - "13000\n", - "14000\n", - "15000\n", - "16000\n", - "17000\n", - "18000\n", - "19000\n", - "20000\n", - "21000\n", - "22000\n", - "23000\n", - "24000\n", - "25000\n", - "26000\n", - "27000\n", - "28000\n", - "29000\n", - "30000\n", - "31000\n", - "32000\n", - "33000\n", - "34000\n", - "35000\n", - "36000\n", - "37000\n", - "38000\n", - "39000\n", - "40000\n", - "41000\n", - "42000\n", - "43000\n", - "44000\n", - "45000\n", - "46000\n", - "47000\n", - "48000\n", - "49000\n", - "50000\n", - "51000\n", - "52000\n", - "53000\n", - "54000\n", - "55000\n", - "56000\n", - "57000\n", - "58000\n", - "59000\n", - "60000\n", - "61000\n", - "62000\n", - "63000\n", - "64000\n", - "65000\n", - "66000\n", - "67000\n", - "68000\n", - "69000\n", - "70000\n", - "71000\n", - "72000\n", - "73000\n", - "74000\n", - "75000\n", - "76000\n", - "77000\n", - "78000\n", - "79000\n", - "80000\n", - "81000\n", - "82000\n", - "83000\n", - "84000\n", - "85000\n", - "86000\n", - "87000\n", - "88000\n", - "89000\n", - "90000\n", - "91000\n", - "92000\n", - "93000\n", - "94000\n", - "95000\n", - "96000\n", - "97000\n", - "98000\n", - "99000\n", - "100000\n", - "101000\n", - "102000\n", - "103000\n", - "104000\n", - "105000\n", - "106000\n", - "107000\n", - "108000\n", - "109000\n", - "110000\n", - "111000\n", - "112000\n", - "113000\n", - "114000\n", - "115000\n", - "116000\n", - "117000\n", - "118000\n", - "119000\n", - "120000\n", - "121000\n", - "122000\n", - "123000\n", - "124000\n", - "125000\n", - "126000\n", - "127000\n", - "128000\n", - "129000\n", - "130000\n", - "131000\n", - "132000\n", - "133000\n", - "134000\n", - "135000\n", - "136000\n", - "137000\n", - "138000\n", - "139000\n", - "140000\n", - "141000\n", - "142000\n", - "143000\n", - "144000\n", - "145000\n", - "146000\n", - "147000\n", - "148000\n", - "149000\n", - "150000\n", - "151000\n", - "152000\n", - "153000\n", - "154000\n", - "155000\n", - "156000\n", - "157000\n", - "158000\n", - "159000\n", - "160000\n", - "161000\n", - "162000\n", - "163000\n", - "164000\n", - "165000\n", - "166000\n", - "167000\n", - "168000\n", - "169000\n", - "170000\n", - "171000\n", - "172000\n", - "173000\n", - "174000\n", - "175000\n", - "176000\n", - "177000\n", - "178000\n", - "179000\n", - "180000\n", - "181000\n", - "182000\n", - "183000\n", - "184000\n", - "185000\n", - "186000\n", - "187000\n", - "188000\n", - "189000\n", - "190000\n", - "191000\n", - "192000\n", - "193000\n", - "194000\n", - "195000\n", - "196000\n", - "197000\n", - "198000\n", - "199000\n", - "200000\n", - "201000\n", - "202000\n", - "203000\n", - "204000\n", - "205000\n", - "206000\n", - "207000\n", - "208000\n", - "209000\n", - "210000\n", - "211000\n", - "212000\n", - "213000\n", - "214000\n", - "215000\n", - "216000\n", - "217000\n", - "218000\n", - "219000\n", - "220000\n", - "221000\n", - "222000\n", - "223000\n", - "224000\n", - "225000\n", - "226000\n", - "227000\n", - "228000\n", - "229000\n", - "230000\n", - "231000\n", - "232000\n", - "233000\n", - "234000\n", - "235000\n", - "236000\n", - "237000\n", - "238000\n", - "239000\n", - "240000\n", - "241000\n", - "242000\n", - "243000\n", - "244000\n", - "245000\n", - "246000\n", - "247000\n", - "248000\n", - "249000\n", - "250000\n", - "251000\n", - "252000\n", - "253000\n", - "254000\n", - "255000\n", - "256000\n", - "257000\n", - "258000\n", - "259000\n", - "260000\n", - "261000\n", - "262000\n", - "263000\n", - "264000\n", - "265000\n", - "266000\n", - "267000\n", - "268000\n", - "269000\n", - "270000\n", - "271000\n", - "272000\n", - "273000\n", - "274000\n", - "275000\n", - "276000\n", - "277000\n", - "278000\n", - "279000\n", - "280000\n", - "281000\n", - "282000\n", - "283000\n", - "284000\n", - "285000\n", - "286000\n", - "287000\n", - "288000\n", - "289000\n", - "290000\n", - "291000\n", - "292000\n", - "293000\n", - "294000\n", - "295000\n", - "296000\n", - "297000\n", - "298000\n", - "299000\n", - "300000\n", - "301000\n", - "302000\n", - "303000\n", - "304000\n", - "305000\n", - "306000\n", - "307000\n", - "308000\n", - "309000\n", - "310000\n", - "311000\n", - "312000\n", - "313000\n", - "314000\n", - "315000\n", - "316000\n", - "317000\n", - "318000\n", - "319000\n", - "320000\n", - "321000\n", - "322000\n", - "323000\n", - "324000\n", - "325000\n", - "326000\n", - "327000\n", - "328000\n", - "329000\n", - "330000\n", - "331000\n", - "332000\n", - "333000\n", - "334000\n", - "335000\n", - "336000\n", - "337000\n", - "338000\n", - "339000\n", - "340000\n", - "341000\n", - "342000\n", - "343000\n", - "344000\n", - "345000\n", - "346000\n", - "347000\n", - "348000\n", - "349000\n", - "350000\n", - "351000\n", - "352000\n", - "353000\n", - "354000\n", - "355000\n", - "356000\n", - "357000\n", - "358000\n", - "359000\n", - "360000\n", - "361000\n", - "362000\n", - "363000\n", - "364000\n", - "365000\n", - "366000\n", - "367000\n", - "368000\n", - "369000\n", - "370000\n", - "371000\n", - "372000\n", - "373000\n", - "374000\n", - "375000\n", - "376000\n", - "377000\n", - "378000\n", - "379000\n", - "380000\n", - "381000\n", - "382000\n", - "383000\n", - "384000\n", - "385000\n", - "386000\n", - "387000\n", - "388000\n", - "389000\n", - "390000\n", - "391000\n", - "392000\n", - "393000\n", - "394000\n", - "395000\n", - "396000\n", - "397000\n", - "398000\n", - "399000\n", - "400000\n", - "401000\n", - "402000\n", - "403000\n", - "404000\n", - "405000\n", - "406000\n", - "407000\n", - "408000\n", - "409000\n", - "410000\n", - "411000\n", - "412000\n", - "413000\n", - "414000\n", - "415000\n", - "416000\n", - "417000\n", - "418000\n", - "419000\n", - "420000\n", - "421000\n", - "422000\n", - "423000\n", - "424000\n", - "425000\n", - "426000\n", - "427000\n", - "428000\n", - "429000\n", - "430000\n", - "431000\n", - "432000\n", - "/Users/maciej/miniconda3/envs/mj/lib/python3.11/site-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n", - " input = module(input)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 tensor(10.5058, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "1000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "100 tensor(7.3365, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2000\n", - "3000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200 tensor(6.6523, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "4000\n", - "5000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "300 tensor(6.1860, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "6000\n", - "7000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "400 tensor(6.0387, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "8000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "500 tensor(5.8481, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "9000\n", - "10000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "600 tensor(5.6081, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "11000\n", - "12000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "700 tensor(5.5820, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "13000\n", - "14000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "800 tensor(5.5111, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "15000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "900 tensor(5.4927, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "16000\n", - "17000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1000 tensor(5.5190, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "18000\n", - "19000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1100 tensor(5.5600, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "20000\n", - "21000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1200 tensor(5.6395, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22000\n", - "23000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1300 tensor(5.4455, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "24000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1400 tensor(5.5564, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "25000\n", - "26000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1500 tensor(5.4919, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "27000\n", - "28000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1600 tensor(5.2355, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "29000\n", - "30000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1700 tensor(5.4107, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "31000\n", - "32000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1800 tensor(5.5119, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "33000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1900 tensor(5.3500, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "34000\n", - "35000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2000 tensor(5.3722, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "36000\n", - "37000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2100 tensor(5.2736, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "38000\n", - "39000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2200 tensor(5.3808, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "40000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2300 tensor(5.5186, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "41000\n", - "42000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2400 tensor(5.2746, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "43000\n", - "44000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2500 tensor(5.3340, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "45000\n", - "46000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2600 tensor(5.4654, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "47000\n", - "48000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2700 tensor(5.4318, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "49000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2800 tensor(5.3528, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "50000\n", - "51000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2900 tensor(5.1630, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "52000\n", - "53000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3000 tensor(5.4531, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "54000\n", - "55000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3100 tensor(5.4153, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "56000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3200 tensor(5.3299, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "57000\n", - "58000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3300 tensor(5.3637, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "59000\n", - "60000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3400 tensor(5.3405, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "61000\n", - "62000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3500 tensor(5.3668, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "63000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3600 tensor(5.4104, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "64000\n", - "65000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3700 tensor(5.2142, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "66000\n", - "67000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3800 tensor(5.5528, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "68000\n", - "69000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3900 tensor(5.1879, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "70000\n", - "71000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4000 tensor(5.2014, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "72000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4100 tensor(5.4020, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "73000\n", - "74000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4200 tensor(5.2686, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "75000\n", - "76000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4300 tensor(5.3070, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "77000\n", - "78000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4400 tensor(5.1891, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "79000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4500 tensor(5.3085, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "80000\n", - "81000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4600 tensor(5.3568, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "82000\n", - "83000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4700 tensor(5.2280, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "84000\n", - "85000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4800 tensor(5.2878, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "86000\n", - "87000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4900 tensor(5.1588, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "88000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5000 tensor(5.1523, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "89000\n", - "90000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5100 tensor(5.2101, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "91000\n", - "92000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5200 tensor(5.2949, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "93000\n", - "94000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5300 tensor(5.3186, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "95000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5400 tensor(5.2580, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "96000\n", - "97000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5500 tensor(5.3632, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "98000\n", - "99000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5600 tensor(5.3885, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100000\n", - "101000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5700 tensor(5.2640, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "102000\n", - "103000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5800 tensor(5.4444, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "104000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5900 tensor(5.1981, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "105000\n", - "106000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6000 tensor(5.2765, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "107000\n", - "108000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6100 tensor(5.3015, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "109000\n", - "110000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6200 tensor(5.1958, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "111000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6300 tensor(5.1862, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "112000\n", - "113000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6400 tensor(5.4609, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "114000\n", - "115000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6500 tensor(5.2700, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "116000\n", - "117000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6600 tensor(5.3814, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "118000\n", - "119000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6700 tensor(5.2443, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "120000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6800 tensor(5.2292, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "121000\n", - "122000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6900 tensor(5.2252, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "123000\n", - "124000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7000 tensor(5.3240, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "125000\n", - "126000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7100 tensor(5.3584, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7200 tensor(5.2038, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "128000\n", - "129000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7300 tensor(5.3306, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "130000\n", - "131000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7400 tensor(5.3824, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "132000\n", - "133000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7500 tensor(5.1708, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "134000\n", - "135000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7600 tensor(5.3388, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "136000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7700 tensor(5.2014, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "137000\n", - "138000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7800 tensor(5.3407, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "139000\n", - "140000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7900 tensor(5.3078, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "141000\n", - "142000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8000 tensor(5.0961, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "143000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8100 tensor(5.1313, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "144000\n", - "145000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8200 tensor(5.2008, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "146000\n", - "147000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8300 tensor(5.1277, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "148000\n", - "149000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8400 tensor(5.3875, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "150000\n", - "151000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8500 tensor(5.3107, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "152000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8600 tensor(5.3640, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "153000\n", - "154000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8700 tensor(5.1869, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "155000\n", - "156000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8800 tensor(5.0180, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "157000\n", - "158000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "8900 tensor(5.1767, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "159000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9000 tensor(5.3253, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "160000\n", - "161000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9100 tensor(5.1971, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "162000\n", - "163000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9200 tensor(5.2071, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "164000\n", - "165000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9300 tensor(5.1244, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "166000\n", - "167000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9400 tensor(5.2198, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "168000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9500 tensor(5.3042, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "169000\n", - "170000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9600 tensor(5.3171, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "171000\n", - "172000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9700 tensor(5.1956, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "173000\n", - "174000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9800 tensor(5.1559, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "175000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9900 tensor(5.1519, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "176000\n", - "177000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10000 tensor(5.3396, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "178000\n", - "179000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10100 tensor(5.2106, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "180000\n", - "181000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10200 tensor(5.3356, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "182000\n", - "183000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10300 tensor(5.2105, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "184000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10400 tensor(5.0844, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "185000\n", - "186000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10500 tensor(5.3788, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "187000\n", - "188000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10600 tensor(5.1145, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "189000\n", - "190000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10700 tensor(5.2610, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "191000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10800 tensor(5.2560, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "192000\n", - "193000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10900 tensor(5.2565, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "194000\n", - "195000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11000 tensor(5.2770, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "196000\n", - "197000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11100 tensor(5.1193, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "198000\n", - "199000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11200 tensor(5.1823, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "200000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11300 tensor(5.3099, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "201000\n", - "202000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11400 tensor(5.2330, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "203000\n", - "204000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11500 tensor(5.1722, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "205000\n", - "206000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11600 tensor(5.2136, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "207000\n", - "208000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11700 tensor(5.3126, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "209000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11800 tensor(5.1057, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "210000\n", - "211000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11900 tensor(5.2419, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "212000\n", - "213000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12000 tensor(5.2434, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "214000\n", - "215000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12100 tensor(5.1692, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "216000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12200 tensor(5.2075, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "217000\n", - "218000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12300 tensor(5.1290, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "219000\n", - "220000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12400 tensor(5.2380, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "221000\n", - "222000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12500 tensor(5.2779, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "223000\n", - "224000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12600 tensor(5.3369, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "225000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12700 tensor(5.2351, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "226000\n", - "227000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12800 tensor(5.2434, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "228000\n", - "229000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12900 tensor(5.1963, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "230000\n", - "231000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13000 tensor(5.1363, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "232000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13100 tensor(5.1915, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "233000\n", - "234000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13200 tensor(5.1264, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "235000\n", - "236000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13300 tensor(5.1468, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "237000\n", - "238000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13400 tensor(5.3026, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "239000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13500 tensor(5.2925, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "240000\n", - "241000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13600 tensor(5.1511, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "242000\n", - "243000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13700 tensor(5.4282, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "244000\n", - "245000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13800 tensor(5.2730, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "246000\n", - "247000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13900 tensor(5.2097, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "248000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14000 tensor(5.2728, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "249000\n", - "250000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14100 tensor(5.2134, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "251000\n", - "252000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14200 tensor(5.1931, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "253000\n", - "254000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14300 tensor(5.2459, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "255000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14400 tensor(5.1297, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "256000\n", - "257000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14500 tensor(5.0971, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "258000\n", - "259000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14600 tensor(5.2238, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "260000\n", - "261000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14700 tensor(5.2328, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "262000\n", - "263000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14800 tensor(5.1782, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "264000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "14900 tensor(5.3230, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "265000\n", - "266000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15000 tensor(5.1504, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "267000\n", - "268000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15100 tensor(5.1998, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "269000\n", - "270000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15200 tensor(5.2138, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "271000\n", - "272000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15300 tensor(5.4110, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "273000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15400 tensor(5.1748, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "274000\n", - "275000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15500 tensor(5.2118, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "276000\n", - "277000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15600 tensor(5.2297, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "278000\n", - "279000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15700 tensor(5.2977, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "280000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15800 tensor(5.2175, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "281000\n", - "282000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "15900 tensor(5.0613, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "283000\n", - "284000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16000 tensor(5.0862, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "285000\n", - "286000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16100 tensor(5.1910, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "287000\n", - "288000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16200 tensor(5.0195, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "289000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16300 tensor(5.1381, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "290000\n", - "291000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16400 tensor(5.2135, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "292000\n", - "293000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16500 tensor(5.2058, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "294000\n", - "295000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16600 tensor(5.2372, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "296000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16700 tensor(5.1753, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "297000\n", - "298000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16800 tensor(5.0765, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "299000\n", - "300000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "16900 tensor(5.3361, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "301000\n", - "302000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17000 tensor(5.2745, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "303000\n", - "304000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17100 tensor(5.2249, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "305000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17200 tensor(5.1877, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "306000\n", - "307000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17300 tensor(5.0891, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "308000\n", - "309000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17400 tensor(5.4181, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "310000\n", - "311000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17500 tensor(5.1299, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "312000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17600 tensor(5.1636, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "313000\n", - "314000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17700 tensor(5.2179, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "315000\n", - "316000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17800 tensor(5.2689, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "317000\n", - "318000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "17900 tensor(5.2410, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "319000\n", - "320000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18000 tensor(5.2342, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "321000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18100 tensor(5.2234, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "322000\n", - "323000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18200 tensor(5.0779, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "324000\n", - "325000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18300 tensor(5.2378, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "326000\n", - "327000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18400 tensor(5.1710, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "328000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18500 tensor(5.1134, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "329000\n", - "330000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18600 tensor(5.2679, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "331000\n", - "332000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18700 tensor(5.2590, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "333000\n", - "334000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18800 tensor(5.1842, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "335000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18900 tensor(5.1379, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "336000\n", - "337000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19000 tensor(5.1416, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "338000\n", - "339000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19100 tensor(5.1602, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "340000\n", - "341000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19200 tensor(5.2670, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "342000\n", - "343000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19300 tensor(5.1622, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "344000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19400 tensor(5.1805, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "345000\n", - "346000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19500 tensor(5.1820, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "347000\n", - "348000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19600 tensor(5.2506, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "349000\n", - "350000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19700 tensor(5.1566, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "351000\n", - "352000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19800 tensor(5.1121, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "353000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19900 tensor(5.1227, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "354000\n", - "355000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20000 tensor(5.2132, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "356000\n", - "357000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20100 tensor(5.2681, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "358000\n", - "359000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20200 tensor(5.2689, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "360000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20300 tensor(5.1758, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "361000\n", - "362000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20400 tensor(5.1275, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "363000\n", - "364000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20500 tensor(5.1803, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "365000\n", - "366000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20600 tensor(5.1202, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "367000\n", - "368000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20700 tensor(5.2343, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "369000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20800 tensor(5.2035, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "370000\n", - "371000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20900 tensor(5.2992, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "372000\n", - "373000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21000 tensor(5.1540, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "374000\n", - "375000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21100 tensor(5.2739, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "376000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21200 tensor(5.2949, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "377000\n", - "378000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21300 tensor(5.2138, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "379000\n", - "380000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21400 tensor(5.2773, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "381000\n", - "382000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21500 tensor(5.2345, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "383000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21600 tensor(5.2528, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "384000\n", - "385000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21700 tensor(5.1824, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "386000\n", - "387000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21800 tensor(5.1943, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "388000\n", - "389000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21900 tensor(5.0359, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "390000\n", - "391000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22000 tensor(5.1506, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "392000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22100 tensor(5.1253, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "393000\n", - "394000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22200 tensor(5.0982, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "395000\n", - "396000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22300 tensor(5.1554, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "397000\n", - "398000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22400 tensor(5.1673, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "399000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22500 tensor(5.1957, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "400000\n", - "401000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22600 tensor(5.1328, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "402000\n", - "403000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22700 tensor(5.2231, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "404000\n", - "405000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22800 tensor(5.1370, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "406000\n", - "407000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22900 tensor(5.2334, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "408000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23000 tensor(5.1372, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "409000\n", - "410000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23100 tensor(5.1193, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "411000\n", - "412000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23200 tensor(5.2649, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "413000\n", - "414000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23300 tensor(5.1514, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "415000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23400 tensor(5.2532, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "416000\n", - "417000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23500 tensor(5.3751, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "418000\n", - "419000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23600 tensor(5.0766, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "420000\n", - "421000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23700 tensor(5.0915, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "422000\n", - "423000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23800 tensor(5.3195, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "424000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23900 tensor(5.2758, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "425000\n", - "426000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "24000 tensor(5.0487, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "427000\n", - "428000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "24100 tensor(5.1555, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "429000\n", - "430000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "24200 tensor(5.2140, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "431000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "24300 tensor(5.2729, device='mps:0', grad_fn=)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "432000\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "train_dataset = Bigrams(path_to_training_file, vocab_size)\n", "model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)\n", @@ -4753,17 +260,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "outputs": [ - { - "data": { - "text/plain": "SimpleBigramNeuralLanguageModel(\n (model): Sequential(\n (0): Embedding(30000, 1000)\n (1): Linear(in_features=1000, out_features=30000, bias=True)\n (2): Softmax(dim=None)\n )\n)" - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "outputs": [], "source": [ "with open(path_to_vocabulary_file, 'rb') as handle:\n", " vocab = pickle.load(handle)\n", @@ -4795,16 +293,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "========== do prediction for dev-0/in.tsv.xz ==========\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "predicition_for_file(model, vocab, folder_dev_0, file_dev_0)" ], @@ -4823,31 +313,14 @@ }, { "cell_type": "code", - "execution_count": 9, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "========== do prediction for test-A/in.tsv.xz ==========\n" - ] - } - ], + "execution_count": null, + "outputs": [], "source": [ "predicition_for_file(model, vocab, folder_test_a, file_test_a)" ], "metadata": { "collapsed": false } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [], - "metadata": { - "collapsed": false - } } ], "metadata": {