{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ECxQCLFdh2dg",
"outputId": "a5bf4387-212b-4f01-e3cc-df308fa015b8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
]
},
{
"cell_type": "code",
"source": [
"root_path = '/content/gdrive/MyDrive/challenging-america-word-gap-prediction'"
],
"metadata": {
"id": "uWXe1O7FjKVZ"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import torch\n",
"torch.cuda.is_available()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Lo2e9lxajQGy",
"outputId": "751a36fc-203b-40eb-e59e-139d6ed92231"
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"import csv\n",
"torch.cuda.empty_cache()\n",
"from torch.utils.data import DataLoader\n",
"import pandas as pd\n",
"from os.path import exists\n",
"from torchtext.vocab import build_vocab_from_iterator\n",
"import itertools\n",
"import regex as re\n",
"from csv import QUOTE_NONE\n",
"from torch import nn\n",
"\n",
"ENCODING = \"utf-8\"\n",
"\n",
"REP = re.compile(r\"[{}\\[\\]\\&%^$*#\\(\\)@\\t\\n0123456789]+\")\n",
"REM = re.compile(r\"'s|[\\-]\\\\n|\\-\\\\n|\\p{P}\")\n",
"\n",
"def read_csv(fname):\n",
" return pd.read_csv(fname, sep=\"\\t\", on_bad_lines='skip', header=None, quoting=QUOTE_NONE, encoding=ENCODING)\n",
"\n",
"def clean_text(text):\n",
" res = str(text).lower().strip()\n",
" res = res.replace(\"’\", \"'\")\n",
" res = REM.sub(\"\", res)\n",
" res = REP.sub(\" \", res)\n",
" res = res.replace(\"'t\", \" not\")\n",
" res = res.replace(\"'s\", \" is\")\n",
" res = res.replace(\"'ll\", \" will\")\n",
" res = res.replace(\"'ve'\", \"have\")\n",
" return res.replace(\"'m\", \" am\")\n",
"\n",
"def get_words_from_line(line, specials = True):\n",
" line = line.rstrip()\n",
" if specials:\n",
" yield ''\n",
" for m in re.finditer(r'[\\p{L}0-9\\*]+|\\p{P}+', line):\n",
" yield m.group(0).lower()\n",
" if specials:\n",
" yield ''\n",
"\n",
"\n",
"def get_word_lines_from_data(d):\n",
" for line in d:\n",
" yield get_words_from_line(line)\n",
"\n",
"\n",
"\n",
"\n",
"class Bigrams(torch.utils.data.IterableDataset):\n",
" def __init__(self, data, vocabulary_size):\n",
" self.vocab = build_vocab_from_iterator(\n",
" get_word_lines_from_data(data),\n",
" max_tokens = vocabulary_size,\n",
" specials = [''])\n",
" self.vocab.set_default_index(self.vocab[''])\n",
" self.vocabulary_size = vocabulary_size\n",
" self.data = data\n",
"\n",
" @staticmethod\n",
" def look_ahead_iterator(gen):\n",
" w1 = None\n",
" for item in gen:\n",
" if w1 is not None:\n",
" yield (w1, item)\n",
" w1 = item\n",
"\n",
" def __iter__(self):\n",
" return self.look_ahead_iterator(\n",
" (self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_data(self.data))))\n",
"\n",
"class SimpleBigramNeuralLanguageModel(torch.nn.Module):\n",
" def __init__(self, vocabulary_size, embedding_size):\n",
" super(SimpleBigramNeuralLanguageModel, self).__init__()\n",
" self.model = nn.Sequential(\n",
" nn.Embedding(vocabulary_size, embedding_size),\n",
" nn.Linear(embedding_size, vocabulary_size),\n",
" nn.Softmax(),\n",
" )\n",
"\n",
" def forward(self, x):\n",
" return self.model(x)\n",
"\n",
"\n"
],
"metadata": {
"id": "GDsznRxrjNSi"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"\n",
"data = read_csv(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/train/in.tsv.xz\")\n",
"train_words = read_csv(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/train/expected.tsv\")\n",
"\n",
"train_data = data[[6, 7]]\n",
"train_data = pd.concat([train_data, train_words], axis=1)\n",
"train_data = train_data[6] + train_data[0] + train_data[7]\n",
"train_data = train_data.apply(clean_text)\n",
"\n",
"vocab_size = 30000\n",
"embed_size = 150\n",
"\n",
"train_dataset = Bigrams(train_data, vocab_size)\n",
"\n",
"\n",
"\n",
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)\n",
"print(device)\n",
"if(not exists('model1.bin')):\n",
" data = DataLoader(train_dataset, batch_size=8000)\n",
" optimizer = torch.optim.Adam(model.parameters())\n",
" criterion = torch.nn.NLLLoss()\n",
"\n",
" model.train()\n",
" step = 0\n",
" for i in range(2):\n",
" print(f\"EPOCH {i}=========================\")\n",
" for x, y in data:\n",
" x = x.to(device)\n",
" y = y.to(device)\n",
" optimizer.zero_grad()\n",
" ypredicted = model(x)\n",
" loss = criterion(torch.log(ypredicted), y)\n",
" if step % 100 == 0:\n",
" print(step, loss)\n",
" step += 1\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" torch.save(model.state_dict(), 'model1.bin')\n",
"else:\n",
" print(\"Loading model1\")\n",
" model.load_state_dict(torch.load('model1.bin'))\n",
"\n",
"\n",
"\n",
"vocab = train_dataset.vocab\n",
"\n",
"def predict(tokens):\n",
" ixs = torch.tensor(vocab.forward(tokens)).to(device)\n",
" out = model(ixs)\n",
" top = torch.topk(out[0], 8)\n",
" top_indices = top.indices.tolist()\n",
" top_probs = top.values.tolist()\n",
" top_words = vocab.lookup_tokens(top_indices)\n",
" result = \"\"\n",
" for word, prob in list(zip(top_words, top_probs)):\n",
" result += f\"{word}:{prob} \"\n",
" # result += f':0.01'\n",
" return result\n",
"\n",
"DEFAULT_PREDICTION = \"from:0.2 the:0.2 to:0.2 a:0.1 and:0.1 of:0.1 :0.1\"\n",
"\n",
"def predict_file(result_path, data):\n",
" with open(result_path, \"w+\", encoding=\"UTF-8\") as f:\n",
" for row in data:\n",
" result = {}\n",
" before = None\n",
" for before in get_words_from_line(clean_text(str(row)), False):\n",
" pass\n",
" before = [before]\n",
" print(before)\n",
" if(len(before) < 1):\n",
" result = DEFAULT_PREDICTION\n",
" else:\n",
" result = predict(before)\n",
" result = result.strip()\n",
" f.write(result + \"\\n\")\n",
" print(result)\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "UAGoWjAvgdHR",
"outputId": "7488a193-d5d3-4e15-8c94-e6a92fcb6e1c"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"cuda\n",
"Loading model1\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"dev_data = pd.read_csv(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/dev-0/in.tsv.xz\", sep='\\t', header=None, quoting=csv.QUOTE_NONE)[6]\n",
"dev_data = dev_data.apply(clean_text)\n",
"predict_file(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/dev-0/out.tsv\", dev_data)\n",
"\n",
"test_data = pd.read_csv(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/test-A/in.tsv.xz\", sep='\\t', header=None, quoting=csv.QUOTE_NONE)[6]\n",
"test_data = test_data.apply(clean_text)\n",
"predict_file(\"/content/gdrive/MyDrive/challenging-america-word-gap-prediction/test-A/out.tsv\", test_data)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NcAFyvb6gi6O",
"outputId": "dac6e397-e648-46d7-aa36-ad7675cec7d3"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['fromn']\n",
"to:0.6022941470146179 :0.08413857221603394 the:0.02710115723311901 a:0.024180108681321144 cents:0.01099067460745573 and:0.010230590589344501 oclock:0.01012511644512415 at:0.007927066646516323\n",
"['its']\n",
":0.2702571153640747 own:0.019152523949742317 use:0.006631065625697374 way:0.005629365798085928 provisions:0.004911798983812332 power:0.004610280506312847 origin:0.0041810800321400166 present:0.004065223038196564\n",
"['ot']\n",
"the:0.22001349925994873 :0.19508947432041168 a:0.030033614486455917 this:0.01654713787138462 tho:0.016085060313344002 his:0.013413750566542149 tbe:0.011244924739003181 said:0.010236472822725773\n",
"['singlenspiing']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['com']\n",
":0.3745647668838501 nmittee:0.0985867902636528 npany:0.07008005678653717 nplete:0.0227628406137228 nmenced:0.022177640348672867 nmunity:0.019416389986872673 ning:0.018061168491840363 npelled:0.017108052968978882\n",
"['with']\n",
"the:0.18855905532836914 :0.16477812826633453 a:0.09615594893693924 his:0.021452313289046288 an:0.01595587097108364 all:0.014357201755046844 which:0.011805357411503792 their:0.010865600779652596\n",
"['fruitn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['lazarette']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['seasonnmechanics']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['infelt']\n",
"that:0.16725043952465057 a:0.1049814447760582 the:0.07968802005052567 :0.06530243158340454 as:0.040015850216150284 so:0.02690543234348297 to:0.023759080097079277 it:0.01877797581255436\n",
"['these']\n",
":0.19787919521331787 are:0.03109699860215187 men:0.02476704865694046 things:0.01615901105105877 two:0.015082272700965405 were:0.014673557132482529 days:0.007715313229709864 people:0.0068898797035217285\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['too']\n",
":0.18147820234298706 much:0.09218698740005493 late:0.02721041440963745 long:0.02395564876496792 many:0.02172279916703701 far:0.01422102004289627 small:0.01405904721468687 great:0.012681767344474792\n",
"['then']\n",
":0.17314893007278442 the:0.054305776953697205 th:0.027903184294700623 he:0.025194548070430756 a:0.01811600849032402 i:0.01776750013232231 to:0.017214776948094368 in:0.016991956159472466\n",
"['me']\n",
":0.1571723371744156 to:0.09861639887094498 and:0.05510082468390465 that:0.04905012249946594 in:0.031311556696891785 a:0.026816315948963165 i:0.02574211359024048 the:0.022411905229091644\n",
"['country']\n",
":0.11966928094625473 and:0.07137807458639145 in:0.03494032099843025 is:0.034728653728961945 to:0.03073720633983612 the:0.026472346857190132 that:0.016115514561533928 has:0.01602117158472538\n",
"['pray']\n",
"for:0.1147090271115303 that:0.09854457527399063 :0.09667541831731796 and:0.05276889353990555 to:0.043221574276685715 the:0.026828594505786896 thee:0.020481640473008156 ning:0.01991654746234417\n",
"['well']\n",
"as:0.17864541709423065 :0.11902722716331482 known:0.06120625138282776 and:0.03417086973786354 to:0.030580556020140648 in:0.01440605241805315 that:0.013506671413779259 for:0.012597030960023403\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['mag']\n",
":0.6771595478057861 var:0.04740045964717865 and:0.015082723461091518 a:0.01107875257730484 the:0.00967230275273323 to:0.009017998352646828 of:0.006967801600694656 e:0.006326612550765276\n",
"['seed']\n",
":0.14156512916088104 and:0.047204963862895966 is:0.03819289058446884 in:0.029025167226791382 the:0.025906240567564964 of:0.023483337834477425 corn:0.02170969545841217 was:0.019000500440597534\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['york']\n",
":0.18007470667362213 and:0.07088713347911835 city:0.04835294559597969 the:0.01977786421775818 to:0.01939130388200283 for:0.01731991022825241 in:0.014967141672968864 tribune:0.013161341659724712\n",
"['beenn']\n",
":0.21989615261554718 a:0.027275050058960915 in:0.023197248578071594 and:0.015070127323269844 the:0.010114927776157856 to:0.007723797112703323 of:0.005964544601738453 at:0.0057502021081745625\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['buy']\n",
":0.14975865185260773 a:0.10235429555177689 the:0.07125183194875717 and:0.043543219566345215 it:0.02510141395032406 in:0.021220633760094643 for:0.016868017613887787 them:0.015618979930877686\n",
"['inferen']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['uponn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['ar']\n",
":0.3615242540836334 nrested:0.04803482070565224 nranged:0.01859256438910961 the:0.017519423738121986 a:0.016516724601387978 nrived:0.016169406473636627 sch:0.015866553410887718 schs:0.01586332358419895\n",
"['minister']\n",
"of:0.20191951096057892 :0.1576572060585022 to:0.08270685374736786 and:0.055784814059734344 in:0.04526316374540329 who:0.022849131375551224 was:0.01826796866953373 is:0.01268035639077425\n",
"['ofsn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['memorable']\n",
":0.23944434523582458 day:0.026241321116685867 year:0.024737460538744926 and:0.024195699021220207 occasion:0.02371875010430813 in:0.01599171571433544 event:0.015270641073584557 night:0.009204641915857792\n",
"['not']\n",
":0.14968617260456085 be:0.04642459750175476 only:0.03997911512851715 to:0.033478107303380966 a:0.03148456662893295 been:0.020359180867671967 the:0.02024826779961586 in:0.01442572008818388\n",
"['or']\n",
":0.20414391160011292 the:0.04081256687641144 a:0.018578192219138145 in:0.017286691814661026 any:0.015642661601305008 two:0.014492310583591461 to:0.013875987380743027 other:0.01381285022944212\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['thought']\n",
"that:0.10238199681043625 of:0.09746503084897995 :0.08445286750793457 it:0.0692593976855278 the:0.03876238316297531 to:0.036570657044649124 he:0.03208998963236809 and:0.02360614947974682\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['no']\n",
":0.21101464331150055 one:0.030883168801665306 doubt:0.021309345960617065 longer:0.015751631930470467 more:0.015039087273180485 other:0.012437735684216022 at:0.00836264993995428 of:0.007628906983882189\n",
"['furnished']\n",
"by:0.13623559474945068 :0.13086862862110138 the:0.07768431305885315 with:0.06902734190225601 to:0.06692063063383102 and:0.031122008338570595 in:0.030942710116505623 a:0.024767804890871048\n",
"['kindhe']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['share']\n",
"of:0.2963021993637085 in:0.11481660604476929 :0.08151209354400635 the:0.04714621230959892 and:0.029531721025705338 to:0.02145523764193058 or:0.020925426855683327 ofnthe:0.01523417979478836\n",
"['idn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['ho']\n",
":0.19489049911499023 was:0.07406753301620483 had:0.05150853469967842 is:0.036005403846502304 has:0.022943999618291855 would:0.018995828926563263 will:0.01608256995677948 could:0.015021993778645992\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['particularly']\n",
":0.1600012630224228 described:0.08091900497674942 in:0.05816621705889702 the:0.04016972333192825 to:0.0396663099527359 of:0.018677931278944016 that:0.016451124101877213 as:0.016146983951330185\n",
"['toundn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['resist']\n",
"the:0.20629844069480896 :0.13768623769283295 any:0.025579893961548805 it:0.02541232667863369 a:0.025056391954421997 nance:0.021608872339129448 his:0.02115912362933159 all:0.019421102479100227\n",
"['great']\n",
":0.23301440477371216 deal:0.042057666927576065 many:0.023794014006853104 britain:0.01637883484363556 and:0.01608119159936905 northern:0.0075382268987596035 number:0.007448261603713036 work:0.006052902899682522\n",
"['sore']\n",
":0.2727644741535187 throat:0.09327245503664017 and:0.06414686888456345 to:0.03874184936285019 of:0.02285708300769329 in:0.020859427750110626 eyes:0.014868006110191345 with:0.013868249021470547\n",
"['copious']\n",
":0.2814045250415802 and:0.057153813540935516 spitting:0.024884618818759918 or:0.01493043638765812 rains:0.00676787318661809 expectoration:0.006205611862242222 use:0.005000473465770483 in:0.004916490521281958\n",
"['of']\n",
"the:0.24628451466560364 :0.16549083590507507 a:0.030842546373605728 this:0.018929913640022278 his:0.013870411552488804 tho:0.011849009431898594 said:0.010169874876737595 their:0.00833516288548708\n",
"['anniversary']\n",
"of:0.5570573806762695 :0.05830441787838936 ofnthe:0.025549080222845078 and:0.02235742285847664 the:0.013832224532961845 in:0.012621582485735416 ot:0.00933550950139761 that:0.007586529012769461\n",
"['drove']\n",
":0.15266035497188568 the:0.08227454870939255 to:0.07634899020195007 up:0.04997767135500908 out:0.03920309618115425 them:0.034609872847795486 him:0.03244243934750557 a:0.030813029035925865\n",
"['claimn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['rulenyour']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['then']\n",
":0.17314893007278442 the:0.054305776953697205 th:0.027903184294700623 he:0.025194548070430756 a:0.01811600849032402 i:0.01776750013232231 to:0.017214776948094368 in:0.016991956159472466\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['it']\n",
"is:0.1975691020488739 :0.11427108943462372 was:0.07985429465770721 has:0.02643054537475109 would:0.025057239457964897 will:0.0246218703687191 to:0.017691470682621002 and:0.014439831487834454\n",
"['presidential']\n",
":0.30631786584854126 election:0.09481414407491684 electors:0.05854112654924393 campaign:0.056805189698934555 candidate:0.04167124629020691 candidates:0.027102060616016388 nomination:0.02184944972395897 elections:0.015486281365156174\n",
"['of']\n",
"the:0.24628451466560364 :0.16549083590507507 a:0.030842546373605728 this:0.018929913640022278 his:0.013870411552488804 tho:0.011849009431898594 said:0.010169874876737595 their:0.00833516288548708\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['financial']\n",
":0.2771313190460205 and:0.030127525329589844 policy:0.02824564278125763 condition:0.024351194500923157 stringency:0.018086541444063187 affairs:0.015763157978653908 panic:0.012458850629627705 question:0.009656081907451153\n",
"['turn']\n",
":0.1496334820985794 the:0.0648597702383995 out:0.06267675012350082 to:0.0600338950753212 of:0.049756769090890884 and:0.02935531921684742 in:0.029205771163105965 over:0.021794509142637253\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['his']\n",
":0.22731943428516388 own:0.028036223724484444 wife:0.014714676886796951 head:0.009271553717553616 life:0.008575893007218838 father:0.007845153100788593 name:0.0067452057264745235 death:0.006232346408069134\n",
"['her']\n",
":0.22112299501895905 to:0.026270100846886635 husband:0.023973815143108368 own:0.023047467693686485 and:0.0185173861682415 in:0.012423854321241379 mother:0.011387202888727188 sister:0.007973994128406048\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['in']\n",
"the:0.22110939025878906 :0.15336278080940247 a:0.04508489370346069 this:0.025574127212166786 his:0.015838539227843285 tho:0.011865855194628239 which:0.011841910891234875 their:0.009982584975659847\n",
"['little']\n",
":0.24557887017726898 girl:0.025172490626573563 more:0.019780097529292107 of:0.01807580143213272 in:0.01266819890588522 to:0.012459754943847656 and:0.011518032290041447 or:0.010145704261958599\n",
"['inn']\n",
":0.11861531436443329 and:0.0650525689125061 the:0.05006604641675949 in:0.027428003028035164 a:0.027054699137806892 of:0.020789235830307007 he:0.01902659982442856 to:0.018165726214647293\n",
"['now']\n",
":0.14582782983779907 in:0.04832162708044052 the:0.024043085053563118 and:0.021647101268172264 that:0.019400129094719887 a:0.017484351992607117 to:0.014732500538229942 on:0.013634675182402134\n",
"['then']\n",
":0.17314893007278442 the:0.054305776953697205 th:0.027903184294700623 he:0.025194548070430756 a:0.01811600849032402 i:0.01776750013232231 to:0.017214776948094368 in:0.016991956159472466\n",
"['permit']\n",
"the:0.1632901132106781 :0.1252882182598114 of:0.04265236482024193 to:0.03897421807050705 me:0.03697557374835014 him:0.036127787083387375 a:0.03516046330332756 them:0.027134213596582413\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['andnthe']\n",
":0.10762743651866913 other:0.014708217233419418 same:0.008086836896836758 first:0.00714407442137599 whole:0.006792465224862099 most:0.006189970299601555 people:0.005712251644581556 amount:0.005635734181851149\n",
"['in']\n",
"the:0.22110939025878906 :0.15336278080940247 a:0.04508489370346069 this:0.025574127212166786 his:0.015838539227843285 tho:0.011865855194628239 which:0.011841910891234875 their:0.009982584975659847\n",
"['de']\n",
":0.49304428696632385 nscribed:0.03593514487147331 ncree:0.01910579949617386 nclare:0.016634183004498482 npartment:0.013101846911013126 ncision:0.012689454481005669 nceased:0.011244077235460281 nmands:0.010968760587275028\n",
"['dip']\n",
":0.18214932084083557 the:0.07342560589313507 of:0.062355153262615204 in:0.05248147249221802 a:0.047397300601005554 and:0.03739320486783981 it:0.024377480149269104 to:0.015617425553500652\n",
"['like']\n",
":0.18616856634616852 a:0.17956425249576569 the:0.09968697279691696 to:0.06316488981246948 that:0.0213160440325737 an:0.0169201772660017 it:0.013055045157670975 this:0.013001575134694576\n",
"['expand']\n",
"the:0.12572140991687775 and:0.11769000440835953 :0.09885091334581375 to:0.04987511411309242 it:0.031796764582395554 in:0.029610566794872284 a:0.028658149763941765 as:0.025107450783252716\n",
"['andn']\n",
":0.11835400760173798 dollars:0.033931098878383636 block:0.030407536774873734 in:0.023633964359760284 the:0.02200307324528694 to:0.019526800140738487 of:0.018512655049562454 a:0.018055016174912453\n",
"['disfiguration']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['her']\n",
":0.22112299501895905 to:0.026270100846886635 husband:0.023973815143108368 own:0.023047467693686485 and:0.0185173861682415 in:0.012423854321241379 mother:0.011387202888727188 sister:0.007973994128406048\n",
"['hasn']\n",
":0.17889410257339478 een:0.0487578809261322 been:0.04432809352874756 the:0.0182209312915802 a:0.015206229873001575 to:0.011778036132454872 in:0.009196098893880844 e:0.008825796656310558\n",
"['greatnmeasure']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['day']\n",
"of:0.3034079968929291 :0.08016764372587204 and:0.04774697870016098 the:0.025342067703604698 to:0.02038748562335968 in:0.020222947001457214 or:0.015532629564404488 at:0.015224823728203773\n",
"['are']\n",
":0.16807684302330017 not:0.04138978198170662 the:0.02690424770116806 in:0.023047056049108505 to:0.019894026219844818 now:0.015693990513682365 a:0.011959198862314224 hereby:0.00985399354249239\n",
"['been']\n",
":0.18799114227294922 made:0.0322195440530777 a:0.031793076545000076 in:0.02522442489862442 the:0.01643744669854641 so:0.0077444217167794704 given:0.0072204493917524815 taken:0.006533249747008085\n",
"['that']\n",
"the:0.1410231739282608 :0.11953254044055939 he:0.044784024357795715 it:0.03494413569569588 they:0.022156892344355583 is:0.019587429240345955 a:0.019473088905215263 there:0.015096952207386494\n",
"['midgenmakes']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['gratifiedn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['pay']\n",
"the:0.1480865776538849 :0.089902363717556 for:0.08847824484109879 a:0.04429009556770325 to:0.032318364828825 nment:0.01842622645199299 and:0.014837000519037247 of:0.014597073197364807\n",
"['habit']\n",
"of:0.41023752093315125 :0.14767734706401825 and:0.04803241044282913 is:0.019620215520262718 to:0.018852675333619118 the:0.017077317461371422 which:0.016443854197859764 in:0.013901734724640846\n",
"['incidents']\n",
"of:0.29866883158683777 :0.10820091515779495 in:0.04884188622236252 and:0.0451679527759552 that:0.025860637426376343 which:0.025412598624825478 connected:0.020349707454442978 the:0.018996335566043854\n",
"['her']\n",
":0.22112299501895905 to:0.026270100846886635 husband:0.023973815143108368 own:0.023047467693686485 and:0.0185173861682415 in:0.012423854321241379 mother:0.011387202888727188 sister:0.007973994128406048\n",
"['only']\n",
":0.16589561104774475 a:0.06038087606430054 to:0.046623602509498596 the:0.036858413368463516 in:0.03357406705617905 one:0.02650539204478264 by:0.017633434385061264 be:0.01402018778026104\n",
"['andneverybody']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['whiell']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['by']\n",
"the:0.2323983758687973 :0.1534150391817093 a:0.05595365911722183 tho:0.013856622390449047 said:0.01337381824851036 law:0.013222591951489449 his:0.012532558292150497 this:0.011029992252588272\n",
"['come']\n",
"to:0.17094673216342926 :0.1031876653432846 in:0.060832031071186066 from:0.034362196922302246 and:0.03334948420524597 out:0.03294382244348526 into:0.03275981917977333 back:0.0275673009455204\n",
"['into']\n",
"the:0.29200682044029236 :0.15183523297309875 a:0.08336054533720016 his:0.01840839348733425 tho:0.015280143357813358 an:0.012433086521923542 this:0.012129685841500759 their:0.011247316375374794\n",
"['strictlyn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['incould']\n",
"not:0.27519363164901733 :0.05142929032444954 see:0.03915046155452728 be:0.02989174984395504 get:0.029048211872577667 have:0.021763350814580917 hardly:0.01833612285554409 hear:0.016535388305783272\n",
"['being']\n",
":0.18178798258304596 the:0.05463423579931259 a:0.044204384088516235 in:0.03139527887105942 made:0.021438436582684517 done:0.010384885594248772 to:0.008472583256661892 of:0.007576572708785534\n",
"['but']\n",
":0.10057884454727173 the:0.09095072746276855 it:0.054127976298332214 in:0.0283758956938982 a:0.02659798040986061 he:0.023481814190745354 i:0.020594369620084763 they:0.01816706918179989\n",
"['par']\n",
":0.20796771347522736 value:0.08712632954120636 nty:0.07249657064676285 nticular:0.048743296414613724 nties:0.03478650748729706 and:0.03286156803369522 nticularly:0.029927806928753853 ntial:0.022359734401106834\n",
"['of']\n",
"the:0.24628451466560364 :0.16549083590507507 a:0.030842546373605728 this:0.018929913640022278 his:0.013870411552488804 tho:0.011849009431898594 said:0.010169874876737595 their:0.00833516288548708\n",
"['prevalent']\n",
"in:0.18078169226646423 :0.0947568416595459 and:0.06480997055768967 that:0.040276192128658295 as:0.026730772107839584 but:0.013920711353421211 for:0.013013817369937897 on:0.01264421921223402\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['ad']\n",
":0.46572381258010864 valorem:0.0639818087220192 nvanced:0.03069620206952095 nvance:0.021423691883683205 ndress:0.020832737907767296 the:0.015519566833972931 and:0.013783372938632965 in:0.012624251656234264\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/container.py:217: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
" input = module(input)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.\u001b[0m\n",
"['wellnspread']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['from']\n",
"the:0.25287488102912903 :0.15713191032409668 a:0.034414686262607574 his:0.014321111142635345 tho:0.014128337614238262 this:0.012060044333338737 which:0.011743685230612755 to:0.011461544781923294\n",
"['of']\n",
"the:0.24628451466560364 :0.16549083590507507 a:0.030842546373605728 this:0.018929913640022278 his:0.013870411552488804 tho:0.011849009431898594 said:0.010169874876737595 their:0.00833516288548708\n",
"['that']\n",
"the:0.1410231739282608 :0.11953254044055939 he:0.044784024357795715 it:0.03494413569569588 they:0.022156892344355583 is:0.019587429240345955 a:0.019473088905215263 there:0.015096952207386494\n",
"['say']\n",
"that:0.21495364606380463 :0.11226599663496017 the:0.04550514370203018 to:0.03919023275375366 it:0.028015941381454468 i:0.022692859172821045 a:0.01814952678978443 in:0.017609383910894394\n",
"['rest']\n",
"of:0.2671319246292114 :0.09157544374465942 and:0.051582906395196915 in:0.03871097043156624 on:0.02648262120783329 the:0.025508644059300423 upon:0.016937701031565666 assured:0.015263134613633156\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['is']\n",
":0.14304344356060028 a:0.07812074571847916 the:0.05577261000871658 not:0.04125296324491501 to:0.028534000739455223 in:0.02009972184896469 no:0.01618652231991291 that:0.013951911590993404\n",
"['an']\n",
":0.2524346113204956 old:0.018409626558423042 hour:0.016093363985419273 act:0.015224146656692028 order:0.011131629347801208 average:0.009394328109920025 opportunity:0.00728580541908741 article:0.006832430604845285\n",
"['him']\n",
":0.10401047766208649 to:0.10037162899971008 and:0.06415976583957672 in:0.04259955883026123 the:0.02874256856739521 a:0.02758781798183918 that:0.0228275079280138 as:0.02206616848707199\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['known']\n",
"as:0.17906907200813293 to:0.1604887694120407 :0.10328951478004456 that:0.06191052868962288 and:0.05049519240856171 in:0.04228660836815834 the:0.02014406956732273 by:0.016692543402314186\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['wenn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['what']\n",
":0.11899658292531967 is:0.09058628976345062 the:0.05584552139043808 he:0.055342115461826324 it:0.04097414389252663 was:0.0380481593310833 a:0.030636867508292198 they:0.02920752577483654\n",
"['ground']\n",
":0.1374719738960266 and:0.06979455798864365 that:0.060389481484889984 of:0.04216333478689194 in:0.03245219215750694 with:0.030342867597937584 the:0.027031153440475464 for:0.026427827775478363\n",
"['some']\n",
":0.17425395548343658 of:0.17399758100509644 time:0.033102847635746 one:0.019578879699110985 other:0.01773841679096222 years:0.014443687163293362 ofnthe:0.009550128132104874 way:0.007940506562590599\n",
"['facts']\n",
":0.12162864208221436 and:0.08035537600517273 in:0.06168408319354057 of:0.05010320618748665 that:0.046635232865810394 are:0.03997327387332916 as:0.03181654214859009 to:0.023187898099422455\n",
"['mi']\n",
":0.28136664628982544 the:0.02248641476035118 a:0.016490785405039787 i:0.01594664715230465 in:0.012440882623195648 and:0.01030310895293951 :0.00990522000938654 of:0.008500398136675358\n",
"['elevator']\n",
":0.13514694571495056 and:0.09703655540943146 companies:0.041655730456113815 company:0.030175816267728806 in:0.024098578840494156 or:0.014940369874238968 for:0.01303930301219225 by:0.011949007399380207\n",
"['wise']\n",
":0.15170572698116302 and:0.1150946393609047 to:0.06811027228832245 appertaining:0.035481829196214676 in:0.024294614791870117 men:0.024229921400547028 as:0.02325117588043213 man:0.014908669516444206\n",
"['then']\n",
":0.17314893007278442 the:0.054305776953697205 th:0.027903184294700623 he:0.025194548070430756 a:0.01811600849032402 i:0.01776750013232231 to:0.017214776948094368 in:0.016991956159472466\n",
"['commend']\n",
"the:0.11945584416389465 itself:0.06042272597551346 to:0.05919293314218521 it:0.0570063479244709 :0.050666287541389465 them:0.05030451714992523 him:0.04998549818992615 and:0.03078053519129753\n",
"['they']\n",
":0.14672249555587769 are:0.10735142976045609 were:0.0713590756058693 have:0.06000291928648949 will:0.04248636215925217 had:0.03111598640680313 would:0.024974988773465157 can:0.017219092696905136\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['of']\n",
"the:0.24628451466560364 :0.16549083590507507 a:0.030842546373605728 this:0.018929913640022278 his:0.013870411552488804 tho:0.011849009431898594 said:0.010169874876737595 their:0.00833516288548708\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['ourn']\n",
":0.39382219314575195 and:0.011552629992365837 feet:0.00959747564047575 the:0.007690655067563057 to:0.007125630509108305 i:0.007030377630144358 a:0.005803974345326424 t:0.005078909918665886\n",
"['fact']\n",
"that:0.39098605513572693 :0.07769396156072617 is:0.038525763899087906 the:0.034249939024448395 of:0.03126796334981918 it:0.018812306225299835 and:0.016333023086190224 thatnthe:0.01586158759891987\n",
"['hn']\n",
":0.40869736671447754 w:0.01600746624171734 chatham:0.01587497442960739 a:0.012906793504953384 the:0.012000254355370998 of:0.01196422427892685 and:0.01193371880799532 e:0.008795693516731262\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['followsn']\n",
":0.12230867147445679 the:0.09292732924222946 a:0.03175881505012512 of:0.024685362353920937 this:0.02118527702987194 he:0.018826929852366447 no:0.018826616927981377 i:0.0188081506639719\n",
"['it']\n",
"is:0.1975691020488739 :0.11427108943462372 was:0.07985429465770721 has:0.02643054537475109 would:0.025057239457964897 will:0.0246218703687191 to:0.017691470682621002 and:0.014439831487834454\n",
"['n']\n",
":0.22925962507724762 n:0.06464511156082153 y:0.03201986104249954 c:0.019956285133957863 the:0.018512077629566193 w:0.017649687826633453 and:0.017604317516088486 e:0.016488024964928627\n",
"['turnedn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['central']\n",
":0.28010228276252747 railroad:0.04624203220009804 and:0.04277598112821579 committee:0.017165429890155792 park:0.016560712829232216 part:0.01345917396247387 pacific:0.012607277370989323 america:0.012253711931407452\n",
"['good']\n",
":0.1800873577594757 and:0.034809429198503494 deal:0.0223788283765316 to:0.021396957337856293 for:0.015180929563939571 as:0.014388279989361763 roads:0.011519820429384708 many:0.011263051070272923\n",
"['apartment']\n",
":0.11303121596574783 in:0.05551525577902794 and:0.04577212035655975 houses:0.043449025601148605 house:0.035473305732011795 is:0.032584186643362045 the:0.03078450821340084 of:0.02266555465757847\n",
"['linen']\n",
":0.17741422355175018 and:0.08512919396162033 of:0.038042619824409485 handkerchiefs:0.01947547122836113 or:0.015615035779774189 in:0.014042098075151443 with:0.013879945501685143 coats:0.010239120572805405\n",
"['than']\n",
":0.1189335286617279 the:0.09955327957868576 a:0.0481162890791893 any:0.034569866955280304 in:0.02920665591955185 that:0.02609132044017315 one:0.02373056858778 to:0.022401196882128716\n",
"['ever']\n",
":0.2154327929019928 been:0.050246719270944595 since:0.04413799196481705 before:0.039964303374290466 had:0.02021610364317894 be:0.01951783336699009 in:0.015496287494897842 saw:0.012246056459844112\n",
"['penanthe']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['encouragement']\n",
"to:0.15264785289764404 of:0.1455760896205902 and:0.0860772654414177 :0.07461944222450256 in:0.03974514827132225 the:0.03513322398066521 for:0.02807413600385189 from:0.023861227557063103\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['it']\n",
"is:0.1975691020488739 :0.11427108943462372 was:0.07985429465770721 has:0.02643054537475109 would:0.025057239457964897 will:0.0246218703687191 to:0.017691470682621002 and:0.014439831487834454\n",
"['people']\n",
"of:0.12857156991958618 :0.10028903931379318 who:0.05000729486346245 and:0.047269824892282486 in:0.03648844361305237 are:0.032156217843294144 to:0.032018695026636124 have:0.020872600376605988\n",
"['or']\n",
":0.20414391160011292 the:0.04081256687641144 a:0.018578192219138145 in:0.017286691814661026 any:0.015642661601305008 two:0.014492310583591461 to:0.013875987380743027 other:0.01381285022944212\n",
"['dakota']\n",
":0.14652682840824127 on:0.11993848532438278 and:0.09048701077699661 in:0.045984070748090744 to:0.034056052565574646 at:0.033352822065353394 the:0.017756661400198936 county:0.013473534025251865\n",
"['orderedn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['ol']\n",
":0.23865024745464325 the:0.20971642434597015 a:0.025130726397037506 tho:0.011873121373355389 this:0.01160435564815998 said:0.009338573552668095 his:0.009322249330580235 tbe:0.00882399920374155\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['th']\n",
":0.20856480300426483 day:0.17502973973751068 of:0.04865522310137749 and:0.025335680693387985 street:0.013732117600739002 daynof:0.011129848659038544 the:0.009347060695290565 :0.007662114687263966\n",
"['f']\n",
":0.33081910014152527 the:0.06714984774589539 a:0.024773625656962395 r:0.022999776527285576 m:0.012727152556180954 h:0.010183022357523441 and:0.009944108314812183 w:0.009089533239603043\n",
"['whfch']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['lungsnchould']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['dayn']\n",
":0.12274598330259323 of:0.042603857815265656 the:0.029073845595121384 and:0.02548983320593834 a:0.022993680089712143 in:0.018368009477853775 be:0.01065831072628498 :0.009792870841920376\n",
"['pricesn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['he']\n",
":0.1504572629928589 was:0.09207449108362198 had:0.06375125050544739 is:0.04183579608798027 has:0.030918937176465988 would:0.024150297045707703 will:0.018284201622009277 could:0.01718415878713131\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['aln']\n",
":0.25407740473747253 the:0.02635866217315197 and:0.01712917722761631 in:0.008856719359755516 for:0.008694037795066833 no:0.008676018565893173 on:0.007427920121699572 is:0.0071052019484341145\n",
"['for']\n",
"the:0.2195897400379181 :0.1482355147600174 a:0.06196406111121178 this:0.0132448123767972 his:0.013087003491818905 tho:0.011123294942080975 their:0.010956835001707077 it:0.01092309970408678\n",
"['remarkablyn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['associations']\n",
"and:0.14909349381923676 :0.12642063200473785 of:0.10908132791519165 in:0.04022640362381935 are:0.03734118863940239 the:0.021830635145306587 or:0.017194261774420738 to:0.01604018546640873\n",
"['thatn']\n",
":0.18509425222873688 the:0.04111270606517792 is:0.02727336250245571 a:0.020692864432930946 i:0.016117816790938377 and:0.014278948307037354 in:0.01377539150416851 have:0.012131815776228905\n",
"['an']\n",
":0.2524346113204956 old:0.018409626558423042 hour:0.016093363985419273 act:0.015224146656692028 order:0.011131629347801208 average:0.009394328109920025 opportunity:0.00728580541908741 article:0.006832430604845285\n",
"['no']\n",
":0.21101464331150055 one:0.030883168801665306 doubt:0.021309345960617065 longer:0.015751631930470467 more:0.015039087273180485 other:0.012437735684216022 at:0.00836264993995428 of:0.007628906983882189\n",
"['nightn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['mountebank']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['broufht']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['p']\n",
"m:0.3617202639579773 :0.257373571395874 r:0.013251986354589462 a:0.008899691514670849 in:0.008549975231289864 b:0.007218791171908379 c:0.007097408641129732 j:0.007063710130751133\n",
"['anrocky']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['wasn']\n",
":0.17828217148780823 a:0.06104143708944321 to:0.04011765867471695 the:0.035765957087278366 and:0.0352165661752224 in:0.03417397290468216 years:0.025945717468857765 per:0.017281075939536095\n",
"['proved']\n",
"to:0.1792956441640854 :0.13156574964523315 that:0.07699204236268997 a:0.05222391337156296 the:0.046136513352394104 by:0.04469001293182373 in:0.03765649348497391 an:0.017632100731134415\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['iowan']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['onn']\n",
":0.2573271095752716 the:0.054543156176805496 a:0.028942041099071503 of:0.028793184086680412 th:0.025362007319927216 i:0.014007133431732655 bbl:0.012129217386245728 and:0.011664386838674545\n",
"['pood']\n",
":0.16353563964366913 and:0.021971208974719048 as:0.02047673426568508 in:0.014478336088359356 to:0.013555939309298992 time:0.011389103718101978 for:0.007571924943476915 of:0.005867846310138702\n",
"['was']\n",
":0.16223494708538055 a:0.06841647624969482 the:0.037927355617284775 not:0.029518524184823036 in:0.026468118652701378 to:0.01670970395207405 made:0.012177110649645329 no:0.010337039828300476\n",
"['couldn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['annn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['nof']\n",
"the:0.2805168628692627 :0.07790356129407883 section:0.028715379536151886 this:0.024134283885359764 said:0.023297453299164772 mortgages:0.02176334150135517 a:0.020796947181224823 course:0.011715427041053772\n",
"['iioiin']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['domain']\n",
"of:0.19037796556949615 :0.10752991586923599 and:0.0721801221370697 to:0.03194751590490341 is:0.031179208308458328 the:0.025113694369792938 as:0.018790939822793007 in:0.0171558428555727\n",
"['door']\n",
"of:0.2567424178123474 :0.1079985648393631 and:0.08049219101667404 in:0.02637580595910549 to:0.026177335530519485 was:0.01896766573190689 at:0.017988065257668495 ofnthe:0.01498465146869421\n",
"['safen']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['ordinary']\n",
":0.2629507780075073 and:0.01459957379847765 business:0.007737538777291775 circumstances:0.007248510140925646 way:0.0071929628029465675 man:0.006172054912894964 conditions:0.0053436183370649815 course:0.005015553440898657\n",
"['notified']\n",
"that:0.44700372219085693 to:0.07935777306556702 of:0.05688105896115303 the:0.05670984834432602 :0.05458160862326622 and:0.03963802382349968 by:0.038625650107860565 for:0.007223705295473337\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['about']\n",
":0.16251502931118011 the:0.1293736845254898 a:0.04998978599905968 to:0.03513412922620773 it:0.02537279948592186 two:0.01648150384426117 one:0.013926786370575428 this:0.011903013102710247\n",
"['him']\n",
":0.10401047766208649 to:0.10037162899971008 and:0.06415976583957672 in:0.04259955883026123 the:0.02874256856739521 a:0.02758781798183918 that:0.0228275079280138 as:0.02206616848707199\n",
"['bed']\n",
":0.1493653804063797 and:0.09828052669763565 of:0.07797157764434814 with:0.026033949106931686 the:0.026013463735580444 in:0.023607488721609116 at:0.022763598710298538 for:0.02129414677619934\n",
"['humanitynhe']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['s']\n",
":0.24743959307670593 and:0.022346865385770798 a:0.020876672118902206 c:0.02086692675948143 s:0.018633587285876274 w:0.015286792069673538 n:0.014412941411137581 to:0.014249380677938461\n",
"['is']\n",
":0.14304344356060028 a:0.07812074571847916 the:0.05577261000871658 not:0.04125296324491501 to:0.028534000739455223 in:0.02009972184896469 no:0.01618652231991291 that:0.013951911590993404\n",
"['just']\n",
"as:0.18960432708263397 :0.11830898374319077 the:0.02586853876709938 been:0.021793019026517868 a:0.021105173975229263 now:0.017591174691915512 what:0.017403941601514816 before:0.016763173043727875\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['plansn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['nft']\n",
"to:0.23248440027236938 thence:0.13160960376262665 :0.0638410896062851 of:0.03282667696475983 dist:0.011081154458224773 chs:0.009778987616300583 n:0.009767229668796062 d:0.00975499302148819\n",
"['blossom']\n",
"and:0.10731347650289536 :0.09192359447479248 as:0.047144047915935516 of:0.029263164848089218 the:0.02507861703634262 in:0.020893795415759087 a:0.017187226563692093 with:0.01324430014938116\n",
"['nation']\n",
":0.11996717751026154 and:0.0790521502494812 of:0.0399576835334301 is:0.035113535821437836 in:0.03467998653650284 to:0.031281791627407074 the:0.025214601308107376 has:0.021718328818678856\n",
"['to']\n",
":0.1641007363796234 the:0.12734068930149078 be:0.05047796294093132 a:0.02106613852083683 make:0.012407870031893253 do:0.012361159548163414 have:0.012224489822983742 his:0.008027305826544762\n",
"['war']\n",
":0.11769980192184448 and:0.056337323039770126 of:0.03036773018538952 with:0.030054394155740738 department:0.029660487547516823 in:0.028225170448422432 the:0.02553398162126541 is:0.022466186434030533\n",
"['hope']\n",
"that:0.15963827073574066 of:0.126298725605011 to:0.09961661696434021 :0.08115705847740173 for:0.04386403411626816 and:0.036458127200603485 the:0.028813958168029785 he:0.016749925911426544\n",
"['thensoutherly']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['etstn']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['or']\n",
":0.20414391160011292 the:0.04081256687641144 a:0.018578192219138145 in:0.017286691814661026 any:0.015642661601305008 two:0.014492310583591461 to:0.013875987380743027 other:0.01381285022944212\n",
"['through']\n",
"the:0.3038175404071808 :0.18414166569709778 a:0.05022845044732094 which:0.021105455234646797 his:0.019041012972593307 this:0.018920745700597763 tho:0.017120618373155594 and:0.012150435708463192\n",
"['tncl']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['cn']\n",
":0.20513185858726501 the:0.07234837114810944 c:0.037284284830093384 a:0.0368281826376915 and:0.018847975879907608 s:0.012302741408348083 n:0.011280616745352745 per:0.01116586197167635\n",
"['eve']\n",
"of:0.2181807905435562 nning:0.170467808842659 :0.09835557639598846 and:0.04165113344788551 nry:0.024329079315066338 in:0.015969153493642807 to:0.015710139647126198 was:0.013154251500964165\n",
"['then']\n",
":0.17314893007278442 the:0.054305776953697205 th:0.027903184294700623 he:0.025194548070430756 a:0.01811600849032402 i:0.01776750013232231 to:0.017214776948094368 in:0.016991956159472466\n",
"['the']\n",
":0.22546915709972382 same:0.00775930006057024 state:0.007242937106639147 first:0.006000572349876165 city:0.005373408552259207 most:0.005041860044002533 people:0.005007922183722258 united:0.0049338326789438725\n",
"['heads']\n",
"of:0.2664720118045807 :0.18057966232299805 and:0.08985890448093414 the:0.030270153656601906 in:0.028018178418278694 to:0.02347172051668167 are:0.019005421549081802 or:0.01736212708055973\n",
"['uniformnresult']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['quaking']\n",
":0.13795071840286255 the:0.050966303795576096 of:0.046845439821481705 and:0.042942408472299576 to:0.03077627159655094 in:0.023394089192152023 a:0.01842011697590351 that:0.01086820662021637\n",
"['and']\n",
":0.1791187971830368 the:0.06385066360235214 a:0.01666204072535038 in:0.01507352851331234 that:0.012835350818932056 to:0.011688937433063984 it:0.010210222564637661 i:0.007507757283747196\n",
"['say']\n",
"that:0.21495364606380463 :0.11226599663496017 the:0.04550514370203018 to:0.03919023275375366 it:0.028015941381454468 i:0.022692859172821045 a:0.01814952678978443 in:0.017609383910894394\n",
"['a']\n",
":0.24112556874752045 few:0.016269860789179802 large:0.011636430397629738 man:0.010118708945810795 good:0.010030915029346943 great:0.009634408168494701 very:0.008648835122585297 little:0.008290580473840237\n",
"['wasn']\n",
":0.17828217148780823 a:0.06104143708944321 to:0.04011765867471695 the:0.035765957087278366 and:0.0352165661752224 in:0.03417397290468216 years:0.025945717468857765 per:0.017281075939536095\n",
"['an']\n",
":0.2524346113204956 old:0.018409626558423042 hour:0.016093363985419273 act:0.015224146656692028 order:0.011131629347801208 average:0.009394328109920025 opportunity:0.00728580541908741 article:0.006832430604845285\n",
"['regulation']\n",
"of:0.2519991993904114 :0.16746503114700317 and:0.07242421060800552 or:0.030094271525740623 in:0.019687307998538017 as:0.015688350424170494 the:0.013164816424250603 ofnthe:0.01093196403235197\n",
"['will']\n",
"be:0.2197660654783249 :0.12876218557357788 not:0.054395660758018494 have:0.023040270432829857 bo:0.012768621556460857 make:0.012733332812786102 do:0.010979394428431988 give:0.009366563521325588\n",
"['ofn']\n",
":0.11161091923713684 the:0.05351819470524788 and:0.045352645218372345 to:0.029555894434452057 in:0.02119685336947441 per:0.02113071084022522 a:0.02079508826136589 s:0.018661532551050186\n",
"['m']\n",
":0.2835228145122528 the:0.027956103906035423 and:0.02779671549797058 a:0.02013435587286949 in:0.015267008915543556 of:0.01501475740224123 e:0.013003258034586906 m:0.01203886978328228\n",
"['after']\n",
"the:0.20441681146621704