{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# xzcat -f1 train/in.tsv.xz | cut -f7,8 | sed 's/-\\\\n/ /g' | sed 's/\\\\n//g' | sed 's/\\\\//g' | ../kenlm/build/bin/lmplz -o 5 > kenlm_model.arpa\n", "# ../kenlm/build/bin/build_binary kenlm_model.arpa kenlm_model.binary " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading train data...\n" ] } ], "source": [ "import regex as re\n", "\n", "# save train text to file\n", "\n", "def clean_string(text):\n", " text = text.lower()\n", " text = re.sub(r\" -\\\\*\\\\n\", \"\", text)\n", " text = re.sub(r\"\\\\n\", \" \", text)\n", " text = text.strip()\n", " return text\n", "\n", "train_text = \"\"\n", "print(\"Reading train data...\")\n", "with open(\"train/in.tsv\", encoding=\"utf8\", mode=\"rt\") as file, open(\"train/expected.tsv\", encoding=\"utf8\", mode=\"rt\") as expected:\n", " for t_line, e_line in zip(file, expected):\n", " t_line = t_line.split(\"\\t\")\n", " train_text += clean_string(t_line[-2]) + f\" {clean_string(e_line)} \" + clean_string(t_line[-1])\n", "\n", "# save train_text to file\n", "print(\"saving to file...\")\n", "with open(\"train_text.txt\", encoding=\"utf8\", mode=\"w\") as file:\n", " file.write(train_text)\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-7.822547912597656\n" ] } ], "source": [ "import kenlm\n", "\n", "path = 'test_model.binary'\n", "model = kenlm.Model(path)\n", "\n", "sentence = \"of the way\"\n", "print(model.score(sentence))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Run predictions on dev-0 data...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 8/10519 [08:16<40:44:33, 13.95s/it] " ] } ], "source": [ "from tqdm import tqdm\n", "import regex as re\n", "from nltk.tokenize import word_tokenize\n", "from english_words import get_english_words_set\n", "\n", "\n", "\n", "def clean_string(text):\n", " text = text.lower()\n", " text = re.sub(r\" -\\\\*\\\\n\", \"\", text)\n", " text = re.sub(r\"\\\\n\", \" \", text)\n", " text = text.strip()\n", " return text\n", "\n", "\n", "def get_word_predictions(w1, w2,):\n", " for word in get_english_words_set(['web2'], lower=True):\n", " sentence = w1 + ' ' + word + ' ' + w2\n", " text_score = model.score(sentence, bos=False, eos=False)\n", " yield((word, text_score))\n", "\n", "def argmax(w1,w2):\n", " # get top 10 predictions from predict_line\n", " top_10 = sorted(list(get_word_predictions(w1,w2)), key=lambda x: -x[1])[:10]\n", " output_line = \" \".join([\"{}:{:.8f}\".format(w, p) for w, p in top_10])\n", " return output_line\n", "\n", " # print(f\"{sentence}: {text_score}\")\n", "\n", " # probs = list(argmax(w1, w2, w4, w5, v, v2, v3))\n", " # sum_prob = sum(p for (w, p) in probs)\n", "\n", " # try:\n", " # probs = [(w, p / sum_prob) for w, p in probs]\n", " # except ZeroDivisionError:\n", " # return \"the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1\"\n", "\n", " # top_probs = sorted(probs, key=lambda x: -x[1])[:4]\n", " # top_probs = [(w,p) for (w,p) in top_probs if p > 0]\n", " \n", " # del probs\n", " # del sum_prob\n", "\n", " # if len(top_probs) == 0:\n", " # return \"the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1\"\n", " \n", " # left_prob = 1 - sum(p for (w, p) in top_probs)\n", " # if left_prob < 0.1:\n", " # left_prob = 0.1\n", "\n", " # output_line = \" \".join([\"{}:{:.8f}\".format(w, p) for w, p in top_probs])\n", " # output_line += \" :{:.8f}\".format(left_prob)\n", "\n", " # # print(f\"{w1} {w2} {w}\" for w in out_line.split(\" \"))\n", "\n", " # return output_line\n", "\n", "\n", "def run_predictions(source_folder):\n", " print(f\"Run predictions on {source_folder} data...\")\n", " \n", " with open(f\"{source_folder}/in.tsv\", encoding=\"utf8\", mode=\"rt\") as file:\n", " train_data = file.readlines()\n", "\n", " with open(f\"{source_folder}/out_kenlm.tsv\", \"w\", encoding=\"utf-8\") as output_file:\n", " for line in tqdm(train_data):\n", " line = line.split(\"\\t\")\n", " \n", " l1 = clean_string(line[-2])\n", " l2 = clean_string(line[-1])\n", "\n", " if not l1 or not l2:\n", " out_line = \"the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1\"\n", " else:\n", " w1 = word_tokenize(l1)[-1:][0]\n", " w2 = word_tokenize(l2)[0][0] \n", " out_line = argmax(w1, w2)\n", " \n", " output_file.write(out_line + \"\\n\")\n", " \n", "\n", "run_predictions(\"dev-0\")\n", "# run_predictions(\"test-A\", V_counter, V2, V3, V4)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "432022\n" ] } ], "source": [ "# with open(\"train/in.tsv\", encoding=\"utf8\", mode=\"rt\") as file:\n", "# train_data = file.readlines()\n", "# print(len(train_data))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['rin',\n", " '11K',\n", " 'ui',\n", " 'i',\n", " 'rsognfd',\n", " 'inlriliinnts',\n", " 'i',\n", " '>',\n", " 'r',\n", " 'the',\n", " 'town',\n", " 'ofy',\n", " '.-Jinn',\n", " ',',\n", " 'in',\n", " 'the',\n", " 'county',\n", " 'of',\n", " 'Lincoln',\n", " 'Rrspcrtfully',\n", " 'rop',\n", " 'HHont',\n", " ',',\n", " 'that',\n", " 'the',\n", " 'part',\n", " 'ol',\n", " 'said',\n", " 'town',\n", " 'whi',\n", " '<',\n", " 'h',\n", " 'they',\n", " 'inhabits',\n", " 'remote',\n", " 'from',\n", " 'tiie',\n", " 'viII',\n", " 'no',\n", " ',',\n", " 'and',\n", " 'tliat',\n", " 'they',\n", " 'are',\n", " 'so',\n", " 'sit',\n", " 'jutfd',\n", " '(',\n", " 'h',\n", " 'it',\n", " 'they',\n", " 'would',\n", " 'he',\n", " 'much',\n", " 'hotter',\n", " 'accomodated',\n", " ',',\n", " 'f',\n", " 'their',\n", " 'lands',\n", " 'were',\n", " 'to',\n", " '1',\n", " 'c',\n", " 'm',\n", " 'oil',\n", " '*',\n", " 'from',\n", " 'raid',\n", " 'town',\n", " 'ofMna',\n", " 'and',\n", " 'allix',\n", " '*',\n", " 'd',\n", " 'and',\n", " 'attached',\n", " 'to',\n", " 'flic',\n", " 'town',\n", " 'of',\n", " 'Wis',\n", " 'tassel',\n", " 'the',\n", " 'si',\n", " 'ire',\n", " 'town',\n", " 'of',\n", " 'tlio',\n", " 'County',\n", " ',',\n", " 'and',\n", " 'wherenost',\n", " 'of',\n", " 'their',\n", " 'hmdmss',\n", " 'is',\n", " 'transacted',\n", " '.',\n", " 'They',\n", " 'wouldIn',\n", " 'r',\n", " 'lore',\n", " 'petition',\n", " 'y',\n", " '<',\n", " 'tir',\n", " 'Hole',\n", " 'r.ible',\n", " 'body',\n", " ',',\n", " 'that',\n", " 'thelividing',\n", " 'line',\n", " 'of',\n", " 's.i',\n", " '.J',\n", " 'towns',\n", " '*',\n", " '»',\n", " 'f',\n", " 'Wiscns^ct',\n", " '«',\n", " 'mf',\n", " \"-'Jim\",\n", " '*',\n", " ',',\n", " 'nav',\n", " 'his',\n", " 'so',\n", " 'far',\n", " 'alt',\n", " 'rod',\n", " 'ns',\n", " 'to',\n", " 'include',\n", " 'their',\n", " 'farms',\n", " 'inmid',\n", " 'town',\n", " 'of',\n", " 'VViscasset',\n", " ',',\n", " 'and',\n", " 'the',\n", " '!',\n", " 'the',\n", " 'now',\n", " 'line',\n", " 'ofLi',\n", " 'vision',\n", " 'between',\n", " 'acid',\n", " 'towns',\n", " 'ninv',\n", " 'ho',\n", " 'as',\n", " 'fdlows',\n", " '*',\n", " 'vizlh',\n", " 'ginning',\n", " 'on',\n", " 'the',\n", " 'pi',\n", " 'scut',\n", " 'line',\n", " 'dividing',\n", " 'the',\n", " 'towns',\n", " 'oliVi.a',\n", " 'assct',\n", " 'and',\n", " \"A'in\",\n", " ',',\n", " 'at',\n", " 't',\n", " \"'\",\n", " '»',\n", " '«',\n", " 'southeast',\n", " 'corner',\n", " 'idSeorgc',\n", " 'Acorns',\n", " 'laud',\n", " 'in',\n", " 'said',\n", " 'Aina',\n", " 'and',\n", " 'riinninu',\n", " 'from',\n", " 'Northeasterly',\n", " 'hv',\n", " 'the',\n", " 'head',\n", " 'of',\n", " 'said',\n", " '.^corn',\n", " '’',\n", " 'sand',\n", " 'and',\n", " 'the',\n", " 'bonds',\n", " 'of',\n", " 'all',\n", " \"the'loisjadjoiiiiiig\",\n", " 'to',\n", " 'theVort',\n", " 'beast',\n", " 'Corner',\n", " 'of',\n", " 'the',\n", " 'l',\n", " '«',\n", " '»',\n", " 't',\n", " 'now',\n", " 'owned',\n", " 'by',\n", " 'Ja',\n", " 'nes',\n", " '*',\n", " '*',\n", " 'oyc',\n", " 'and',\n", " 'formerly',\n", " 'o',\n", " 'm',\n", " 'd',\n", " 'hv',\n", " 'tin',\n", " '*',\n", " 'late',\n", " 'Hon',\n", " '.',\n", " 'Abie',\n", " ')',\n", " 'Wood',\n", " ',',\n", " 'andbeingp-rt',\n", " 'oflotNo.12M',\n", " 'M.',\n", " 'on',\n", " 'Me',\n", " 'vccnics',\n", " 'piling',\n", " 'and',\n", " 'theme',\n", " '/list',\n", " 'Northwesterly',\n", " 'hvlie',\n", " 'North',\n", " 'line',\n", " 'id',\n", " 'said',\n", " 'lot',\n", " 'No',\n", " '.',\n", " '12',\n", " 'to',\n", " 'the',\n", " 'southeaster',\n", " 'y',\n", " 'he',\n", " 'id',\n", " 'of',\n", " 'land',\n", " 'owned',\n", " 'by',\n", " 'Whitcomb',\n", " '&',\n", " 'Groves',\n", " ',',\n", " 'hence',\n", " 'northeasterly',\n", " 'by',\n", " 'tiie',\n", " 'Inal',\n", " 'of',\n", " 'said',\n", " 'lot',\n", " 'to',\n", " 'tliolorlhonst',\n", " 'corner',\n", " 'thereof',\n", " ',',\n", " 'thence',\n", " 'northwesterly',\n", " 'to',\n", " 'Ihe',\n", " 'line',\n", " 'of',\n", " 'the',\n", " 'town',\n", " 'of',\n", " 'Dresden',\n", " ',',\n", " 'thence',\n", " '8',\n", " '<',\n", " '>',\n", " 'uthwrst',\n", " 'rly',\n", " 'by',\n", " 'said',\n", " 'Dresden',\n", " 'Inn',\n", " '*',\n", " ',',\n", " 'to',\n", " 'tbu',\n", " 'Sunth',\n", " 'westerlyorner',\n", " ',',\n", " 'of',\n", " 'the',\n", " 'present',\n", " 'dividing',\n", " 'line',\n", " ',',\n", " 'I',\n", " 'etwee',\n", " 'n',\n", " 'theown',\n", " '>',\n", " '‘',\n", " 'of',\n", " '’',\n", " '.J',\n", " 'Im',\n", " 'and',\n", " \"Wiscii'^et\",\n", " ',',\n", " 'and',\n", " 'thence',\n", " 'East-',\n", " 'joutb',\n", " 'easterly',\n", " ',',\n", " 'ly',\n", " 'said',\n", " 'town',\n", " 'lino',\n", " 'to',\n", " 'tiie',\n", " 'bounds',\n", " 'first',\n", " 'jMentioned',\n", " ',',\n", " 'v',\n", " 'jili',\n", " 'all',\n", " 'the',\n", " 'lands',\n", " 'lying',\n", " 'vvitbiu',\n", " 'tin',\n", " '*',\n", " 'loresaid',\n", " 'limits',\n", " 'and',\n", " 'that',\n", " 'ib',\n", " 'inhabitants',\n", " 'thereonvilli',\n", " 'their',\n", " 'goods',\n", " 'and',\n", " 'Estate',\n", " ',',\n", " 'may',\n", " 'be',\n", " 'set',\n", " 'oil',\n", " \"'\",\n", " 'fromaid',\n", " 'town',\n", " 'of',\n", " 'Aina',\n", " 'to',\n", " '»',\n", " '»',\n", " '»',\n", " 'id',\n", " 'town',\n", " 'of',\n", " 'Wiscassot.ton',\n", " 'County',\n", " 'feel',\n", " 'an',\n", " 'interest',\n", " 'in',\n", " '.',\n", " 'tn',\n", " 'great',\n", " 'is',\n", " 'sues',\n", " 'that',\n", " 'are',\n", " 'now',\n", " 'before',\n", " 'them',\n", " ',',\n", " 'and',\n", " 'whichare',\n", " 'the',\n", " 'bonds',\n", " 'of',\n", " 'cohesion',\n", " 'by',\n", " 'which',\n", " 'thegreat',\n", " 'Republican',\n", " 'parly',\n", " 'is',\n", " 'united',\n", " '.',\n", " 'I',\n", " 'per',\n", " '--',\n", " ':',\n", " 'ceive',\n", " 'that',\n", " 'the',\n", " 'principles',\n", " 'of',\n", " 'liberty',\n", " 'stillanimates',\n", " 'you',\n", " 'as',\n", " 'when',\n", " 'I',\n", " 'last',\n", " 'addressedyou',\n", " ',',\n", " 'and',\n", " 'I',\n", " 'rejoice',\n", " '.',\n", " 'It',\n", " 'is',\n", " 'not',\n", " 'in',\n", " 'the',\n", " 'na',\n", " 'ture',\n", " 'of',\n", " 'the',\n", " 'cause',\n", " 'of',\n", " 'human',\n", " 'freedom',\n", " 'to',\n", " 'diedie',\n", " 'out',\n", " 'of',\n", " 'the',\n", " 'human',\n", " 'heart',\n", " '.',\n", " 'We',\n", " 'repre',\n", " 'sent',\n", " 'the',\n", " 'righis',\n", " 'of',\n", " 'human',\n", " 'liberty',\n", " ',',\n", " 'the',\n", " 'sameprinciples',\n", " 'that',\n", " 'inspired',\n", " 'Jefferson',\n", " 'andJackson',\n", " ',',\n", " 'and',\n", " 'we',\n", " 'now',\n", " 'stand',\n", " 'where',\n", " 'we',\n", " 'al',\n", " 'ways',\n", " 'have',\n", " 'stood',\n", " ',',\n", " 'and',\n", " 'always',\n", " 'will',\n", " 'stand',\n", " ',',\n", " 'until',\n", " 'we',\n", " 'have',\n", " 'attained',\n", " 'our',\n", " 'ends',\n", " '.',\n", " 'Theelation',\n", " 'before',\n", " 'us',\n", " ',',\n", " 'it',\n", " 'is',\n", " 'true',\n", " \"'\",\n", " ',',\n", " 'is',\n", " 'not',\n", " 'a',\n", " ',',\n", " \"'\",\n", " 'na',\n", " 'tional',\n", " 'election',\n", " ',',\n", " 'and',\n", " 'it',\n", " 'is',\n", " 'true',\n", " 'that',\n", " 'we',\n", " 'neednot',\n", " 'necessarily',\n", " 'discuss',\n", " 'National',\n", " 'issues',\n", " ',',\n", " 'but',\n", " 'it',\n", " 'is',\n", " 'also',\n", " 'true',\n", " 'that',\n", " 'the',\n", " 'Republican',\n", " 'par',\n", " 'ty',\n", " 'is',\n", " 'National',\n", " 'in',\n", " 'its',\n", " 'and',\n", " 'design',\n", " ',',\n", " 'and',\n", " 'hence',\n", " ',',\n", " 'every',\n", " 'election',\n", " ',',\n", " 'be',\n", " 'it',\n", " 'of',\n", " 'State.or',\n", " ';',\n", " 'County',\n", " ',',\n", " 'or',\n", " 'of',\n", " 'town',\n", " ',',\n", " 'or',\n", " 'of',\n", " 'city',\n", " ',',\n", " 'partakesalike',\n", " 'of',\n", " 'a',\n", " 'National',\n", " 'nature',\n", " ',',\n", " 'and',\n", " 'their',\n", " 're',\n", " 'sults',\n", " 'enter',\n", " 'into',\n", " 'all',\n", " 'our',\n", " 'general',\n", " 'concerns.But',\n", " 'I',\n", " 'now',\n", " 'propose',\n", " 'to',\n", " 'speak',\n", " 'to',\n", " 'you',\n", " 'offacts',\n", " 'which',\n", " 'more',\n", " 'immediately',\n", " 'interestyou',\n", " '.',\n", " 'I',\n", " 'am',\n", " 'before',\n", " 'you',\n", " 'as',\n", " 'your',\n", " 'candidatefor',\n", " 'Governor',\n", " 'not',\n", " 'of',\n", " 'my',\n", " 'own',\n", " 'choice',\n", " ',',\n", " \"'\",\n", " 'Imay',\n", " 'justly',\n", " 'say',\n", " '.',\n", " 'Ody',\n", " 'ambition',\n", " 'was',\n", " 'satis',\n", " 'fied',\n", " 'with',\n", " 'one',\n", " 'term',\n", " ',',\n", " 'and',\n", " 'I',\n", " 'had',\n", " 'hoped',\n", " 'to',\n", " 're',\n", " 'tire',\n", " 'from',\n", " 'the',\n", " 'cares',\n", " 'of',\n", " 'office',\n", " 'to',\n", " 'devote',\n", " 'mytime',\n", " 'to',\n", " 'interests',\n", " 'of',\n", " 'a',\n", " 'private',\n", " 'nature',\n", " '.',\n", " 'Yetsummoned',\n", " 'as',\n", " 'I',\n", " 'was',\n", " ',',\n", " 'by',\n", " 'the',\n", " 'unanimouschoice',\n", " 'of',\n", " 'your',\n", " 'representatives',\n", " 'in',\n", " 'Conven',\n", " 'tion',\n", " ',',\n", " 'I',\n", " 'felt',\n", " 'constrained',\n", " 'to',\n", " 'accept',\n", " 'the',\n", " 'callof',\n", " '.',\n", " 'the',\n", " 'Republican',\n", " 'party',\n", " ',',\n", " 'and',\n", " 'I',\n", " 'am',\n", " 'hereto',\n", " 'open',\n", " 'to',\n", " 'you',\n", " 'my',\n", " 'heart',\n", " 'and',\n", " 'my',\n", " 'mind',\n", " 'up',\n", " 'on',\n", " 'public',\n", " 'questions',\n", " 'in',\n", " 'which',\n", " 'you',\n", " 'justlymanifest',\n", " 'a',\n", " 'deep',\n", " 'interest',\n", " '.']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from nltk.tokenize import word_tokenize\n", "word_tokenize(text)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'the': 9065021,\n", " 'of': 5472207,\n", " 'and': 4299259,\n", " 'to': 3575612,\n", " 'a': 2710622,\n", " 'in': 2686894,\n", " 'that': 1467928,\n", " 'is': 1279167,\n", " 'it': 1167772,\n", " 'for': 1144284,\n", " 'be': 992701,\n", " 'was': 986130,\n", " 'as': 879790,\n", " 'at': 863453,\n", " 'by': 858066,\n", " 'on': 819505,\n", " 'i': 816076,\n", " 'with': 794078,\n", " 'he': 776888,\n", " 'or': 674438,\n", " 'this': 627203,\n", " 'his': 618101,\n", " 'not': 604947,\n", " 'from': 576711,\n", " 'which': 572596,\n", " 'are': 528619,\n", " 'will': 519112,\n", " 'have': 513257,\n", " 's': 489456,\n", " 'tho': 465585,\n", " 'all': 463084,\n", " 'but': 460675,\n", " 'they': 450993,\n", " 'an': 420170,\n", " 'one': 413809,\n", " 'had': 396904,\n", " 'has': 386379,\n", " 'their': 377294,\n", " 'been': 374978,\n", " 'no': 366339,\n", " 'said': 353115,\n", " 'were': 348313,\n", " 'who': 342015,\n", " 'we': 319853,\n", " 'there': 311264,\n", " 'would': 290263,\n", " '1': 286386,\n", " 't': 275743,\n", " 'so': 272336,\n", " 'if': 271926,\n", " 'any': 269024,\n", " 'when': 268129,\n", " 'her': 258976,\n", " 'them': 240990,\n", " 'him': 237535,\n", " 'mr': 229137,\n", " 'its': 224384,\n", " 'you': 223369,\n", " 'out': 222458,\n", " 'our': 213779,\n", " 'other': 213610,\n", " 'time': 211490,\n", " 'more': 207219,\n", " 'upon': 200290,\n", " 'than': 199152,\n", " 'made': 198649,\n", " 'up': 197991,\n", " 'day': 194396,\n", " 'such': 193026,\n", " 'two': 192820,\n", " 'may': 192332,\n", " 'tbe': 190738,\n", " 'some': 183696,\n", " 'state': 179728,\n", " 'j': 178635,\n", " 'do': 176230,\n", " 'man': 175854,\n", " 'now': 174816,\n", " 'can': 174633,\n", " 'she': 172474,\n", " 'm': 166226,\n", " 'into': 166143,\n", " 'e': 166003,\n", " 'w': 164759,\n", " 'about': 164037,\n", " 'n': 163632,\n", " 'new': 162739,\n", " 'l': 158739,\n", " 'my': 158632,\n", " 'only': 155874,\n", " 'men': 155281,\n", " 'city': 149928,\n", " 'ing': 149573,\n", " 'then': 149545,\n", " 'shall': 148173,\n", " 'these': 145383,\n", " 'after': 144729,\n", " 'should': 142414,\n", " 'o': 140683,\n", " 'over': 140671,\n", " 'great': 139053,\n", " 'county': 135720,\n", " 'good': 135681,\n", " 'very': 135509,\n", " 'what': 135139,\n", " 'every': 134754,\n", " 'r': 134054,\n", " 'years': 133524,\n", " 'd': 133321,\n", " 'c': 132482,\n", " 'being': 130985,\n", " 'people': 130583,\n", " 'first': 127281,\n", " '000': 127084,\n", " 're': 125442,\n", " 'many': 124439,\n", " 'most': 123285,\n", " 'could': 123230,\n", " 'under': 122289,\n", " 'h': 121514,\n", " 'before': 118539,\n", " 'well': 118108,\n", " 'per': 114940,\n", " 'last': 114552,\n", " 'work': 113010,\n", " 'same': 112079,\n", " 'where': 111579,\n", " 'me': 111346,\n", " 'f': 110556,\n", " 'mrs': 108039,\n", " 'those': 107671,\n", " 'ot': 107631,\n", " 'feet': 106860,\n", " 'much': 106570,\n", " 'year': 104062,\n", " 'make': 103103,\n", " 'states': 101683,\n", " 'three': 99943,\n", " 'while': 97401,\n", " 'house': 97187,\n", " 'also': 95849,\n", " 'old': 95558,\n", " 'through': 94245,\n", " 'each': 93521,\n", " 'way': 93193,\n", " 'country': 92494,\n", " 'tion': 92215,\n", " 'us': 92158,\n", " 'little': 92011,\n", " 'court': 90894,\n", " 'place': 90642,\n", " 'down': 90465,\n", " '2': 90005,\n", " 'b': 89797,\n", " 'must': 89316,\n", " 'did': 88750,\n", " 'land': 88682,\n", " 'north': 87040,\n", " 'con': 85792,\n", " 'part': 85665,\n", " 'south': 85226,\n", " 'your': 85192,\n", " 'street': 84360,\n", " 'aud': 83993,\n", " 'public': 81839,\n", " 'law': 81740,\n", " 'long': 81409,\n", " 'without': 81332,\n", " 'here': 80105,\n", " 'against': 79394,\n", " 'de': 78915,\n", " 'th': 77471,\n", " 'u': 76398,\n", " 'ed': 76228,\n", " 'until': 75857,\n", " 'p': 75604,\n", " 'take': 75389,\n", " 'large': 75219,\n", " 'united': 75181,\n", " 'line': 74996,\n", " 'right': 74664,\n", " 'few': 74474,\n", " 'general': 74442,\n", " 'ol': 74202,\n", " 'life': 73885,\n", " 'west': 73557,\n", " 'like': 73209,\n", " 'own': 72963,\n", " 'bo': 72946,\n", " 'found': 72887,\n", " 'never': 72376,\n", " '4': 72237,\n", " 'company': 71150,\n", " 'present': 70655,\n", " '3': 70322,\n", " 'go': 70233,\n", " 'water': 70171,\n", " 'money': 69656,\n", " 'just': 69335,\n", " 'party': 68859,\n", " 'government': 68460,\n", " 'home': 68371,\n", " 'ho': 67622,\n", " 'even': 66865,\n", " 'days': 66663,\n", " 'lie': 65871,\n", " 'business': 64810,\n", " 'ever': 64807,\n", " 'get': 64435,\n", " 'interest': 64157,\n", " '10': 63963,\n", " 'how': 63854,\n", " 'war': 63838,\n", " 'taken': 63488,\n", " 'during': 62969,\n", " 'given': 62934,\n", " 'see': 62869,\n", " 'four': 62746,\n", " 'come': 62435,\n", " 'case': 61818,\n", " 'having': 61386,\n", " 'came': 60657,\n", " 'know': 60620,\n", " 'side': 60173,\n", " 'com': 60088,\n", " 'between': 60033,\n", " 'order': 60029,\n", " 'back': 59161,\n", " 'give': 58993,\n", " 'st': 58879,\n", " 'iu': 58846,\n", " 'john': 58509,\n", " 'say': 58438,\n", " 'best': 58191,\n", " 'put': 58187,\n", " 'too': 58037,\n", " 'half': 57773,\n", " 'office': 57699,\n", " 'thence': 57646,\n", " 'lot': 57528,\n", " 'fact': 57223,\n", " 'known': 57118,\n", " 'both': 56984,\n", " 'power': 56978,\n", " 'number': 56772,\n", " 'night': 56261,\n", " 'la': 56044,\n", " 'world': 55992,\n", " 'president': 55991,\n", " 'another': 55779,\n", " 'district': 55515,\n", " 'v': 55512,\n", " 'next': 55126,\n", " 'less': 55053,\n", " 'ii': 54831,\n", " 'went': 54645,\n", " 'york': 54529,\n", " 'far': 54511,\n", " 'within': 53995,\n", " 'ex': 53978,\n", " 'left': 53894,\n", " 'young': 53382,\n", " 'town': 53122,\n", " 'off': 53096,\n", " '5': 52989,\n", " 'hundred': 52853,\n", " '8': 52792,\n", " 'east': 52776,\n", " 'five': 52647,\n", " 'point': 52614,\n", " 'use': 52450,\n", " '*': 51877,\n", " 'pay': 51822,\n", " 'among': 51741,\n", " 'yet': 51263,\n", " 'several': 51056,\n", " 'done': 50859,\n", " 'bill': 50841,\n", " 'white': 50826,\n", " 'nnd': 50740,\n", " 'held': 50550,\n", " 'property': 50547,\n", " 'road': 50330,\n", " 'might': 50244,\n", " 'board': 49911,\n", " 'again': 49873,\n", " 'high': 49557,\n", " 'whole': 49391,\n", " 'miss': 48883,\n", " 'g': 48808,\n", " 'act': 48591,\n", " 'still': 48504,\n", " 'hand': 48430,\n", " 'end': 48330,\n", " 'matter': 48328,\n", " 'away': 48199,\n", " 'sale': 48080,\n", " 'ment': 47671,\n", " 'ten': 47613,\n", " 'because': 47468,\n", " 'school': 47413,\n", " 'twenty': 47404,\n", " 'above': 47384,\n", " 'called': 46828,\n", " 'american': 46822,\n", " 'y': 46356,\n", " 'cent': 46222,\n", " 'amount': 46115,\n", " 'course': 45302,\n", " 'ago': 45238,\n", " 'small': 45187,\n", " 'week': 45112,\n", " 'six': 45092,\n", " 'used': 44799,\n", " 'section': 44395,\n", " 'since': 44346,\n", " 'dr': 44303,\n", " 'once': 44211,\n", " 'took': 44000,\n", " '11': 43914,\n", " 'ami': 43913,\n", " '7': 43733,\n", " 'himself': 43626,\n", " 'nothing': 43490,\n", " 'paid': 43343,\n", " 'better': 43336,\n", " 'am': 43321,\n", " 'let': 43230,\n", " 'bad': 43152,\n", " 'soon': 43000,\n", " 'clock': 42944,\n", " 'however': 42464,\n", " 'head': 42236,\n", " 'k': 42178,\n", " 'en': 42174,\n", " 'does': 42024,\n", " 'certain': 41908,\n", " 'along': 41676,\n", " 'pro': 41173,\n", " 'body': 40913,\n", " 'near': 40745,\n", " 'committee': 40642,\n", " 'thing': 40575,\n", " 'question': 40132,\n", " 'cause': 40071,\n", " 'full': 40009,\n", " 'others': 39921,\n", " 'set': 39912,\n", " 'brought': 39789,\n", " 'al': 39459,\n", " 'think': 39390,\n", " 'making': 39357,\n", " 'miles': 39337,\n", " 'thought': 39327,\n", " 'second': 39271,\n", " 'morning': 39184,\n", " 'though': 39178,\n", " 'times': 39105,\n", " 'girl': 38804,\n", " 'boy': 38784,\n", " '6': 38763,\n", " 'co': 38623,\n", " 'room': 38449,\n", " 'following': 38325,\n", " 'name': 38301,\n", " 'wife': 38295,\n", " 'church': 38274,\n", " 'dollars': 38002,\n", " 'always': 37648,\n", " 'enough': 37486,\n", " 'thus': 37477,\n", " 'un': 37410,\n", " 'almost': 37402,\n", " 'cannot': 37223,\n", " 'able': 37192,\n", " 'river': 36841,\n", " 'find': 36795,\n", " '00': 36793,\n", " 'ground': 36537,\n", " 'due': 36444,\n", " 'children': 36286,\n", " 'got': 36227,\n", " 'free': 36206,\n", " 'light': 36137,\n", " 'action': 36062,\n", " 'ia': 36049,\n", " 'washington': 35891,\n", " 'friends': 35600,\n", " 'says': 35599,\n", " 'stock': 35587,\n", " 'lo': 35573,\n", " 'whom': 35563,\n", " 'whose': 35346,\n", " 'service': 35273,\n", " 'received': 35272,\n", " 'means': 34777,\n", " 'person': 34759,\n", " 'necessary': 34700,\n", " 'nor': 34676,\n", " 'told': 34675,\n", " 'death': 34557,\n", " 'sent': 34369,\n", " 'further': 34226,\n", " 'purpose': 34128,\n", " 'er': 34115,\n", " 'things': 34079,\n", " 'tha': 33661,\n", " 'congress': 33650,\n", " 'bis': 33499,\n", " 'passed': 33493,\n", " 'seen': 33484,\n", " 'national': 33330,\n", " 'building': 33234,\n", " 'keep': 33214,\n", " 'front': 33196,\n", " 'block': 33088,\n", " 'real': 33028,\n", " 'aa': 32803,\n", " 'going': 32767,\n", " 'past': 32699,\n", " 'whether': 32622,\n", " 'months': 32443,\n", " 'dis': 32419,\n", " 'ly': 32398,\n", " 'true': 32297,\n", " 'sum': 32271,\n", " 'woman': 32180,\n", " 'subject': 32114,\n", " '50': 32109,\n", " 'either': 32013,\n", " 'railroad': 31994,\n", " 'son': 31985,\n", " 'members': 31976,\n", " 'union': 31922,\n", " 'system': 31839,\n", " '0': 31799,\n", " 'gold': 31698,\n", " 'around': 31668,\n", " 'persons': 31587,\n", " '20': 31585,\n", " 'sold': 31542,\n", " 'duty': 31529,\n", " 'market': 31376,\n", " 'least': 31270,\n", " 'show': 31147,\n", " 'form': 30989,\n", " 'hands': 30983,\n", " '12': 30964,\n", " 'saw': 30856,\n", " 'tlie': 30853,\n", " 'family': 30818,\n", " 'cost': 30746,\n", " 'report': 30665,\n", " 'why': 30549,\n", " 'nearly': 30520,\n", " 'election': 30453,\n", " 'short': 30337,\n", " 'price': 30306,\n", " 'become': 30266,\n", " 'notice': 30132,\n", " 'look': 30122,\n", " 'condition': 30013,\n", " '30': 29989,\n", " 'open': 29981,\n", " 'meeting': 29913,\n", " 'kind': 29855,\n", " 'lots': 29836,\n", " 'corner': 29771,\n", " 'women': 29510,\n", " 'together': 29506,\n", " 'possible': 29491,\n", " 'ihe': 29406,\n", " 'gave': 29384,\n", " '100': 29274,\n", " 'themselves': 29250,\n", " 'reason': 29105,\n", " 'labor': 29043,\n", " 'ter': 29006,\n", " 'judge': 28965,\n", " 'vote': 28927,\n", " 'result': 28914,\n", " 'third': 28722,\n", " 'run': 28717,\n", " 'fair': 28653,\n", " 'tin': 28635,\n", " 'value': 28498,\n", " 'mortgage': 28465,\n", " 'eight': 28464,\n", " 'ad': 28331,\n", " 'position': 28286,\n", " 'evening': 28178,\n", " 'wo': 28163,\n", " 'thereof': 28056,\n", " '9': 28023,\n", " 'tor': 27921,\n", " 'lor': 27839,\n", " 'provided': 27801,\n", " 'bank': 27781,\n", " 'cut': 27746,\n", " 'im': 27696,\n", " 'described': 27667,\n", " 'believe': 27648,\n", " 'hour': 27634,\n", " 'paper': 27584,\n", " 'hold': 27567,\n", " 'live': 27543,\n", " '15': 27446,\n", " 'acres': 27399,\n", " 'god': 27370,\n", " 'early': 27298,\n", " '25': 27277,\n", " 'quarter': 27190,\n", " 'thirty': 27132,\n", " 'want': 27115,\n", " 'therefore': 27093,\n", " 'late': 27091,\n", " 'call': 26989,\n", " 'charge': 26959,\n", " 'heard': 26900,\n", " 'army': 26885,\n", " 'effect': 26707,\n", " 'waa': 26689,\n", " 'laws': 26659,\n", " 'face': 26638,\n", " 'oi': 26628,\n", " 'cents': 26425,\n", " 'stand': 26384,\n", " 'age': 26226,\n", " 'kept': 26167,\n", " 'fire': 26109,\n", " 'tne': 26103,\n", " 'date': 25797,\n", " 'placed': 25621,\n", " 'common': 25606,\n", " 'mind': 25554,\n", " 'william': 25538,\n", " 'march': 25514,\n", " 'door': 25482,\n", " 'heart': 25459,\n", " 'republican': 25359,\n", " 'aid': 25218,\n", " 'special': 25161,\n", " 'force': 25130,\n", " 'ap': 25055,\n", " 'beginning': 25026,\n", " 'thousand': 25006,\n", " 'secretary': 25005,\n", " 'strong': 24999,\n", " 'ac': 24935,\n", " 'claim': 24931,\n", " 'farm': 24896,\n", " 'officers': 24822,\n", " 'father': 24813,\n", " 'estate': 24803,\n", " 'political': 24619,\n", " 'tax': 24583,\n", " 'except': 24565,\n", " 'manner': 24525,\n", " 'cases': 24524,\n", " 'lands': 24481,\n", " 'department': 24456,\n", " 'ar': 24372,\n", " 'hard': 24357,\n", " 'already': 24286,\n", " 'proper': 24281,\n", " 'hi': 24258,\n", " 'required': 24237,\n", " 'low': 24225,\n", " 'air': 24213,\n", " 'trust': 24206,\n", " 'asked': 24203,\n", " 'james': 24201,\n", " 'blood': 24189,\n", " 'book': 24186,\n", " 'meet': 24156,\n", " 'poor': 24116,\n", " 'fall': 24072,\n", " 'george': 24063,\n", " 'trade': 24019,\n", " 'big': 23973,\n", " 'quite': 23900,\n", " 'car': 23566,\n", " 'ready': 23514,\n", " 'often': 23510,\n", " 'close': 23374,\n", " 'field': 23359,\n", " 'bonds': 23337,\n", " 'read': 23320,\n", " 'attention': 23309,\n", " 'view': 23229,\n", " 'class': 23192,\n", " 'red': 23181,\n", " 'hut': 23150,\n", " 'care': 23107,\n", " 'mother': 23095,\n", " 'black': 23081,\n", " 'tell': 23073,\n", " 'deed': 23072,\n", " 'return': 23011,\n", " 'gen': 23006,\n", " 'tions': 22984,\n", " 'lost': 22978,\n", " 'something': 22884,\n", " 'favor': 22766,\n", " 'nt': 22733,\n", " 'rate': 22629,\n", " 'health': 22618,\n", " 'weeks': 22573,\n", " 'fine': 22567,\n", " 'oil': 22549,\n", " 'taking': 22481,\n", " 'hereby': 22425,\n", " 'follows': 22375,\n", " 'hours': 22368,\n", " 'hope': 22366,\n", " 'july': 22337,\n", " 'letter': 22320,\n", " 'seven': 22309,\n", " 'turned': 22295,\n", " 'pre': 22155,\n", " 'change': 22088,\n", " 'yesterday': 22085,\n", " 'demand': 22065,\n", " 'don': 22026,\n", " 'corn': 22006,\n", " 'governor': 21960,\n", " 'democratic': 21956,\n", " 'senate': 21946,\n", " 'need': 21937,\n", " 'coming': 21932,\n", " 'prices': 21903,\n", " 'try': 21890,\n", " 'knew': 21885,\n", " 'eyes': 21879,\n", " 'virginia': 21859,\n", " 'carried': 21841,\n", " 'minutes': 21785,\n", " 'train': 21780,\n", " 'opinion': 21763,\n", " 'itself': 21699,\n", " 'doubt': 21693,\n", " 'leave': 21663,\n", " 'grand': 21626,\n", " 'account': 21583,\n", " 'month': 21563,\n", " 'nature': 21520,\n", " 'citizens': 21483,\n", " 'sell': 21470,\n", " 'food': 21406,\n", " 'rather': 21324,\n", " 'western': 21296,\n", " 'nation': 21288,\n", " 'character': 21283,\n", " 'bring': 21268,\n", " 'although': 21250,\n", " 'ns': 21219,\n", " 'seems': 21196,\n", " 'probably': 21095,\n", " 'southern': 21015,\n", " 'dead': 20966,\n", " 'worth': 20918,\n", " 'anything': 20847,\n", " 'began': 20846,\n", " 'li': 20795,\n", " 'child': 20729,\n", " 'silver': 20723,\n", " 'according': 20646,\n", " 'fifty': 20565,\n", " 'hall': 20553,\n", " 'important': 20539,\n", " 'charles': 20488,\n", " 'smith': 20474,\n", " 'chief': 20472,\n", " 'doing': 20464,\n", " 'love': 20462,\n", " 'turn': 20440,\n", " 'june': 20423,\n", " 'ti': 20420,\n", " 'senator': 20412,\n", " 'feel': 20405,\n", " 'wheat': 20394,\n", " 'latter': 20375,\n", " 'entire': 20375,\n", " 'iron': 20371,\n", " 'heavy': 20328,\n", " 'story': 20296,\n", " 'different': 20231,\n", " 'record': 20197,\n", " 'il': 20195,\n", " 'met': 20185,\n", " 'ou': 20136,\n", " 'terms': 20037,\n", " 'ton': 19905,\n", " 'spring': 19903,\n", " 'became': 19897,\n", " '13': 19826,\n", " 'peace': 19819,\n", " 'seemed': 19796,\n", " 'ship': 19773,\n", " 'fully': 19727,\n", " 'ill': 19709,\n", " 'various': 19682,\n", " 'post': 19655,\n", " 'horse': 19616,\n", " 'named': 19559,\n", " 'running': 19554,\n", " 'gone': 19525,\n", " 'avenue': 19521,\n", " 'range': 19503,\n", " 'mo': 19496,\n", " 'reached': 19480,\n", " 'ha': 19462,\n", " 'plan': 19424,\n", " 'season': 19391,\n", " 'clerk': 19340,\n", " 'appear': 19315,\n", " 'inches': 19302,\n", " 'convention': 19295,\n", " 'living': 19292,\n", " 'portion': 19268,\n", " 'help': 19267,\n", " 'member': 19224,\n", " 'perhaps': 19219,\n", " 'chicago': 19191,\n", " 'aad': 19174,\n", " 'later': 19171,\n", " 'places': 19117,\n", " 'rest': 19087,\n", " 'main': 19080,\n", " 'rights': 19076,\n", " '40': 19053,\n", " 'conditions': 19053,\n", " 'april': 19024,\n", " 'future': 19023,\n", " 'greater': 19019,\n", " 'constitution': 18998,\n", " 'foot': 18993,\n", " 'words': 18974,\n", " 'success': 18973,\n", " 'justice': 18935,\n", " 'hill': 18855,\n", " 'und': 18825,\n", " 'streets': 18812,\n", " 'sec': 18737,\n", " 'crop': 18729,\n", " 'forty': 18729,\n", " 'today': 18701,\n", " 'loss': 18680,\n", " '14': 18646,\n", " 'friend': 18607,\n", " 'word': 18596,\n", " 'alone': 18554,\n", " 'local': 18551,\n", " 'sea': 18522,\n", " 'lu': 18511,\n", " 'payment': 18495,\n", " 'laid': 18465,\n", " 'generally': 18461,\n", " 'winter': 18458,\n", " 'col': 18410,\n", " 'majority': 18392,\n", " 'support': 18372,\n", " 'history': 18307,\n", " 'till': 18239,\n", " 'regard': 18214,\n", " 'earth': 18210,\n", " 'england': 18198,\n", " 'nine': 18193,\n", " 'aro': 18154,\n", " 'cash': 18104,\n", " 'cotton': 18065,\n", " 'ohio': 18020,\n", " 'foreign': 17966,\n", " 'interests': 17953,\n", " 'king': 17938,\n", " 'judgment': 17937,\n", " 'makes': 17916,\n", " 'stated': 17901,\n", " 'toward': 17897,\n", " 'lower': 17875,\n", " 'wit': 17873,\n", " 'equal': 17867,\n", " 'mary': 17853,\n", " 'wood': 17838,\n", " 'capital': 17817,\n", " 'parties': 17781,\n", " 'felt': 17778,\n", " 'looked': 17754,\n", " 'died': 17744,\n", " 'pass': 17737,\n", " '18': 17726,\n", " 'arc': 17712,\n", " 'moment': 17704,\n", " 'afternoon': 17700,\n", " 'ty': 17691,\n", " 'period': 17680,\n", " 'lines': 17648,\n", " 'returned': 17621,\n", " 'unless': 17614,\n", " 'increase': 17599,\n", " 'idea': 17586,\n", " 'private': 17576,\n", " '16': 17569,\n", " 'lake': 17550,\n", " 'ber': 17529,\n", " 'giving': 17519,\n", " 'cold': 17504,\n", " 'personal': 17444,\n", " 'lay': 17421,\n", " 'farmers': 17403,\n", " 'degrees': 17385,\n", " 'policy': 17359,\n", " 'ma': 17356,\n", " 'ft': 17331,\n", " 'henry': 17327,\n", " 'cor': 17305,\n", " 'territory': 17287,\n", " 'disease': 17279,\n", " 'comes': 17200,\n", " 'supply': 17199,\n", " 'es': 17189,\n", " 'spirit': 17174,\n", " 'boys': 17143,\n", " 'brown': 17138,\n", " 'followed': 17117,\n", " 'ought': 17101,\n", " 'secured': 17039,\n", " 'township': 17004,\n", " 'secure': 16912,\n", " 'carry': 16885,\n", " 'society': 16870,\n", " 'shown': 16865,\n", " 'fore': 16850,\n", " 'au': 16850,\n", " 'sure': 16847,\n", " 'human': 16805,\n", " 'monday': 16785,\n", " 'especially': 16784,\n", " 'entirely': 16724,\n", " 'tbo': 16675,\n", " 'rich': 16674,\n", " 'clear': 16614,\n", " 'farmer': 16596,\n", " 'soil': 16560,\n", " 'trouble': 16534,\n", " 'elected': 16524,\n", " 'coal': 16521,\n", " 'ward': 16506,\n", " 'stone': 16477,\n", " 'self': 16457,\n", " 'america': 16439,\n", " 'taxes': 16396,\n", " 'll': 16384,\n", " 'tried': 16360,\n", " 'ana': 16325,\n", " 'former': 16324,\n", " 'term': 16310,\n", " 'honor': 16306,\n", " 'ordered': 16303,\n", " 'sunday': 16291,\n", " 'premises': 16249,\n", " 'started': 16245,\n", " 'bed': 16221,\n", " 'goods': 16187,\n", " 'instead': 16184,\n", " 'thomas': 16159,\n", " 'trial': 16141,\n", " 'across': 16122,\n", " 'beautiful': 16119,\n", " 'pa': 16110,\n", " 'strength': 16083,\n", " 'allowed': 16073,\n", " 'deal': 16044,\n", " 'port': 15990,\n", " 'lady': 15937,\n", " 'highest': 15934,\n", " 'parts': 15933,\n", " 'pounds': 15929,\n", " 'island': 15921,\n", " 'top': 15883,\n", " 'deep': 15883,\n", " 'session': 15874,\n", " 'recorded': 15839,\n", " 'control': 15819,\n", " 'served': 15812,\n", " 'entered': 15787,\n", " 'military': 15785,\n", " 'tl': 15751,\n", " 'none': 15751,\n", " 'stood': 15751,\n", " 'french': 15748,\n", " 'answer': 15742,\n", " 'seem': 15725,\n", " 'saturday': 15654,\n", " 'legislature': 15644,\n", " 'sun': 15644,\n", " 'sufficient': 15627,\n", " '17': 15585,\n", " 'houses': 15573,\n", " 'rev': 15552,\n", " 'article': 15541,\n", " 'evidence': 15538,\n", " 'expected': 15532,\n", " 'statement': 15512,\n", " '500': 15496,\n", " 'object': 15493,\n", " 'thc': 15493,\n", " 'built': 15483,\n", " 'win': 15459,\n", " 'suit': 15456,\n", " 'reported': 15446,\n", " 'attorney': 15443,\n", " 'club': 15436,\n", " 'fur': 15432,\n", " 'note': 15422,\n", " 'officer': 15418,\n", " 'total': 15411,\n", " 'distance': 15389,\n", " 'ono': 15385,\n", " 'january': 15384,\n", " 'cure': 15376,\n", " 'council': 15371,\n", " 'issue': 15364,\n", " 'se': 15350,\n", " 'immediately': 15310,\n", " 'race': 15306,\n", " 'san': 15278,\n", " 'green': 15273,\n", " 'wa': 15230,\n", " 'looking': 15218,\n", " 'debt': 15201,\n", " 'firm': 15194,\n", " 'ers': 15175,\n", " 'louis': 15158,\n", " 'roads': 15145,\n", " 'ne': 15143,\n", " 'hat': 15138,\n", " 'twelve': 15108,\n", " 'forth': 15093,\n", " 'claims': 15090,\n", " 'higher': 15077,\n", " 'offered': 15065,\n", " 'id': 15058,\n", " 'august': 15049,\n", " 'finally': 15046,\n", " 'receive': 15035,\n", " 'captain': 15012,\n", " 'fell': 15011,\n", " 'commission': 14989,\n", " 'havo': 14976,\n", " 'bear': 14965,\n", " 'bv': 14962,\n", " 'dakota': 14960,\n", " 'ness': 14948,\n", " 'issued': 14938,\n", " 'husband': 14926,\n", " 'proposed': 14925,\n", " 'points': 14912,\n", " 'principal': 14901,\n", " 'killed': 14901,\n", " 'won': 14890,\n", " 'wide': 14874,\n", " 'le': 14849,\n", " 'tie': 14828,\n", " 'getting': 14805,\n", " 'store': 14797,\n", " 'etc': 14782,\n", " 'single': 14779,\n", " 'schools': 14751,\n", " 'news': 14736,\n", " 'natural': 14726,\n", " 'direction': 14706,\n", " 'opened': 14684,\n", " 'police': 14681,\n", " 'dry': 14666,\n", " 'whatever': 14661,\n", " 'game': 14652,\n", " 'below': 14648,\n", " 'trees': 14631,\n", " 'quiet': 14630,\n", " 'follow': 14622,\n", " 'hear': 14621,\n", " 'desire': 14621,\n", " 'mining': 14592,\n", " 'summer': 14561,\n", " 'ai': 14560,\n", " 'ir': 14555,\n", " 'addition': 14547,\n", " 'page': 14484,\n", " 'fourth': 14476,\n", " 'beyond': 14424,\n", " 'press': 14377,\n", " 'average': 14376,\n", " 'dated': 14368,\n", " 'led': 14362,\n", " 'regular': 14336,\n", " 'tba': 14332,\n", " 'length': 14328,\n", " 'continued': 14283,\n", " 'northern': 14280,\n", " ...}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pickle\n", "with open('V.pickle', 'rb') as handle:\n", " V_counter = pickle.load(handle)\n", "V_counter" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10000" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(V_counter)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "python11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }