challenging-america-word-ga.../main.ipynb

32 KiB
Raw Blame History

import pandas as pd
columns = ['FileId','Paper', 'Idk1', 'Year','Idk2','Idk3', 'LeftContext', 'RightContext']

# dev_data = pd.read_csv('dev-0/in.tsv', sep='\t', names=columns, engine='python', quotechar='"', error_bad_lines=False)
# dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\t', engine='python', quotechar='"', error_bad_lines=False)
dev_data = list()
with open('dev-0/in.tsv', "r") as f:
    for line in f.readlines():
        dev_data.append(line.split('\t')[-2])

dev_expected = list()
with open('dev-0/expected.tsv', "r") as f:
    for line in f.readlines():
        dev_expected.append(line.replace('\n',''))
from nltk.tokenize import word_tokenize 

def createBigram(data, expected):
   listOfBigrams = []
   bigramCounts = {}
   unigramCounts = {}

   for i in range(len(data)):
       tokenized = word_tokenize(data[i])
       word = tokenized[-1]
       listOfBigrams.append((word, expected[i]))
       if (word, expected[i]) in bigramCounts:
        bigramCounts[(word, expected[i])] += 1
       else:
        bigramCounts[(word, expected[i])] = 1
       if data[i] in unigramCounts:
        unigramCounts[word] += 1
       else:
        unigramCounts[word] = 1
        
   return listOfBigrams, unigramCounts, bigramCounts

def calcBigramProb(listOfBigrams, unigramCounts, bigramCounts):
    listOfProb = {}
    for bigram in listOfBigrams:
        word1 = bigram[0]
        word2 = bigram[1]
        listOfProb[bigram] = (bigramCounts.get(bigram))/(unigramCounts.get(word1))
    return listOfProb
bigrams, uniCounts, biCounts = createBigram(dev_data, dev_expected)
probs = calcBigramProb(bigrams, uniCounts, biCounts)
probs
'day'): 1.0,
 ('ol', 'powers'): 1.0,
 ('real', 'Asiatic'): 1.0,
 ('perfect', 'and'): 1.0,
 ('stcu-\\\\n', 'i«d'): 1.0,
 ('The', 'commis-'): 1.0,
 ('otr', 'just'): 1.0,
 ('for', 'men.'): 1.0,
 ('She', 'his'): 1.0,
 ('in\\\\n', 'thccityol'): 1.0,
 ('j\\\\n', 'ons'): 1.0,
 ('tlio', 'Convention'): 1.0,
 ('9rdick', 'is'): 1.0,
 ("'s", 'weight'): 1.0,
 ('Charleston', 'to'): 1.0,
 ('foaming', 'beasts'): 1.0,
 ('of\\\\n', 'these'): 1.0,
 ('pay\\\\nin', 'that'): 1.0,
 ('from', '$1'): 1.0,
 ('quack', 'medicines.'): 1.0,
 ('found', 'neces-'): 1.0,
 ('west', 'lino'): 1.0,
 ('to\\\\nsay', 'whether'): 1.0,
 ('away\\\\nuntil', 'the'): 1.0,
 ('givo', 'it'): 1.0,
 ('from', 'the'): 12.0,
 (',', 'too,'): 1.0,
 ('paws\\\\nhad', 'been'): 1.0,
 ('organs', ';'): 1.0,
 ('capitalists', 'great'): 1.0,
 (',', 'whllt'): 1.0,
 ('guarantees', 'the'): 1.0,
 ('than\\\\ndo', 'so,'): 1.0,
 ('\\\\n', 'And'): 1.0,
 ('support\\\\n', '1'): 1.0,
 ('legislative', 'body'): 1.0,
 ('\\\\n', 'as'): 4.0,
 ('fishes.\\\\n', 'We'): 1.0,
 ('Tho', 'night'): 1.0,
 ('wages', 'of'): 1.0,
 ('of', 'returning'): 1.0,
 ('are', 'for'): 1.0,
 ('w.s', 'recently'): 1.0,
 ('te', 'the'): 1.0,
 ('the', 'finances,'): 1.0,
 ('that', 'Imrns'): 1.0,
 ('a', 'trust'): 1.0,
 ('next', 'year,'): 1.0,
 ('ready\\\\n', 'to'): 1.0,
 ('nt', 'a'): 1.0,
 ('Noth-\\\\n', 'ng,'): 1.0,
 ('agree', 'on'): 1.0,
 ('were', 'present.'): 1.0,
 ('Ills', 'country,'): 1.0,
 ('crossing\\\\nBoar', 'River;'): 1.0,
 ('the', 'place'): 3.0,
 ('This', 'reception'): 1.0,
 ('de-\\\\n', 'pendent'): 1.0,
 ('tribunals', 'will'): 1.0,
 ('a', 'steamer'): 1.0,
 ('I', 'am'): 3.0,
 (',', 'at'): 3.0,
 ('on', 'or'): 1.0,
 ('preached', 'on'): 1.0,
 ('of\\\\n', 'things'): 1.0,
 ('for', 'the'): 13.0,
 (']', 'e'): 1.0,
 ('the', 'transit'): 1.0,
 ('hy\\\\n', 'which'): 1.0,
 ('opinion', 'that'): 1.0,
 ('.', 'At'): 3.0,
 ('', 'I'): 1.0,
 ('and\\\\n', 'and'): 1.0,
 ('atrango', 'positions'): 1.0,
 ('pn', 'Tues\xad'): 1.0,
 ('and', 'all'): 4.0,
 ('is', 'honestly'): 1.0,
 ('honorable', 'citizens.'): 1.0,
 ('per', 'cent'): 1.0,
 ('riveting', 'it,'): 1.0,
 ('the\\\\nCommissioner', 'of'): 1.0,
 ('separating', 'from'): 1.0,
 ('are', 'startled'): 1.0,
 ('voice', 'for'): 1.0,
 ('when', 'thete'): 1.0,
 ('\\\\nformed', 'the'): 1.0,
 ('remembered', 'that'): 1.0,
 ('#', 'of'): 1.0,
 ('very', 'process'): 1.0,
 ('would', 'gel'): 1.0,
 ('me', 'with'): 1.0,
 ('aflllcted', 'with'): 1.0,
 ('ulti\\\\n', 'mate'): 1.0,
 ('utmost\\\\nstretch', 'of'): 1.0,
 ('-rstand', 'much'): 1.0,
 ('was', 'after'): 1.0,
 ('Its\\\\npresent', 'strength,'): 1.0,
 ('the\\\\n', 'peiiod'): 1.0,
 ('son\\\\nhas', 'been'): 1.0,
 ('is', 'covered'): 1.0,
 ('Female\\\\n', 'Seminary;'): 1.0,
 ('they', 'used'): 1.0,
 ('out-\\\\n', 'numbered'): 1.0,
 ('came\\\\nfrom', 'heavy'): 1.0,
 ('a\\\\n', 'pall'): 1.0,
 ('adranoe', 'guard.'): 1.0,
 ('some\xad\\\\n', 'what'): 1.0,
 ('tho', 'sulo'): 1.0,
 ('upper', 'navigation'): 1.0,
 ('whs\\\\n', 'therefore'): 1.0,
 ('against', 'the'): 2.0,
 ('expendi-\\\\n', 'tures'): 1.0,
 ('treason\\\\nwith', 'indignant'): 1.0,
 ('very', 'strong'): 1.0,
 ('mil-\\\\n', 'lions'): 1.0,
 ('rights', 'supported'): 1.0,
 (',', 'shall'): 2.0,
 ('Moat\\\\nbeing', 'piloted'): 1.0,
 ('be-\\\\n', 'ing'): 2.0,
 ('little', 'barefooted'): 1.0,
 ('of\\\\n', 'developenient'): 1.0,
 ('be-\\\\n', 'Cause'): 1.0,
 ("'Abraham\\\\n", 'llu.li'): 1.0,
 ('the', 'estate'): 2.0,
 ('his', 'friends'): 1.0,
 ('P', "('nderwood.a"): 1.0,
 ('the\\\\n', 'roof'): 1.0,
 ('Cwk', 'r-;'): 1.0,
 ('.', "Johnson's"): 1.0,
 (';', 'while'): 1.0,
 ('*', 'of'): 1.0,
 ('have', 'power'): 1.0,
 ('in', 'search'): 1.0,
 ('and\\\\nthe', 'principal'): 1.0,
 ('HAY.the\\\\nlast', 'named'): 1.0,
 ('have', 'occasioned;'): 1.0,
 ('the\\\\nWestern', 'boundary'): 1.0,
 (',', 'whose'): 1.0,
 ('be', 'visited'): 1.0,
 ('\\\\n', 'and'): 13.0,
 ('party', 'zeal;—and'): 1.0,
 ('exercise', 'over'): 1.0,
 ('foremost', 'in'): 1.0,
 ('three', 'pounds'): 1.0,
 ('exceed-\\\\n', 'ed'): 1.0,
 ('could', 'find,'): 1.0,
 ('-', '.'): 1.0,
 ('could\\\\n', 'f.'): 1.0,
 ('our', 'power.'): 1.0,
 ('intimated', 'that'): 1.0,
 ('Thomas', 'J.'): 2.0,
 ('laid', 'me'): 1.0,
 ('that', 'there'): 3.0,
 ('that', 'on'): 1.0,
 ('it', 'to'): 2.0,
 ('Mc-\\\\n', 'Xamara.'): 1.0,
 ('and\\\\nthrough', 'the'): 1.0,
 ('trembled', 'with'): 1.0,
 ('chin-imps.\\\\nling', 'him,as'): 1.0,
 ('the', 'agricultural'): 1.0,
 ('have', 'proceeded'): 1.0,
 (',', 'P.'): 1.0,
 ('rlin', 'sale'): 1.0,
 ('sight', 'of'): 2.0,
 ('\\\\n', 'rails'): 1.0,
 ('know\\\\nof', 'no'): 1.0,
 ('.', 'MCI).Bids.'): 1.0,
 ('member', 'of'): 1.0,
 ('\\\\nand', 'they'): 1.0,
 ("'", 'of'): 1.0,
 ('the', 'ground;'): 1.0,
 (',', 'and'): 88.0,
 ('traverses', 'the'): 1.0,
 ('could', 'lie'): 1.0,
 ('sounded', 'by'): 1.0,
 ('kianr.h', 'of'): 1.0,
 ('be\\\\n', 'promptly'): 1.0,
 ('what', 'history'): 1.0,
 ('high', 'taxation.'): 1.0,
 ('they', 'will'): 4.0,
 ('with', 'the'): 13.0,
 ('ho', '"would'): 1.0,
 ("'s", 'hole'): 1.0,
 ('these', 'circumstances.'): 1.0,
 ('n', 'belief'): 1.0,
 ('because', 'they'): 1.0,
 ('.', 'When'): 2.0,
 (',', 'a'): 12.0,
 ('as', 'far'): 2.0,
 ('the', 'means'): 3.0,
 ('receive', 'as'): 1.0,
 ('business', 'of'): 1.0,
 ('them.\\\\nWhy', 'should'): 1.0,
 ('tangled', 'thickets'): 1.0,
 (',', "Blannerhasset's"): 1.0,
 ('happiness', 'of'): 1.0,
 ('district', 'of'): 1.0,
 ('\\\\n', 'chosen'): 1.0,
 ('lias', 'sustained'): 1.0,
 ('mid', 'large'): 1.0,
 ('work', 'now."'): 1.0,
 ('certified', 'by'): 1.0,
 ('were', 'entitled,'): 1.0,
 ('submission', 'fb'): 1.0,
 ('bind¬\\\\n', 'ing'): 1.0,
 ('or\\\\n', 'in'): 1.0,
 ('vir\xad\\\\n', 'tue'): 1.0,
 ('asking', 'that'): 1.0,
 ('as', 'there'): 2.0,
 ('immediate', 'approval'): 1.0,
 ('thcro\\\\nwas', 'not'): 1.0,
 ('kind', 'of'): 2.0,
 ('us\\\\n', 'ow'): 1.0,
 ('thence', 'North'): 1.0,
 ('bring-\\\\n', 'ing'): 1.0,
 ('.', '-Cut'): 1.0,
 ('a', 'supper'): 1.0,
 ('surroundings.\\\\n', '“Youre'): 1.0,
 ('the', 'men'): 3.0,
 ('Is\\\\n', 'formed'): 1.0,
 ('intrinsic', 'value'): 1.0,
 ('ordinance.\\\\n', 'Section'): 1.0,
 ('tojustily', 'Protection,'): 1.0,
 ('scramble\\\\n', 'for'): 1.0,
 ('s', 'part,'): 1.0,
 ('both', 'to'): 1.0,
 ('with\\\\nthe', 'idea'): 1.0,
 ('tell', 'of'): 1.0,
 ('gone', 'by'): 1.0,
 ('the', 'liberty'): 1.0,
 ('they', 'lived'): 1.0,
 ('skating\\\\n', 'rink,'): 1.0,
 (',', 'se'): 1.0,
 ('work', 'and'): 2.0,
 ('last\\\\n', 'sale,'): 1.0,
 ('modern', 'times'): 1.0,
 ('Icould', 'see'): 1.0,
 ('they', 'led'): 1.0,
 ('all\\\\nalong', 'the'): 1.0,
 ('of\\\\n', 'all'): 2.0,
 ('1919', 'certificate'): 1.0,
 ('from', 'my'): 1.0,
 ('that', 'the'): 9.0,
 ('aeenmed', 'that'): 1.0,
 ('on', 'Wednesday,'): 2.0,
 ('kept\\\\n', 'up'): 2.0,
 ('after', 'tea'): 1.0,
 ('equipping', 'each'): 1.0,
 ('mode\\\\n', 'of'): 1.0,
 ('the\\\\n', 'ocean,'): 1.0,
 ('.', 'Messersmith'): 1.0,
 ('not\\\\n', 'there,'): 1.0,
 ('n', 'fam-'): 1.0,
 ('beg', 'this'): 1.0,
 ('to', 'attempt'): 1.0,
 ('and\\\\n', 'the'): 5.0,
 ('they\\\\n', 'were'): 1.0,
 ('be', 'more'): 1.0,
 ('ui.cieisioi.ti\\\\n', 'b\\\\'): 1.0,
 ('our\\\\n', 'constitutional'): 1.0,
 ('to', 'die'): 1.0,
 ('and', 'honest'): 1.0,
 ('bo', 'too'): 1.0,
 ('to', 'take'): 2.0,
 (',', 'Thomas'): 1.0,
 ('many', 'months'): 1.0,
 ('150', 'of'): 1.0,
 ('Billy\\\\n', 'one'): 1.0,
 ('.', 'His'): 2.0,
 ('ancP\\\\n', '67-100'): 1.0,
 ('conduct', 'the'): 1.0,
 ('the', 'small'): 1.0,
 ('by', 'ample'): 1.0,
 ('be', 'well'): 3.0,
 ('speak', 'of'): 1.0,
 ('de-\\\\n', 'termined'): 1.0,
 ('window.\\\\n', 'I'): 1.0,
 ('not', 'bo'): 1.0,
 ('train', 'hearing'): 1.0,
 ('minds\\\\nof', 'men'): 1.0,
 ('supply', 'this'): 1.0,
 ('.', 'Also'): 1.0,
 ('we', 'sought'): 1.0,
 ('chief', 'houor'): 1.0,
 ('road', 'was'): 1.0,
 ('strikes', 'in'): 1.0,
 ('at', "Hawkins'"): 1.0,
 ('sure', 'that'): 1.0,
 ('lelease\\\\n', 'he'): 1.0,
 ('in\\\\neach', 'year,'): 1.0,
 ('proclaimed\\\\n', 'by'): 1.0,
 ('polirira\\\\n', 'Course'): 1.0,
 ('the', 'advance'): 1.0,
 ('elaborate', 'frescoing'): 1.0,
 ('work', 'nnd'): 1.0,
 ('pension', 'frauds'): 1.0,
 ('was', 'in'): 1.0,
 ('.', 'Gen.'): 1.0,
 ('west\\\\n', 'coast,'): 1.0,
 ('where', 'Charles'): 1.0,
 ('the', 'stare'): 1.0,
 ('for', 'all'): 1.0,
 ('.', 'But'): 4.0,
 ('the', 'Committee'): 2.0,
 ('so', 'as'): 4.0,
 ('upon\\\\n', 'their'): 1.0,
 ('from', 'just'): 1.0,
 ('the', 'valley'): 3.0,
 ('\\\\n', 'lie'): 1.0,
 ('You\\\\n', 'must'): 1.0,
 ('harvest.\\\\n', 'The'): 1.0,
 (',', 'will,'): 1.0,
 ('—', 'Burleigh'): 1.0,
 (',', 'of'): 5.0,
 ('of', 'fun-'): 1.0,
 ('are', 'gen-'): 1.0,
 ('he', 'was'): 4.0,
 (',', 'another'): 1.0,
 ('\\\\nand', 'possession'): 1.0,
 ('to\\\\n', 'gude'): 1.0,
 ('come.\\\\n', 'The'): 1.0,
 ('\\\\nthat', 'it'): 1.0,
 ('think\\\\nIt', 'a'): 1.0,
 ('the', 'receipt'): 1.0,
 ('those', 'districts,'): 1.0,
 ('firing\\\\n', 'pan'): 1.0,
 ('contempt', 'of'): 1.0,
 ('George\\\\n', 'J.'): 1.0,
 ('of', '*00'): 1.0,
 ('the\\\\nhomestead', 'might'): 1.0,
 ('se\\\\nwho', 'have'): 1.0,
 ('place', 'the'): 1.0,
 ('for\\\\n', '1916'): 1.0,
 (',', 'which'): 9.0,
 ('that', 'their'): 1.0,
 ('the', 'gen-'): 2.0,
 ('would\\\\n', 'do'): 1.0,
 ('be', 're-'): 1.0,
 ('unharmed.\\\\n', 'I'): 1.0,
 ('a\\\\n', 'substitute'): 1.0,
 ('n\\\\n', 'prodigy'): 1.0,
 ('switchmen', 'to-'): 1.0,
 ('rest', 'in'): 1.0,
 ('when\\\\n', 'heavily'): 1.0,
 ('that', 'question,'): 1.0,
 ('capable', 'of'): 1.0,
 ('to', 'lupn'): 1.0,
 ('very', 'in"-*\''): 1.0,
 ('.', 'In'): 4.0,
 ('phenomenal', 'growth.'): 1.0,
 ('hut', 'we'): 1.0,
 ('that', 'he'): 3.0,
 ('the', 'earliest'): 1.0,
 ('the', 'gun,'): 1.0,
 ('I', 'know'): 2.0,
 ('about', 'the'): 2.0,
 ('to', 'President'): 1.0,
 ('The', 'British'): 1.0,
 ('the', 'colored'): 1.0,
 ('recent', 'au-'): 1.0,
 ('new', 'proprietary'): 1.0,
 ('a', 'village'): 1.0,
 ('will\\\\n', 'equaliy'): 1.0,
 ('but', 'also'): 1.0,
 ('\\\\n', 'No'): 1.0,
 ('be\\\\nenforced', 'with'): 1.0,
 ('statement', 'is'): 1.0,
 ('questioning', 'the'): 1.0,
 ('ihe\\\\n', 'I'): 1.0,
 ('he', 'appreciate'): 1.0,
 ('MortunY', 'Traveller'): 1.0,
 ('was', 'to'): 1.0,
 ('once', 'formed'): 1.0,
 ('traduced', 'her'): 1.0,
 (',', 'according'): 2.0,
 (',', 'alwavs'): 1.0,
 ('6|d', ':'): 1.0,
 ('\\\\n', 'which'): 4.0,
 ('were', 'fit'): 1.0,
 ('references\\\\n', 'to'): 1.0,
 ('of\\\\n', 'feet'): 1.0,
 ('brick', 'fioor'): 1.0,
 ('was', 'not'): 4.0,
 ('is', 'more'): 1.0,
 ('consider', 'it'): 2.0,
 ('The', 'latter'): 1.0,
 ('in', 'a'): 8.0,
 ('good\\\\nname', 'and'): 1.0,
 (',', 'trom'): 1.0,
 ('several\\\\njoints', 'were'): 1.0,
 ('the', 'party'): 1.0,
 ('not', 'know'): 2.0,
 ('\\\\n', '1913,'): 2.0,
 ('is', 'possible'): 1.0,
 ('is', 'seven'): 1.0,
 ('powerful', 'foree'): 1.0,
 ('a\\\\ngood', 'jilace,'): 1.0,
 ('\\\\nit', 'the'): 1.0,
 ('for\\\\nalUr', 'taking'): 1.0,
 ('anil', 'did'): 1.0,
 ('defeat', 'the'): 1.0,
 ('attempts', 'lo'): 1.0,
 ('State', 'of'): 1.0,
 ('a\\\\n', 'quaint'): 1.0,
 (',', 'doing'): 1.0,
 ('Frias\\\\nhis', 'Minister'): 1.0,
 ('moon\\\\nlight', 'nights'): 1.0,
 ('pav', 't«xthr'): 1.0,
 ('proposition', 'was'): 1.0,
 ('Into\\\\n', 'the'): 1.0,
 ('ask', 'him'): 1.0,
 ('Houses', 'of'): 1.0,
 ('be', 'brought'): 1.0,
 ('engaged', 'in'): 1.0,
 ('Dos-\\\\n', 'well'): 1.0,
 ('of', 'depriving'): 1.0,
 ('from', 'a'): 2.0,
 ('return', 'to'): 1.0,
 ('pay-\\\\n', 'ment'): 1.0,
 ('fact', 'that'): 1.0,
 ('night\\\\nThe', 'strictest'): 1.0,
 ('\\\\n', '<'): 1.0,
 ('*', 'before'): 1.0,
 ('Australia', 'live'): 1.0,
 ('a', 'majority,'): 1.0,
 ('the\\\\n', 'arc'): 1.0,
 ('the\\\\n', 'result'): 1.0,
 ('heartily\\\\n', 'accede'): 1.0,
 ('years', 'ago'): 1.0,
 ('pa\\\\n', 'yers,'): 1.0,
 ('\\\\n', 'gave'): 1.0,
 ('that', 'lien'): 1.0,
 ('deny', 'that'): 1.0,
 (',', '1884.'): 1.0,
 ('in', 'this'): 2.0,
 ('inform\\\\n', 'Congress'): 1.0,
 ('stoop', 'to'): 1.0,
 ('transferred', 'the'): 1.0,
 ('\\\\n', 'has'): 1.0,
 ('by', 'the'): 13.0,
 ('event\\\\n', 'an'): 1.0,
 ('rn\\\\n', 'than'): 1.0,
 ('advan-\\\\n', 'tages.'): 1.0,
 ('at', 'tho'): 1.0,
 ('yard\\\\n', 'when'): 1.0,
 ('only\\\\nsuffeicr', 'nolmdy'): 1.0,
 ('the', 'Indians'): 1.0,
 ('And', 'unless'): 1.0,
 ('ordinary', 'American'): 1.0,
 ('death.\\\\n', '“Jesus'): 1.0,
 ('to', 'ascend'): 1.0,
 ('but', 'those'): 1.0,
 ('death.\\\\n', 'We'): 1.0,
 ('it', 'he'): 1.0,
 ('are\\\\n', 'not'): 1.0,
 ('the', 'coura'): 1.0,
 ('wastes\\\\nheat', 'most'): 1.0,
 ('as', 'Vice'): 1.0,
 ('claimants', 'of'): 1.0,
 ('returned', 'tt)'): 1.0,
 ('San', 'Francisco'): 1.0,
 ('to', 'civil'): 1.0,
 ('Hanks\\\\n', 'of'): 1.0,
 ('wife—as\\\\n', 'she'): 1.0,
 (',', '7'): 1.0,
 ('tri-\\\\nfle', 'taller'): 1.0,
 ('him', 'a'): 1.0,
 (',', 'Miss'): 1.0,
 ('it', 'has'): 3.0,
 ('will', 'erect'): 1.0,
 ('testified', 'that'): 1.0,
 ('seed', 'potatoes,'): 1.0,
 ('control', 'over'): 1.0,
 ('ap\\\\n', 'proved,'): 1.0,
 ('have', 'added'): 1.0,
 ('cents', 'for'): 1.0,
 ('committee\\\\n', 'and'): 1.0,
 ('of', 'such'): 2.0,
 ('and', 'that'): 2.0,
 ('District\\\\nNumber', '7,'): 1.0,
 ('every\\\\nand', 'he'): 1.0,
 ('union', 'of'): 1.0,
 ('the', 'oilier'): 1.0,
 ('and', 'hopelessly'): 1.0,
 ('the', 'notes'): 1.0,
 ('the', 'floor'): 1.0,
 ('with', 'tons'): 1.0,
 ('disregard\\\\n', '«»f'): 1.0,
 ('at\\\\n', 'Per'): 1.0,
 ('\\\\ncountry', 'will'): 1.0,
 ('yet\\\\nthe', 'ball'): 1.0,
 ('.', 'Glick'): 1.0,
 ('any', 'district'): 1.0,
 ('is', 'qualified'): 1.0,
 ('to', 'wait'): 1.0,
 ('and\\\\n', 'that'): 2.0,
 ('west', 'by'): 1.0,
 (',', ';ynl'): 1.0,
 ('different\\\\n', 'kinds'): 1.0,
 ('benefit', 'he'): 1.0,
 ('initiaiiec', 'wiil'): 1.0,
 ('a', 'noise'): 1.0,
 (',', 'l/ic~M'): 1.0,
 ('Bv\\\\n', 'this'): 1.0,
 ('the', 'respec\xad'): 1.0,
 ('cash', 'in'): 1.0,
 ('scarce', 'bear'): 1.0,
 ('coin\\\\nfinely', 'into'): 1.0,
 ('now', 'to'): 1.0,
 ('relief', 'that'): 1.0,
 ('at\\\\n', 'Aldershot'): 1.0,
 ('benefit', 'of'): 2.0,
 ('the', 'nations'): 1.0,
 ('of\\\\n', 'her'): 1.0,
 ('after', 'the'): 3.0,
 ('will', 'he'): 1.0,
 ('That', 'all'): 1.0,
 ('reach\\\\nLloyd', 'George'): 1.0,
 ('Judg\\\\n', 'ing'): 1.0,
 ('of', '"Vic'): 1.0,
 ('of', 'Pensaco-'): 1.0,
 (',', 'although'): 1.0,
 ('by', 'their'): 1.0,
 ('xan\\\\n', 'raise'): 1.0,
 (',', 'Jno'): 1.0,
 ('of\\\\n', 'Congrtss.'): 1.0,
 ('mind', 'of'): 1.0,
 (',', 'light'): 1.0,
 ('strange', 'that'): 1.0,
 ('sullied', 'its'): 1.0,
 ('easy', 'and'): 1.0,
 ('The', 'case,'): 1.0,
 (',', 'without'): 2.0,
 ('it', 'would'): 3.0,
 ('feet\\\\nless', 'altitude'): 1.0,
 ('fall', 'and'): 1.0,
 ('castles\\\\n', 'which'): 1.0,
 ('there', 'is'): 4.0,
 ('In', 'the'): 6.0,
 ('his', 'duly'): 2.0,
 ('I\\\\nhad', 'seen'): 1.0,
 ('all', 'his'): 1.0,
 ('who\\\\n', 'is'): 1.0,
 ('Grand', 'Prize'): 1.0,
 ('he', 'entcitaiued,'): 1.0,
 ('sentiments', 'to-'): 1.0,
 ('court\\\\n', 'and'): 1.0,
 ('turh', 'uias:niliccul'): 1.0,
 ('capacities', 'for'): 1.0,
 ('the', 'hospital.'): 1.0,
 ('armies\\\\n', 'and'): 1.0,
 ('the', 'bond,'): 1.0,
 (')', 'from'): 1.0,
 ('and', 'competent,'): 1.0,
 (',', 'or'): 9.0,
 ('to\\\\n', 'go'): 1.0,
 ('that\\\\n', 'ordinarily'): 1.0,
 ('duty.\\\\n', 'Resolved,'): 1.0,
 ('an', 'infetlor'): 1.0,
 ('enough', 'of'): 1.0,
 ('his', 'native'): 1.0,
 ('four', 'artillery'): 1.0,
 ('Paul\\\\n', 'half'): 1.0,
 ('the', 'appearance'): 2.0,
 ('inha-\\\\nbitants', 'on'): 1.0,
 ('and', 'resolutions'): 1.0,
 ('anil', 'exhibits,'): 1.0,
 ('keeping', 'myself'): 1.0,
 ('hand', 'of'): 1.0,
 ('as', 'I'): 1.0,
 (',', 'never'): 1.0,
 ('my\\\\n', 'soda'): 1.0,
 ('pro.\\\\nclaiming', 'that'): 1.0,
 ('sta-\\\\ntioned', 'in'): 1.0,
 (',', 'the'): 16.0,
 ('F.iurth\\\\n', 'avenue'): 1.0,
 ('remote', 'objects,'): 1.0,
 ('certain', 'Masons'): 1.0,
 ('Jefferson', ':'): 1.0,
 ('to', 'participate'): 2.0,
 ('the', 'farmer'): 1.0,
 ('the', 'city'): 5.0,
 ('Supreme', 'Judge,'): 1.0,
 ('City', 'authorities'): 1.0,
 ('.', 'The'): 22.0,
 ('mar\\\\n', 'vellous'): 1.0,
 ('distant', 'as'): 1.0,
 ('with-\\\\nin', 'the'): 1.0,
 ('Blundle', 'Maple'): 1.0,
 ('e', ',o'): 1.0,
 ('much', 'surprised'): 1.0,
 ('moro\\\\n', 'wanton'): 1.0,
 ('divided\\\\n', 'among'): 1.0,
 ('the', 'tame'): 1.0,
 ('manager', 'qf'): 1.0,
 ('by\\\\n', 'Mrs.'): 1.0,
 ('young\\\\n', 'theologians'): 1.0,
 ('the', 'lib*'): 1.0,
 ('recent', 'homo'): 1.0,
 ('said\\\\n', 'administrator'): 1.0,
 ('It', 'has'): 1.0,
 (',', 'Concentrated'): 1.0,
 ('the', 'sugar'): 1.0,
 ('little', 'bov'): 1.0,
 ('min', 'W'): 1.0,
 ('dupes', 'of'): 1.0,
 ('One', 'of'): 2.0,
 ('Mr.\\\\n', 'Bo:tj'): 1.0,
 ('Bunker', 'Hill'): 1.0,
 ('judgment', 'Bhall'): 1.0,
 ('In', 'part:'): 1.0,
 ('to', 'file'): 1.0,
 ('New', 'Jersey'): 1.0,
 (',', 'she'): 2.0,
 ('tasacl', 'out,'): 1.0,
 ('of', 'alarm'): 1.0,
 ('it', 'in'): 2.0,
 ('.', 'W.'): 1.0,
 ('and', 'computed'): 1.0,
 ('complaint', 'came'): 1.0,
 ('and', 'not'): 1.0,
 ('distrust\\\\n', 'each'): 1.0,
 ('knew', 'how'): 1.0,
 ('In', '1898'): 1.0,
 ('republicanism', 'must'): 1.0,
 ('posi\xad\\\\n', 'tion'): 1.0,
 ('break\\\\n', 'troth'): 1.0,
 ('.\\\\n', 'Diploma'): 1.0,
 ('did', 'Ia6t.'): 1.0,
 ('is', 'real,'): 1.0,
 ('that\\\\n', 'Cato,'): 1.0,
 ('.', 'in'): 1.0,
 ('throwing', 'off'): 1.0,
 ('have', 'been'): 9.0,
 ('Folding', 'Cae'): 1.0,
 ('will\\\\nhave', 'a'): 1.0,
 ('re\\\\n', 'garding'): 1.0,
 ('but\\\\n', 'trom'): 1.0,
 ('walked', 'be-'): 1.0,
 ('swept', 'away'): 1.0,
 ('until\\\\n', 'fairly'): 1.0,
 ('awakened', 'by'): 1.0,
 ('tha\\\\n', 'smallest'): 1.0,
 ('prop', 'r'): 1.0,
 ('is', 'kept'): 2.0,
 ('circumstance.\\\\n', 'Had'): 1.0,
 ('are\\\\nvariant—some', 'saying'): 1.0,
 ('when', 'president.'): 1.0,
 ('j\\\\nmeat', 'oj'): 1.0,
 ('help', 'ye'): 1.0,
 ('on\\\\neach', 'one'): 1.0,
 ('by\\\\n', 'the'): 3.0,
 ('four', 'thousand'): 2.0,
 ('quarrH', '-'): 1.0,
 ('\\\\n', 'muscular,'): 1.0,
 ('his', 'own'): 3.0,
 ('Maryland\\\\nRailroad', 'Company,'): 1.0,
 ('!', 'Innocent'): 1.0,
 ('eradicating', 'all'): 1.0,
 (',', 'that'): 14.0,
 ('damage', 'done,'): 1.0,
 ('lie', 'knew'): 1.0,
 ('get', 'seed,'): 1.0,
 ('the', 'bidder,'): 2.0,
 ('and', 'the'): 18.0,
 ('ef-\\\\n', 'fective.'): 1.0,
 ('the\\\\n', 'extent'): 1.0,
 ('quite', 'firm.'): 1.0,
 ('Away\\\\nfrom', 'home'): 1.0,
 (',', 'have'): 2.0,
 ('the\\\\n', 'means'): 1.0,
 ('indifference.\\\\n', 'Tuberculosis'): 1.0,
 ('home\\\\n', 'with'): 1.0,
 ('never', 'have'): 1.0,
 ('to', 'show'): 1.0,
 ('thence\\\\n', 'south'): 1.0,
 ('laid', 'in'): 1.0,
 ('and\\\\nTruekee', 'Kailroad'): 1.0,
 ('.', 'Brown'): 1.0,
 ('the', 'death'): 2.0,
 ('horses', 'Mrs.'): 1.0,
 ('statistics', 'in'): 1.0,
 ('on', 'timothy'): 1.0,
 ('shall', 'be'): 8.0,
 ('the', 'prejudices'): 1.0,
 ('stallion\\\\n', '|'): 1.0,
 ('half', 'of'): 1.0,
 ('it', 'is'): 9.0,
 ('do', 'it;'): 1.0,
 ('declares', 'that'): 1.0,
 ('themselves', 'f-atriots'): 1.0,
 ('which', 'you'): 2.0,
 ('this\\\\n', 'tide'): 1.0,
 ('en\xad\\\\ntitled', 'to'): 1.0,
 ('all', 'round.'): 1.0,
 ('the', 'free.'): 1.0,
 (',', 'be'): 1.0,
 ('oi', 'till'): 1.0,
 ('utterest\\\\n', 'scorn'): 1.0,
 ('was', 'over'): 1.0,
 ('the', 'besl'): 1.0,
 ('third\\\\nof', 'the*'): 1.0,
 ('admission', 'that'): 1.0,
 ('been', 'put'): 1.0,
 ('164', 'perchei'): 1.0,
 ('no', 'an-'): 1.0,
 ('twenty', 'lings'): 1.0,
 ('It', 'is'): 6.0,
 ('hard', 'to'): 1.0,
 (',', 'is'): 4.0,
 ('sides', 'and'): 1.0,
 ('deprived', 'the'): 1.0,
 ('elo\xad\\\\n', 'quently'): 1.0,
 ('s', 'who'): 1.0,
 ('advantages\\\\nwould', 'not'): 1.0,
 ('Foushec', 'ft.'): 1.0,
 ('.', 'Clairs'): 1.0,
 ('the\\\\n', 'Young'): 1.0,
 ('was', 'clear,'): 1.0,
 ('food\\\\nfrom', 'a'): 1.0,
 ('not', 'only'): 3.0,
 ('blood-stained', 'weap'): 1.0,
 ('property\\\\n', 'has'): 1.0,
 ('aro', 'perfectly'): 1.0,
 ('the', 'iumily'): 1.0,
 (',', 'reed*,'): 1.0,
 ('just', 'for'): 1.0,
 ('the', 'D'): 1.0,
 ('had', 'evidently'): 1.0,
 ('the', 'measures'): 1.0,
 ('I\\\\n', 'finally'): 1.0,
 ('people', 'of'): 2.0,
 ('the', 'expense'): 1.0,
 ('felt', 'if'): 1.0,
 ('jour-\\\\nnalism', 'which'): 1.0,
 ('as', 'to'): 7.0,
 ('47', 'deg.'): 1.0,
 ("'", 'after'): 1.0,
 ('.', 'He'): 11.0,
 ('About\\\\n', 'midnight'): 1.0,
 ('my', 'wife'): 2.0,
 ('effect', 'npon'): 1.0,
 ('throughout', 'the"'): 1.0,
 ('the', 'last'): 4.0,
 ('of', 'Kennehee'): 1.0,
 ('.V', 'York.'): 1.0,
 ('of\\\\n', 'Inue.'): 1.0,
 ('are', 'awarded'): 1.0,
 ('bound', 'to'): 1.0,
 ('the', 'upper'): 1.0,
 ('species', 'of'): 1.0,
 ('day', 'ol'): 1.0,
 ('a', 'fair'): 3.0,
 ('public', 'trustee'): 1.0,
 ('has', 'become'): 1.0,
 ('lading', 'and,'): 1.0,
 ('Another', 'potent'): 1.0,
 ('to', 'ihe'): 1.0,
 (';', 'the'): 3.0,
 ('a', 'large'): 4.0,
 ('II.\\\\n', 'one'): 1.0,
 ('ben-\\\\n', 'efit'): 1.0,
 ('which\\\\nwaa', 'fastened'): 1.0,
 ('is', 'laid'): 1.0,
 ('in', 'Smith'): 1.0,
 ('the\\\\nbest', 'Mexico'): 1.0,
 ('each', 'three'): 1.0,
 ('a', 'moun-'): 1.0,
 ...}