Dict bigram model
This commit is contained in:
parent
bad4620b17
commit
9693ce9b24
56
lm1.py
Normal file
56
lm1.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import sys
|
||||||
|
import random
|
||||||
|
from tqdm import tqdm
|
||||||
|
from collections import defaultdict
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
|
||||||
|
corpus = []
|
||||||
|
|
||||||
|
with open('train/in.tsv', 'r') as f:
|
||||||
|
print('Reading corpus...')
|
||||||
|
for line in tqdm(f):
|
||||||
|
ctx = line.split('\t')[6:]
|
||||||
|
|
||||||
|
corpus.append(ctx[0] + 'BLANK' + ctx[1])
|
||||||
|
|
||||||
|
corpus = ' '.join(corpus)
|
||||||
|
corpus = corpus.replace('-\n', '')
|
||||||
|
corpus = corpus.replace('\\n', ' ')
|
||||||
|
corpus = corpus.replace('\n', ' ')
|
||||||
|
corpus = corpus.split(' ')
|
||||||
|
|
||||||
|
if (os.path.exists('distrib.pkl')):
|
||||||
|
print('Loading distribution...')
|
||||||
|
distrib = pickle.load(open('distrib.pkl', 'rb'))
|
||||||
|
else:
|
||||||
|
print('Generating distribution...')
|
||||||
|
distrib = defaultdict(lambda: defaultdict(int))
|
||||||
|
for i in tqdm(range(len(corpus) - 1)):
|
||||||
|
distrib[corpus[i]][corpus[i+1]] += 1
|
||||||
|
|
||||||
|
with open('distrib.pkl', 'wb') as f:
|
||||||
|
print('Saving distribution...')
|
||||||
|
pickle.dump(dict(distrib), f)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
with open('dev-0/in.tsv', 'r') as f:
|
||||||
|
print('Generating output...')
|
||||||
|
for line in tqdm(f):
|
||||||
|
ctx = line.split('\t')[6:]
|
||||||
|
last_word = ctx[0].split(' ')[-1]
|
||||||
|
try:
|
||||||
|
blank_word = max(distrib[last_word], key=distrib[last_word].get)
|
||||||
|
except:
|
||||||
|
blank_word = 'NONE'
|
||||||
|
results.append(blank_word)
|
||||||
|
|
||||||
|
with open('dev-0/out.tsv', 'w') as f:
|
||||||
|
print('Writing output...')
|
||||||
|
for result in tqdm(results):
|
||||||
|
if result == 'NONE':
|
||||||
|
f.write('a:0.6 the:0.2 :0.2')
|
||||||
|
else:
|
||||||
|
f.write(f'{result}:0.9 :0.1')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user