470619
This commit is contained in:
parent
57fd77f584
commit
51220186a3
18240
dev-0/out.tsv
18240
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
@ -71,7 +71,7 @@
|
||||
"def generate_N_grams(text, ngram=1, no_punctuation=True):\n",
|
||||
" text = re.sub(r'[\\-] ', '', text).lower()\n",
|
||||
" if no_punctuation:\n",
|
||||
" text = re.sub(r'[\\)\\(\\.\\,\\-]', ' ', text)\n",
|
||||
" text = re.sub(r'[^\\w\\s]', ' ', text)\n",
|
||||
" words=[word for word in text.split()]\n",
|
||||
" temp=zip(*[words[i:] for i in range(0,ngram)])\n",
|
||||
" ans=[' '.join(ngram) for ngram in temp]\n",
|
||||
@ -141,8 +141,7 @@
|
||||
" if tmp_probs[i] == 1:\n",
|
||||
" tmp_probs[i] = 0.1\n",
|
||||
" else:\n",
|
||||
" c = probs[word_2][min(probs[word_2].keys(), key=(lambda k: probs[word_2][k]))] / 10\n",
|
||||
" tmp_probs[i] = probs[word_1][i] * c\n",
|
||||
" tmp_probs[i] = probs[word_1][i] / 5\n",
|
||||
" else:\n",
|
||||
" tmp_probs = probs[word_1]\n",
|
||||
" else:\n",
|
||||
@ -172,7 +171,7 @@
|
||||
" t = i[0]\n",
|
||||
" t = re.sub(r'[\\-] ', '', t).lower()\n",
|
||||
" if True:\n",
|
||||
" t = re.sub(r'[\\)\\(\\.\\,\\-]', ' ', t)\n",
|
||||
" t = re.sub(r'[^\\w\\s]', ' ', t)\n",
|
||||
" words=[word for word in t.split()]\n",
|
||||
" found_words.append(find_word(words[-1], ' '.join(words[-2:])))\n",
|
||||
" return found_words\n",
|
||||
|
7
run.py
7
run.py
@ -39,7 +39,7 @@ def print_example(data, words, idx):
|
||||
def generate_N_grams(text, ngram=1, no_punctuation=True):
|
||||
text = re.sub(r'[\-] ', '', text).lower()
|
||||
if no_punctuation:
|
||||
text = re.sub(r'[\)\(\.\,\-]', ' ', text)
|
||||
text = re.sub(r'[^\w\s]', ' ', text)
|
||||
words=[word for word in text.split()]
|
||||
temp=zip(*[words[i:] for i in range(0,ngram)])
|
||||
ans=[' '.join(ngram) for ngram in temp]
|
||||
@ -86,8 +86,7 @@ def find_word(word_1, word_2):
|
||||
if tmp_probs[i] == 1:
|
||||
tmp_probs[i] = 0.1
|
||||
else:
|
||||
c = probs[word_2][min(probs[word_2].keys(), key=(lambda k: probs[word_2][k]))] / 10
|
||||
tmp_probs[i] = probs[word_1][i] * c
|
||||
tmp_probs[i] = probs[word_1][i] / 5
|
||||
else:
|
||||
tmp_probs = probs[word_1]
|
||||
else:
|
||||
@ -110,7 +109,7 @@ def find_words(data):
|
||||
t = i[0]
|
||||
t = re.sub(r'[\-] ', '', t).lower()
|
||||
if True:
|
||||
t = re.sub(r'[\)\(\.\,\-]', ' ', t)
|
||||
t = re.sub(r'[^\w\s]', ' ', t)
|
||||
words=[word for word in t.split()]
|
||||
found_words.append(find_word(words[-1], ' '.join(words[-2:])))
|
||||
return found_words
|
||||
|
12648
test-A/out.tsv
12648
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user