470619
This commit is contained in:
parent
57fd77f584
commit
51220186a3
18240
dev-0/out.tsv
18240
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
@ -71,7 +71,7 @@
|
|||||||
"def generate_N_grams(text, ngram=1, no_punctuation=True):\n",
|
"def generate_N_grams(text, ngram=1, no_punctuation=True):\n",
|
||||||
" text = re.sub(r'[\\-] ', '', text).lower()\n",
|
" text = re.sub(r'[\\-] ', '', text).lower()\n",
|
||||||
" if no_punctuation:\n",
|
" if no_punctuation:\n",
|
||||||
" text = re.sub(r'[\\)\\(\\.\\,\\-]', ' ', text)\n",
|
" text = re.sub(r'[^\\w\\s]', ' ', text)\n",
|
||||||
" words=[word for word in text.split()]\n",
|
" words=[word for word in text.split()]\n",
|
||||||
" temp=zip(*[words[i:] for i in range(0,ngram)])\n",
|
" temp=zip(*[words[i:] for i in range(0,ngram)])\n",
|
||||||
" ans=[' '.join(ngram) for ngram in temp]\n",
|
" ans=[' '.join(ngram) for ngram in temp]\n",
|
||||||
@ -141,8 +141,7 @@
|
|||||||
" if tmp_probs[i] == 1:\n",
|
" if tmp_probs[i] == 1:\n",
|
||||||
" tmp_probs[i] = 0.1\n",
|
" tmp_probs[i] = 0.1\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" c = probs[word_2][min(probs[word_2].keys(), key=(lambda k: probs[word_2][k]))] / 10\n",
|
" tmp_probs[i] = probs[word_1][i] / 5\n",
|
||||||
" tmp_probs[i] = probs[word_1][i] * c\n",
|
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" tmp_probs = probs[word_1]\n",
|
" tmp_probs = probs[word_1]\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
@ -172,7 +171,7 @@
|
|||||||
" t = i[0]\n",
|
" t = i[0]\n",
|
||||||
" t = re.sub(r'[\\-] ', '', t).lower()\n",
|
" t = re.sub(r'[\\-] ', '', t).lower()\n",
|
||||||
" if True:\n",
|
" if True:\n",
|
||||||
" t = re.sub(r'[\\)\\(\\.\\,\\-]', ' ', t)\n",
|
" t = re.sub(r'[^\\w\\s]', ' ', t)\n",
|
||||||
" words=[word for word in t.split()]\n",
|
" words=[word for word in t.split()]\n",
|
||||||
" found_words.append(find_word(words[-1], ' '.join(words[-2:])))\n",
|
" found_words.append(find_word(words[-1], ' '.join(words[-2:])))\n",
|
||||||
" return found_words\n",
|
" return found_words\n",
|
||||||
|
7
run.py
7
run.py
@ -39,7 +39,7 @@ def print_example(data, words, idx):
|
|||||||
def generate_N_grams(text, ngram=1, no_punctuation=True):
|
def generate_N_grams(text, ngram=1, no_punctuation=True):
|
||||||
text = re.sub(r'[\-] ', '', text).lower()
|
text = re.sub(r'[\-] ', '', text).lower()
|
||||||
if no_punctuation:
|
if no_punctuation:
|
||||||
text = re.sub(r'[\)\(\.\,\-]', ' ', text)
|
text = re.sub(r'[^\w\s]', ' ', text)
|
||||||
words=[word for word in text.split()]
|
words=[word for word in text.split()]
|
||||||
temp=zip(*[words[i:] for i in range(0,ngram)])
|
temp=zip(*[words[i:] for i in range(0,ngram)])
|
||||||
ans=[' '.join(ngram) for ngram in temp]
|
ans=[' '.join(ngram) for ngram in temp]
|
||||||
@ -86,8 +86,7 @@ def find_word(word_1, word_2):
|
|||||||
if tmp_probs[i] == 1:
|
if tmp_probs[i] == 1:
|
||||||
tmp_probs[i] = 0.1
|
tmp_probs[i] = 0.1
|
||||||
else:
|
else:
|
||||||
c = probs[word_2][min(probs[word_2].keys(), key=(lambda k: probs[word_2][k]))] / 10
|
tmp_probs[i] = probs[word_1][i] / 5
|
||||||
tmp_probs[i] = probs[word_1][i] * c
|
|
||||||
else:
|
else:
|
||||||
tmp_probs = probs[word_1]
|
tmp_probs = probs[word_1]
|
||||||
else:
|
else:
|
||||||
@ -110,7 +109,7 @@ def find_words(data):
|
|||||||
t = i[0]
|
t = i[0]
|
||||||
t = re.sub(r'[\-] ', '', t).lower()
|
t = re.sub(r'[\-] ', '', t).lower()
|
||||||
if True:
|
if True:
|
||||||
t = re.sub(r'[\)\(\.\,\-]', ' ', t)
|
t = re.sub(r'[^\w\s]', ' ', t)
|
||||||
words=[word for word in t.split()]
|
words=[word for word in t.split()]
|
||||||
found_words.append(find_word(words[-1], ' '.join(words[-2:])))
|
found_words.append(find_word(words[-1], ' '.join(words[-2:])))
|
||||||
return found_words
|
return found_words
|
||||||
|
12648
test-A/out.tsv
12648
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user