s430705
This commit is contained in:
parent
68537ae8d2
commit
290a1f802c
19304
dev-0/out.tsv
19304
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
17
run.py
17
run.py
@ -13,16 +13,9 @@ DEFAULT_PREDICTION = 'the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1'
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_text(text):
|
def preprocess_text(text):
|
||||||
# remove punctuation
|
text = text.lower().replace("-\\n", "").replace("\\n", " ")
|
||||||
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
|
text = re.sub(r"\p{P}", "", text)
|
||||||
# only alphabets and numerics
|
|
||||||
text = re.sub('[^a-zA-Z]', ' ', text)
|
|
||||||
# replace newline with space
|
|
||||||
text = re.sub("\n", " ", text)
|
|
||||||
# lower case
|
|
||||||
text = text.lower()
|
|
||||||
# split and join the words
|
|
||||||
text = ' '.join(text.split())
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +95,7 @@ with open("dev-0/out.tsv", "w") as file:
|
|||||||
text = preprocess_text(str(row[7]))
|
text = preprocess_text(str(row[7]))
|
||||||
words = word_tokenize(text)
|
words = word_tokenize(text)
|
||||||
if len(words) < 3:
|
if len(words) < 3:
|
||||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
prediction = DEFAULT_PREDICTION
|
||||||
else:
|
else:
|
||||||
prediction = predict_probs(words[0], words[1])
|
prediction = predict_probs(words[0], words[1])
|
||||||
file.write(prediction + "\n")
|
file.write(prediction + "\n")
|
||||||
@ -112,7 +105,7 @@ with open("test-A/out.tsv", "w") as file:
|
|||||||
text = preprocess_text(str(row[7]))
|
text = preprocess_text(str(row[7]))
|
||||||
words = word_tokenize(text)
|
words = word_tokenize(text)
|
||||||
if len(words) < 3:
|
if len(words) < 3:
|
||||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
prediction = DEFAULT_PREDICTION
|
||||||
else:
|
else:
|
||||||
prediction = predict_probs(words[0], words[1])
|
prediction = predict_probs(words[0], words[1])
|
||||||
file.write(prediction + "\n")
|
file.write(prediction + "\n")
|
||||||
|
13066
test-A/out.tsv
13066
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user