s430705
This commit is contained in:
parent
68537ae8d2
commit
290a1f802c
19304
dev-0/out.tsv
19304
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
17
run.py
17
run.py
@ -13,16 +13,9 @@ DEFAULT_PREDICTION = 'the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1'
|
||||
|
||||
|
||||
def preprocess_text(text):
|
||||
# remove punctuation
|
||||
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
|
||||
# only alphabets and numerics
|
||||
text = re.sub('[^a-zA-Z]', ' ', text)
|
||||
# replace newline with space
|
||||
text = re.sub("\n", " ", text)
|
||||
# lower case
|
||||
text = text.lower()
|
||||
# split and join the words
|
||||
text = ' '.join(text.split())
|
||||
text = text.lower().replace("-\\n", "").replace("\\n", " ")
|
||||
text = re.sub(r"\p{P}", "", text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
@ -102,7 +95,7 @@ with open("dev-0/out.tsv", "w") as file:
|
||||
text = preprocess_text(str(row[7]))
|
||||
words = word_tokenize(text)
|
||||
if len(words) < 3:
|
||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
||||
prediction = DEFAULT_PREDICTION
|
||||
else:
|
||||
prediction = predict_probs(words[0], words[1])
|
||||
file.write(prediction + "\n")
|
||||
@ -112,7 +105,7 @@ with open("test-A/out.tsv", "w") as file:
|
||||
text = preprocess_text(str(row[7]))
|
||||
words = word_tokenize(text)
|
||||
if len(words) < 3:
|
||||
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
|
||||
prediction = DEFAULT_PREDICTION
|
||||
else:
|
||||
prediction = predict_probs(words[0], words[1])
|
||||
file.write(prediction + "\n")
|
||||
|
13066
test-A/out.tsv
13066
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user