fix log10 library

This commit is contained in:
Dawid 2022-04-24 23:57:21 +02:00
parent cdefe27fd4
commit feda89c5a9
3 changed files with 17945 additions and 17945 deletions

File diff suppressed because it is too large Load Diff

24
run.py
View File

@ -1,4 +1,4 @@
from cmath import log10 from math import log10
import csv import csv
import pandas as pd import pandas as pd
import regex as re import regex as re
@ -90,7 +90,7 @@ def write_output():
for _, row in dev_data.iterrows(): for _, row in dev_data.iterrows():
text = prepare_text(str(row[7])) text = prepare_text(str(row[7]))
words = word_tokenize(text) words = word_tokenize(text)
if len(words) < 3: if len(words) < 4:
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1" prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
else: else:
prediction = predict(words[0], words[1]) prediction = predict(words[0], words[1])
@ -100,23 +100,23 @@ def write_output():
for _, row in test_data.iterrows(): for _, row in test_data.iterrows():
text = prepare_text(str(row[7])) text = prepare_text(str(row[7]))
words = word_tokenize(text) words = word_tokenize(text)
if len(words) < 3: if len(words) < 4:
prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1" prediction = "the:0.2 be:0.2 to:0.2 of:0.1 and:0.1 a:0.1 :0.1"
else: else:
prediction = predict(words[0], words[1]) prediction = predict(words[0], words[1])
file.write(prediction + "\n") file.write(prediction + "\n")
if __name__ == "__main__": if __name__ == "__main__":
print("Preparing data...") # print("Preparing data...")
train_data = train_data[[6, 7]] # train_data = train_data[[6, 7]]
train_data = pd.concat([train_data, train_labels], axis=1) # train_data = pd.concat([train_data, train_labels], axis=1)
train_data["final"] = train_data[6] + train_data[0] + train_data[7] # train_data["final"] = train_data[6] + train_data[0] + train_data[7]
train = train_data[['final']] # train = train_data[['final']]
with open("./train_data.txt", 'a') as f: # with open("./train_data.txt", 'a') as f:
for _, row in train_data.iterrows(): # for _, row in train_data.iterrows():
text = prepare_text(str(row["final"])) # text = prepare_text(str(row["final"]))
f.write(text + '\n') # f.write(text + '\n')
print("Preparing model...") print("Preparing model...")
os.system('sh ./kenlm.sh') os.system('sh ./kenlm.sh')

File diff suppressed because it is too large Load Diff