Change regexes

This commit is contained in:
s426135 2020-03-22 14:32:24 +01:00
parent 95e6501fe5
commit c267e9e7e0
8 changed files with 6800 additions and 1528 deletions

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

5272
dev-0/together Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -30,14 +30,14 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
for token in tokens: for token in tokens:
token = clear_tokens(token, False) token = clear_tokens(token, False)
try: try:
product += word_logprobs[class_][token] product *= word_logprobs[class_][token]
except KeyError: except KeyError:
product += 0 product *= 1
# tu wzoru uzyj # tu wzoru uzyj
if class_ == 'sceptic': if class_ == 'sceptic':
product += sceptic_class_logprob product *= sceptic_class_logprob
elif class_ == 'paranormal': elif class_ == 'paranormal':
product += paranormal_class_logprob product *= paranormal_class_logprob
probs[abs(product)] = class_ probs[abs(product)] = class_
#print(probs) #print(probs)
# mozna jeszcze zrobic aby bralo kluczowe slowa i wtedy decydowalo ze paranormal # mozna jeszcze zrobic aby bralo kluczowe slowa i wtedy decydowalo ze paranormal
@ -46,7 +46,7 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
return probs[max(probs.keys())] return probs[max(probs.keys())]
def search_for_keywords(text): def search_for_keywords(text):
keywords = ['paranormal', 'ufo', 'aliens', 'conspiracy', 'aliens'] keywords = ['paranormal', 'ufo', 'aliens', 'conspiracy', 'aliens', 'atlantis']
return any(keyword in text for keyword in keywords) return any(keyword in text for keyword in keywords)
def main(): def main():
@ -55,10 +55,10 @@ def main():
paranormal_class_logprob = pickle_list[0] paranormal_class_logprob = pickle_list[0]
sceptic_class_logprob = pickle_list[1] sceptic_class_logprob = pickle_list[1]
word_logprobs = pickle_list[2] word_logprobs = pickle_list[2]
#in_file = "test-A/in.tsv" in_file = "test-A/in.tsv"
in_file = "dev-0/in.tsv" #in_file = "dev-0/in.tsv"
#out_file = "test-A/out.tsv" out_file = "test-A/out.tsv"
out_file = "dev-0/out.tsv" #out_file = "dev-0/out.tsv"
print (f"in {in_file}") print (f"in {in_file}")
print (f"out {out_file}") print (f"out {out_file}")
with open(in_file) as in_f, open(out_file, 'w') as out_f: with open(in_file) as in_f, open(out_file, 'w') as out_f:

File diff suppressed because it is too large Load Diff

View File

@ -74,10 +74,10 @@ def calc_word_logprobs(word_counts):
return word_logprobs return word_logprobs
def main(): def main():
#expected = './train/expected.tsv' expected = './train/expected.tsv'
expected = './dev-0/expected.tsv' #expected = './dev-0/expected.tsv'
#in_f = './train/in.tsv' in_f = './train/in.tsv'
in_f = './dev-0/in.tsv' #in_f = './dev-0/in.tsv'
print (f"expected {expected}") print (f"expected {expected}")
print (f"in {in_f}") print (f"in {in_f}")
paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected) paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected)