Change regexes
This commit is contained in:
parent
95e6501fe5
commit
c267e9e7e0
BIN
.predict.py.swp
BIN
.predict.py.swp
Binary file not shown.
BIN
.train.py.swp
BIN
.train.py.swp
Binary file not shown.
680
dev-0/out.tsv
680
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
5272
dev-0/together
Normal file
5272
dev-0/together
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
18
predict.py
18
predict.py
@ -30,14 +30,14 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
|
|||||||
for token in tokens:
|
for token in tokens:
|
||||||
token = clear_tokens(token, False)
|
token = clear_tokens(token, False)
|
||||||
try:
|
try:
|
||||||
product += word_logprobs[class_][token]
|
product *= word_logprobs[class_][token]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
product += 0
|
product *= 1
|
||||||
# tu wzoru uzyj
|
# tu wzoru uzyj
|
||||||
if class_ == 'sceptic':
|
if class_ == 'sceptic':
|
||||||
product += sceptic_class_logprob
|
product *= sceptic_class_logprob
|
||||||
elif class_ == 'paranormal':
|
elif class_ == 'paranormal':
|
||||||
product += paranormal_class_logprob
|
product *= paranormal_class_logprob
|
||||||
probs[abs(product)] = class_
|
probs[abs(product)] = class_
|
||||||
#print(probs)
|
#print(probs)
|
||||||
# mozna jeszcze zrobic aby bralo kluczowe slowa i wtedy decydowalo ze paranormal
|
# mozna jeszcze zrobic aby bralo kluczowe slowa i wtedy decydowalo ze paranormal
|
||||||
@ -46,7 +46,7 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
|
|||||||
return probs[max(probs.keys())]
|
return probs[max(probs.keys())]
|
||||||
|
|
||||||
def search_for_keywords(text):
|
def search_for_keywords(text):
|
||||||
keywords = ['paranormal', 'ufo', 'aliens', 'conspiracy', 'aliens']
|
keywords = ['paranormal', 'ufo', 'aliens', 'conspiracy', 'aliens', 'atlantis']
|
||||||
return any(keyword in text for keyword in keywords)
|
return any(keyword in text for keyword in keywords)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -55,10 +55,10 @@ def main():
|
|||||||
paranormal_class_logprob = pickle_list[0]
|
paranormal_class_logprob = pickle_list[0]
|
||||||
sceptic_class_logprob = pickle_list[1]
|
sceptic_class_logprob = pickle_list[1]
|
||||||
word_logprobs = pickle_list[2]
|
word_logprobs = pickle_list[2]
|
||||||
#in_file = "test-A/in.tsv"
|
in_file = "test-A/in.tsv"
|
||||||
in_file = "dev-0/in.tsv"
|
#in_file = "dev-0/in.tsv"
|
||||||
#out_file = "test-A/out.tsv"
|
out_file = "test-A/out.tsv"
|
||||||
out_file = "dev-0/out.tsv"
|
#out_file = "dev-0/out.tsv"
|
||||||
print (f"in {in_file}")
|
print (f"in {in_file}")
|
||||||
print (f"out {out_file}")
|
print (f"out {out_file}")
|
||||||
with open(in_file) as in_f, open(out_file, 'w') as out_f:
|
with open(in_file) as in_f, open(out_file, 'w') as out_f:
|
||||||
|
2350
test-A/out.tsv
2350
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
8
train.py
8
train.py
@ -74,10 +74,10 @@ def calc_word_logprobs(word_counts):
|
|||||||
return word_logprobs
|
return word_logprobs
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
#expected = './train/expected.tsv'
|
expected = './train/expected.tsv'
|
||||||
expected = './dev-0/expected.tsv'
|
#expected = './dev-0/expected.tsv'
|
||||||
#in_f = './train/in.tsv'
|
in_f = './train/in.tsv'
|
||||||
in_f = './dev-0/in.tsv'
|
#in_f = './dev-0/in.tsv'
|
||||||
print (f"expected {expected}")
|
print (f"expected {expected}")
|
||||||
print (f"in {in_f}")
|
print (f"in {in_f}")
|
||||||
paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected)
|
paranormal_class_lgprob, skeptic_class_logprob = calc_class_logprob(expected)
|
||||||
|
Loading…
Reference in New Issue
Block a user