my brilliant solution2
This commit is contained in:
parent
73b72d2df3
commit
874b7f6266
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import pickle
|
||||
from tokenize import tokenize
|
||||
import math
|
||||
model = pickle.load(open('model.pkl', 'rb', ))
|
||||
pskeptic, vocabulary_size, skeptic_words_total, paranormal_words_total, skeptics_count, paranormal_count = model
|
||||
for line in sys.stdin:
|
||||
document = line.rstrip()
|
||||
terms = tokenize(document)
|
||||
log_prob_skeptic = math.log(pskeptic)
|
||||
log_prob_paranormal = math.log(1-pskeptic)
|
||||
|
||||
for term in terms:
|
||||
if term not in skeptics_count:
|
||||
skeptics_count[term]=0
|
||||
if term not in paranormal_count:
|
||||
paranormal_count[term]=0
|
||||
log_prob_skeptic += math.log((skeptics_count[term] +1)/(skeptic_words_total + vocabulary_size))
|
||||
log_prob_paranormal += math.log((paranormal_count[term] +1)/(paranormal_words_total + vocabulary_size))
|
||||
if log_prob_skeptic > log_prob_paranormal:
|
||||
print("S")
|
||||
else:
|
||||
print("P")
|
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python3
|
||||
def tokenize(d):
|
||||
return d.split(' ')
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import pickle
|
||||
import math
|
||||
def tokenize(d):
|
||||
return d.split(' ')
|
||||
def train():
|
||||
documents_total = 0
|
||||
skeptic_documents_total = 0
|
||||
paranormal_documents_total = 0
|
||||
vocabulary = set()
|
||||
skeptic_words_total = 0
|
||||
paranormal_words_total = 0
|
||||
skeptic_count = {}
|
||||
paranormal_count = {}
|
||||
for line in sys.stdin:
|
||||
line = line.rstrip()
|
||||
fields = line.split('\t')
|
||||
label = fields[0].strip()
|
||||
document = fields[1]
|
||||
terms = tokenize(document)
|
||||
for t in terms:
|
||||
vocabulary.add(t)
|
||||
documents_total +=1
|
||||
if label == 'S':
|
||||
skeptic_documents_total +=1
|
||||
skeptic_words_total += len(terms)
|
||||
for term in terms:
|
||||
if term in skeptic_count:
|
||||
skeptic_count[term] += 1
|
||||
else:
|
||||
skeptic_count[term] = 1
|
||||
else:
|
||||
paranormal_words_total += len(terms)
|
||||
for term in terms:
|
||||
if term in paranormal_count:
|
||||
paranormal_count[term] += 1
|
||||
else:
|
||||
paranormal_count[term] = 1
|
||||
|
||||
|
||||
pskeptic = skeptic_documents_total / documents_total
|
||||
vocabulary_size = len(vocabulary)
|
||||
model = (pskeptic, vocabulary_size, skeptic_words_total,paranormal_words_total, skeptic_count, paranormal_count)
|
||||
pickle.dump(model, open("model.pkl", "wb"))
|
||||
train()
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue