paranormal-or-skeptic/dev-0/train.py

#!/usr/bin/env python3
import sys
import pickle
import math
def tokenize(d):
	return d.split(' ')
def train():
	documents_total = 0
	skeptic_documents_total = 0
	paranormal_documents_total = 0
	vocabulary = set()
	skeptic_words_total = 0
	paranormal_words_total = 0
	skeptic_count = {}
	paranormal_count = {}
	for line in sys.stdin:
		line = line.rstrip()
		fields = line.split('\t')
		label = fields[0].strip()
		document = fields[1]
		terms = tokenize(document)
		for t in terms:
			vocabulary.add(t)
		documents_total +=1
		if label == 'S':
			skeptic_documents_total +=1
			skeptic_words_total += len(terms)
			for term in terms:
				if term in skeptic_count:
					skeptic_count[term] += 1
				else:
					skeptic_count[term] = 1
		else:
			paranormal_words_total += len(terms)
			for term in terms:
				if term in paranormal_count:
					paranormal_count[term] += 1
				else:
					paranormal_count[term] = 1
                
            
	pskeptic = skeptic_documents_total / documents_total
	vocabulary_size = len(vocabulary)
	model = (pskeptic, vocabulary_size, skeptic_words_total,paranormal_words_total, skeptic_count, paranormal_count)
	pickle.dump(model, open("model.pkl", "wb"))
train()
my brilliant solution2 2020-03-22 18:35:08 +01:00			`#!/usr/bin/env python3`
			`import sys`
			`import pickle`
			`import math`
			`def tokenize(d):`
			`return d.split(' ')`
			`def train():`
			`documents_total = 0`
			`skeptic_documents_total = 0`
			`paranormal_documents_total = 0`
			`vocabulary = set()`
			`skeptic_words_total = 0`
			`paranormal_words_total = 0`
			`skeptic_count = {}`
			`paranormal_count = {}`
			`for line in sys.stdin:`
			`line = line.rstrip()`
			`fields = line.split('\t')`
			`label = fields[0].strip()`
			`document = fields[1]`
			`terms = tokenize(document)`
			`for t in terms:`
			`vocabulary.add(t)`
			`documents_total +=1`
			`if label == 'S':`
			`skeptic_documents_total +=1`
			`skeptic_words_total += len(terms)`
			`for term in terms:`
			`if term in skeptic_count:`
			`skeptic_count[term] += 1`
			`else:`
			`skeptic_count[term] = 1`
			`else:`
			`paranormal_words_total += len(terms)`
			`for term in terms:`
			`if term in paranormal_count:`
			`paranormal_count[term] += 1`
			`else:`
			`paranormal_count[term] = 1`


			`pskeptic = skeptic_documents_total / documents_total`
			`vocabulary_size = len(vocabulary)`
			`model = (pskeptic, vocabulary_size, skeptic_words_total,paranormal_words_total, skeptic_count, paranormal_count)`
			`pickle.dump(model, open("model.pkl", "wb"))`
			`train()`