paranormal-or-skeptic/solution.py

import csv
from collections import defaultdict
import math

counter = 0
docs = []
with open('in.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        docs.append(row)
        counter+=1

print(counter)
pcounter = 0
scounter = 0
with open('expected.tsv') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        if row[0] == " P":
            pcounter += 1
        if row[0] == " S":
            scounter += 1

print(pcounter)
print(scounter)

print("P(S) = " + str(scounter+1/counter+2))
print("P(P) = " + str(pcounter+1/counter+2))

def calc_class_logprob(expected_path):
    paranoarmal_class_count = 0
    skeptic_class_count = 0
    with open(expected_path) as f:
        for line in f:
            if "P" in line:
                paranoarmal_class_count +=1
            elif "S" in line:
                skeptic_class_count +=1

    paranormal_class_prob = paranoarmal_class_count / (paranoarmal_class_count + skeptic_class_count)
    skeptic_class_prob = skeptic_class_count / (paranoarmal_class_count + skeptic_class_count)

    return math.log(paranormal_class_prob), math.log(skeptic_class_prob)

def calc_word_counts(in_path, expected_path):
    with open(in_path), open(expected_path) as in_file, exp_file:
        word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)}
        for in_line, exp_line in zip(in_file, exp_file):
            for line in f:
                class_ = exp_line.rstrip('\n').replace(" ", "")
                text, timestamp = line.rstrip('\n').split('\t')
                tokens = text.lower().split(' ')
                for token in tokens:
                    if class_ == 'P':
                        word_counts['paranormal'][token] += 1
                    elif class_ == 'S':
                        word_counts['skeptic'][token] += 1

    return word_counts


def calc_words_logprobs(words_counts):
    total_skeptic = sum(word_counts['skeptic'].values()) + len(word_counts['skeptic'].keys())) 
    total_paranormal = sum(word_counts['paranormal'].values() + len(word_counts['paranormal'].keys()))


# with open('prediction.tsv', 'wt') as tsvfile:
#     tsv_writer = csv.writer(tsvfile, delimiter='\t')
#     for i in range(counter):
#         tsv_writer.writerow('S')
first solution 2020-03-09 14:37:26 +01:00			`import csv`
test 2020-03-09 18:30:02 +01:00			`from collections import defaultdict`
			`import math`
first solution 2020-03-09 14:37:26 +01:00
			`counter = 0`
test 2020-03-09 18:30:02 +01:00			`docs = []`
first solution 2020-03-09 14:37:26 +01:00			`with open('in.tsv') as tsvfile:`
			`reader = csv.reader(tsvfile, delimiter='\t')`
			`for row in reader:`
test 2020-03-09 18:30:02 +01:00			`docs.append(row)`
first solution 2020-03-09 14:37:26 +01:00			`counter+=1`

			`print(counter)`
test 2020-03-09 18:30:02 +01:00			`pcounter = 0`
			`scounter = 0`
			`with open('expected.tsv') as tsvfile:`
			`reader = csv.reader(tsvfile, delimiter='\t')`
			`for row in reader:`
			`if row[0] == " P":`
			`pcounter += 1`
			`if row[0] == " S":`
			`scounter += 1`

			`print(pcounter)`
			`print(scounter)`

			`print("P(S) = " + str(scounter+1/counter+2))`
			`print("P(P) = " + str(pcounter+1/counter+2))`

			`def calc_class_logprob(expected_path):`
			`paranoarmal_class_count = 0`
			`skeptic_class_count = 0`
			`with open(expected_path) as f:`
			`for line in f:`
			`if "P" in line:`
			`paranoarmal_class_count +=1`
			`elif "S" in line:`
			`skeptic_class_count +=1`

			`paranormal_class_prob = paranoarmal_class_count / (paranoarmal_class_count + skeptic_class_count)`
			`skeptic_class_prob = skeptic_class_count / (paranoarmal_class_count + skeptic_class_count)`

			`return math.log(paranormal_class_prob), math.log(skeptic_class_prob)`

			`def calc_word_counts(in_path, expected_path):`
			`with open(in_path), open(expected_path) as in_file, exp_file:`
			`word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)}`
			`for in_line, exp_line in zip(in_file, exp_file):`
			`for line in f:`
			`class_ = exp_line.rstrip('\n').replace(" ", "")`
			`text, timestamp = line.rstrip('\n').split('\t')`
			`tokens = text.lower().split(' ')`
			`for token in tokens:`
			`if class_ == 'P':`
			`word_counts['paranormal'][token] += 1`
			`elif class_ == 'S':`
			`word_counts['skeptic'][token] += 1`

			`return word_counts`


			`def calc_words_logprobs(words_counts):`
			`total_skeptic = sum(word_counts['skeptic'].values()) + len(word_counts['skeptic'].keys()))`
			`total_paranormal = sum(word_counts['paranormal'].values() + len(word_counts['paranormal'].keys()))`

first solution 2020-03-09 14:37:26 +01:00
test 2020-03-09 18:30:02 +01:00			`# with open('prediction.tsv', 'wt') as tsvfile:`
			`# tsv_writer = csv.writer(tsvfile, delimiter='\t')`
			`# for i in range(counter):`
			`# tsv_writer.writerow('S')`
first solution 2020-03-09 14:37:26 +01:00