paranormal-or-skeptic/code_regression.py
2020-04-04 19:55:07 +02:00

54 lines
1.8 KiB
Python

from collections import defaultdict
import math
import pickle
import re
from pip._vendor.msgpack.fallback import xrange
import random
vocabulary=[]
#word_to_index_mapping=[]
#index_to_word_mapping=[]
#file_to_save=open("test.tsv","w",encoding='utf-8')
#def define_vocabulary(file_to_learn_new_words,expected_path):
# word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)}
# with open(file_to_learn_new_words, encoding='utf-8') as in_file, open(expected_path, encoding='utf-8') as expected_file:
# for line, exp in zip(in_file, expected_file):
# class_ = exp.rstrip('\n').replace(' ', '')
# text, timestamp = line.rstrip('\n').split('\t')
# tokens = text.lower().split(' ')
# for token in tokens:
# if class_ == 'P':
# word_counts['paranormal'][token] += 1
# elif class_ == 'S':
# word_counts['skeptic'][token] += 1
# return word_counts
file_to_save=open("test.tsv","w",encoding='utf-8')
def define_vocabulary(file_to_learn_new_words):
word_counts={'count': defaultdict(int)}
with open(file_to_learn_new_words,encoding='utf-8') as in_file:
for line in in_file:
text, timestamp = line.rstrip('\n').split('\t')
tokens = text.lower().split(' ')
for token in tokens:
word_counts['count'][token]+=1
return word_counts
def main():
# --------------- initialization ---------------------------------
vocabulary = define_vocabulary('train/in.tsv')
i=1;
weights=[]
testFuckingPython=len(vocabulary['count'])+1
print(testFuckingPython)
for i in range(testFuckingPython):
weights.append(random.randrange(0,len(vocabulary['count'])+1))
# --------------- prediction -------------------------------------
main()