paranormal-or-skeptic/code_regression.py

from collections import defaultdict
import math
import pickle
import re

from pip._vendor.msgpack.fallback import xrange
import random

vocabulary=[]
#word_to_index_mapping=[]
#index_to_word_mapping=[]

#file_to_save=open("test.tsv","w",encoding='utf-8')
#def define_vocabulary(file_to_learn_new_words,expected_path):
#    word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)}
#    with open(file_to_learn_new_words, encoding='utf-8') as in_file, open(expected_path, encoding='utf-8')  as  expected_file:
#        for line, exp in zip(in_file, expected_file):
#            class_ = exp.rstrip('\n').replace(' ', '')
#            text, timestamp = line.rstrip('\n').split('\t')
#            tokens = text.lower().split(' ')
#            for token in tokens:
#                if class_ == 'P':
#                    word_counts['paranormal'][token] += 1
#                elif class_ == 'S':
#                    word_counts['skeptic'][token] += 1
#    return word_counts

file_to_save=open("test.tsv","w",encoding='utf-8')
def define_vocabulary(file_to_learn_new_words):
    word_counts={'count': defaultdict(int)}
    with open(file_to_learn_new_words,encoding='utf-8') as in_file:
        for line in in_file:
            text, timestamp = line.rstrip('\n').split('\t')
            tokens = text.lower().split(' ')
            for token in tokens:
                word_counts['count'][token]+=1
    return word_counts

def read_input(file_path):
    word_counts={'count': defaultdict(int)}
    with open(file_path, encoding='utf-8') as in_file:
        for line in in_file:
            text, timestamp = line.rstrip('\n').split('\t')
            tokens = text.lower().split(' ')
            for token in tokens:
                word_counts['count'][token]+=1
    return word_counts

def main():
    # --------------- initialization ---------------------------------
    vocabulary = define_vocabulary('train/in.tsv')
    i=1;
    weights=[]
    testFuckingPython=len(vocabulary['count'])+1
    for i in range(testFuckingPython):
        weights.append(random.randrange(0,len(vocabulary['count'])+1))
    precision=0.00001
    learning_rate=0.001
    prev_step_size=1
    max_iterations=len(vocabulary['count'])
    current_iteration=0
    readed_words=read_input("train/in.tsv")
    # --------------- prediction -------------------------------------
    #while (prev_step_size>precision and current_iteration<max_iterations):


main()
Created vocabulary 2020-04-02 18:29:06 +02:00			`from collections import defaultdict`
			`import math`
			`import pickle`
			`import re`

Created vocabulary 2020-04-04 19:55:07 +02:00			`from pip._vendor.msgpack.fallback import xrange`
			`import random`

Created vocabulary 2020-04-02 18:29:06 +02:00			`vocabulary=[]`
Created vocabulary 2020-04-04 19:55:07 +02:00			`#word_to_index_mapping=[]`
			`#index_to_word_mapping=[]`

			`#file_to_save=open("test.tsv","w",encoding='utf-8')`
			`#def define_vocabulary(file_to_learn_new_words,expected_path):`
			`# word_counts = {'paranormal': defaultdict(int), 'skeptic': defaultdict(int)}`
			`# with open(file_to_learn_new_words, encoding='utf-8') as in_file, open(expected_path, encoding='utf-8') as expected_file:`
			`# for line, exp in zip(in_file, expected_file):`
			`# class_ = exp.rstrip('\n').replace(' ', '')`
			`# text, timestamp = line.rstrip('\n').split('\t')`
			`# tokens = text.lower().split(' ')`
			`# for token in tokens:`
			`# if class_ == 'P':`
			`# word_counts['paranormal'][token] += 1`
			`# elif class_ == 'S':`
			`# word_counts['skeptic'][token] += 1`
			`# return word_counts`
Created vocabulary 2020-04-02 18:29:06 +02:00
Created vocabulary 2020-04-02 20:01:33 +02:00			`file_to_save=open("test.tsv","w",encoding='utf-8')`
Created vocabulary 2020-04-04 19:55:07 +02:00			`def define_vocabulary(file_to_learn_new_words):`
			`word_counts={'count': defaultdict(int)}`
			`with open(file_to_learn_new_words,encoding='utf-8') as in_file:`
			`for line in in_file:`
Created vocabulary 2020-04-02 20:01:33 +02:00			`text, timestamp = line.rstrip('\n').split('\t')`
			`tokens = text.lower().split(' ')`
			`for token in tokens:`
Created vocabulary 2020-04-04 19:55:07 +02:00			`word_counts['count'][token]+=1`
Created vocabulary 2020-04-02 20:01:33 +02:00			`return word_counts`
Created vocabulary 2020-04-02 18:29:06 +02:00
Commit to move project to PC 2020-04-05 00:34:05 +02:00			`def read_input(file_path):`
			`word_counts={'count': defaultdict(int)}`
			`with open(file_path, encoding='utf-8') as in_file:`
			`for line in in_file:`
			`text, timestamp = line.rstrip('\n').split('\t')`
			`tokens = text.lower().split(' ')`
			`for token in tokens:`
			`word_counts['count'][token]+=1`
			`return word_counts`

Created vocabulary 2020-04-02 18:29:06 +02:00			`def main():`
Created vocabulary 2020-04-04 19:55:07 +02:00			`# --------------- initialization ---------------------------------`
			`vocabulary = define_vocabulary('train/in.tsv')`
			`i=1;`
			`weights=[]`
			`testFuckingPython=len(vocabulary['count'])+1`
			`for i in range(testFuckingPython):`
			`weights.append(random.randrange(0,len(vocabulary['count'])+1))`
Commit to move project to PC 2020-04-05 00:34:05 +02:00			`precision=0.00001`
			`learning_rate=0.001`
			`prev_step_size=1`
			`max_iterations=len(vocabulary['count'])`
			`current_iteration=0`
			`readed_words=read_input("train/in.tsv")`
Created vocabulary 2020-04-04 19:55:07 +02:00			`# --------------- prediction -------------------------------------`
Commit to move project to PC 2020-04-05 00:34:05 +02:00			`#while (prev_step_size>precision and current_iteration<max_iterations):`
Created vocabulary 2020-04-02 20:01:33 +02:00
Created vocabulary 2020-04-02 18:29:06 +02:00
			`main()`