Added create_dictionary and main

2020-04-19 19:30:57 +02:00 · 2020-04-19 19:30:57 +02:00 · 604732ed31
commit 604732ed31
parent 12f1942371
4 changed files with 300029 additions and 0 deletions
--- a/dev-0/in.tsv
+++ b/dev-0/in.tsv
--- a/linear_regression.py
+++ b/linear_regression.py
@ -0,0 +1,26 @@
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.feature_extraction.text import CountVectorizer
 import sklearn
 import pandas as pd
 import math
 import re
 def create_dictionary(in_path):
    tfDict = []
    with open(in_path,encoding='utf-8') as in_file:
        for line in in_file:
            for word in re.findall(r"[\w]+",line):
                tfDict.append(word)
    return tfDict
 def main():
    created_dictionary=create_dictionary("train/in.tsv")
    #tfidf = TfidfVectorizer(min_df=1,stop_words='english')
    tfidf = TfidfVectorizer(stop_words='english')
    x = tfidf.fit(created_dictionary)
    y = tfidf.transform(x.vocabulary_)
 main()
--- a/test-A/in.tsv
+++ b/test-A/in.tsv
--- a/train/in.tsv
+++ b/train/in.tsv