Delete trash

2022-04-26 23:39:42 +02:00 · 2022-04-26 23:39:42 +02:00 · 048e0fb186
commit 048e0fb186
parent d9d70b1335
3 changed files with 0 additions and 200 deletions
--- a/._main.ipynb
+++ b/._main.ipynb
--- a/main.ipynb
+++ b/main.ipynb
--- a/run.py
+++ b/run.py
@ -1,48 +0,0 @@
 import lzma
 # import re
 from sklearn.feature_extraction.text import CountVectorizer
 import csv
 from sklearn.naive_bayes import GaussianNB
 # def get_str_cleaned(str_dirty):
 #         punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~'
 #         new_str = str_dirty.lower()
 #         new_str = re.sub(' +', ' ', new_str)
 #         for char in punctuation:
 #             new_str = new_str.replace(char, '')
 #             new_str = new_str.replace('\n', '')
 #         return new_str
 # with open('train/expected.tsv') as f:
 #     trainY = list(csv.reader(f))
 trainX = []
 trainY = []
 testX = []
 testY = []
 with lzma.open('train/in.tsv.xz') as f:
    for line in f:
        # X_train.append(get_str_cleaned(line.decode('utf-8')))
        trainX.append(line.decode('utf-8'))
 with open('train/expected.tsv') as f:
    for line in f:
        trainY.append(line)
 vectorizer = CountVectorizer()
 trainX = vectorizer.fit_transform(trainX)
 model = GaussianNB()
 model.fit(trainX, trainY)
 with open('dev-0/in.tsv') as f:
    for line in f:
        testX.append(line.decode('utf-8'))
    # testX = list(csv.reader(f))
 predictedY = model.predict(testX)
 print(predictedY)
 # with open('dev-0/expected.tsv') as f:
 #     expectedY = list(csv.reader(f))