Delete trash

2022-04-26 23:39:42 +02:00 · 2022-04-26 23:39:42 +02:00 · 048e0fb186
commit 048e0fb186
parent d9d70b1335
3 changed files with 0 additions and 200 deletions
--- a/._main.ipynb
+++ b/._main.ipynb
--- a/main.ipynb
+++ b/main.ipynb
--- a/run.py
+++ b/run.py
@ -1,48 +0,0 @@
-import lzma
-# import re
-from sklearn.feature_extraction.text import CountVectorizer
-import csv
-from sklearn.naive_bayes import GaussianNB
-
-# def get_str_cleaned(str_dirty):
-#         punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~'
-#         new_str = str_dirty.lower()
-#         new_str = re.sub(' +', ' ', new_str)
-#         for char in punctuation:
-#             new_str = new_str.replace(char, '')
-#             new_str = new_str.replace('\n', '')
-#         return new_str
-
-# with open('train/expected.tsv') as f:
-#     trainY = list(csv.reader(f))
-
-trainX = []
-trainY = []
-testX = []
-testY = []
-
-with lzma.open('train/in.tsv.xz') as f:
-    for line in f:
-        # X_train.append(get_str_cleaned(line.decode('utf-8')))
-        trainX.append(line.decode('utf-8'))
-
-with open('train/expected.tsv') as f:
-    for line in f:
-        trainY.append(line)
-
-vectorizer = CountVectorizer()
-trainX = vectorizer.fit_transform(trainX)
-
-model = GaussianNB()
-model.fit(trainX, trainY)
-
-with open('dev-0/in.tsv') as f:
-    for line in f:
-        testX.append(line.decode('utf-8'))
-    # testX = list(csv.reader(f))
-
-predictedY = model.predict(testX)
-print(predictedY)
-
-# with open('dev-0/expected.tsv') as f:
-#     expectedY = list(csv.reader(f))