s444476 add solution

This commit is contained in:
Maciej Ścigacz 2022-05-01 17:40:37 +02:00
parent d043e30286
commit 2135e5268f
3 changed files with 10935 additions and 0 deletions

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

36
run.py Normal file
View File

@ -0,0 +1,36 @@
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_extraction.text import TfidfVectorizer
print("Wczytywanie zbioru treningowego")
#, error_bad_lines=False
train = pd.read_csv("train/train.tsv", sep='\t', header=None)
# print(train)
# train = train.dropna()
X_train = train[1]
Y_train = train[0]
print(len(X_train))
print(len(Y_train))
print("Wczytywanie pliku do predykcji")
pred_x = []
with open("dev-0/in.tsv", encoding='utf-8') as f:
for line in f:
pred_x.append(line)
print("Wektoryzacja")
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
pred_x = vectorizer.transform(pred_x)
print("Uczenie modelu")
model = GaussianNB()
model.fit(X_train.toarray(), Y_train)
print("Predykcja wyników")
pred_y = model.predict(pred_x.toarray())
print("Zapis do pliku")
pd.DataFrame(pred_y).to_csv("dev-0/out.tsv", header=False, index=None)

5447
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff