s444476 add solution
This commit is contained in:
parent
d043e30286
commit
2135e5268f
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
36
run.py
Normal file
36
run.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
|
||||||
|
print("Wczytywanie zbioru treningowego")
|
||||||
|
#, error_bad_lines=False
|
||||||
|
train = pd.read_csv("train/train.tsv", sep='\t', header=None)
|
||||||
|
|
||||||
|
# print(train)
|
||||||
|
# train = train.dropna()
|
||||||
|
X_train = train[1]
|
||||||
|
Y_train = train[0]
|
||||||
|
print(len(X_train))
|
||||||
|
print(len(Y_train))
|
||||||
|
|
||||||
|
print("Wczytywanie pliku do predykcji")
|
||||||
|
pred_x = []
|
||||||
|
with open("dev-0/in.tsv", encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
pred_x.append(line)
|
||||||
|
|
||||||
|
print("Wektoryzacja")
|
||||||
|
vectorizer = TfidfVectorizer()
|
||||||
|
X_train = vectorizer.fit_transform(X_train)
|
||||||
|
pred_x = vectorizer.transform(pred_x)
|
||||||
|
|
||||||
|
|
||||||
|
print("Uczenie modelu")
|
||||||
|
model = GaussianNB()
|
||||||
|
model.fit(X_train.toarray(), Y_train)
|
||||||
|
|
||||||
|
print("Predykcja wyników")
|
||||||
|
pred_y = model.predict(pred_x.toarray())
|
||||||
|
|
||||||
|
print("Zapis do pliku")
|
||||||
|
pd.DataFrame(pred_y).to_csv("dev-0/out.tsv", header=False, index=None)
|
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user