First solution
This commit is contained in:
parent
9cb2fb2612
commit
68e8ce1205
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
32
run.py
Normal file
32
run.py
Normal file
@ -0,0 +1,32 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
|
||||
# Pliki zostały wygenerowane poza repozytorium, dlatego są inne ścieżki i nazwy plików
|
||||
df = pd.read_csv("train.tsv", sep="\t", header=None)
|
||||
dev_X = pd.read_csv("in.tsv", sep="\t", header=None)
|
||||
test_X = pd.read_csv("test_in.tsv", sep="\t", header=None)
|
||||
|
||||
df = df.head(1500)
|
||||
vectorizer = CountVectorizer()
|
||||
X = vectorizer.fit_transform(df[1])
|
||||
y = df[0]
|
||||
model = GaussianNB()
|
||||
model.fit(X.toarray(), y)
|
||||
|
||||
data_for_dev = model.predict(vectorizer.transform(dev_X[0]).toarray())
|
||||
data_for_dev = data_for_dev.tolist()
|
||||
data_for_dev = [str(x)+'\n' for x in data_for_dev]
|
||||
|
||||
data_for_test = model.predict(vectorizer.transform(test_X[0]).toarray())
|
||||
data_for_test = data_for_test.tolist()
|
||||
data_for_test = [str(x)+'\n' for x in data_for_test]
|
||||
|
||||
with open("out.tsv", "w", encoding="UTF-8") as output_file:
|
||||
output_file.writelines(data_for_dev)
|
||||
|
||||
with open("test_out.tsv", "w", encoding="UTF-8") as output_file:
|
||||
output_file.writelines(data_for_test)
|
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user