This commit is contained in:
Jan Przybylski 2021-04-20 22:51:32 +02:00
parent e92ad61864
commit bf6bbf28ff
3 changed files with 10437 additions and 10443 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,30 +1,24 @@
import numpy as np
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
le=preprocessing.LabelEncoder()
with open("train/in.tsv") as f:
data = f.readlines()
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False)
vectorizer = TfidfVectorizer()
x = vectorizer.fit_transform(data)
X=x.toarray()
X = f.readlines()
with open("train/expected.tsv") as ff:
Y = ff.readlines()
Y=le.fit_transform(Y)
with open("dev-0/in.tsv") as d:
fil = d.readlines()
vectorizer = TfidfVectorizer(ngram_range=(1,2), use_idf = False)
vectorizer = TfidfVectorizer()
r=vectorizer.fit_transform(fil)
r=r.toarray()
r=r.reshape(-1,1)
gnb = GaussianNB()
Y= le.fit_transform(Y)
with open("test-A/in.tsv") as d:
r = d.readlines()
gnb = make_pipeline(TfidfVectorizer(),MultinomialNB())
model=gnb.fit(X, Y)
y_pred=model.predict(X)
print(y_pred)
y_pred=model.predict(r)
y_pred=np.array(y_pred)
t=np.array2string(y_pred, precision=2, separator='\n',suppress_small=True)
f = open("dev-0/out.tsv", "a")
np.set_printoptions(threshold=np.inf)
t=np.array2string(y_pred.flatten(), precision=2, separator='\n',suppress_small=True)
f = open("test-A/out.tsv", "a")
f.write(t)

File diff suppressed because it is too large Load Diff