This commit is contained in:
Klaudia 2021-05-02 15:25:30 +02:00
parent 9cb2fb2612
commit f8d3baa339
3 changed files with 10982 additions and 0 deletions

83
Skrypt.py Normal file
View File

@ -0,0 +1,83 @@
#!/usr/bin/env python
# coding: utf-8
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import string
import csv
from stop_words import get_stop_words
stop_words = get_stop_words('polish')
gnb = MultinomialNB()
vectorizer = TfidfVectorizer()
zdanie = []
cyfra = []
with open("train/train.tsv") as tsv:
for line in csv.reader(tsv, delimiter="\t"):
cyfra.append(line[0])
zdanie.append(line[1])
prep0=[]
for x in zdanie:
temp = ""
for y in x.split():
y = y.strip().replace(",", "")
if y not in stop_words:
temp = temp + " " + y
prep0.append(temp)
zdanie2 = vectorizer.fit_transform(prep0)
gnb.fit(zdanie2, cyfra)
inp1 = open('dev-0/in.tsv', 'r', encoding="utf-8")
out1 = open("dev-0/out.tsv", "w")
linia1 = inp1.readlines()
inp1.close()
prep=[]
for x in linia1:
temp = ""
for y in x.split():
y = y.strip().replace(",", "")
if y not in stop_words:
temp = temp + " " + y
prep.append(temp)
vectorizer1 = vectorizer.transform(prep)
predict1 = gnb.predict(vectorizer1)
print(predict1)
for x in predict1:
out1.write(str(x) + '\n')
out1.close()
inp2 = open('test-A/in.tsv', 'r', encoding="utf-8")
out2 = open("test-A/out.tsv", "w")
linia2 = inp2.readlines()
inp2.close()
prep2=[]
for x2 in linia2:
temp2 = ""
for y2 in x2.split():
y2 = y2.strip().replace(",", "")
if y2 not in stop_words:
temp2 = temp2 + " " + y2
prep2.append(temp2)
vectorizer2 = vectorizer.transform(prep2)
predict2 = gnb.predict(vectorizer2)
print(predict2)
for y in predict2:
out2.write(str(y) + '\n')
out2.close()

5452
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

5447
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff