Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
f8d3baa339 |
83
Skrypt.py
Normal file
83
Skrypt.py
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
import string
|
||||
import csv
|
||||
from stop_words import get_stop_words
|
||||
|
||||
stop_words = get_stop_words('polish')
|
||||
|
||||
gnb = MultinomialNB()
|
||||
vectorizer = TfidfVectorizer()
|
||||
|
||||
|
||||
zdanie = []
|
||||
cyfra = []
|
||||
with open("train/train.tsv") as tsv:
|
||||
for line in csv.reader(tsv, delimiter="\t"):
|
||||
cyfra.append(line[0])
|
||||
zdanie.append(line[1])
|
||||
|
||||
prep0=[]
|
||||
for x in zdanie:
|
||||
temp = ""
|
||||
for y in x.split():
|
||||
y = y.strip().replace(",", "")
|
||||
if y not in stop_words:
|
||||
temp = temp + " " + y
|
||||
prep0.append(temp)
|
||||
|
||||
zdanie2 = vectorizer.fit_transform(prep0)
|
||||
gnb.fit(zdanie2, cyfra)
|
||||
|
||||
|
||||
|
||||
inp1 = open('dev-0/in.tsv', 'r', encoding="utf-8")
|
||||
out1 = open("dev-0/out.tsv", "w")
|
||||
|
||||
linia1 = inp1.readlines()
|
||||
inp1.close()
|
||||
|
||||
prep=[]
|
||||
for x in linia1:
|
||||
temp = ""
|
||||
for y in x.split():
|
||||
y = y.strip().replace(",", "")
|
||||
if y not in stop_words:
|
||||
temp = temp + " " + y
|
||||
prep.append(temp)
|
||||
|
||||
vectorizer1 = vectorizer.transform(prep)
|
||||
predict1 = gnb.predict(vectorizer1)
|
||||
print(predict1)
|
||||
|
||||
for x in predict1:
|
||||
out1.write(str(x) + '\n')
|
||||
out1.close()
|
||||
|
||||
|
||||
|
||||
inp2 = open('test-A/in.tsv', 'r', encoding="utf-8")
|
||||
out2 = open("test-A/out.tsv", "w")
|
||||
|
||||
linia2 = inp2.readlines()
|
||||
inp2.close()
|
||||
|
||||
prep2=[]
|
||||
for x2 in linia2:
|
||||
temp2 = ""
|
||||
for y2 in x2.split():
|
||||
y2 = y2.strip().replace(",", "")
|
||||
if y2 not in stop_words:
|
||||
temp2 = temp2 + " " + y2
|
||||
prep2.append(temp2)
|
||||
|
||||
vectorizer2 = vectorizer.transform(prep2)
|
||||
predict2 = gnb.predict(vectorizer2)
|
||||
print(predict2)
|
||||
|
||||
for y in predict2:
|
||||
out2.write(str(y) + '\n')
|
||||
out2.close()
|
5452
dev-0/out.tsv
Normal file
5452
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
5447
test-A/out.tsv
Normal file
5447
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user