git push!
This commit is contained in:
parent
c8d7e1452c
commit
4237349093
5446
dev-0/out.tsv
Normal file
5446
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
52
mian.py
52
mian.py
@ -1,5 +1,3 @@
|
||||
import csv
|
||||
|
||||
import nltk
|
||||
import pandas as pd
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
@ -88,3 +86,53 @@ print(przewidywania)
|
||||
with open("test-A/out.tsv", "w", encoding="utf-8") as uwu:
|
||||
for p in przewidywania:
|
||||
uwu.write(str(p)+"\n")
|
||||
|
||||
### dev-0
|
||||
|
||||
# w pliku in.tsv w kolumnach 1983, 5199 trzeba zamienic w tekscie tabulator na 4 spacje
|
||||
dev_in = pd.read_csv('dev-0/in.tsv', sep='\t')
|
||||
dev_in.columns = ["x"]
|
||||
|
||||
print(dev_in["x"][0])
|
||||
|
||||
dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\t')
|
||||
dev_expected.columns = ["y"]
|
||||
|
||||
print(dev_expected["y"][0])
|
||||
|
||||
# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/
|
||||
slowa = []
|
||||
for tekst in dev_in["x"]:
|
||||
pom = []
|
||||
for slowo in word_tokenize(tekst):
|
||||
pom.append(slowo.lower())
|
||||
slowa.append(pom)
|
||||
print(slowa[0])
|
||||
|
||||
teksty = []
|
||||
for tekst in test["x"]:
|
||||
pom = None
|
||||
for slowo in word_tokenize(tekst):
|
||||
wektor = None
|
||||
try:
|
||||
wektor = model.wv[slowo.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
if wektor is not None:
|
||||
if pom is None:
|
||||
pom = wektor
|
||||
else:
|
||||
pom = pom + wektor
|
||||
teksty.append(wektor)
|
||||
print(teksty[0])
|
||||
|
||||
przewidywania = clf.predict(teksty)
|
||||
print(przewidywania)
|
||||
|
||||
with open("dev-0/out.tsv", "w", encoding="utf-8") as uwu:
|
||||
for p in przewidywania:
|
||||
uwu.write(str(p)+"\n")
|
||||
|
||||
for i in range(len(przewidywania)):
|
||||
print(przewidywania[i], dev_expected["y"][i])
|
||||
|
||||
|
@ -3796,7 +3796,7 @@
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
@ -5174,7 +5174,7 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
|
|
BIN
word2vec.model
BIN
word2vec.model
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user