git push!
This commit is contained in:
parent
c8d7e1452c
commit
4237349093
5446
dev-0/out.tsv
Normal file
5446
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
52
mian.py
52
mian.py
@ -1,5 +1,3 @@
|
|||||||
import csv
|
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.neural_network import MLPClassifier
|
from sklearn.neural_network import MLPClassifier
|
||||||
@ -88,3 +86,53 @@ print(przewidywania)
|
|||||||
with open("test-A/out.tsv", "w", encoding="utf-8") as uwu:
|
with open("test-A/out.tsv", "w", encoding="utf-8") as uwu:
|
||||||
for p in przewidywania:
|
for p in przewidywania:
|
||||||
uwu.write(str(p)+"\n")
|
uwu.write(str(p)+"\n")
|
||||||
|
|
||||||
|
### dev-0
|
||||||
|
|
||||||
|
# w pliku in.tsv w kolumnach 1983, 5199 trzeba zamienic w tekscie tabulator na 4 spacje
|
||||||
|
dev_in = pd.read_csv('dev-0/in.tsv', sep='\t')
|
||||||
|
dev_in.columns = ["x"]
|
||||||
|
|
||||||
|
print(dev_in["x"][0])
|
||||||
|
|
||||||
|
dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\t')
|
||||||
|
dev_expected.columns = ["y"]
|
||||||
|
|
||||||
|
print(dev_expected["y"][0])
|
||||||
|
|
||||||
|
# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/
|
||||||
|
slowa = []
|
||||||
|
for tekst in dev_in["x"]:
|
||||||
|
pom = []
|
||||||
|
for slowo in word_tokenize(tekst):
|
||||||
|
pom.append(slowo.lower())
|
||||||
|
slowa.append(pom)
|
||||||
|
print(slowa[0])
|
||||||
|
|
||||||
|
teksty = []
|
||||||
|
for tekst in test["x"]:
|
||||||
|
pom = None
|
||||||
|
for slowo in word_tokenize(tekst):
|
||||||
|
wektor = None
|
||||||
|
try:
|
||||||
|
wektor = model.wv[slowo.lower()]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
if wektor is not None:
|
||||||
|
if pom is None:
|
||||||
|
pom = wektor
|
||||||
|
else:
|
||||||
|
pom = pom + wektor
|
||||||
|
teksty.append(wektor)
|
||||||
|
print(teksty[0])
|
||||||
|
|
||||||
|
przewidywania = clf.predict(teksty)
|
||||||
|
print(przewidywania)
|
||||||
|
|
||||||
|
with open("dev-0/out.tsv", "w", encoding="utf-8") as uwu:
|
||||||
|
for p in przewidywania:
|
||||||
|
uwu.write(str(p)+"\n")
|
||||||
|
|
||||||
|
for i in range(len(przewidywania)):
|
||||||
|
print(przewidywania[i], dev_expected["y"][i])
|
||||||
|
|
||||||
|
@ -3796,7 +3796,7 @@
|
|||||||
0
|
0
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
0
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
@ -5174,7 +5174,7 @@
|
|||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
0
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
|
BIN
word2vec.model
BIN
word2vec.model
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user