git push!

This commit is contained in:
Dominik Jagosz 2024-05-17 22:04:12 +02:00
parent c8d7e1452c
commit 4237349093
6 changed files with 5498 additions and 4 deletions

5446
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

52
mian.py
View File

@ -1,5 +1,3 @@
import csv
import nltk import nltk
import pandas as pd import pandas as pd
from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPClassifier
@ -88,3 +86,53 @@ print(przewidywania)
with open("test-A/out.tsv", "w", encoding="utf-8") as uwu: with open("test-A/out.tsv", "w", encoding="utf-8") as uwu:
for p in przewidywania: for p in przewidywania:
uwu.write(str(p)+"\n") uwu.write(str(p)+"\n")
### dev-0
# w pliku in.tsv w kolumnach 1983, 5199 trzeba zamienic w tekscie tabulator na 4 spacje
dev_in = pd.read_csv('dev-0/in.tsv', sep='\t')
dev_in.columns = ["x"]
print(dev_in["x"][0])
dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\t')
dev_expected.columns = ["y"]
print(dev_expected["y"][0])
# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/
slowa = []
for tekst in dev_in["x"]:
pom = []
for slowo in word_tokenize(tekst):
pom.append(slowo.lower())
slowa.append(pom)
print(slowa[0])
teksty = []
for tekst in test["x"]:
pom = None
for slowo in word_tokenize(tekst):
wektor = None
try:
wektor = model.wv[slowo.lower()]
except KeyError:
pass
if wektor is not None:
if pom is None:
pom = wektor
else:
pom = pom + wektor
teksty.append(wektor)
print(teksty[0])
przewidywania = clf.predict(teksty)
print(przewidywania)
with open("dev-0/out.tsv", "w", encoding="utf-8") as uwu:
for p in przewidywania:
uwu.write(str(p)+"\n")
for i in range(len(przewidywania)):
print(przewidywania[i], dev_expected["y"][i])

View File

@ -3796,7 +3796,7 @@
0 0
1 1
1 1
0 1
1 1
1 1
1 1
@ -5174,7 +5174,7 @@
1 1
1 1
1 1
1 0
1 1
1 1
1 1

1 1
3796 0
3797 1
3798 1
3799 0 1
3800 1
3801 1
3802 1
5174 1
5175 1
5176 1
5177 1 0
5178 1
5179 1
5180 1

Binary file not shown.

Binary file not shown.

Binary file not shown.