test 4

2024-05-19 11:31:01 +02:00 · 2024-05-19 11:31:01 +02:00 · 3817604049
commit 3817604049
parent 5ec1581437
5 changed files with 1441 additions and 1433 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,5 @@
 word2vec_100_3_polish.bin
-word2vec_100_3_polish.bin.syn0.npy
+word2vec_100_3_polish.bin.syn0.npy
+word2vec_300_3_polish.bin
+word2vec_300_3_polish.bin.trainables.syn1neg.npy
+word2vec_300_3_polish.bin.wv.vectors.npy
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/repo_link.tsv
+++ b/repo_link.tsv
@ -0,0 +1 @@
+https://git.wmi.amu.edu.pl/s464913/DL_Word2Vec
--- a/run.py
+++ b/run.py
@ -17,20 +17,24 @@ def read_data():
 def text_to_vector(text, word2vec, vector_size):
    words = text.split()
    text_vector = np.zeros(vector_size)
+    word_count = 0
    for word in words:
-        if word in word2vec:
-            text_vector += word2vec[word]
-    return text_vector / len(words)
+        if word in word2vec.wv:
+            text_vector += word2vec.wv[word]
+            word_count += 1
+    if word_count > 0:
+        text_vector /= word_count
+    return text_vector


 def main():
    train_dataset, dev_0_dataset, test_A_dataset = read_data()

    # Word2Vec parameters
-    vector_size = 100
+    vector_size = 300

    # Training the Word2Vec model
-    word2vec = KeyedVectors.load("word2vec_100_3_polish.bin")
+    word2vec = KeyedVectors.load("word2vec_300_3_polish.bin")

    # Convert text to vectors
    train_vectors = np.array(
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
				`@ -0,0 +1 @@`
				`https://git.wmi.amu.edu.pl/s464913/DL_Word2Vec`