test 4
This commit is contained in:
parent
5ec1581437
commit
3817604049
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,2 +1,5 @@
|
|||||||
word2vec_100_3_polish.bin
|
word2vec_100_3_polish.bin
|
||||||
word2vec_100_3_polish.bin.syn0.npy
|
word2vec_100_3_polish.bin.syn0.npy
|
||||||
|
word2vec_300_3_polish.bin
|
||||||
|
word2vec_300_3_polish.bin.trainables.syn1neg.npy
|
||||||
|
word2vec_300_3_polish.bin.wv.vectors.npy
|
1410
dev-0/out.tsv
1410
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
1
repo_link.tsv
Normal file
1
repo_link.tsv
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://git.wmi.amu.edu.pl/s464913/DL_Word2Vec
|
|
14
run.py
14
run.py
@ -17,20 +17,24 @@ def read_data():
|
|||||||
def text_to_vector(text, word2vec, vector_size):
|
def text_to_vector(text, word2vec, vector_size):
|
||||||
words = text.split()
|
words = text.split()
|
||||||
text_vector = np.zeros(vector_size)
|
text_vector = np.zeros(vector_size)
|
||||||
|
word_count = 0
|
||||||
for word in words:
|
for word in words:
|
||||||
if word in word2vec:
|
if word in word2vec.wv:
|
||||||
text_vector += word2vec[word]
|
text_vector += word2vec.wv[word]
|
||||||
return text_vector / len(words)
|
word_count += 1
|
||||||
|
if word_count > 0:
|
||||||
|
text_vector /= word_count
|
||||||
|
return text_vector
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
train_dataset, dev_0_dataset, test_A_dataset = read_data()
|
train_dataset, dev_0_dataset, test_A_dataset = read_data()
|
||||||
|
|
||||||
# Word2Vec parameters
|
# Word2Vec parameters
|
||||||
vector_size = 100
|
vector_size = 300
|
||||||
|
|
||||||
# Training the Word2Vec model
|
# Training the Word2Vec model
|
||||||
word2vec = KeyedVectors.load("word2vec_100_3_polish.bin")
|
word2vec = KeyedVectors.load("word2vec_300_3_polish.bin")
|
||||||
|
|
||||||
# Convert text to vectors
|
# Convert text to vectors
|
||||||
train_vectors = np.array(
|
train_vectors = np.array(
|
||||||
|
1444
test-A/out.tsv
1444
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user