test 4
This commit is contained in:
parent
5ec1581437
commit
3817604049
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,2 +1,5 @@
|
||||
word2vec_100_3_polish.bin
|
||||
word2vec_100_3_polish.bin.syn0.npy
|
||||
word2vec_100_3_polish.bin.syn0.npy
|
||||
word2vec_300_3_polish.bin
|
||||
word2vec_300_3_polish.bin.trainables.syn1neg.npy
|
||||
word2vec_300_3_polish.bin.wv.vectors.npy
|
1410
dev-0/out.tsv
1410
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
1
repo_link.tsv
Normal file
1
repo_link.tsv
Normal file
@ -0,0 +1 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/DL_Word2Vec
|
|
14
run.py
14
run.py
@ -17,20 +17,24 @@ def read_data():
|
||||
def text_to_vector(text, word2vec, vector_size):
|
||||
words = text.split()
|
||||
text_vector = np.zeros(vector_size)
|
||||
word_count = 0
|
||||
for word in words:
|
||||
if word in word2vec:
|
||||
text_vector += word2vec[word]
|
||||
return text_vector / len(words)
|
||||
if word in word2vec.wv:
|
||||
text_vector += word2vec.wv[word]
|
||||
word_count += 1
|
||||
if word_count > 0:
|
||||
text_vector /= word_count
|
||||
return text_vector
|
||||
|
||||
|
||||
def main():
|
||||
train_dataset, dev_0_dataset, test_A_dataset = read_data()
|
||||
|
||||
# Word2Vec parameters
|
||||
vector_size = 100
|
||||
vector_size = 300
|
||||
|
||||
# Training the Word2Vec model
|
||||
word2vec = KeyedVectors.load("word2vec_100_3_polish.bin")
|
||||
word2vec = KeyedVectors.load("word2vec_300_3_polish.bin")
|
||||
|
||||
# Convert text to vectors
|
||||
train_vectors = np.array(
|
||||
|
1444
test-A/out.tsv
1444
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user