This commit is contained in:
Mateusz 2024-05-19 22:42:01 +02:00
parent 10b830e7a0
commit 8e6b13ca19
3 changed files with 603 additions and 602 deletions

File diff suppressed because it is too large Load Diff

5
run.py
View File

@ -4,6 +4,7 @@ from sklearn.model_selection import train_test_split
import tensorflow as tf import tensorflow as tf
from keras.optimizers import Adam from keras.optimizers import Adam
import numpy as np import numpy as np
import gensim
def read_data(): def read_data():
@ -15,7 +16,7 @@ def read_data():
def text_to_vector(text, word2vec, vector_size): def text_to_vector(text, word2vec, vector_size):
words = text.split() words = gensim.utils.simple_preprocess(text)
text_vector = np.zeros(vector_size) text_vector = np.zeros(vector_size)
word_count = 0 word_count = 0
for word in words: for word in words:
@ -33,7 +34,7 @@ def main():
# Word2Vec parameters # Word2Vec parameters
vector_size = 100 vector_size = 100
# Training the Word2Vec model # Loading the Word2Vec model
word2vec = KeyedVectors.load("fasttext_100_3_polish.bin") word2vec = KeyedVectors.load("fasttext_100_3_polish.bin")
# Convert text to vectors # Convert text to vectors

File diff suppressed because it is too large Load Diff