final version

This commit is contained in:
Łukasz Jędyk 2021-05-21 13:16:42 +02:00
parent 6fb853de01
commit 22853ee945

View File

@ -42,7 +42,6 @@ X_dev = [word_tokenize(content) for content in X_dev]
X_test = [word_tokenize(content) for content in X_test] X_test = [word_tokenize(content) for content in X_test]
# word2vec # word2vec
#word2vec = Word2Vec(X_train, vector_size=50, window=5, min_count=1)
word2vec = gensim.downloader.load('word2vec-google-news-300') word2vec = gensim.downloader.load('word2vec-google-news-300')
X_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_train] X_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_train]
X_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_dev] X_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_dev]