diff --git a/main.py b/main.py index e91b6d4..0af190a 100644 --- a/main.py +++ b/main.py @@ -42,7 +42,6 @@ X_dev = [word_tokenize(content) for content in X_dev] X_test = [word_tokenize(content) for content in X_test] # word2vec -#word2vec = Word2Vec(X_train, vector_size=50, window=5, min_count=1) word2vec = gensim.downloader.load('word2vec-google-news-300') X_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_train] X_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_dev]