final version
This commit is contained in:
parent
6fb853de01
commit
22853ee945
1
main.py
1
main.py
@ -42,7 +42,6 @@ X_dev = [word_tokenize(content) for content in X_dev]
|
|||||||
X_test = [word_tokenize(content) for content in X_test]
|
X_test = [word_tokenize(content) for content in X_test]
|
||||||
|
|
||||||
# word2vec
|
# word2vec
|
||||||
#word2vec = Word2Vec(X_train, vector_size=50, window=5, min_count=1)
|
|
||||||
word2vec = gensim.downloader.load('word2vec-google-news-300')
|
word2vec = gensim.downloader.load('word2vec-google-news-300')
|
||||||
X_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_train]
|
X_train = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_train]
|
||||||
X_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_dev]
|
X_dev = [np.mean([word2vec[word] for word in content if word in word2vec] or [np.zeros(300)], axis=0) for content in X_dev]
|
||||||
|
Loading…
Reference in New Issue
Block a user