import gzip import gensim import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from sklearn.preprocessing import LabelEncoder from sklearn.linear_model import LogisticRegression from gensim.models import Word2Vec, KeyedVectors # train_X = [] # train_y = [] # with gzip.open('train/train.tsv.gz','r') as fin: # for line in fin: # sline = line.decode('UTF-8').replace("\n", "").split("\t") # train_y.append(sline[0]) # train_X.append(''.join(sline[1:])) # w2v = gensim.models.Word2Vec(list(train_X), vector_size=100, window=10, min_count=2, epochs=5, workers=2) #w2v = gensim.models.Word2Vec(vector_size=100) #w2v.wv.load_word2vec_format('../../../ncexclude/nkjp+wiki-forms-all-100-cbow-hs.txt.gz', binary=False) #w2v.wv.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False) w2v = KeyedVectors.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False) w2v.save("word2vec2.wordvectors")