26 lines
1.0 KiB
Python
26 lines
1.0 KiB
Python
|
import gzip
|
||
|
import gensim
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
import matplotlib.pyplot as plt
|
||
|
import matplotlib.gridspec as gridspec
|
||
|
from sklearn.preprocessing import LabelEncoder
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
from gensim.models import Word2Vec, KeyedVectors
|
||
|
|
||
|
# train_X = []
|
||
|
# train_y = []
|
||
|
# with gzip.open('train/train.tsv.gz','r') as fin:
|
||
|
# for line in fin:
|
||
|
# sline = line.decode('UTF-8').replace("\n", "").split("\t")
|
||
|
# train_y.append(sline[0])
|
||
|
# train_X.append(''.join(sline[1:]))
|
||
|
|
||
|
# w2v = gensim.models.Word2Vec(list(train_X), vector_size=100, window=10, min_count=2, epochs=5, workers=2)
|
||
|
|
||
|
#w2v = gensim.models.Word2Vec(vector_size=100)
|
||
|
#w2v.wv.load_word2vec_format('../../../ncexclude/nkjp+wiki-forms-all-100-cbow-hs.txt.gz', binary=False)
|
||
|
#w2v.wv.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False)
|
||
|
|
||
|
w2v = KeyedVectors.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False)
|
||
|
w2v.save("word2vec2.wordvectors")
|