sport-text-classification-b.../mrlrozwiazanie.py
2021-05-25 20:16:00 +02:00

26 lines
1.0 KiB
Python

import gzip
import gensim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from gensim.models import Word2Vec, KeyedVectors
# train_X = []
# train_y = []
# with gzip.open('train/train.tsv.gz','r') as fin:
# for line in fin:
# sline = line.decode('UTF-8').replace("\n", "").split("\t")
# train_y.append(sline[0])
# train_X.append(''.join(sline[1:]))
# w2v = gensim.models.Word2Vec(list(train_X), vector_size=100, window=10, min_count=2, epochs=5, workers=2)
#w2v = gensim.models.Word2Vec(vector_size=100)
#w2v.wv.load_word2vec_format('../../../ncexclude/nkjp+wiki-forms-all-100-cbow-hs.txt.gz', binary=False)
#w2v.wv.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False)
w2v = KeyedVectors.load_word2vec_format('../../../ncexclude/wiki-forms-all-100-skipg-ns.txt.gz', binary=False)
w2v.save("word2vec2.wordvectors")