Add project
This commit is contained in:
commit
fc74777649
BIN
glove-wiki-gigaword-300.model
Normal file
BIN
glove-wiki-gigaword-300.model
Normal file
Binary file not shown.
77
projektFC.py
Normal file
77
projektFC.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from gensim.models import KeyedVectors
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Ścieżka do plików modeli Word2Vec
|
||||||
|
word2vec_model_path = "word2vec-google-news-300.model"
|
||||||
|
glove_model_path = "glove-wiki-gigaword-300.model"
|
||||||
|
|
||||||
|
# Wczytanie modeli Word2Vec i GloVe
|
||||||
|
word2vec_model = KeyedVectors.load(word2vec_model_path)
|
||||||
|
glove_model = KeyedVectors.load(glove_model_path)
|
||||||
|
|
||||||
|
# Przykładowe słowo, dla którego szukamy synonimów
|
||||||
|
word_to_find = 'banana'
|
||||||
|
|
||||||
|
# Znalezienie synonimów dla danego słowa w modelu Word2Vec
|
||||||
|
similar_words_word2vec = word2vec_model.most_similar(word_to_find)
|
||||||
|
print(f"Synonyms for '{word_to_find}' in Word2Vec model:")
|
||||||
|
for word, similarity in similar_words_word2vec:
|
||||||
|
print(f"- {word} (similarity: {similarity})")
|
||||||
|
|
||||||
|
# Znalezienie synonimów dla danego słowa w modelu GloVe
|
||||||
|
similar_words_glove = glove_model.most_similar(word_to_find)
|
||||||
|
print(f"\nSynonyms for '{word_to_find}' in GloVe model:")
|
||||||
|
for word, similarity in similar_words_glove:
|
||||||
|
print(f"- {word} (similarity: {similarity})")
|
||||||
|
|
||||||
|
# Przykładowe zdanie do porównania
|
||||||
|
sentence1 = ['dog', 'walking', 'on', 'the', 'street']
|
||||||
|
sentence2 = ['cat', 'running', 'across', 'the', 'road']
|
||||||
|
|
||||||
|
# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu Word2Vec
|
||||||
|
vec1_word2vec = sum([word2vec_model[word] for word in sentence1 if word in word2vec_model]) / max(1, len([word for word in sentence1 if word in word2vec_model]))
|
||||||
|
vec2_word2vec = sum([word2vec_model[word] for word in sentence2 if word in word2vec_model]) / max(1, len([word for word in sentence2 if word in word2vec_model]))
|
||||||
|
similarity_score_word2vec = cosine_similarity(np.array([vec1_word2vec]), np.array([vec2_word2vec]))[0][0]
|
||||||
|
|
||||||
|
print("\nSemantic similarity between sentence 1 and sentence 2 in Word2Vec model:")
|
||||||
|
print(similarity_score_word2vec)
|
||||||
|
|
||||||
|
# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu GloVe
|
||||||
|
vec1_glove = sum([glove_model[word] for word in sentence1 if word in glove_model]) / max(1, len([word for word in sentence1 if word in glove_model]))
|
||||||
|
vec2_glove = sum([glove_model[word] for word in sentence2 if word in glove_model]) / max(1, len([word for word in sentence2 if word in glove_model]))
|
||||||
|
similarity_score_glove = cosine_similarity(np.array([vec1_glove]), np.array([vec2_glove]))[0][0]
|
||||||
|
|
||||||
|
print("\nSemantic similarity between sentence 1 and sentence 2 in GloVe model:")
|
||||||
|
print(similarity_score_glove)
|
||||||
|
|
||||||
|
# Lokalny model
|
||||||
|
sentences = [["to", "jest", "pierwsze", "zdanie", "dla", "word2vec"],
|
||||||
|
["to", "jest", "drugie", "zdanie"],
|
||||||
|
["kolejne", "zdanie"],
|
||||||
|
["jeszcze", "jedno", "zdanie"],
|
||||||
|
["i", "ostatnie", "zdanie"]]
|
||||||
|
|
||||||
|
# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu Word2Vec
|
||||||
|
print("\nSemantic similarity between the first sentence and the other sentences using Word2Vec model:")
|
||||||
|
first_sentence_vec_word2vec = sum([word2vec_model[word] for word in sentences[0] if word in word2vec_model]) / max(1, len([word for word in sentences[0] if word in word2vec_model]))
|
||||||
|
for i in range(1, len(sentences)):
|
||||||
|
words_in_model = [word for word in sentences[i] if word in word2vec_model]
|
||||||
|
if words_in_model:
|
||||||
|
vec_word2vec = sum([word2vec_model[word] for word in words_in_model]) / len(words_in_model)
|
||||||
|
similarity_score_word2vec = cosine_similarity(np.array([first_sentence_vec_word2vec]), np.array([vec_word2vec]))[0][0]
|
||||||
|
print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_word2vec}")
|
||||||
|
else:
|
||||||
|
print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the Word2Vec model.")
|
||||||
|
|
||||||
|
# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu GloVe
|
||||||
|
print("\nSemantic similarity between the first sentence and the other sentences using GloVe model:")
|
||||||
|
first_sentence_vec_glove = sum([glove_model[word] for word in sentences[0] if word in glove_model]) / max(1, len([word for word in sentences[0] if word in glove_model]))
|
||||||
|
for i in range(1, len(sentences)):
|
||||||
|
words_in_model = [word for word in sentences[i] if word in glove_model]
|
||||||
|
if words_in_model:
|
||||||
|
vec_glove = sum([glove_model[word] for word in words_in_model]) / len(words_in_model)
|
||||||
|
similarity_score_glove = cosine_similarity(np.array([first_sentence_vec_glove]), np.array([vec_glove]))[0][0]
|
||||||
|
print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_glove}")
|
||||||
|
else:
|
||||||
|
print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the GloVe model.")
|
BIN
word2vec-google-news-300.model
Normal file
BIN
word2vec-google-news-300.model
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user