dl-projekt/projektFC.py
Franciszek Czajka fc74777649 Add project
2024-06-09 14:30:47 +02:00

78 lines
4.6 KiB
Python

from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Ścieżka do plików modeli Word2Vec
word2vec_model_path = "word2vec-google-news-300.model"
glove_model_path = "glove-wiki-gigaword-300.model"
# Wczytanie modeli Word2Vec i GloVe
word2vec_model = KeyedVectors.load(word2vec_model_path)
glove_model = KeyedVectors.load(glove_model_path)
# Przykładowe słowo, dla którego szukamy synonimów
word_to_find = 'banana'
# Znalezienie synonimów dla danego słowa w modelu Word2Vec
similar_words_word2vec = word2vec_model.most_similar(word_to_find)
print(f"Synonyms for '{word_to_find}' in Word2Vec model:")
for word, similarity in similar_words_word2vec:
print(f"- {word} (similarity: {similarity})")
# Znalezienie synonimów dla danego słowa w modelu GloVe
similar_words_glove = glove_model.most_similar(word_to_find)
print(f"\nSynonyms for '{word_to_find}' in GloVe model:")
for word, similarity in similar_words_glove:
print(f"- {word} (similarity: {similarity})")
# Przykładowe zdanie do porównania
sentence1 = ['dog', 'walking', 'on', 'the', 'street']
sentence2 = ['cat', 'running', 'across', 'the', 'road']
# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu Word2Vec
vec1_word2vec = sum([word2vec_model[word] for word in sentence1 if word in word2vec_model]) / max(1, len([word for word in sentence1 if word in word2vec_model]))
vec2_word2vec = sum([word2vec_model[word] for word in sentence2 if word in word2vec_model]) / max(1, len([word for word in sentence2 if word in word2vec_model]))
similarity_score_word2vec = cosine_similarity(np.array([vec1_word2vec]), np.array([vec2_word2vec]))[0][0]
print("\nSemantic similarity between sentence 1 and sentence 2 in Word2Vec model:")
print(similarity_score_word2vec)
# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu GloVe
vec1_glove = sum([glove_model[word] for word in sentence1 if word in glove_model]) / max(1, len([word for word in sentence1 if word in glove_model]))
vec2_glove = sum([glove_model[word] for word in sentence2 if word in glove_model]) / max(1, len([word for word in sentence2 if word in glove_model]))
similarity_score_glove = cosine_similarity(np.array([vec1_glove]), np.array([vec2_glove]))[0][0]
print("\nSemantic similarity between sentence 1 and sentence 2 in GloVe model:")
print(similarity_score_glove)
# Lokalny model
sentences = [["to", "jest", "pierwsze", "zdanie", "dla", "word2vec"],
["to", "jest", "drugie", "zdanie"],
["kolejne", "zdanie"],
["jeszcze", "jedno", "zdanie"],
["i", "ostatnie", "zdanie"]]
# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu Word2Vec
print("\nSemantic similarity between the first sentence and the other sentences using Word2Vec model:")
first_sentence_vec_word2vec = sum([word2vec_model[word] for word in sentences[0] if word in word2vec_model]) / max(1, len([word for word in sentences[0] if word in word2vec_model]))
for i in range(1, len(sentences)):
words_in_model = [word for word in sentences[i] if word in word2vec_model]
if words_in_model:
vec_word2vec = sum([word2vec_model[word] for word in words_in_model]) / len(words_in_model)
similarity_score_word2vec = cosine_similarity(np.array([first_sentence_vec_word2vec]), np.array([vec_word2vec]))[0][0]
print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_word2vec}")
else:
print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the Word2Vec model.")
# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu GloVe
print("\nSemantic similarity between the first sentence and the other sentences using GloVe model:")
first_sentence_vec_glove = sum([glove_model[word] for word in sentences[0] if word in glove_model]) / max(1, len([word for word in sentences[0] if word in glove_model]))
for i in range(1, len(sentences)):
words_in_model = [word for word in sentences[i] if word in glove_model]
if words_in_model:
vec_glove = sum([glove_model[word] for word in words_in_model]) / len(words_in_model)
similarity_score_glove = cosine_similarity(np.array([first_sentence_vec_glove]), np.array([vec_glove]))[0][0]
print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_glove}")
else:
print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the GloVe model.")