from gensim.models import KeyedVectors from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Ścieżka do plików modeli Word2Vec word2vec_model_path = "word2vec-google-news-300.model" glove_model_path = "glove-wiki-gigaword-300.model" # Wczytanie modeli Word2Vec i GloVe word2vec_model = KeyedVectors.load(word2vec_model_path) glove_model = KeyedVectors.load(glove_model_path) # Przykładowe słowo, dla którego szukamy synonimów word_to_find = 'banana' # Znalezienie synonimów dla danego słowa w modelu Word2Vec similar_words_word2vec = word2vec_model.most_similar(word_to_find) print(f"Synonyms for '{word_to_find}' in Word2Vec model:") for word, similarity in similar_words_word2vec: print(f"- {word} (similarity: {similarity})") # Znalezienie synonimów dla danego słowa w modelu GloVe similar_words_glove = glove_model.most_similar(word_to_find) print(f"\nSynonyms for '{word_to_find}' in GloVe model:") for word, similarity in similar_words_glove: print(f"- {word} (similarity: {similarity})") # Przykładowe zdanie do porównania sentence1 = ['dog', 'walking', 'on', 'the', 'street'] sentence2 = ['cat', 'running', 'across', 'the', 'road'] # Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu Word2Vec vec1_word2vec = sum([word2vec_model[word] for word in sentence1 if word in word2vec_model]) / max(1, len([word for word in sentence1 if word in word2vec_model])) vec2_word2vec = sum([word2vec_model[word] for word in sentence2 if word in word2vec_model]) / max(1, len([word for word in sentence2 if word in word2vec_model])) similarity_score_word2vec = cosine_similarity(np.array([vec1_word2vec]), np.array([vec2_word2vec]))[0][0] print("\nSemantic similarity between sentence 1 and sentence 2 in Word2Vec model:") print(similarity_score_word2vec) # Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu GloVe vec1_glove = sum([glove_model[word] for word in sentence1 if word in glove_model]) / max(1, len([word for word in sentence1 if word in glove_model])) vec2_glove = sum([glove_model[word] for word in sentence2 if word in glove_model]) / max(1, len([word for word in sentence2 if word in glove_model])) similarity_score_glove = cosine_similarity(np.array([vec1_glove]), np.array([vec2_glove]))[0][0] print("\nSemantic similarity between sentence 1 and sentence 2 in GloVe model:") print(similarity_score_glove) # Lokalny model sentences = [["to", "jest", "pierwsze", "zdanie", "dla", "word2vec"], ["to", "jest", "drugie", "zdanie"], ["kolejne", "zdanie"], ["jeszcze", "jedno", "zdanie"], ["i", "ostatnie", "zdanie"]] # Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu Word2Vec print("\nSemantic similarity between the first sentence and the other sentences using Word2Vec model:") first_sentence_vec_word2vec = sum([word2vec_model[word] for word in sentences[0] if word in word2vec_model]) / max(1, len([word for word in sentences[0] if word in word2vec_model])) for i in range(1, len(sentences)): words_in_model = [word for word in sentences[i] if word in word2vec_model] if words_in_model: vec_word2vec = sum([word2vec_model[word] for word in words_in_model]) / len(words_in_model) similarity_score_word2vec = cosine_similarity(np.array([first_sentence_vec_word2vec]), np.array([vec_word2vec]))[0][0] print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_word2vec}") else: print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the Word2Vec model.") # Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu GloVe print("\nSemantic similarity between the first sentence and the other sentences using GloVe model:") first_sentence_vec_glove = sum([glove_model[word] for word in sentences[0] if word in glove_model]) / max(1, len([word for word in sentences[0] if word in glove_model])) for i in range(1, len(sentences)): words_in_model = [word for word in sentences[i] if word in glove_model] if words_in_model: vec_glove = sum([glove_model[word] for word in words_in_model]) / len(words_in_model) similarity_score_glove = cosine_similarity(np.array([first_sentence_vec_glove]), np.array([vec_glove]))[0][0] print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_glove}") else: print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the GloVe model.")