commit fc74777649ee4babdd66e2b57802a2ce04ef81b0 Author: Franciszek Czajka Date: Sun Jun 9 14:30:47 2024 +0200 Add project diff --git a/glove-wiki-gigaword-300.model b/glove-wiki-gigaword-300.model new file mode 100644 index 0000000..479a6b6 Binary files /dev/null and b/glove-wiki-gigaword-300.model differ diff --git a/projektFC.py b/projektFC.py new file mode 100644 index 0000000..5360b4b --- /dev/null +++ b/projektFC.py @@ -0,0 +1,77 @@ +from gensim.models import KeyedVectors +from sklearn.metrics.pairwise import cosine_similarity +import numpy as np + +# Ścieżka do plików modeli Word2Vec +word2vec_model_path = "word2vec-google-news-300.model" +glove_model_path = "glove-wiki-gigaword-300.model" + +# Wczytanie modeli Word2Vec i GloVe +word2vec_model = KeyedVectors.load(word2vec_model_path) +glove_model = KeyedVectors.load(glove_model_path) + +# Przykładowe słowo, dla którego szukamy synonimów +word_to_find = 'banana' + +# Znalezienie synonimów dla danego słowa w modelu Word2Vec +similar_words_word2vec = word2vec_model.most_similar(word_to_find) +print(f"Synonyms for '{word_to_find}' in Word2Vec model:") +for word, similarity in similar_words_word2vec: + print(f"- {word} (similarity: {similarity})") + +# Znalezienie synonimów dla danego słowa w modelu GloVe +similar_words_glove = glove_model.most_similar(word_to_find) +print(f"\nSynonyms for '{word_to_find}' in GloVe model:") +for word, similarity in similar_words_glove: + print(f"- {word} (similarity: {similarity})") + +# Przykładowe zdanie do porównania +sentence1 = ['dog', 'walking', 'on', 'the', 'street'] +sentence2 = ['cat', 'running', 'across', 'the', 'road'] + +# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu Word2Vec +vec1_word2vec = sum([word2vec_model[word] for word in sentence1 if word in word2vec_model]) / max(1, len([word for word in sentence1 if word in word2vec_model])) +vec2_word2vec = sum([word2vec_model[word] for word in sentence2 if word in word2vec_model]) / max(1, len([word for word in sentence2 if word in word2vec_model])) +similarity_score_word2vec = cosine_similarity(np.array([vec1_word2vec]), np.array([vec2_word2vec]))[0][0] + +print("\nSemantic similarity between sentence 1 and sentence 2 in Word2Vec model:") +print(similarity_score_word2vec) + +# Obliczenie podobieństwa semantycznego między zdaniem 1 a zdaniem 2 w modelu GloVe +vec1_glove = sum([glove_model[word] for word in sentence1 if word in glove_model]) / max(1, len([word for word in sentence1 if word in glove_model])) +vec2_glove = sum([glove_model[word] for word in sentence2 if word in glove_model]) / max(1, len([word for word in sentence2 if word in glove_model])) +similarity_score_glove = cosine_similarity(np.array([vec1_glove]), np.array([vec2_glove]))[0][0] + +print("\nSemantic similarity between sentence 1 and sentence 2 in GloVe model:") +print(similarity_score_glove) + +# Lokalny model +sentences = [["to", "jest", "pierwsze", "zdanie", "dla", "word2vec"], + ["to", "jest", "drugie", "zdanie"], + ["kolejne", "zdanie"], + ["jeszcze", "jedno", "zdanie"], + ["i", "ostatnie", "zdanie"]] + +# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu Word2Vec +print("\nSemantic similarity between the first sentence and the other sentences using Word2Vec model:") +first_sentence_vec_word2vec = sum([word2vec_model[word] for word in sentences[0] if word in word2vec_model]) / max(1, len([word for word in sentences[0] if word in word2vec_model])) +for i in range(1, len(sentences)): + words_in_model = [word for word in sentences[i] if word in word2vec_model] + if words_in_model: + vec_word2vec = sum([word2vec_model[word] for word in words_in_model]) / len(words_in_model) + similarity_score_word2vec = cosine_similarity(np.array([first_sentence_vec_word2vec]), np.array([vec_word2vec]))[0][0] + print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_word2vec}") + else: + print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the Word2Vec model.") + +# Obliczenie podobieństwa semantycznego między pierwszym zdaniem a pozostałymi w modelu GloVe +print("\nSemantic similarity between the first sentence and the other sentences using GloVe model:") +first_sentence_vec_glove = sum([glove_model[word] for word in sentences[0] if word in glove_model]) / max(1, len([word for word in sentences[0] if word in glove_model])) +for i in range(1, len(sentences)): + words_in_model = [word for word in sentences[i] if word in glove_model] + if words_in_model: + vec_glove = sum([glove_model[word] for word in words_in_model]) / len(words_in_model) + similarity_score_glove = cosine_similarity(np.array([first_sentence_vec_glove]), np.array([vec_glove]))[0][0] + print(f"Similarity between sentence 1 and sentence {i + 1}: {similarity_score_glove}") + else: + print(f"No similarity computed for sentence {i + 1} as there are no words from the sentence in the GloVe model.") diff --git a/word2vec-google-news-300.model b/word2vec-google-news-300.model new file mode 100644 index 0000000..de0a5a5 Binary files /dev/null and b/word2vec-google-news-300.model differ