Back to complete only

2022-04-03 20:32:20 +02:00 · 2022-04-03 20:32:20 +02:00 · 43e6edd45b
commit 43e6edd45b
parent 2d93bf88a5
1 changed files with 31 additions and 28 deletions
--- a/run.py
+++ b/run.py
@ -52,8 +52,8 @@ def load_train():
 def predict(search_for_words):
    trigrams_complete = {} # Tablica trigramów szukanych słów które wystąpiły w tekście z dokładnie tymi samymi szukanymi słowami w tej samej kolejności
    bigrams_complete = {} # Tablica bigramów szukanych słów które wystąpiły w tekście z dokładnie tymi samymi szukanymi słowami w tej samej kolejności
-    bigrams_not_complete = {}
-    trigrams_not_complete = {}
+    # bigrams_not_complete = {}
+    # trigrams_not_complete = {}
    # search_for_words_complete = [] # Tablica szukanych słów które wystąpiły w tekście z dokładnie tymi samymi szukanymi słowami w tej samej kolejności
    # Szukanie bigramów i trigramów które zawierają szukaną lukę dla słowa z tablicy search_for_words.
    # Jeżeli kolejność słów się zgadza liczona jest ilość wystąpień takich bigramów i trigramów z tymi słowami.
@ -73,8 +73,8 @@ def predict(search_for_words):
                    search_for_word_s = search_for_word.split("_")
                    if search_for_word_s[0] == word_bi_last and search_for_word_s[1] == word: # Jeżeli szukane słowa tworzą bigram występujący w tekście trenującym to zwiększamy liczbę jego wystąpień
                        set_bigram_count(word_bi_last, word, bigrams_complete) 
-                    elif search_for_word_s[0] == word_bi_last:
-                        set_bigram_count(word_bi_last, word, bigrams_not_complete)
+                    # elif search_for_word_s[0] == word_bi_last:
+                    #     set_bigram_count(word_bi_last, word, bigrams_not_complete)
            if i_ == 1:  # If potrzebny aby zbuforować min 3 wyrazy dla trigramu w początkowej fazie przebiegu pętli.          
                words[0]=word_bi_last
                words[1]=word   
@ -84,8 +84,8 @@ def predict(search_for_words):
                    search_for_word = search_for_word.split("_")
                    if search_for_word[0] == words[1] and search_for_word[1] == words[2]:  # Jeżeli szukane słowa należą do przedostatniego i ostatniego słowa trigramu to jest zwiększana liczba wystąpień tego trigramu.           
                        set_trigram_count(words[0], words[1], words[2], trigrams_complete) 
-                    elif search_for_word[0] == words[1]:
-                        set_trigram_count(words[0], words[1], words[2], trigrams_not_complete) 
+                    # elif search_for_word[0] == words[1]:
+                    #     set_trigram_count(words[0], words[1], words[2], trigrams_not_complete) 
            elif i_ > 2: # Jest to już ponad 2 przebieg pętli więc możemy rotować wyrazy jak w kolecje. Dla trigramów.
                words[0]=words[1]
                words[1]=words[2]
@ -94,16 +94,18 @@ def predict(search_for_words):
                    search_for_word = search_for_word.split("_")
                    if search_for_word[0] == words[1] and search_for_word[1] == words[2]:  
                        set_trigram_count(words[0], words[1], words[2], trigrams_complete)       
-                    elif search_for_word[0] == words[1]:
-                        set_trigram_count(words[0], words[1], words[2], trigrams_not_complete)      
+                    # elif search_for_word[0] == words[1]:
+                    #     set_trigram_count(words[0], words[1], words[2], trigrams_not_complete)      
            word_bi_last = word
            
            if i_ == 500000:
                break
            
    print(len(search_for_words))
-    print(len(bigrams_complete), len(bigrams_not_complete), len(bigrams_complete)+len(bigrams_not_complete))
-    print(len(trigrams_complete), len(trigrams_not_complete), len(trigrams_complete)+len(trigrams_not_complete))
+    print(len(bigrams_complete))
+    print(len(trigrams_complete))
+    # print(len(bigrams_complete), len(bigrams_not_complete), len(bigrams_complete)+len(bigrams_not_complete))
+    # print(len(trigrams_complete), len(trigrams_not_complete), len(trigrams_complete)+len(trigrams_not_complete))
    # Szukanie trigramu który najczęściej wystąpił dla każdych dokadnie tych samych co szukanych danych dwóch słów z tablicy serch_for_word.
    # Dotyczy dkoładnie pasujących bigramów z szukanymi słowami
    left_context_search_for_word = {}
@ -115,25 +117,26 @@ def predict(search_for_words):
                left_context = trigram.split("_")[0]
                left_context_search_for_word[bigram_complete] = left_context

-    # Szukanie trigramu który najczęściej wystąpił dla pierwszego szukanego słowa z szukanych słów z tablicy serch_for_word.
-    # To w przypadku gdyby szukane słowa w ogóle nie znalazły swojego dopasowania w zbiorze train to wtedy dostaną jakieś tam prawdopodobieństwo dla tego pierwszego słow z szukanych słów.
-    left_context_search_for_word_not_complete = {}
-    for bigram_not_complete in bigrams_not_complete:
-        max_count = 0
-        for trigram in trigrams_not_complete:
-            if bigram_not_complete == '_'.join(trigram.split("_")[1:3]) and trigrams_not_complete[trigram] > max_count:
-                max_count = trigrams_not_complete[trigram]
-                left_context = trigram.split("_")[0]
-                left_context_search_for_word_not_complete[bigram_not_complete] = left_context
+    # # Szukanie trigramu który najczęściej wystąpił dla pierwszego szukanego słowa z szukanych słów z tablicy serch_for_word.
+    # # To w przypadku gdyby szukane słowa w ogóle nie znalazły swojego dopasowania w zbiorze train to wtedy dostaną jakieś tam prawdopodobieństwo dla tego pierwszego słow z szukanych słów.
+    # left_context_search_for_word_not_complete = {}
+    # for bigram_not_complete in bigrams_not_complete:
+    #     max_count = 0
+    #     for trigram in trigrams_not_complete:
+    #         if bigram_not_complete == '_'.join(trigram.split("_")[1:3]) and trigrams_not_complete[trigram] > max_count:
+    #             max_count = trigrams_not_complete[trigram]
+    #             left_context = trigram.split("_")[0]
+    #             left_context_search_for_word_not_complete[bigram_not_complete] = left_context

-    for search_for_word in search_for_words:
-        if search_for_word in left_context_search_for_word:
-            left_context = left_context_search_for_word[search_for_word]
-            print(f"{left_context} {' '.join(search_for_word.split('_'))} {trigrams_complete['_'.join([left_context, search_for_word])]/bigrams_complete[search_for_word]}")
-        elif search_for_word in left_context_search_for_word_not_complete:
-            print(f"{left_context} {' '.join(search_for_word.split('_'))} {trigrams_not_complete['_'.join([left_context, search_for_word])]/bigrams_not_complete[search_for_word]}")
-        else:
-            print(f"??? {' '.join(search_for_word.split('_'))}")
+    # for search_for_word in search_for_words:
+    #     if search_for_word in left_context_search_for_word:
+    #         left_context = left_context_search_for_word[search_for_word]
+    #         print(f"{left_context} {' '.join(search_for_word.split('_'))} {trigrams_complete['_'.join([left_context, search_for_word])]/bigrams_complete[search_for_word]}")
+    #     elif search_for_word in left_context_search_for_word_not_complete:
+    #         left_context = left_context_search_for_word[search_for_word]
+    #         print(f"{left_context} {' '.join(search_for_word.split('_'))} {trigrams_not_complete['_'.join([left_context, search_for_word])]/bigrams_not_complete[search_for_word]}")
+    #     else:
+    #         print(f"??? {' '.join(search_for_word.split('_'))}")

 def load_dev():
    # Ładowanie zbioru testującego