old way

2020-03-22 13:58:35 +01:00 · 2020-03-22 13:58:35 +01:00 · 95e6501fe5
commit 95e6501fe5
parent 9ea4e1abab
7 changed files with 1518 additions and 1515 deletions
--- a/.predict.py.swp
+++ b/.predict.py.swp
--- a/.train.py.swp
+++ b/.train.py.swp
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/naive_base_model.pkl
+++ b/naive_base_model.pkl
--- a/predict.py
+++ b/predict.py
@ -6,6 +6,7 @@ import re
 def clear_tokens(tokens, is_text=True):
    tokens = tokens.replace('\\n', ' ')
    return tokens
    tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
    tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@\’\＞\″\±]+', ' ', tokens)
    tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens)
@ -22,7 +23,8 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
    text, timestap = post.rstrip('\n').split('\t')
    text =  clear_tokens(text, True)
    tokens = text.lower().split(' ')
-    probs = {0.0 : 'sceptic', 0.0 : 'paranormal'}
+    #probs = {0.0 : 'sceptic', 0.0 : 'paranormal'}
    probs = {}
    for class_ in word_logprobs.keys():
        product = 1
        for token in tokens:
@ -30,7 +32,7 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
            try:
                product += word_logprobs[class_][token]
            except KeyError:
-                pass
+                product += 0
            # tu wzoru uzyj
        if class_ == 'sceptic':
            product +=  sceptic_class_logprob
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/train.py
+++ b/train.py
@ -23,6 +23,7 @@ def calc_class_logprob(expected_path):
 def clear_tokens(tokens, is_text=True):
    tokens = tokens.replace('\\n', ' ')
    return tokens
    # delete links, special characters, kropki, and \n
    tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
    tokens = re.sub(r'(|\-|\_)([a-z]+(\-|\_))+[a-z]+(|\-|\_)', ' ', tokens)