old way
This commit is contained in:
parent
9ea4e1abab
commit
95e6501fe5
BIN
.predict.py.swp
BIN
.predict.py.swp
Binary file not shown.
BIN
.train.py.swp
BIN
.train.py.swp
Binary file not shown.
1182
dev-0/out.tsv
1182
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -6,6 +6,7 @@ import re
|
||||
|
||||
def clear_tokens(tokens, is_text=True):
|
||||
tokens = tokens.replace('\\n', ' ')
|
||||
return tokens
|
||||
tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
|
||||
tokens = re.sub(r'[\n\&\"\?\\\'\*\[\]\,\;\.\=\+\(\)\!\/\:\`\~\%\^\$\#\@\’\>\″\±]+', ' ', tokens)
|
||||
tokens = re.sub(r'[\.\-][\.\-]+', ' ', tokens)
|
||||
@ -22,7 +23,8 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
|
||||
text, timestap = post.rstrip('\n').split('\t')
|
||||
text = clear_tokens(text, True)
|
||||
tokens = text.lower().split(' ')
|
||||
probs = {0.0 : 'sceptic', 0.0 : 'paranormal'}
|
||||
#probs = {0.0 : 'sceptic', 0.0 : 'paranormal'}
|
||||
probs = {}
|
||||
for class_ in word_logprobs.keys():
|
||||
product = 1
|
||||
for token in tokens:
|
||||
@ -30,7 +32,7 @@ def calc_post_prob(post, paranormal_class_logprob, sceptic_class_logprob, word_l
|
||||
try:
|
||||
product += word_logprobs[class_][token]
|
||||
except KeyError:
|
||||
pass
|
||||
product += 0
|
||||
# tu wzoru uzyj
|
||||
if class_ == 'sceptic':
|
||||
product += sceptic_class_logprob
|
||||
|
1844
test-A/out.tsv
1844
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
1
train.py
1
train.py
@ -23,6 +23,7 @@ def calc_class_logprob(expected_path):
|
||||
|
||||
def clear_tokens(tokens, is_text=True):
|
||||
tokens = tokens.replace('\\n', ' ')
|
||||
return tokens
|
||||
# delete links, special characters, kropki, and \n
|
||||
tokens = re.sub(r'\(((http)|(https)).*((\.com)|(\.net)|(\.jpg)|(\.html))\)'," ", tokens)
|
||||
tokens = re.sub(r'(|\-|\_)([a-z]+(\-|\_))+[a-z]+(|\-|\_)', ' ', tokens)
|
||||
|
Loading…
Reference in New Issue
Block a user