laptop commit fixed naive baise
This commit is contained in:
parent
a6694d768d
commit
6be8cd183c
9
code.py
9
code.py
@ -33,8 +33,13 @@ def calc_word_count(in_path, expected_path):
|
|||||||
text = text.lower()
|
text = text.lower()
|
||||||
text = re.sub(r'\\n+', " ", text)
|
text = re.sub(r'\\n+', " ", text)
|
||||||
text = re.sub(r'http\S+', " ", text)
|
text = re.sub(r'http\S+', " ", text)
|
||||||
#text = re.sub(r'(\s+|\\n)', ' ', text)
|
text = re.sub(r'\/[a-z]\/', " ", text)
|
||||||
#text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
text = re.sub(r'[^a-z]', " ", text)
|
||||||
|
text = re.sub(r'\s{2,}', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'^\s', "", text)
|
||||||
tokens = text.split(' ')
|
tokens = text.split(' ')
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if class_ == '1':
|
if class_ == '1':
|
||||||
|
@ -6,13 +6,8 @@ import re
|
|||||||
open_file = open('naive_base_model.pkl', 'rb')
|
open_file = open('naive_base_model.pkl', 'rb')
|
||||||
pickle_loaded = pickle.load(open_file)
|
pickle_loaded = pickle.load(open_file)
|
||||||
paranomal_class_logprob, skeptic_class_logprob, word_logprobs = pickle_loaded
|
paranomal_class_logprob, skeptic_class_logprob, word_logprobs = pickle_loaded
|
||||||
#pickle_loaded=pickle.load(open_file)
|
|
||||||
#paranomal_class_logprob, skeptic_class_logprob, word_logprobs = pickle_loaded
|
|
||||||
#Niektórych słów nie bezie w zbiorze treningowym dev-0 i dev-A
|
|
||||||
def prediction(input,output):
|
def prediction(input,output):
|
||||||
output_file = open(output,'w')
|
output_file = open(output,'w')
|
||||||
#pickle_load = pickle.load(open('naive_base_model.pkl', 'rb'))
|
|
||||||
#paranormal_class_logprob, skeptic_class_logprob, word_logprob = pickle_load
|
|
||||||
with open(input,encoding='utf-8') as in_file:
|
with open(input,encoding='utf-8') as in_file:
|
||||||
for line in in_file:
|
for line in in_file:
|
||||||
temp_paranormal_logprob = paranomal_class_logprob
|
temp_paranormal_logprob = paranomal_class_logprob
|
||||||
@ -21,8 +16,13 @@ def prediction(input,output):
|
|||||||
text = text.lower()
|
text = text.lower()
|
||||||
text = re.sub(r'\\n+', " ", text)
|
text = re.sub(r'\\n+', " ", text)
|
||||||
text = re.sub(r'http\S+', " ", text)
|
text = re.sub(r'http\S+', " ", text)
|
||||||
#text = re.sub(r'(\s+|\\n)', ' ', text)
|
text = re.sub(r'\/[a-z]\/', " ", text)
|
||||||
#text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
text = re.sub(r'[^a-z]', " ", text)
|
||||||
|
text = re.sub(r'\s{2,}', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'\W\w{1,3}\W|\A\w{1,3}\W', " ", text)
|
||||||
|
text = re.sub(r'^\s', "", text)
|
||||||
tokens = text.split(' ')
|
tokens = text.split(' ')
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if token not in word_logprobs['paranormal']:
|
if token not in word_logprobs['paranormal']:
|
||||||
|
432
dev-0/out.tsv
432
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
Binary file not shown.
454
test-A/out.tsv
454
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user