Linear regression higher F1.0 lower accuracy

This commit is contained in:
Th3NiKo 2020-04-06 14:01:32 +02:00
parent d7040c9bc6
commit db398db388
4 changed files with 4461 additions and 4456 deletions

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,8 @@ from tokenizer import tokenize
#Load model
model = pickle.load(open("model.pkl","rb"))
weights, word_to_index_mapping, word_count = model
sum = 0
counter = 0
for line in sys.stdin:
document = line.rstrip()
@ -17,9 +19,12 @@ for line in sys.stdin:
y_predicted = weights[0]
for word in terms:
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
if y_predicted <= 0.5:
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
sum += y_predicted
counter += 1
if y_predicted <= 0:
print(0)
else:
print(1)
#print(sum / counter)

File diff suppressed because it is too large Load Diff

View File

@ -76,7 +76,7 @@ def train():
#With get u can avoid missing words and replace them with value u want
#Weights replace value doesnt matter if word is missing cause word_count will give 0
for word in actual_x:
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
#Cost count. Check how good was our prediction
Loss = (y_predicted - actual_y) ** 2.0
@ -94,9 +94,9 @@ def train():
weights[0] = weights[0] - delta
for word in actual_x:
if word in word_to_index_mapping:
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
weights[word_to_index_mapping[word]] -= (log(word_count[word] / len(word_count) + 1) * delta)
if Loss_sum_counter > 10000000:
if Loss_sum_counter > 7000000:
break