Linear regression higher F1.0 lower accuracy
This commit is contained in:
parent
d7040c9bc6
commit
db398db388
4414
dev-0/out.tsv
4414
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
13
predict.py
13
predict.py
@ -8,6 +8,8 @@ from tokenizer import tokenize
|
||||
#Load model
|
||||
model = pickle.load(open("model.pkl","rb"))
|
||||
weights, word_to_index_mapping, word_count = model
|
||||
sum = 0
|
||||
counter = 0
|
||||
|
||||
for line in sys.stdin:
|
||||
document = line.rstrip()
|
||||
@ -17,9 +19,12 @@ for line in sys.stdin:
|
||||
|
||||
y_predicted = weights[0]
|
||||
for word in terms:
|
||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
||||
|
||||
if y_predicted <= 0.5:
|
||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
|
||||
sum += y_predicted
|
||||
counter += 1
|
||||
if y_predicted <= 0:
|
||||
print(0)
|
||||
else:
|
||||
print(1)
|
||||
print(1)
|
||||
|
||||
#print(sum / counter)
|
4484
test-A/out.tsv
4484
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
6
train.py
6
train.py
@ -76,7 +76,7 @@ def train():
|
||||
#With get u can avoid missing words and replace them with value u want
|
||||
#Weights replace value doesnt matter if word is missing cause word_count will give 0
|
||||
for word in actual_x:
|
||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
|
||||
|
||||
#Cost count. Check how good was our prediction
|
||||
Loss = (y_predicted - actual_y) ** 2.0
|
||||
@ -94,9 +94,9 @@ def train():
|
||||
weights[0] = weights[0] - delta
|
||||
for word in actual_x:
|
||||
if word in word_to_index_mapping:
|
||||
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
|
||||
weights[word_to_index_mapping[word]] -= (log(word_count[word] / len(word_count) + 1) * delta)
|
||||
|
||||
if Loss_sum_counter > 10000000:
|
||||
if Loss_sum_counter > 7000000:
|
||||
break
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user