Linear regression higher F1.0 lower accuracy
This commit is contained in:
parent
d7040c9bc6
commit
db398db388
4414
dev-0/out.tsv
4414
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
13
predict.py
13
predict.py
@ -8,6 +8,8 @@ from tokenizer import tokenize
|
|||||||
#Load model
|
#Load model
|
||||||
model = pickle.load(open("model.pkl","rb"))
|
model = pickle.load(open("model.pkl","rb"))
|
||||||
weights, word_to_index_mapping, word_count = model
|
weights, word_to_index_mapping, word_count = model
|
||||||
|
sum = 0
|
||||||
|
counter = 0
|
||||||
|
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
document = line.rstrip()
|
document = line.rstrip()
|
||||||
@ -17,9 +19,12 @@ for line in sys.stdin:
|
|||||||
|
|
||||||
y_predicted = weights[0]
|
y_predicted = weights[0]
|
||||||
for word in terms:
|
for word in terms:
|
||||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
|
||||||
|
sum += y_predicted
|
||||||
if y_predicted <= 0.5:
|
counter += 1
|
||||||
|
if y_predicted <= 0:
|
||||||
print(0)
|
print(0)
|
||||||
else:
|
else:
|
||||||
print(1)
|
print(1)
|
||||||
|
|
||||||
|
#print(sum / counter)
|
4484
test-A/out.tsv
4484
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
6
train.py
6
train.py
@ -76,7 +76,7 @@ def train():
|
|||||||
#With get u can avoid missing words and replace them with value u want
|
#With get u can avoid missing words and replace them with value u want
|
||||||
#Weights replace value doesnt matter if word is missing cause word_count will give 0
|
#Weights replace value doesnt matter if word is missing cause word_count will give 0
|
||||||
for word in actual_x:
|
for word in actual_x:
|
||||||
y_predicted += weights[word_to_index_mapping.get(word,0)] * (word_count.get(word,0) / len(word_count))
|
y_predicted += weights[word_to_index_mapping.get(word,0)] * log(word_count.get(word,0) / len(word_count) + 1)
|
||||||
|
|
||||||
#Cost count. Check how good was our prediction
|
#Cost count. Check how good was our prediction
|
||||||
Loss = (y_predicted - actual_y) ** 2.0
|
Loss = (y_predicted - actual_y) ** 2.0
|
||||||
@ -94,9 +94,9 @@ def train():
|
|||||||
weights[0] = weights[0] - delta
|
weights[0] = weights[0] - delta
|
||||||
for word in actual_x:
|
for word in actual_x:
|
||||||
if word in word_to_index_mapping:
|
if word in word_to_index_mapping:
|
||||||
weights[word_to_index_mapping[word]] -= ((word_count[word] / len(word_count)) * delta)
|
weights[word_to_index_mapping[word]] -= (log(word_count[word] / len(word_count) + 1) * delta)
|
||||||
|
|
||||||
if Loss_sum_counter > 10000000:
|
if Loss_sum_counter > 7000000:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user