my brilliant solution4
This commit is contained in:
parent
abba594b01
commit
24b02bc754
5272
dev-0/out.tsv
Normal file
5272
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
28
linearpred.py
Normal file
28
linearpred.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import math
|
||||||
|
import pickle
|
||||||
|
import sys
|
||||||
|
from tokenize import tokenize
|
||||||
|
|
||||||
|
model = pickle.load(open("model.pkl", "rb"))
|
||||||
|
word_to_index, vocabulary, weights, words_count = model
|
||||||
|
lines = sys.stdin.readlines()
|
||||||
|
for line2 in lines:
|
||||||
|
line2 = line2.rstrip()
|
||||||
|
fields2 = line2.split('\t') ##rozdzielamy linie na tablice oddzielonymi tabami
|
||||||
|
label2 = fields2[0].strip() ##to etykiety
|
||||||
|
document2 = fields2[1] ##to posty
|
||||||
|
terms2 = document2.split(' ') ##to rozdziel posty na słowa
|
||||||
|
for term2 in terms2: ##dla każdego słowa w poście
|
||||||
|
if term2 in words_count:
|
||||||
|
words_count[term2] += 1 ##robimy słownik dla danego słowa ile razy występuje
|
||||||
|
else:
|
||||||
|
words_count[term2] = 1
|
||||||
|
expected=weights[0]
|
||||||
|
for t in terms2:
|
||||||
|
if(t in vocabulary):
|
||||||
|
expected=expected+(words_count[t]/len(words_count)*(weights[word_to_index[t]]))
|
||||||
|
if(expected>0.65):
|
||||||
|
print(1)
|
||||||
|
else:
|
||||||
|
print(0)
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
71
train.py
Normal file
71
train.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import random
|
||||||
|
import pickle
|
||||||
|
import math
|
||||||
|
lines = sys.stdin.readlines()
|
||||||
|
popr=0
|
||||||
|
fals=0
|
||||||
|
def train():
|
||||||
|
textdoc = sys.stdin
|
||||||
|
vocabulary = set()
|
||||||
|
xi=1
|
||||||
|
word_to_index={}
|
||||||
|
learning_rate=0.0001
|
||||||
|
loss_sum=0
|
||||||
|
loss_counter=0
|
||||||
|
popr=0
|
||||||
|
words_count={}
|
||||||
|
fals=0
|
||||||
|
linenr2=0
|
||||||
|
linenr=0
|
||||||
|
weights=[]
|
||||||
|
for line in lines:
|
||||||
|
linenr+=1
|
||||||
|
line = line.rstrip()
|
||||||
|
fields = line.split('\t') ##rozdzielamy linie na tablice oddzielonymi tabami
|
||||||
|
label = fields[0].strip() ##to etykiety
|
||||||
|
document = fields[1] ##to posty
|
||||||
|
terms = document.split(' ') ##to rozdziel posty na słowa
|
||||||
|
for t in terms: ##dla każdego słowa w poście
|
||||||
|
if t not in vocabulary:
|
||||||
|
vocabulary.add(t) ##dodaj słowo do słownika
|
||||||
|
for term2 in terms: ##dla każdego słowa w poście
|
||||||
|
if term2 in words_count:
|
||||||
|
words_count[term2] += 1 ##robimy słownik dla danego słowa ile razy występuje
|
||||||
|
else:
|
||||||
|
words_count[term2] = 1
|
||||||
|
for t in vocabulary:
|
||||||
|
word_to_index[t]=xi
|
||||||
|
xi=xi+1
|
||||||
|
for i in range(0, len(vocabulary)+1):
|
||||||
|
weights.append(random.uniform(-0.1,0.1))
|
||||||
|
for cnt in range(0,500):
|
||||||
|
for line2 in lines:
|
||||||
|
line2 = line2.rstrip()
|
||||||
|
fields2 = line2.split('\t') ##rozdzielamy linie na tablice oddzielonymi tabami
|
||||||
|
label2 = fields2[0].strip() ##to etykiety
|
||||||
|
document2 = fields2[1] ##to posty
|
||||||
|
terms2 = document2.split(' ') ##to rozdziel posty na słowa
|
||||||
|
expected=weights[0]
|
||||||
|
for t in terms2:
|
||||||
|
expected=expected+(words_count[t]/len(words_count)*(weights[word_to_index[t]]))
|
||||||
|
for t in terms2:
|
||||||
|
weights[word_to_index[t]]-=(words_count[t]/len(words_count))*(expected-float(label2))*learning_rate
|
||||||
|
loss=(expected-float(label2))**2
|
||||||
|
loss_sum+=loss
|
||||||
|
if(loss_counter%10000==0):
|
||||||
|
#print(loss_sum/10000)
|
||||||
|
loss_counter=0
|
||||||
|
loss_sum=0.0
|
||||||
|
print(expected)
|
||||||
|
print(label2)
|
||||||
|
weights[0]-=(expected-float(label2))*learning_rate
|
||||||
|
loss_counter+=1
|
||||||
|
model=(word_to_index, vocabulary, weights, words_count)
|
||||||
|
pickle.dump(model,open("model.pkl", "wb"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
train()
|
Loading…
Reference in New Issue
Block a user