test 2
This commit is contained in:
parent
a0a24dd6b5
commit
a459bfbb6f
18842
dev-0/out.tsv
18842
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
6
run.py
6
run.py
@ -11,10 +11,10 @@ def main():
|
|||||||
|
|
||||||
train_words = get_csv("train/expected.tsv")
|
train_words = get_csv("train/expected.tsv")
|
||||||
|
|
||||||
train_data = data[[7, 6]]
|
train_data = data[[6, 7]]
|
||||||
train_data = pd.concat([train_data, train_words], axis=1)
|
train_data = pd.concat([train_data, train_words], axis=1)
|
||||||
|
|
||||||
train_data[760] = train_data[7] + train_data[0] + train_data[6]
|
train_data[607] = train_data[6] + train_data[0] + train_data[7]
|
||||||
|
|
||||||
model = defaultdict(lambda: defaultdict(lambda: 0))
|
model = defaultdict(lambda: defaultdict(lambda: 0))
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ def main():
|
|||||||
|
|
||||||
def train_model(data, model):
|
def train_model(data, model):
|
||||||
for _, row in data.iterrows():
|
for _, row in data.iterrows():
|
||||||
words = nltk.word_tokenize(clean_text(row[760]))
|
words = nltk.word_tokenize(clean_text(row[607]))
|
||||||
for w1, w2 in nltk.bigrams(words, pad_left=True, pad_right=True):
|
for w1, w2 in nltk.bigrams(words, pad_left=True, pad_right=True):
|
||||||
if w1 and w2:
|
if w1 and w2:
|
||||||
model[w2][w1] += 1
|
model[w2][w1] += 1
|
||||||
|
13210
test-A/out.tsv
13210
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user