fix out no. lines
This commit is contained in:
parent
a0217d00af
commit
2f6c8330e5
17735
dev-0/out.tsv
17735
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
9
run.py
9
run.py
@ -1,9 +1,10 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import csv
|
||||||
from nltk import trigrams, word_tokenize
|
from nltk import trigrams, word_tokenize
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
|
|
||||||
train_data = pd.read_csv('train/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
|
train_data = pd.read_csv('train/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
|
||||||
train_labels = pd.read_csv('train/expected.tsv', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
|
train_labels = pd.read_csv('train/expected.tsv', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
|
||||||
|
|
||||||
train_data = train_data[[6, 7]]
|
train_data = train_data[[6, 7]]
|
||||||
train_data = pd.concat([train_data, train_labels], axis=1)
|
train_data = pd.concat([train_data, train_labels], axis=1)
|
||||||
@ -45,8 +46,8 @@ def predict_probs(word1, word2):
|
|||||||
|
|
||||||
return str_prediction
|
return str_prediction
|
||||||
|
|
||||||
dev_data = pd.read_csv('dev-0/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
|
dev_data = pd.read_csv('dev-0/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
|
||||||
test_data = pd.read_csv('test-A/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
|
test_data = pd.read_csv('test-A/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
|
||||||
|
|
||||||
with open('dev-0/out.tsv', 'w') as file:
|
with open('dev-0/out.tsv', 'w') as file:
|
||||||
for index, row in dev_data.iterrows():
|
for index, row in dev_data.iterrows():
|
||||||
|
12378
test-A/out.tsv
12378
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user