fix out no. lines

This commit is contained in:
Łukasz Jędyk 2022-04-02 15:26:18 +02:00
parent a0217d00af
commit 2f6c8330e5
3 changed files with 15146 additions and 14976 deletions

File diff suppressed because it is too large Load Diff

9
run.py
View File

@ -1,9 +1,10 @@
import pandas as pd
import csv
from nltk import trigrams, word_tokenize
from collections import Counter, defaultdict
train_data = pd.read_csv('train/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
train_labels = pd.read_csv('train/expected.tsv', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
train_data = pd.read_csv('train/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
train_labels = pd.read_csv('train/expected.tsv', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
train_data = train_data[[6, 7]]
train_data = pd.concat([train_data, train_labels], axis=1)
@ -45,8 +46,8 @@ def predict_probs(word1, word2):
return str_prediction
dev_data = pd.read_csv('dev-0/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
test_data = pd.read_csv('test-A/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None)
dev_data = pd.read_csv('dev-0/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
test_data = pd.read_csv('test-A/in.tsv.xz', sep='\t', error_bad_lines=False, warn_bad_lines=False, header=None, quoting=csv.QUOTE_NONE)
with open('dev-0/out.tsv', 'w') as file:
for index, row in dev_data.iterrows():

File diff suppressed because it is too large Load Diff