most common value in train

This commit is contained in:
Jakub Pokrywka 2023-11-16 16:54:06 +01:00
parent c5d9b752dc
commit 2cfd344ac5
4 changed files with 158108 additions and 158098 deletions

122480
dev-0/out.tsv

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
import pandas as pd
r_out = pd.read_csv('../train/expected.tsv', names = ('class',))
most_common = r_out['class'].value_counts().idxmax()
for dataset in 'dev-0', 'test-A', 'test-B':
with open(f'../{dataset}/out.tsv', 'w') as f_out, open(f'../{dataset}/in.tsv', 'r') as f_in:
for line_in in f_in:
f_out.write(most_common + '\n')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff