53 lines
1.2 KiB
Python
53 lines
1.2 KiB
Python
|
import pandas as pd
|
||
|
import fasttext
|
||
|
|
||
|
X_train = pd.read_csv('train/in.tsv', sep='\t', header=None)
|
||
|
X_train = X_train[2]
|
||
|
y_train = pd.read_csv('train/expected.tsv', sep='\t', header=None)
|
||
|
|
||
|
X_dev = pd.read_csv('dev-0/in.tsv', sep='\t', header=None)
|
||
|
X_dev = X_dev[2]
|
||
|
y_dev = pd.read_csv('dev-0/expected.tsv', sep='\t', header=None)
|
||
|
|
||
|
X_test_A = pd.read_csv('test-A/in.tsv', sep='\t', header=None)
|
||
|
X_test_A = X_test_A[2]
|
||
|
|
||
|
X_test_B = pd.read_csv('test-B/in.tsv', sep='\t', header=None)
|
||
|
X_test_B = X_test_B[2]
|
||
|
|
||
|
|
||
|
with open('train.txt', 'w', encoding='utf-8') as f:
|
||
|
for i in range(len(X_train)):
|
||
|
f.write(f'__label__{y_train[0][i]} {X_train[i]}\n')
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
with open('dev.txt', 'w', encoding='utf-8') as f:
|
||
|
for i in range(len(X_dev)):
|
||
|
f.write(f'__label__{y_dev[0][i]} {X_dev[i]}\n')
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
model = fasttext.train_supervised('train.txt')
|
||
|
|
||
|
|
||
|
with open('dev-0/out.txt', 'w') as f:
|
||
|
for sentence in X_dev:
|
||
|
f.write(f'{model.predict(sentence)[0][0][9:]}\n')
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
with open('test-A/out.txt', 'w') as f:
|
||
|
for sentence in X_test_A:
|
||
|
f.write(f'{model.predict(sentence)[0][0][9:]}\n')
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
with open('test-B/out.txt', 'w') as f:
|
||
|
for sentence in X_test_B:
|
||
|
f.write(f'{model.predict(sentence)[0][0][9:]}\n')
|
||
|
|
||
|
f.close()
|
||
|
|