forked from kubapok/en-ner-conll-2003
22 lines
615 B
Python
22 lines
615 B
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import gensim
|
||
|
import torch
|
||
|
import pandas as pd
|
||
|
import seaborn as sns
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
|
||
|
|
||
|
# Load data
|
||
|
train = pd.read_csv('train/train.tsv', sep='\t', names=['labels', 'document'])
|
||
|
Y_train = train['labels'].values
|
||
|
X_train = train['document'].values
|
||
|
|
||
|
test = pd.read_csv('test-A/in.tsv', sep='\t', names=['document'])
|
||
|
X_test = test['document'].values
|
||
|
|
||
|
dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=['document'])
|
||
|
exp = pd.read_csv('dev-0/expected.tsv', sep='\t', names=['labels'])
|
||
|
X_dev = dev['document'].values
|
||
|
Y_dev = dev['labels'].values
|