478841
This commit is contained in:
parent
c3ce71c113
commit
11867437bf
11
run.py
11
run.py
@ -5,18 +5,15 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
# * Training data loading
|
||||
with open('train/in.tsv', 'r', encoding='utf-8') as f:
|
||||
x_train = pd.DataFrame([line.strip().split('\t')
|
||||
for line in f.readlines()], columns=['text', 'text_id'])
|
||||
x_train = pd.DataFrame(f.readlines(), columns=['text'])
|
||||
y_train = pd.read_csv('train/expected.tsv', sep='\t',
|
||||
names=['paranormal'], encoding='utf-8')
|
||||
# *Validation data loading
|
||||
with open('dev-0/in.tsv', 'r', encoding='utf-8') as f:
|
||||
x_dev = pd.DataFrame([line.strip().split('\t')
|
||||
for line in f.readlines()], columns=['text', 'text_id'])
|
||||
x_dev = pd.DataFrame(f.readlines(), columns=['text'])
|
||||
# * Test data loading
|
||||
with open('train/in.tsv', 'r', encoding='utf-8') as f:
|
||||
x_test = pd.DataFrame([line.strip().split('\t')
|
||||
for line in f.readlines()], columns=['text', 'text_id'])
|
||||
with open('test-A/in.tsv', 'r', encoding='utf-8') as f:
|
||||
x_test = pd.DataFrame(f.readlines(), columns=['text'])
|
||||
|
||||
# * Training data preparation
|
||||
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, max_features=500)
|
||||
|
285395
test-A/out.tsv
285395
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user