478841
This commit is contained in:
parent
c3ce71c113
commit
11867437bf
11
run.py
11
run.py
@ -5,18 +5,15 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|||||||
|
|
||||||
# * Training data loading
|
# * Training data loading
|
||||||
with open('train/in.tsv', 'r', encoding='utf-8') as f:
|
with open('train/in.tsv', 'r', encoding='utf-8') as f:
|
||||||
x_train = pd.DataFrame([line.strip().split('\t')
|
x_train = pd.DataFrame(f.readlines(), columns=['text'])
|
||||||
for line in f.readlines()], columns=['text', 'text_id'])
|
|
||||||
y_train = pd.read_csv('train/expected.tsv', sep='\t',
|
y_train = pd.read_csv('train/expected.tsv', sep='\t',
|
||||||
names=['paranormal'], encoding='utf-8')
|
names=['paranormal'], encoding='utf-8')
|
||||||
# *Validation data loading
|
# *Validation data loading
|
||||||
with open('dev-0/in.tsv', 'r', encoding='utf-8') as f:
|
with open('dev-0/in.tsv', 'r', encoding='utf-8') as f:
|
||||||
x_dev = pd.DataFrame([line.strip().split('\t')
|
x_dev = pd.DataFrame(f.readlines(), columns=['text'])
|
||||||
for line in f.readlines()], columns=['text', 'text_id'])
|
|
||||||
# * Test data loading
|
# * Test data loading
|
||||||
with open('train/in.tsv', 'r', encoding='utf-8') as f:
|
with open('test-A/in.tsv', 'r', encoding='utf-8') as f:
|
||||||
x_test = pd.DataFrame([line.strip().split('\t')
|
x_test = pd.DataFrame(f.readlines(), columns=['text'])
|
||||||
for line in f.readlines()], columns=['text', 'text_id'])
|
|
||||||
|
|
||||||
# * Training data preparation
|
# * Training data preparation
|
||||||
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, max_features=500)
|
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, max_features=500)
|
||||||
|
285395
test-A/out.tsv
285395
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user