s444501
This commit is contained in:
parent
33b70ce7b1
commit
736197e51b
20000
dev-0/out.tsv
Normal file
20000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
11563
dev-1/out.tsv
Normal file
11563
dev-1/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
54
run.py
Normal file
54
run.py
Normal file
@ -0,0 +1,54 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
print('Loading train.tsv...')
|
||||
train = pd.read_csv('train/train.tsv', sep='\t', header=None)
|
||||
train_date = (train[0] + train[1]) / 2
|
||||
train_text = train[4]
|
||||
|
||||
# Debug
|
||||
# train_text = train_text[:1000]
|
||||
# train_date = train_date[:1000]
|
||||
|
||||
print('Loading dev-0...')
|
||||
with open('dev-0/in.tsv', encoding='utf8') as file:
|
||||
dev0_text = file.readlines()
|
||||
dev0_text = [str(line) for line in dev0_text]
|
||||
|
||||
print('Loading dev-1...')
|
||||
with open('dev-1/in.tsv', encoding='utf8') as file:
|
||||
dev1_text = file.readlines()
|
||||
dev1_text = [str(line) for line in dev1_text]
|
||||
|
||||
print('Loading test...')
|
||||
with open('test-A/in.tsv', encoding='utf8') as file:
|
||||
test_text = file.readlines()
|
||||
test_text = [str(line) for line in test_text]
|
||||
|
||||
print('Vectorizing training text...')
|
||||
vc = TfidfVectorizer(max_df=0.90)
|
||||
vectorized_text = vc.fit_transform(train_text)
|
||||
|
||||
print('Training model...')
|
||||
model = LinearRegression()
|
||||
model.fit(vectorized_text, train_date)
|
||||
|
||||
print('Predicting dev0...')
|
||||
vectorized_dev0 = vc.transform(dev0_text)
|
||||
out_dev0 = model.predict(vectorized_dev0)
|
||||
|
||||
print('Predicting dev1...')
|
||||
vectorized_dev1 = vc.transform(dev1_text)
|
||||
out_dev1 = model.predict(vectorized_dev1)
|
||||
|
||||
print('Predicting test-A...')
|
||||
vectorized_test = vc.transform(test_text)
|
||||
out_test = model.predict(vectorized_test)
|
||||
|
||||
print('Saving to file')
|
||||
np.savetxt('dev-0/out.tsv', out_dev0, fmt='%f')
|
||||
np.savetxt('dev-1/out.tsv', out_dev1, fmt='%f')
|
||||
np.savetxt('test-A/out.tsv', out_test, fmt='%f')
|
||||
|
14220
test-A/out.tsv
Normal file
14220
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user