forked from kubapok/retroc2
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import pandas as pd
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
df = pd.read_csv('./train/train.tsv', header=None, sep='\t')
|
|
df['mean'] = (df.iloc[:, 0] + df.iloc[:, 1]) / 2
|
|
|
|
vect = TfidfVectorizer()
|
|
x_train_vect = vect.fit_transform(df[4])
|
|
|
|
# wytrenowany model jest zapisany jako "finalized_model.sav"
|
|
reg = LinearRegression().fit(x_train_vect, df['mean'])
|
|
|
|
|
|
# zapis modelu
|
|
# import pickle
|
|
# filename = 'finalized_model.sav'
|
|
# pickle.dump(reg, open(filename, 'wb'))
|
|
|
|
|
|
# predykcje dla dev-1
|
|
x_test = pd.read_csv('./dev-1/in.tsv', header=None, sep='\t')
|
|
y_test = pd.read_csv('./dev-1/expected.tsv', header=None, sep='\t')
|
|
|
|
x_test_vect = vect.transform(x_test[0])
|
|
y_pred = reg.predict(x_test_vect)
|
|
|
|
pd.DataFrame(y_pred).to_csv('./dev-1/out.tsv', header=None, sep='\t', index=False)
|
|
|
|
# predykcje dla dev-0
|
|
x_test_dev0 = pd.read_csv('./dev-0/in.tsv', header=None, sep='\t')
|
|
y_test_dev0 = pd.read_csv('./dev-0/expected.tsv', header=None, sep='\t')
|
|
|
|
x_test_dev0_vect = vect.transform(x_test_dev0[0])
|
|
y_pred_dev_0 = reg.predict(x_test_dev0_vect)
|
|
|
|
pd.DataFrame(y_pred_dev_0).to_csv('./dev-0/out.tsv', header=None, sep='\t', index=False)
|