retroc2-linear-regression/main.py
2021-09-20 20:41:45 +02:00

38 lines
1.2 KiB
Python

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
df = pd.read_csv('./train/train.tsv', header=None, sep='\t')
df['mean'] = (df.iloc[:, 0] + df.iloc[:, 1]) / 2
vect = TfidfVectorizer()
x_train_vect = vect.fit_transform(df[4])
# wytrenowany model jest zapisany jako "finalized_model.sav"
reg = LinearRegression().fit(x_train_vect, df['mean'])
# zapis modelu
# import pickle
# filename = 'finalized_model.sav'
# pickle.dump(reg, open(filename, 'wb'))
# predykcje dla dev-1
x_test = pd.read_csv('./dev-1/in.tsv', header=None, sep='\t')
y_test = pd.read_csv('./dev-1/expected.tsv', header=None, sep='\t')
x_test_vect = vect.transform(x_test[0])
y_pred = reg.predict(x_test_vect)
pd.DataFrame(y_pred).to_csv('./dev-1/out.tsv', header=None, sep='\t', index=False)
# predykcje dla dev-0
x_test_dev0 = pd.read_csv('./dev-0/in.tsv', header=None, sep='\t')
y_test_dev0 = pd.read_csv('./dev-0/expected.tsv', header=None, sep='\t')
x_test_dev0_vect = vect.transform(x_test_dev0[0])
y_pred_dev_0 = reg.predict(x_test_dev0_vect)
pd.DataFrame(y_pred_dev_0).to_csv('./dev-0/out.tsv', header=None, sep='\t', index=False)