linear regression

This commit is contained in:
Piotr Biskup 2021-09-20 20:38:57 +02:00
parent 647c099815
commit a8d2e1126e
13 changed files with 32742 additions and 0 deletions

2
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Default ignored files
/workspace.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (paranormal-or-skeptic-ISI-public-bayes-2)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/retroc2-linear-regression.iml" filepath="$PROJECT_DIR$/.idea/retroc2-linear-regression.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.7 (paranormal-or-skeptic-ISI-public-bayes-2)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}

19998
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

11562
dev-1/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
finalized_model.sav Normal file

Binary file not shown.

BIN
geval Normal file

Binary file not shown.

1105
logistic regression.ipynb Normal file

File diff suppressed because it is too large Load Diff

37
main.py Normal file
View File

@ -0,0 +1,37 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
df = pd.read_csv('./train/train.tsv', header=None, sep='\t')
df['mean'] = (df.iloc[:, 0] + df.iloc[:, 1]) / 2
vect = TfidfVectorizer()
x_train_vect = vect.fit_transform(df[4])
# wytrenowany model jest zapisany jako "finalized_model.sav"
reg = LinearRegression().fit(x_train_vect, df['mean'])
# zapis modelu
# import pickle
# filename = 'finalized_model.sav'
# pickle.dump(reg, open(filename, 'wb'))
# predykcje dla dev-1
x_test = pd.read_csv('./dev-1/in.tsv', header=None, sep='\t')
y_test = pd.read_csv('./dev-1/expected.tsv', header=None, sep='\t')
x_test_vect = vect.transform(x_test[0])
y_pred = reg.predict(x_test_vect)
pd.DataFrame(y_pred).to_csv('./dev-1/out.tsv', header=None, sep='\t', index=False)
# predykcje dla dev-0
x_test_dev0 = pd.read_csv('./dev-0/in.tsv', header=None, sep='\t')
y_test_dev0 = pd.read_csv('./dev-0/expected.tsv', header=None, sep='\t')
x_test_dev0_vect = vect.transform(x_test_dev0[0])
y_pred_dev_0 = reg.predict(x_test_dev0_vect)
pd.DataFrame(y_pred_dev_0).to_csv('./dev-0/out.tsv', header=None, sep='\t', index=False)