Compare commits

...

2 Commits

Author SHA1 Message Date
Piotr Biskup
000725c1a9 fix jupyter notebook name 2021-09-20 20:43:40 +02:00
Piotr Biskup
a8d2e1126e linear regression 2021-09-20 20:41:45 +02:00
13 changed files with 32741 additions and 0 deletions

2
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Default ignored files
/workspace.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (paranormal-or-skeptic-ISI-public-bayes-2)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/retroc2-linear-regression.iml" filepath="$PROJECT_DIR$/.idea/retroc2-linear-regression.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.7 (paranormal-or-skeptic-ISI-public-bayes-2)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}

19998
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

11562
dev-1/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
finalized_model.sav Normal file

Binary file not shown.

BIN
geval Normal file

Binary file not shown.

1104
linear regression.ipynb Normal file

File diff suppressed because it is too large Load Diff

37
main.py Normal file
View File

@ -0,0 +1,37 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
df = pd.read_csv('./train/train.tsv', header=None, sep='\t')
df['mean'] = (df.iloc[:, 0] + df.iloc[:, 1]) / 2
vect = TfidfVectorizer()
x_train_vect = vect.fit_transform(df[4])
# wytrenowany model jest zapisany jako "finalized_model.sav"
reg = LinearRegression().fit(x_train_vect, df['mean'])
# zapis modelu
# import pickle
# filename = 'finalized_model.sav'
# pickle.dump(reg, open(filename, 'wb'))
# predykcje dla dev-1
x_test = pd.read_csv('./dev-1/in.tsv', header=None, sep='\t')
y_test = pd.read_csv('./dev-1/expected.tsv', header=None, sep='\t')
x_test_vect = vect.transform(x_test[0])
y_pred = reg.predict(x_test_vect)
pd.DataFrame(y_pred).to_csv('./dev-1/out.tsv', header=None, sep='\t', index=False)
# predykcje dla dev-0
x_test_dev0 = pd.read_csv('./dev-0/in.tsv', header=None, sep='\t')
y_test_dev0 = pd.read_csv('./dev-0/expected.tsv', header=None, sep='\t')
x_test_dev0_vect = vect.transform(x_test_dev0[0])
y_pred_dev_0 = reg.predict(x_test_dev0_vect)
pd.DataFrame(y_pred_dev_0).to_csv('./dev-0/out.tsv', header=None, sep='\t', index=False)