Compare commits

...

1 Commits

Author SHA1 Message Date
Wojciech Jarmosz
99da7c018d Add linear regression model and results 2021-05-18 23:01:50 +02:00
5 changed files with 2042 additions and 0 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*~
*.swp
*.pyc
env

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

31
linear_regression.py Normal file
View File

@ -0,0 +1,31 @@
from sklearn.linear_model import LinearRegression
import pandas as pd
def prepare_data(file, type):
data = pd.read_csv(file, header=None, sep="\t")
for c in data.select_dtypes(include=object).columns.values:
data[c] = data[c].astype("category").cat.codes
if type == 'train':
data = pd.get_dummies(data, columns=[4])
else:
data = pd.get_dummies(data, columns=[3])
return data
data = prepare_data("./train/train.tsv", "train")
data = data.loc[(data[0] > 1000)]
price = data.iloc[:,0]
training_data = data.iloc[:,1:]
clf = LinearRegression().fit(training_data, price)
with open('dev-0/out.tsv', 'w') as writer:
dev_data = prepare_data('dev-0/in.tsv', "dev")
for result in clf.predict(dev_data.iloc[:,0:]):
writer.write(str(int(result)) + '\n')
with open('test-A/out.tsv', 'w') as writer:
test_data = prepare_data('test-A/in.tsv', "test")
for result in clf.predict(test_data.iloc[:,0:]):
writer.write(str(int(result)) + '\n')

10
requirements.txt Normal file
View File

@ -0,0 +1,10 @@
joblib==1.0.1
numpy==1.20.3
pandas==1.2.4
python-dateutil==2.8.1
pytz==2021.1
scikit-learn==0.24.2
scipy==1.6.3
six==1.16.0
sklearn==0.0
threadpoolctl==2.1.0

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff