Add linear regression model and results
This commit is contained in:
parent
5c4bb10ddf
commit
99da7c018d
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
*~
|
||||
*.swp
|
||||
*.pyc
|
||||
env
|
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
31
linear_regression.py
Normal file
31
linear_regression.py
Normal file
@ -0,0 +1,31 @@
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import pandas as pd
|
||||
|
||||
def prepare_data(file, type):
|
||||
data = pd.read_csv(file, header=None, sep="\t")
|
||||
for c in data.select_dtypes(include=object).columns.values:
|
||||
data[c] = data[c].astype("category").cat.codes
|
||||
if type == 'train':
|
||||
data = pd.get_dummies(data, columns=[4])
|
||||
else:
|
||||
data = pd.get_dummies(data, columns=[3])
|
||||
return data
|
||||
|
||||
data = prepare_data("./train/train.tsv", "train")
|
||||
|
||||
data = data.loc[(data[0] > 1000)]
|
||||
|
||||
price = data.iloc[:,0]
|
||||
training_data = data.iloc[:,1:]
|
||||
|
||||
clf = LinearRegression().fit(training_data, price)
|
||||
|
||||
with open('dev-0/out.tsv', 'w') as writer:
|
||||
dev_data = prepare_data('dev-0/in.tsv', "dev")
|
||||
for result in clf.predict(dev_data.iloc[:,0:]):
|
||||
writer.write(str(int(result)) + '\n')
|
||||
|
||||
with open('test-A/out.tsv', 'w') as writer:
|
||||
test_data = prepare_data('test-A/in.tsv', "test")
|
||||
for result in clf.predict(test_data.iloc[:,0:]):
|
||||
writer.write(str(int(result)) + '\n')
|
10
requirements.txt
Normal file
10
requirements.txt
Normal file
@ -0,0 +1,10 @@
|
||||
joblib==1.0.1
|
||||
numpy==1.20.3
|
||||
pandas==1.2.4
|
||||
python-dateutil==2.8.1
|
||||
pytz==2021.1
|
||||
scikit-learn==0.24.2
|
||||
scipy==1.6.3
|
||||
six==1.16.0
|
||||
sklearn==0.0
|
||||
threadpoolctl==2.1.0
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user