forked from kubapok/auta-public
Close to 24k on dev
This commit is contained in:
parent
5c4bb10ddf
commit
0876f64f62
49
.ipynb_checkpoints/Auta-checkpoint.ipynb
Normal file
49
.ipynb_checkpoints/Auta-checkpoint.ipynb
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.linear_model import LinearRegression\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"import seaborn as sns\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df =pd.read_csv('train/train.csv', sep=\"\\t\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
218
Auta.ipynb
Normal file
218
Auta.ipynb
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 67,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.linear_model import LinearRegression\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"import seaborn as sns\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from sklearn.preprocessing import PolynomialFeatures"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 115,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"col_names = [\"Price\",\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 116,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df =pd.read_csv('train/train.tsv', sep=\"\\t\", names=col_names)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 187,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def prepareData(df):\n",
|
||||||
|
" df[\"Age\"] = 2018 - df[\"Year\"]\n",
|
||||||
|
" df[\"SqrtAge\"] = df.age**0.5\n",
|
||||||
|
" df[\"SqrtMileage\"] = df.Mileage ** 0.5\n",
|
||||||
|
" df[\"SqrtEngineCapacity\"] = df.EngineCapacity ** 0.5\n",
|
||||||
|
" df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )\n",
|
||||||
|
" df = df.drop(['EngineType','Brand'], axis = 1)\n",
|
||||||
|
" poly = PolynomialFeatures(2, interaction_only=True)\n",
|
||||||
|
" df = poly.fit_transform(df)\n",
|
||||||
|
" return df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 188,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_train = df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 190,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_train = df_train.Price\n",
|
||||||
|
"x_train = df_train.drop('Price', axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 191,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x_train = prepareData(x_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 192,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"LinearRegression()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 192,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"linReg = LinearRegression()\n",
|
||||||
|
"linReg.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 193,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_dev =pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", names=[\"Price\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 194,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x_dev =pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=[\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 195,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x_dev = prepareData(x_dev)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 196,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.7535351650926749\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"score = linReg.score(x_dev, y_dev)\n",
|
||||||
|
"print(score)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 197,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_pred = linReg.predict(x_dev)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 198,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = {'Price':y_pred}\n",
|
||||||
|
"y_pred = pd.DataFrame(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 199,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"24989.603665517054"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 199,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"mean_squared_error(y_dev, y_pred, squared=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"24943.930732282024\n",
|
||||||
|
"26863.621497665004 #BEZ AGE\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
59
Auta.py
Normal file
59
Auta.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.preprocessing import PolynomialFeatures
|
||||||
|
|
||||||
|
col_names = ["Price","Mileage","Year","Brand","EngineType","EngineCapacity"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def prepareData(df):
|
||||||
|
df["Age"] = 2018 - df["Year"]
|
||||||
|
df["SqrtAge"] = df.Age**0.5
|
||||||
|
df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )
|
||||||
|
df = df.drop(['EngineType','Brand'], axis = 1)
|
||||||
|
df["SqrtMileage"] = df.Mileage ** 0.5
|
||||||
|
df["SqrtEngineCapacity"] = df.EngineCapacity ** 0.5
|
||||||
|
poly = PolynomialFeatures(2, interaction_only=True)
|
||||||
|
df = poly.fit_transform(df)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
df =pd.read_csv('train/train.tsv', sep="\t", names=col_names)
|
||||||
|
y_dev =pd.read_csv('dev-0/expected.tsv', sep="\t", names=["Price"])
|
||||||
|
x_dev =pd.read_csv('dev-0/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
|
||||||
|
x_test =pd.read_csv('test-A/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
|
||||||
|
|
||||||
|
y_train = df.Price
|
||||||
|
x_train = df.drop('Price', axis=1)
|
||||||
|
|
||||||
|
x_train = prepareData(x_train)
|
||||||
|
|
||||||
|
linReg = LinearRegression()
|
||||||
|
linReg.fit(x_train, y_train)
|
||||||
|
|
||||||
|
x_dev = prepareData(x_dev)
|
||||||
|
x_test = prepareData(x_test)
|
||||||
|
|
||||||
|
#Score modelu dla zbioru dev
|
||||||
|
score = linReg.score(x_dev, y_dev)
|
||||||
|
print(score)
|
||||||
|
|
||||||
|
#Wartość RMSE dla zbioru dev
|
||||||
|
y_pred = linReg.predict(x_dev)
|
||||||
|
data = {'Price':y_pred}
|
||||||
|
y_pred = pd.DataFrame(data)
|
||||||
|
rmse = mean_squared_error(y_dev, y_pred, squared=False)
|
||||||
|
print(rmse)
|
||||||
|
|
||||||
|
#predict dla test-A
|
||||||
|
y_pred_test = linReg.predict(x_test)
|
||||||
|
data = {'Price':y_pred_test}
|
||||||
|
y_pred_test = pd.DataFrame(data)
|
||||||
|
|
||||||
|
y_pred_test.to_csv(r'test-A/out.tsv', sep='\t')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
1000
dev-0/.ipynb_checkpoints/expected-checkpoint.tsv
Normal file
1000
dev-0/.ipynb_checkpoints/expected-checkpoint.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1000
test-A/.ipynb_checkpoints/in-checkpoint.tsv
Normal file
1000
test-A/.ipynb_checkpoints/in-checkpoint.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1001
test-A/out.tsv
Normal file
1001
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user