Compare commits

...

4 Commits

Author SHA1 Message Date
Dominik
b92f387c1f chmod u+x 2021-05-18 18:21:07 +02:00
Dominik Strzako
78bb7c0a0e Fixed the outputs 2021-05-18 18:15:54 +02:00
Dominik Strzako
7c0a80aa6e out for dev-0 2021-05-18 18:10:32 +02:00
Dominik Strzako
0876f64f62 Close to 24k on dev 2021-05-18 18:06:52 +02:00
9 changed files with 5330 additions and 0 deletions

View File

@ -0,0 +1,49 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df =pd.read_csv('train/train.csv', sep=\"\\t\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

218
Auta.ipynb Normal file
View File

@ -0,0 +1,218 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.preprocessing import PolynomialFeatures"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"col_names = [\"Price\",\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"]"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"df =pd.read_csv('train/train.tsv', sep=\"\\t\", names=col_names)"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {},
"outputs": [],
"source": [
"def prepareData(df):\n",
" df[\"Age\"] = 2018 - df[\"Year\"]\n",
" df[\"SqrtAge\"] = df.age**0.5\n",
" df[\"SqrtMileage\"] = df.Mileage ** 0.5\n",
" df[\"SqrtEngineCapacity\"] = df.EngineCapacity ** 0.5\n",
" df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )\n",
" df = df.drop(['EngineType','Brand'], axis = 1)\n",
" poly = PolynomialFeatures(2, interaction_only=True)\n",
" df = poly.fit_transform(df)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
"df_train = df"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
"y_train = df_train.Price\n",
"x_train = df_train.drop('Price', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"x_train = prepareData(x_train)"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"linReg = LinearRegression()\n",
"linReg.fit(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"y_dev =pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", names=[\"Price\"])"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [],
"source": [
"x_dev =pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=[\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"])"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"x_dev = prepareData(x_dev)"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7535351650926749\n"
]
}
],
"source": [
"score = linReg.score(x_dev, y_dev)\n",
"print(score)"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [],
"source": [
"y_pred = linReg.predict(x_dev)"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [],
"source": [
"data = {'Price':y_pred}\n",
"y_pred = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24989.603665517054"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_squared_error(y_dev, y_pred, squared=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"24943.930732282024\n",
"26863.621497665004 #BEZ AGE\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

62
Auta.py Normal file
View File

@ -0,0 +1,62 @@
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
col_names = ["Price","Mileage","Year","Brand","EngineType","EngineCapacity"]
def prepareData(df):
df["Age"] = 2018 - df["Year"]
df["SqrtAge"] = df.Age**0.5
df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )
df = df.drop(['EngineType','Brand'], axis = 1)
df["SqrtMileage"] = df.Mileage ** 0.5
df["SqrtEngineCapacity"] = df.EngineCapacity ** 0.5
poly = PolynomialFeatures(2, interaction_only=True)
df = poly.fit_transform(df)
return df
def main():
df =pd.read_csv('train/train.tsv', sep="\t", names=col_names)
y_dev =pd.read_csv('dev-0/expected.tsv', sep="\t", names=["Price"])
x_dev =pd.read_csv('dev-0/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
x_test =pd.read_csv('test-A/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
y_train = df.Price
x_train = df.drop('Price', axis=1)
x_train = prepareData(x_train)
linReg = LinearRegression()
linReg.fit(x_train, y_train)
x_dev = prepareData(x_dev)
x_test = prepareData(x_test)
#Score modelu dla zbioru dev
score = linReg.score(x_dev, y_dev)
print(score)
#Wartość RMSE dla zbioru dev
y_pred = linReg.predict(x_dev)
data = {'Price':y_pred}
y_pred = pd.DataFrame(data)
y_pred.to_csv(r'dev-0/out.tsv', sep='\t', index=False, header=False)
rmse = mean_squared_error(y_dev, y_pred, squared=False)
print(rmse)
#predict dla test-A
y_pred_test = linReg.predict(x_test)
data = {'Price':y_pred_test}
y_pred_test = pd.DataFrame(data)
y_pred_test.to_csv(r'test-A/out.tsv', sep='\t', index=False, header=False)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
geval Executable file

Binary file not shown.

1001
out.tsv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff