Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

9 changed files with 0 additions and 5330 deletions

View File

@ -1,49 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df =pd.read_csv('train/train.csv', sep=\"\\t\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -1,218 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.preprocessing import PolynomialFeatures"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"col_names = [\"Price\",\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"]"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"df =pd.read_csv('train/train.tsv', sep=\"\\t\", names=col_names)"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {},
"outputs": [],
"source": [
"def prepareData(df):\n",
" df[\"Age\"] = 2018 - df[\"Year\"]\n",
" df[\"SqrtAge\"] = df.age**0.5\n",
" df[\"SqrtMileage\"] = df.Mileage ** 0.5\n",
" df[\"SqrtEngineCapacity\"] = df.EngineCapacity ** 0.5\n",
" df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )\n",
" df = df.drop(['EngineType','Brand'], axis = 1)\n",
" poly = PolynomialFeatures(2, interaction_only=True)\n",
" df = poly.fit_transform(df)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
"df_train = df"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
"y_train = df_train.Price\n",
"x_train = df_train.drop('Price', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"x_train = prepareData(x_train)"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"linReg = LinearRegression()\n",
"linReg.fit(x_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"y_dev =pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", names=[\"Price\"])"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [],
"source": [
"x_dev =pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=[\"Mileage\",\"Year\",\"Brand\",\"EngineType\",\"EngineCapacity\"])"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"x_dev = prepareData(x_dev)"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7535351650926749\n"
]
}
],
"source": [
"score = linReg.score(x_dev, y_dev)\n",
"print(score)"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [],
"source": [
"y_pred = linReg.predict(x_dev)"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [],
"source": [
"data = {'Price':y_pred}\n",
"y_pred = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24989.603665517054"
]
},
"execution_count": 199,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_squared_error(y_dev, y_pred, squared=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"24943.930732282024\n",
"26863.621497665004 #BEZ AGE\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

62
Auta.py
View File

@ -1,62 +0,0 @@
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
col_names = ["Price","Mileage","Year","Brand","EngineType","EngineCapacity"]
def prepareData(df):
df["Age"] = 2018 - df["Year"]
df["SqrtAge"] = df.Age**0.5
df = pd.concat([df, df['EngineType'].str.get_dummies()], axis = 1 )
df = df.drop(['EngineType','Brand'], axis = 1)
df["SqrtMileage"] = df.Mileage ** 0.5
df["SqrtEngineCapacity"] = df.EngineCapacity ** 0.5
poly = PolynomialFeatures(2, interaction_only=True)
df = poly.fit_transform(df)
return df
def main():
df =pd.read_csv('train/train.tsv', sep="\t", names=col_names)
y_dev =pd.read_csv('dev-0/expected.tsv', sep="\t", names=["Price"])
x_dev =pd.read_csv('dev-0/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
x_test =pd.read_csv('test-A/in.tsv', sep="\t", names=["Mileage","Year","Brand","EngineType","EngineCapacity"])
y_train = df.Price
x_train = df.drop('Price', axis=1)
x_train = prepareData(x_train)
linReg = LinearRegression()
linReg.fit(x_train, y_train)
x_dev = prepareData(x_dev)
x_test = prepareData(x_test)
#Score modelu dla zbioru dev
score = linReg.score(x_dev, y_dev)
print(score)
#Wartość RMSE dla zbioru dev
y_pred = linReg.predict(x_dev)
data = {'Price':y_pred}
y_pred = pd.DataFrame(data)
y_pred.to_csv(r'dev-0/out.tsv', sep='\t', index=False, header=False)
rmse = mean_squared_error(y_dev, y_pred, squared=False)
print(rmse)
#predict dla test-A
y_pred_test = linReg.predict(x_test)
data = {'Price':y_pred_test}
y_pred_test = pd.DataFrame(data)
y_pred_test.to_csv(r'test-A/out.tsv', sep='\t', index=False, header=False)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

BIN
geval

Binary file not shown.

1001
out.tsv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff