{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: scikit-learn==0.24.2 in c:\\users\\ania\\appdata\\roaming\\python\\python38\\site-packages (0.24.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (2.1.0)\n", "Requirement already satisfied: scipy>=0.19.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.5.2)\n", "Requirement already satisfied: joblib>=0.11 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (0.17.0)\n", "Requirement already satisfied: numpy>=1.13.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.19.2)\n" ] } ], "source": [ "!pip install scikit-learn==0.24.2 --user" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from math import sqrt\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_squared_error\n", "np.set_printoptions(formatter={'float_kind':'{:f}'.format})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "header = None\n", "with open('names') as f:\n", " header = f.read().replace('\\n', '').split('\\t')\n", "cars_train = pd.read_csv('train/train.tsv', sep=\"\\t\", names=header)\n", "cars_train_X = cars_train[[\"mileage\", \"year\", \"brand\", \"engineType\", \"engineCapacity\"]]\n", "cars_train_X = pd.get_dummies(cars_train_X)\n", "cars_train_Y = cars_train[\"price\"]\n", "input_columns = cars_train_X.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "30118.8791272898" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Trenowanie modelu i błąd na train\n", "model = LinearRegression(positive=True)\n", "model.fit(cars_train_X, cars_train_Y)\n", "predictions = model.predict(cars_train_X)\n", "sqrt(mean_squared_error(predictions, cars_train_Y))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "cars_dev_0_X = pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=header[1:])\n", "cars_dev_0_Y = pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", header=None).to_numpy().flatten('F')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | mileage | \n", "year | \n", "engineCapacity | \n", "brand_Abarth | \n", "brand_Aixam | \n", "brand_Alfa | \n", "brand_Aston | \n", "brand_Audi | \n", "brand_Austin | \n", "brand_BMW | \n", "... | \n", "brand_Uaz | \n", "brand_Vauxhall | \n", "brand_Volkswagen | \n", "brand_Volvo | \n", "brand_Warszawa | \n", "brand_dla | \n", "brand_star | \n", "engineType_benzyna | \n", "engineType_diesel | \n", "engineType_gaz | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "77000 | \n", "2015 | \n", "2000 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "186146 | \n", "2006 | \n", "1498 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
2 | \n", "192000 | \n", "2007 | \n", "2500 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
3 | \n", "220000 | \n", "2003 | \n", "1997 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
4 | \n", "248000 | \n", "2008 | \n", "1900 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "146000 | \n", "2004 | \n", "1686 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
996 | \n", "19323 | \n", "2015 | \n", "1598 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
997 | \n", "27561 | \n", "2016 | \n", "1598 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
998 | \n", "155000 | \n", "2012 | \n", "1600 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
999 | \n", "31438 | \n", "2015 | \n", "3000 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1000 rows × 96 columns
\n", "\n", " | mileage | \n", "year | \n", "engineCapacity | \n", "brand_Abarth | \n", "brand_Aixam | \n", "brand_Alfa | \n", "brand_Aston | \n", "brand_Audi | \n", "brand_Austin | \n", "brand_BMW | \n", "... | \n", "brand_Uaz | \n", "brand_Vauxhall | \n", "brand_Volkswagen | \n", "brand_Volvo | \n", "brand_Warszawa | \n", "brand_dla | \n", "brand_star | \n", "engineType_benzyna | \n", "engineType_diesel | \n", "engineType_gaz | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "203000 | \n", "2010 | \n", "1500 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "39000 | \n", "2008 | \n", "1000 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
2 | \n", "190000 | \n", "2005 | \n", "1600 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
3 | \n", "230000 | \n", "2001 | \n", "1598 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
4 | \n", "189000 | \n", "2000 | \n", "1600 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "465000 | \n", "2005 | \n", "2500 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
996 | \n", "89074 | \n", "2014 | \n", "2000 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
997 | \n", "21711 | \n", "2014 | \n", "1329 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
998 | \n", "144000 | \n", "2014 | \n", "1500 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
999 | \n", "113606 | \n", "2000 | \n", "4000 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
1000 rows × 96 columns
\n", "