{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: scikit-learn==0.24.2 in c:\\users\\ania\\appdata\\roaming\\python\\python38\\site-packages (0.24.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (2.1.0)\n", "Requirement already satisfied: scipy>=0.19.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.5.2)\n", "Requirement already satisfied: joblib>=0.11 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (0.17.0)\n", "Requirement already satisfied: numpy>=1.13.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.19.2)\n" ] } ], "source": [ "!pip install scikit-learn==0.24.2 --user" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from math import sqrt\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import mean_squared_error\n", "np.set_printoptions(formatter={'float_kind':'{:f}'.format})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "header = None\n", "with open('names') as f:\n", " header = f.read().replace('\\n', '').split('\\t')\n", "cars_train = pd.read_csv('train/train.tsv', sep=\"\\t\", names=header)\n", "cars_train_X = cars_train[[\"mileage\", \"year\", \"brand\", \"engineType\", \"engineCapacity\"]]\n", "cars_train_X = pd.get_dummies(cars_train_X)\n", "cars_train_Y = cars_train[\"price\"]\n", "input_columns = cars_train_X.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "30118.8791272898" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Trenowanie modelu i błąd na train\n", "model = LinearRegression(positive=True)\n", "model.fit(cars_train_X, cars_train_Y)\n", "predictions = model.predict(cars_train_X)\n", "sqrt(mean_squared_error(predictions, cars_train_Y))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "cars_dev_0_X = pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=header[1:])\n", "cars_dev_0_Y = pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", header=None).to_numpy().flatten('F')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mileageyearengineCapacitybrand_Abarthbrand_Aixambrand_Alfabrand_Astonbrand_Audibrand_Austinbrand_BMW...brand_Uazbrand_Vauxhallbrand_Volkswagenbrand_Volvobrand_Warszawabrand_dlabrand_starengineType_benzynaengineType_dieselengineType_gaz
077000201520000000000...0000000010
1186146200614980000000...0000000100
2192000200725000000000...0000000010
3220000200319970000000...0000000010
4248000200819000000000...0010000010
..................................................................
995146000200416860000000...0000000010
99619323201515980000000...0000000100
99727561201615980000000...0000000010
998155000201216000000000...0000000100
99931438201530000000000...0000000010
\n", "

1000 rows × 96 columns

\n", "
" ], "text/plain": [ " mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n", "0 77000 2015 2000 0 0 0 \n", "1 186146 2006 1498 0 0 0 \n", "2 192000 2007 2500 0 0 0 \n", "3 220000 2003 1997 0 0 0 \n", "4 248000 2008 1900 0 0 0 \n", ".. ... ... ... ... ... ... \n", "995 146000 2004 1686 0 0 0 \n", "996 19323 2015 1598 0 0 0 \n", "997 27561 2016 1598 0 0 0 \n", "998 155000 2012 1600 0 0 0 \n", "999 31438 2015 3000 0 0 0 \n", "\n", " brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n", "0 0 0 0 0 ... 0 \n", "1 0 0 0 0 ... 0 \n", "2 0 0 0 0 ... 0 \n", "3 0 0 0 0 ... 0 \n", "4 0 0 0 0 ... 0 \n", ".. ... ... ... ... ... ... \n", "995 0 0 0 0 ... 0 \n", "996 0 0 0 0 ... 0 \n", "997 0 0 0 0 ... 0 \n", "998 0 0 0 0 ... 0 \n", "999 0 0 0 0 ... 0 \n", "\n", " brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 1 0 0 0 \n", ".. ... ... ... ... ... \n", "995 0 0 0 0 0 \n", "996 0 0 0 0 0 \n", "997 0 0 0 0 0 \n", "998 0 0 0 0 0 \n", "999 0 0 0 0 0 \n", "\n", " brand_star engineType_benzyna engineType_diesel engineType_gaz \n", "0 0 0 1 0 \n", "1 0 1 0 0 \n", "2 0 0 1 0 \n", "3 0 0 1 0 \n", "4 0 0 1 0 \n", ".. ... ... ... ... \n", "995 0 0 1 0 \n", "996 0 1 0 0 \n", "997 0 0 1 0 \n", "998 0 1 0 0 \n", "999 0 0 1 0 \n", "\n", "[1000 rows x 96 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Przygotowanie inputu dla DEV_0\n", "cars_dev_0_X = pd.get_dummies(cars_dev_0_X)\n", "columns_to_add = [x for x in input_columns if x not in cars_dev_0_X.columns]\n", "for column in columns_to_add:\n", " cars_dev_0_X[column] = 0\n", "cars_dev_0_X = cars_dev_0_X[input_columns]\n", "cars_dev_0_X" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "33193.54683638966" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Wynik dla DEV_0\n", "predictions_dev = model.predict(cars_dev_0_X)\n", "np.savetxt(\"dev-0/out.tsv\", predictions_dev, fmt='%f')\n", "sqrt(mean_squared_error(predictions_dev, cars_dev_0_Y))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "cars_test_A_X = pd.read_csv('test-A/in.tsv', sep=\"\\t\", names=header[1:])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mileageyearengineCapacitybrand_Abarthbrand_Aixambrand_Alfabrand_Astonbrand_Audibrand_Austinbrand_BMW...brand_Uazbrand_Vauxhallbrand_Volkswagenbrand_Volvobrand_Warszawabrand_dlabrand_starengineType_benzynaengineType_dieselengineType_gaz
0203000201015000000000...0000000010
139000200810000000000...0000000100
2190000200516000000000...0000000010
3230000200115980000000...0010000100
4189000200016000000001...0000000100
..................................................................
995465000200525000000000...0000000010
99689074201420000000001...0000000010
99721711201413290000000...0000000100
998144000201415000000000...0000000010
999113606200040000000000...0000000100
\n", "

1000 rows × 96 columns

\n", "
" ], "text/plain": [ " mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n", "0 203000 2010 1500 0 0 0 \n", "1 39000 2008 1000 0 0 0 \n", "2 190000 2005 1600 0 0 0 \n", "3 230000 2001 1598 0 0 0 \n", "4 189000 2000 1600 0 0 0 \n", ".. ... ... ... ... ... ... \n", "995 465000 2005 2500 0 0 0 \n", "996 89074 2014 2000 0 0 0 \n", "997 21711 2014 1329 0 0 0 \n", "998 144000 2014 1500 0 0 0 \n", "999 113606 2000 4000 0 0 0 \n", "\n", " brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n", "0 0 0 0 0 ... 0 \n", "1 0 0 0 0 ... 0 \n", "2 0 0 0 0 ... 0 \n", "3 0 0 0 0 ... 0 \n", "4 0 0 0 1 ... 0 \n", ".. ... ... ... ... ... ... \n", "995 0 0 0 0 ... 0 \n", "996 0 0 0 1 ... 0 \n", "997 0 0 0 0 ... 0 \n", "998 0 0 0 0 ... 0 \n", "999 0 0 0 0 ... 0 \n", "\n", " brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 1 0 0 0 \n", "4 0 0 0 0 0 \n", ".. ... ... ... ... ... \n", "995 0 0 0 0 0 \n", "996 0 0 0 0 0 \n", "997 0 0 0 0 0 \n", "998 0 0 0 0 0 \n", "999 0 0 0 0 0 \n", "\n", " brand_star engineType_benzyna engineType_diesel engineType_gaz \n", "0 0 0 1 0 \n", "1 0 1 0 0 \n", "2 0 0 1 0 \n", "3 0 1 0 0 \n", "4 0 1 0 0 \n", ".. ... ... ... ... \n", "995 0 0 1 0 \n", "996 0 0 1 0 \n", "997 0 1 0 0 \n", "998 0 0 1 0 \n", "999 0 1 0 0 \n", "\n", "[1000 rows x 96 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Dostosowanie inputu dla testu\n", "cars_test_A_X = pd.get_dummies(cars_test_A_X)\n", "columns_to_add = [x for x in input_columns if x not in cars_test_A_X.columns]\n", "for column in columns_to_add:\n", " cars_test_A_X[column] = 0\n", "cars_test_A_X = cars_test_A_X[input_columns]\n", "cars_test_A_X" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "#Predykcja i zapisywanie wyniku dla testu\n", "predictions_test = model.predict(cars_test_A_X)\n", "np.savetxt(\"test-A/out.tsv\", predictions_test, fmt='%f')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }