diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..7d4d72b --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,1430 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 188, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "metadata": {}, + "outputs": [], + "source": [ + "#with open('train/train.tsv') as file:\n", + " # for line in file.readlines()[:10]:\n", + " # print(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": {}, + "outputs": [], + "source": [ + "#with open('names') as file:\n", + " # for line in file.readlines():\n", + " # header.append(line.strip())" + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [], + "source": [ + "#train" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "with open('names') as file:\n", + " header = file.read().rstrip('\\n').split('\\t')\n", + "\n", + "train_path='train/train.tsv'\n", + "\n", + "\n", + "\n", + "train = pd.read_csv(train_path, sep='\\t', names=header)\n", + "#removing discrete value\n", + "train.drop('brand', inplace=True, axis=1)\n", + "train.drop('engineType', inplace=True, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#output\n", + "y_train = pd.DataFrame(train['price'])\n", + "\n", + "\n", + "#removing output\n", + "train.drop('price', inplace=True, axis=1)\n", + "x_train = pd.DataFrame(train)\n", + "\n", + "model = LinearRegression()\n", + "model.fit(x_train, y_train)\n", + "\n", + "header=['price','year','brand','engineType','engineCapacity']" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "#dev" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "dev = pd.read_csv('dev-0/in.tsv', sep='\\t', names=header)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price year brand engineType engineCapacity\n", + "0 77000 2015 Ford diesel 2000\n", + "1 186146 2006 Mercedes-Benz benzyna 1498\n", + "2 192000 2007 Nissan diesel 2500\n", + "3 220000 2003 Ford diesel 1997\n", + "4 248000 2008 Volkswagen diesel 1900\n", + ".. ... ... ... ... ...\n", + "995 146000 2004 Opel diesel 1686\n", + "996 19323 2015 Renault benzyna 1598\n", + "997 27561 2016 Toyota diesel 1598\n", + "998 155000 2012 Hyundai benzyna 1600\n", + "999 31438 2015 Land diesel 3000\n", + "\n", + "[1000 rows x 5 columns]\n" + ] + } + ], + "source": [ + "print(dev)" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [], + "source": [ + "with open('dev-0/expected.tsv', 'r') as file:\n", + " y_dev = np.array([float(x.rstrip('\\n')) for x in file.readlines()])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [], + "source": [ + "dev.drop('brand', inplace=True, axis=1)\n", + "dev.drop('engineType', inplace=True, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price year engineCapacity\n", + "0 77000 2015 2000\n", + "1 186146 2006 1498\n", + "2 192000 2007 2500\n", + "3 220000 2003 1997\n", + "4 248000 2008 1900\n", + ".. ... ... ...\n", + "995 146000 2004 1686\n", + "996 19323 2015 1598\n", + "997 27561 2016 1598\n", + "998 155000 2012 1600\n", + "999 31438 2015 3000\n", + "\n", + "[1000 rows x 3 columns]\n" + ] + } + ], + "source": [ + "print(dev)" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 7.72392063e+04]\n", + " [ 1.21746103e+04]\n", + " [ 4.92626456e+04]\n", + " [ 1.37190947e+04]\n", + " [ 2.40946032e+04]\n", + " [ 7.88318837e+04]\n", + " [ 4.47318598e+04]\n", + " [ 9.44511674e+03]\n", + " [-8.77567502e+03]\n", + " [ 1.34841352e+04]\n", + " [ 2.51235429e+04]\n", + " [ 8.25232472e+04]\n", + " [ 5.73949851e+03]\n", + " [ 1.47356316e+03]\n", + " [ 3.58932978e+04]\n", + " [ 2.41566138e+04]\n", + " [ 6.28327771e+04]\n", + " [ 9.44318776e+04]\n", + " [ 3.63165742e+04]\n", + " [ 6.93253866e+03]\n", + " [-4.87879907e+01]\n", + " [ 7.40420208e+03]\n", + " [-8.74108566e+03]\n", + " [ 5.85018256e+04]\n", + " [-5.31833451e+03]\n", + " [ 6.90090820e+04]\n", + " [ 7.90958938e+04]\n", + " [ 1.21888104e+04]\n", + " [ 1.01686280e+04]\n", + " [ 3.72544993e+04]\n", + " [ 7.59714805e+04]\n", + " [ 6.35250818e+04]\n", + " [ 6.65801358e+03]\n", + " [ 4.95984416e+04]\n", + " [ 3.91985020e+04]\n", + " [ 5.94405897e+04]\n", + " [ 3.25405996e+04]\n", + " [ 2.49435551e+04]\n", + " [ 4.25956589e+04]\n", + " [ 7.14460623e+04]\n", + " [ 7.49405450e+04]\n", + " [ 2.85287223e+04]\n", + " [ 3.90573961e+04]\n", + " [ 6.80943863e+04]\n", + " [ 1.23555014e+04]\n", + " [ 1.21925126e+05]\n", + " [ 9.18307794e+04]\n", + " [ 9.82420042e+03]\n", + " [ 4.28924356e+04]\n", + " [ 5.07739058e+04]\n", + " [ 7.50514879e+04]\n", + " [-7.25748788e+03]\n", + " [ 9.55299727e+04]\n", + " [ 3.95122188e+04]\n", + " [ 1.19340537e+04]\n", + " [-6.55254554e+03]\n", + " [-4.10177479e+03]\n", + " [ 2.49776566e+04]\n", + " [ 7.10790038e+04]\n", + " [ 2.06810033e+04]\n", + " [ 6.23198797e+04]\n", + " [-1.07284521e+04]\n", + " [-6.46381409e+02]\n", + " [ 7.30727534e+04]\n", + " [ 8.30811199e+04]\n", + " [ 2.86989359e+04]\n", + " [-7.03756627e+03]\n", + " [ 8.72640461e+04]\n", + " [ 3.31901123e+04]\n", + " [ 1.88453572e+03]\n", + " [ 2.06604184e+04]\n", + " [ 3.29929632e+04]\n", + " [ 1.89676836e+04]\n", + " [ 1.95236628e+04]\n", + " [ 1.67547668e+05]\n", + " [ 4.85499441e+04]\n", + " [ 6.27455250e+04]\n", + " [ 4.20967880e+04]\n", + " [ 6.68641130e+04]\n", + " [ 1.66520495e+04]\n", + " [ 4.76051559e+04]\n", + " [ 2.03516912e+04]\n", + " [ 4.52221063e+04]\n", + " [ 1.66323131e+04]\n", + " [ 1.79557880e+04]\n", + " [ 1.92879834e+04]\n", + " [ 5.65858379e+04]\n", + " [ 4.10082404e+04]\n", + " [ 4.05624418e+04]\n", + " [ 2.35217266e+04]\n", + " [ 4.10092641e+04]\n", + " [ 3.26970987e+04]\n", + " [ 5.52463184e+04]\n", + " [ 4.53240110e+04]\n", + " [ 3.34969210e+03]\n", + " [ 2.65136699e+03]\n", + " [ 6.43185703e+04]\n", + " [ 6.01867692e+04]\n", + " [ 3.64334293e+04]\n", + " [ 3.01009442e+04]\n", + " [ 6.47733211e+03]\n", + " [ 9.33718072e+03]\n", + " [ 1.01767327e+04]\n", + " [ 1.96917839e+04]\n", + " [ 5.89513195e+03]\n", + " [ 2.51338800e+04]\n", + " [ 2.62510931e+04]\n", + " [ 4.24081831e+04]\n", + " [ 6.31779344e+04]\n", + " [ 1.97391316e+04]\n", + " [ 7.68646191e+04]\n", + " [-1.53232910e+04]\n", + " [ 5.42528855e+04]\n", + " [ 5.99216155e+04]\n", + " [ 8.74438207e+03]\n", + " [ 4.07106246e+04]\n", + " [ 1.17569351e+04]\n", + " [ 3.42320268e+04]\n", + " [ 5.58880945e+04]\n", + " [ 2.67149647e+04]\n", + " [ 2.44165413e+04]\n", + " [ 3.06502434e+04]\n", + " [-2.47116768e+04]\n", + " [ 3.02109289e+04]\n", + " [ 6.69114096e+04]\n", + " [ 7.51099605e+04]\n", + " [-2.01692811e+04]\n", + " [ 4.42081612e+04]\n", + " [ 2.20836791e+04]\n", + " [ 4.64605808e+04]\n", + " [ 1.23637492e+04]\n", + " [-7.60995807e+03]\n", + " [ 4.57435502e+04]\n", + " [ 3.78355245e+04]\n", + " [-2.88493424e+04]\n", + " [ 6.29914716e+04]\n", + " [ 1.00418697e+05]\n", + " [ 5.67414418e+04]\n", + " [ 3.16945267e+04]\n", + " [ 6.27334737e+04]\n", + " [ 1.47491206e+04]\n", + " [ 5.12304959e+04]\n", + " [ 6.98188482e+04]\n", + " [ 1.79823749e+04]\n", + " [ 6.62751512e+04]\n", + " [ 5.18421259e+04]\n", + " [ 9.08771880e+04]\n", + " [ 1.48160390e+04]\n", + " [ 1.08181408e+04]\n", + " [ 5.76520947e+04]\n", + " [ 3.22203519e+04]\n", + " [ 1.80650050e+04]\n", + " [ 4.64112525e+04]\n", + " [-3.87907792e+04]\n", + " [ 7.49071294e+04]\n", + " [-1.39798993e+04]\n", + " [ 7.57818485e+04]\n", + " [ 1.77386762e+04]\n", + " [ 5.87321021e+04]\n", + " [ 6.03558654e+04]\n", + " [ 3.63825302e+04]\n", + " [ 6.71251159e+04]\n", + " [ 6.72245534e+03]\n", + " [-3.56414333e+03]\n", + " [ 3.00644261e+03]\n", + " [-6.20961682e+03]\n", + " [ 4.15049154e+04]\n", + " [ 3.80286210e+04]\n", + " [ 2.62211308e+04]\n", + " [ 8.18037844e+04]\n", + " [ 6.12139209e+04]\n", + " [ 7.10600141e+04]\n", + " [ 2.01274133e+04]\n", + " [ 2.25298349e+04]\n", + " [ 7.50498152e+04]\n", + " [ 3.69820172e+04]\n", + " [ 7.68439140e+04]\n", + " [ 9.55286656e+04]\n", + " [ 8.21351233e+04]\n", + " [ 7.49405450e+04]\n", + " [ 5.32157390e+04]\n", + " [ 3.35058778e+04]\n", + " [ 1.66136440e+04]\n", + " [-1.39818122e+04]\n", + " [ 2.14885095e+04]\n", + " [ 5.52127456e+03]\n", + " [ 8.16206384e+04]\n", + " [ 3.79281385e+04]\n", + " [ 1.09335304e+04]\n", + " [ 2.34982408e+04]\n", + " [-1.17828688e+04]\n", + " [ 3.98538772e+04]\n", + " [ 4.42215829e+04]\n", + " [ 6.27808750e+04]\n", + " [ 1.50281434e+03]\n", + " [ 5.47538677e+04]\n", + " [ 4.09626275e+04]\n", + " [ 5.79964638e+04]\n", + " [ 2.15737773e+04]\n", + " [ 3.73251402e+04]\n", + " [ 1.00436068e+05]\n", + " [ 9.55298275e+04]\n", + " [-6.01901332e+03]\n", + " [-1.81190118e+04]\n", + " [ 1.89912585e+04]\n", + " [ 2.18783873e+04]\n", + " [ 3.23953698e+04]\n", + " [ 6.48850564e+04]\n", + " [ 6.76483307e+04]\n", + " [ 3.28916102e+04]\n", + " [ 2.56118154e+04]\n", + " [ 6.94500220e+03]\n", + " [ 9.16650596e+04]\n", + " [ 4.22596892e+04]\n", + " [ 3.45678228e+04]\n", + " [ 7.74875097e+04]\n", + " [ 5.02744211e+04]\n", + " [ 6.18923966e+04]\n", + " [ 1.20091628e+05]\n", + " [ 5.33609688e+04]\n", + " [-1.42105218e+04]\n", + " [ 3.37136699e+04]\n", + " [ 2.68020500e+04]\n", + " [-9.57488945e+03]\n", + " [ 3.75269757e+04]\n", + " [ 2.69785796e+04]\n", + " [-1.02671788e+03]\n", + " [ 4.02983297e+04]\n", + " [ 3.92837698e+04]\n", + " [ 5.43089837e+04]\n", + " [ 6.25751477e+03]\n", + " [ 4.44839525e+04]\n", + " [ 2.56231695e+04]\n", + " [ 4.65805047e+04]\n", + " [-7.53446900e+02]\n", + " [ 4.59814301e+03]\n", + " [ 7.17227007e+04]\n", + " [ 4.98409271e+04]\n", + " [ 1.39219345e+04]\n", + " [ 6.12628755e+04]\n", + " [ 3.67973692e+04]\n", + " [ 2.48314325e+04]\n", + " [ 2.19641304e+04]\n", + " [ 4.85477660e+04]\n", + " [ 3.74685962e+04]\n", + " [ 1.46107897e+04]\n", + " [ 1.32977276e+04]\n", + " [ 4.30436593e+04]\n", + " [ 3.09683777e+04]\n", + " [ 1.23090152e+04]\n", + " [ 5.48703011e+04]\n", + " [ 2.40886093e+04]\n", + " [ 4.96906762e+04]\n", + " [ 7.45968093e+04]\n", + " [ 1.25781616e+04]\n", + " [ 8.62434536e+04]\n", + " [ 3.23735853e+04]\n", + " [ 8.25441151e+04]\n", + " [ 3.34542991e+04]\n", + " [ 2.17662054e+04]\n", + " [ 6.79525419e+04]\n", + " [ 7.67910195e+03]\n", + " [ 2.05659498e+04]\n", + " [ 1.45568094e+04]\n", + " [ 7.32280041e+04]\n", + " [ 2.74655993e+04]\n", + " [-2.78234822e+03]\n", + " [ 9.55293918e+04]\n", + " [ 2.70399512e+03]\n", + " [ 2.01194307e+04]\n", + " [-1.26097057e+04]\n", + " [ 2.41052835e+04]\n", + " [ 1.88758039e+04]\n", + " [ 1.70969595e+04]\n", + " [ 6.87331454e+04]\n", + " [ 1.54055323e+04]\n", + " [ 1.81482242e+04]\n", + " [ 1.49857523e+05]\n", + " [ 1.91716243e+04]\n", + " [ 2.44616990e+04]\n", + " [ 1.20994614e+05]\n", + " [ 6.22825860e+04]\n", + " [ 1.32896850e+04]\n", + " [ 1.48926177e+04]\n", + " [ 7.49401093e+04]\n", + " [ 2.98847085e+04]\n", + " [ 7.83718016e+04]\n", + " [ 7.63975954e+03]\n", + " [ 3.39814987e+04]\n", + " [ 5.64987000e+04]\n", + " [ 2.73889631e+04]\n", + " [ 7.29763962e+04]\n", + " [ 3.84496848e+04]\n", + " [-6.36290177e+02]\n", + " [ 2.73829692e+04]\n", + " [ 3.77668236e+04]\n", + " [ 4.35553958e+04]\n", + " [ 3.00975414e+04]\n", + " [ 2.84308776e+04]\n", + " [ 5.38059327e+04]\n", + " [ 2.34657860e+04]\n", + " [ 1.14752909e+04]\n", + " [ 3.29090825e+03]\n", + " [ 3.68497030e+04]\n", + " [ 1.40531278e+04]\n", + " [ 3.52155681e+04]\n", + " [ 5.89700199e+03]\n", + " [ 5.24862337e+04]\n", + " [ 3.92933948e+04]\n", + " [ 8.59376845e+03]\n", + " [ 4.92554503e+03]\n", + " [-2.32014656e+04]\n", + " [ 2.44390308e+04]\n", + " [-2.12286176e+04]\n", + " [ 6.80774508e+04]\n", + " [ 8.87696844e+04]\n", + " [ 2.47949144e+03]\n", + " [ 2.65942205e+04]\n", + " [ 4.74497286e+04]\n", + " [ 2.60220384e+04]\n", + " [ 4.58110680e+04]\n", + " [ 4.88588451e+04]\n", + " [ 2.37969368e+04]\n", + " [ 2.15490883e+04]\n", + " [ 5.77794692e+04]\n", + " [ 7.63565582e+04]\n", + " [ 1.73454390e+04]\n", + " [ 4.46006665e+04]\n", + " [ 6.03239564e+04]\n", + " [ 3.04161206e+04]\n", + " [ 2.88105503e+04]\n", + " [ 6.45221803e+04]\n", + " [ 3.93983198e+03]\n", + " [ 3.52486354e+04]\n", + " [-2.00595799e+03]\n", + " [ 4.25393763e+04]\n", + " [ 4.17506277e+04]\n", + " [ 3.00921486e+04]\n", + " [ 5.29598062e+04]\n", + " [ 6.35070240e+04]\n", + " [ 1.30497869e+05]\n", + " [ 8.72599621e+04]\n", + " [ 4.49456832e+04]\n", + " [ 6.62783780e+04]\n", + " [ 1.30458250e+04]\n", + " [ 6.51087000e+04]\n", + " [ 1.47700561e+04]\n", + " [ 3.97172087e+03]\n", + " [ 5.45698206e+04]\n", + " [ 3.92757272e+04]\n", + " [ 5.86862154e+04]\n", + " [ 1.46647577e+04]\n", + " [ 4.85958666e+04]\n", + " [ 3.27656923e+04]\n", + " [ 5.16166193e+04]\n", + " [ 6.37901595e+04]\n", + " [ 8.27669537e+03]\n", + " [ 4.44470403e+03]\n", + " [ 5.48899029e+04]\n", + " [ 2.12692812e+04]\n", + " [ 9.44542556e+04]\n", + " [ 4.17926078e+04]\n", + " [-9.12329402e+02]\n", + " [ 4.87988831e+04]\n", + " [ 5.82550345e+04]\n", + " [ 7.83716564e+04]\n", + " [ 1.79254278e+05]\n", + " [ 6.80775960e+04]\n", + " [ 5.46045563e+04]\n", + " [ 8.51910622e+03]\n", + " [ 3.08604417e+04]\n", + " [ 3.12439259e+04]\n", + " [-2.40882505e+04]\n", + " [ 7.05766223e+04]\n", + " [ 8.18032034e+04]\n", + " [ 4.21722806e+04]\n", + " [ 2.83109876e+04]\n", + " [ 4.74276265e+04]\n", + " [ 7.67639692e+04]\n", + " [ 3.71113168e+04]\n", + " [-1.32297547e+04]\n", + " [ 3.85111404e+04]\n", + " [ 3.67009333e+04]\n", + " [ 5.11705339e+04]\n", + " [-3.82518515e+03]\n", + " [ 2.77726540e+04]\n", + " [ 1.88429707e+04]\n", + " [-1.49374038e+04]\n", + " [ 2.64349542e+04]\n", + " [-1.66357904e+03]\n", + " [ 5.01625419e+04]\n", + " [ 7.14267877e+04]\n", + " [ 4.83545621e+04]\n", + " [ 2.37668499e+04]\n", + " [ 7.40267890e+03]\n", + " [ 6.59193248e+04]\n", + " [ 2.13228654e+04]\n", + " [ 7.04900421e+04]\n", + " [ 4.47360866e+04]\n", + " [ 3.47583889e+04]\n", + " [ 5.18087774e+04]\n", + " [ 3.68748252e+04]\n", + " [ 8.76196156e+04]\n", + " [ 6.09000252e+04]\n", + " [ 5.37751549e+04]\n", + " [ 2.64138959e+04]\n", + " [-6.04990349e+02]\n", + " [ 1.94719666e+04]\n", + " [ 8.00517596e+04]\n", + " [ 8.14486150e+04]\n", + " [ 4.52312476e+04]\n", + " [ 6.54858499e+03]\n", + " [ 1.93094735e+04]\n", + " [ 2.38485465e+05]\n", + " [ 9.89609389e+04]\n", + " [ 1.20329797e+05]\n", + " [ 1.12266687e+05]\n", + " [ 1.06043174e+04]\n", + " [ 4.44880574e+04]\n", + " [ 4.67570344e+04]\n", + " [ 4.31948831e+04]\n", + " [ 6.58194314e+04]\n", + " [ 6.76323430e+04]\n", + " [ 7.83710754e+04]\n", + " [ 5.87025823e+04]\n", + " [ 4.86412816e+04]\n", + " [ 5.13357942e+04]\n", + " [-2.55895708e+04]\n", + " [ 5.52489561e+04]\n", + " [ 4.30457080e+04]\n", + " [ 3.98529801e+04]\n", + " [ 7.44798402e+04]\n", + " [ 5.58178664e+04]\n", + " [ 2.83682779e+04]\n", + " [ 4.32556374e+03]\n", + " [ 7.49401093e+04]\n", + " [ 2.03092220e+04]\n", + " [ 4.24847819e+04]\n", + " [ 2.75741244e+04]\n", + " [ 6.61066642e+04]\n", + " [ 3.19111792e+04]\n", + " [ 5.76034631e+04]\n", + " [ 2.34045699e+04]\n", + " [ 6.80770151e+04]\n", + " [ 6.12145019e+04]\n", + " [ 1.12895812e+05]\n", + " [ 4.40665677e+04]\n", + " [ 9.59321744e+04]\n", + " [ 6.53237825e+04]\n", + " [ 6.60966158e+04]\n", + " [ 6.16700812e+04]\n", + " [ 2.92109674e+04]\n", + " [ 7.02154658e+03]\n", + " [ 1.79197752e+04]\n", + " [ 2.50673350e+04]\n", + " [ 4.72636015e+04]\n", + " [ 9.23610370e+03]\n", + " [ 6.51091237e+04]\n", + " [ 5.06992027e+04]\n", + " [ 3.08554420e+04]\n", + " [ 7.80239759e+04]\n", + " [ 1.36781688e+04]\n", + " [ 2.51278860e+04]\n", + " [ 2.45923032e+04]\n", + " [ 8.67040464e+03]\n", + " [ 3.98119192e+04]\n", + " [ 1.35470374e+04]\n", + " [ 8.18032034e+04]\n", + " [ 4.20587052e+04]\n", + " [ 3.95035872e+04]\n", + " [ 2.75801183e+04]\n", + " [ 1.76679845e+04]\n", + " [ 7.99267520e+04]\n", + " [ 6.98405293e+04]\n", + " [ 2.34331025e+04]\n", + " [ 5.22103653e+04]\n", + " [ 2.85215129e+04]\n", + " [ 1.93674407e+04]\n", + " [ 1.49962178e+04]\n", + " [ 5.82847453e+04]\n", + " [-2.38509784e+04]\n", + " [ 4.52841169e+04]\n", + " [ 2.31038551e+04]\n", + " [ 2.01960669e+04]\n", + " [ 1.78780640e+04]\n", + " [ 2.49140626e+04]\n", + " [ 5.22276943e+04]\n", + " [ 4.72633660e+04]\n", + " [ 3.27478400e+04]\n", + " [ 2.58243217e+04]\n", + " [ 4.42135403e+04]\n", + " [ 4.79002889e+04]\n", + " [ 7.83716564e+04]\n", + " [ 2.40941980e+04]\n", + " [ 1.90258207e+04]\n", + " [ 3.69520505e+04]\n", + " [ 6.36189659e+04]\n", + " [ 4.14228743e+04]\n", + " [ 7.66286754e+04]\n", + " [ 2.93001912e+02]\n", + " [ 6.41098944e+04]\n", + " [ 2.61873769e+04]\n", + " [ 3.42067209e+04]\n", + " [ 2.49200566e+04]\n", + " [ 6.06011246e+04]\n", + " [ 5.92036537e+04]\n", + " [ 6.64316504e+04]\n", + " [ 7.91520970e+03]\n", + " [ 8.93294867e+04]\n", + " [ 3.22776596e+04]\n", + " [-3.05700209e+04]\n", + " [ 1.06293618e+05]\n", + " [ 7.26140278e+04]\n", + " [ 2.04653179e+04]\n", + " [ 1.96907665e+05]\n", + " [ 4.98539697e+04]\n", + " [ 1.83720059e+04]\n", + " [ 4.73004447e+04]\n", + " [ 3.58295545e+04]\n", + " [-2.85150061e+04]\n", + " [ 2.35837372e+04]\n", + " [ 5.02479170e+04]\n", + " [ 1.05426261e+05]\n", + " [ 1.60085306e+04]\n", + " [ 2.57377687e+04]\n", + " [ 9.40069725e+04]\n", + " [-1.23465247e+04]\n", + " [ 9.33117395e+04]\n", + " [ 7.11769593e+04]\n", + " [ 6.86786909e+04]\n", + " [ 5.86844219e+04]\n", + " [ 2.78019843e+04]\n", + " [ 9.48765839e+04]\n", + " [ 1.28080702e+04]\n", + " [ 7.91108006e+04]\n", + " [ 1.19733962e+04]\n", + " [ 7.38588734e+04]\n", + " [ 6.40128498e+03]\n", + " [ 6.52553159e+04]\n", + " [ 4.17775391e+04]\n", + " [ 7.47860903e+04]\n", + " [ 6.84652518e+04]\n", + " [ 2.73123269e+04]\n", + " [-1.65561112e+03]\n", + " [ 1.29133685e+04]\n", + " [ 5.98767472e+03]\n", + " [ 4.61408243e+04]\n", + " [ 1.96114873e+04]\n", + " [ 6.70697999e+04]\n", + " [ 7.49401093e+04]\n", + " [ 5.22677240e+04]\n", + " [ 5.66326605e+04]\n", + " [ 1.29819621e+04]\n", + " [ 2.81924020e+04]\n", + " [ 1.64104017e+04]\n", + " [ 2.93185506e+03]\n", + " [ 1.85786207e+04]\n", + " [-1.34830572e+04]\n", + " [ 2.24563755e+03]\n", + " [ 5.43229835e+04]\n", + " [ 1.36788086e+05]\n", + " [ 8.40736383e+04]\n", + " [ 1.46914537e+03]\n", + " [ 4.05282383e+04]\n", + " [ 4.12403508e+04]\n", + " [-2.75501503e+04]\n", + " [ 5.54335283e+04]\n", + " [ 2.17228781e+04]\n", + " [-5.71333908e+03]\n", + " [ 1.69133186e+03]\n", + " [ 7.38187266e+04]\n", + " [ 3.61783340e+04]\n", + " [ 9.24524667e+04]\n", + " [ 6.58295354e+04]\n", + " [ 8.19438840e+04]\n", + " [ 5.58232923e+04]\n", + " [ 4.93964766e+04]\n", + " [ 9.08771880e+04]\n", + " [ 3.02788588e+04]\n", + " [ 6.33019617e+04]\n", + " [-5.03969853e+03]\n", + " [ 5.56652904e+04]\n", + " [-2.16618511e+04]\n", + " [ 5.82403041e+04]\n", + " [ 7.26419803e+04]\n", + " [ 1.42516992e+04]\n", + " [ 6.87006433e+03]\n", + " [ 3.63505118e+04]\n", + " [ 6.31680013e+04]\n", + " [ 2.21299799e+04]\n", + " [-6.59547416e+02]\n", + " [ 2.46069288e+04]\n", + " [ 7.47691128e+04]\n", + " [ 1.44882281e+04]\n", + " [ 4.59137287e+04]\n", + " [ 6.32079327e+04]\n", + " [ 6.78693880e+04]\n", + " [ 1.28197836e+05]\n", + " [ 5.14657587e+04]\n", + " [ 2.82655674e+03]\n", + " [ 2.73143756e+04]\n", + " [ 7.02539343e+04]\n", + " [ 4.41058855e+04]\n", + " [ 6.30195447e+04]\n", + " [ 1.41057313e+04]\n", + " [ 3.47756149e+04]\n", + " [ 1.26588798e+04]\n", + " [-4.90514987e+04]\n", + " [ 2.50685131e+04]\n", + " [ 5.58952421e+04]\n", + " [ 4.63293876e+04]\n", + " [ 1.59109320e+04]\n", + " [-2.06855765e+03]\n", + " [ 5.64793881e+04]\n", + " [ 4.23821320e+04]\n", + " [ 4.14498311e+04]\n", + " [ 2.65861779e+04]\n", + " [ 2.20780603e+04]\n", + " [ 5.41557113e+04]\n", + " [ 4.25185577e+04]\n", + " [ 2.01668948e+05]\n", + " [ 1.43182815e+04]\n", + " [-2.00103554e+04]\n", + " [ 5.64148158e+04]\n", + " [ 3.80370409e+04]\n", + " [ 1.79511196e+04]\n", + " [ 3.93804365e+04]\n", + " [ 4.83253109e+04]\n", + " [ 1.10400068e+04]\n", + " [ 6.93919470e+04]\n", + " [-2.85203089e+03]\n", + " [ 1.32499937e+05]\n", + " [ 9.00454527e+04]\n", + " [-1.00602366e+03]\n", + " [ 2.65320678e+04]\n", + " [-2.50424704e+03]\n", + " [ 3.02109289e+04]\n", + " [-1.06622818e+03]\n", + " [ 6.76334202e+04]\n", + " [ 2.59685741e+04]\n", + " [ 3.97799524e+04]\n", + " [-6.07239227e+03]\n", + " [ 2.06478415e+04]\n", + " [ 2.32711985e+04]\n", + " [ 3.40614911e+04]\n", + " [ 2.14548517e+04]\n", + " [ 4.92339835e+04]\n", + " [ 3.28563650e+04]\n", + " [ 5.80275285e+04]\n", + " [ 5.51835135e+04]\n", + " [ 5.69571871e+04]\n", + " [ 2.18148793e+04]\n", + " [ 2.68786862e+04]\n", + " [ 4.25999274e+04]\n", + " [ 6.18609331e+04]\n", + " [ 2.39114906e+04]\n", + " [ 5.54507916e+04]\n", + " [ 8.99193491e+04]\n", + " [ 8.74626162e+04]\n", + " [ 4.00472126e+04]\n", + " [ 5.09581277e+04]\n", + " [ 1.25970242e+03]\n", + " [ 4.50495006e+04]\n", + " [ 2.49973465e+04]\n", + " [ 3.35491655e+04]\n", + " [ 3.96208450e+04]\n", + " [ 1.67892367e+04]\n", + " [ 1.37534475e+04]\n", + " [ 8.73896091e+03]\n", + " [ 8.83452644e+04]\n", + " [ 4.97945570e+04]\n", + " [ 3.22646629e+04]\n", + " [ 8.06827686e+03]\n", + " [ 8.69380011e+04]\n", + " [ 2.53637886e+04]\n", + " [ 3.46235163e+04]\n", + " [ 3.07286594e+04]\n", + " [ 8.04655616e+04]\n", + " [ 3.06299440e+04]\n", + " [ 4.48446734e+04]\n", + " [-3.89468345e+04]\n", + " [ 3.91445340e+04]\n", + " [ 1.67633417e+04]\n", + " [ 1.16092773e+05]\n", + " [ 7.61620467e+04]\n", + " [ 5.03561131e+04]\n", + " [ 6.35579938e+04]\n", + " [ 3.06012819e+04]\n", + " [ 3.61394556e+04]\n", + " [-9.83433043e+03]\n", + " [ 1.13452332e+04]\n", + " [ 1.11683571e+04]\n", + " [ 7.50257233e+03]\n", + " [ 5.29760206e+04]\n", + " [ 4.80661512e+04]\n", + " [-9.30847054e+02]\n", + " [-1.47641009e+04]\n", + " [ 9.41345552e+04]\n", + " [ 1.69338050e+03]\n", + " [ 7.01320646e+04]\n", + " [-4.13359245e+03]\n", + " [ 3.31387820e+04]\n", + " [ 9.54335401e+04]\n", + " [ 4.02156996e+04]\n", + " [ 2.15797713e+04]\n", + " [ 5.04443850e+04]\n", + " [ 5.72135796e+04]\n", + " [ 9.55293918e+04]\n", + " [ 3.33463530e+04]\n", + " [-1.58640419e+04]\n", + " [ 1.38862815e+04]\n", + " [-2.89066667e+04]\n", + " [ 2.19971557e+04]\n", + " [ 5.19793131e+04]\n", + " [ 7.12684490e+03]\n", + " [ 3.58889140e+04]\n", + " [-1.23545673e+04]\n", + " [ 5.99529153e+04]\n", + " [ 1.29480082e+05]\n", + " [ 4.59496018e+04]\n", + " [ 5.85892414e+04]\n", + " [-2.87792583e+03]\n", + " [ 4.14995105e+04]\n", + " [ 5.77815025e+04]\n", + " [-3.27996125e+03]\n", + " [ 1.82457376e+03]\n", + " [ 5.56717526e+04]\n", + " [ 1.42649973e+04]\n", + " [ 2.25823433e+04]\n", + " [-3.08457716e+03]\n", + " [ 2.68503273e+04]\n", + " [ 8.00012708e+04]\n", + " [ 1.03130472e+05]\n", + " [ 1.91955435e+04]\n", + " [ 1.71504693e+03]\n", + " [ 8.18033487e+04]\n", + " [-1.21180992e+03]\n", + " [ 2.79698823e+04]\n", + " [ 2.41738771e+04]\n", + " [ 1.30961048e+03]\n", + " [ 2.65035105e+04]\n", + " [ 2.28322824e+04]\n", + " [ 9.09400887e+04]\n", + " [ 6.26556757e+04]\n", + " [-3.62015998e+03]\n", + " [ 4.63953435e+04]\n", + " [ 5.12807099e+04]\n", + " [ 4.29831083e+04]\n", + " [ 5.46680369e+04]\n", + " [ 1.64534407e+04]\n", + " [ 5.77744683e+04]\n", + " [-3.07626357e+04]\n", + " [ 4.46185189e+04]\n", + " [ 7.60557519e+04]\n", + " [ 7.77895805e+03]\n", + " [ 6.80770151e+04]\n", + " [ 4.19211634e+04]\n", + " [-1.27882453e+04]\n", + " [-4.27656585e+04]\n", + " [ 2.99285119e+04]\n", + " [ 1.25009364e+04]\n", + " [ 1.27659808e+04]\n", + " [ 2.35303583e+04]\n", + " [ 1.08538303e+05]\n", + " [ 2.61444946e+04]\n", + " [ 4.61070901e+04]\n", + " [ 2.90919492e+04]\n", + " [ 3.26301940e+04]\n", + " [-3.59837362e+04]\n", + " [ 4.08836664e+04]\n", + " [-2.50733677e+04]\n", + " [ 3.62939061e+04]\n", + " [ 3.36585334e+04]\n", + " [ 3.56449823e+04]\n", + " [ 5.42363688e+04]\n", + " [ 8.61907433e+03]\n", + " [-2.55060788e+04]\n", + " [ 1.77605089e+04]\n", + " [ 3.94269510e+04]\n", + " [ 1.73474877e+04]\n", + " [ 2.28182458e+04]\n", + " [ 3.73464248e+04]\n", + " [-6.07778184e+04]\n", + " [ 6.06230754e+04]\n", + " [ 4.95028941e+04]\n", + " [ 4.30465868e+03]\n", + " [ 7.13864363e+04]\n", + " [ 5.93920266e+04]\n", + " [ 2.62271247e+04]\n", + " [ 1.32790047e+04]\n", + " [ 5.79284219e+04]\n", + " [ 7.41990485e+03]\n", + " [-9.84098811e+03]\n", + " [ 3.46916920e+04]\n", + " [ 1.72638308e+05]\n", + " [-1.65317359e+04]\n", + " [ 1.97624262e+04]\n", + " [ 7.96505300e+04]\n", + " [ 6.12139209e+04]\n", + " [ 1.77458833e+04]\n", + " [-1.16180069e+04]\n", + " [ 6.05027437e+03]\n", + " [ 6.19094962e+04]\n", + " [ 1.63271997e+04]\n", + " [ 2.35217266e+04]\n", + " [ 3.33025067e+03]\n", + " [ 2.94707434e+04]\n", + " [ 3.51406993e+04]\n", + " [ 5.84048545e+04]\n", + " [ 7.34499762e+04]\n", + " [-6.43144547e+03]\n", + " [ 6.03239564e+04]\n", + " [ 7.48168215e+04]\n", + " [ 6.51708775e+03]\n", + " [ 3.86342570e+04]\n", + " [ 2.80017539e+04]\n", + " [ 1.27833534e+04]\n", + " [ 6.84501695e+03]\n", + " [ 7.97847490e+04]\n", + " [ 5.77815025e+04]\n", + " [ 3.57901748e+04]\n", + " [ 2.97666078e+04]\n", + " [ 2.67605154e+03]\n", + " [ 3.66836700e+04]\n", + " [ 2.33084923e+04]\n", + " [ 6.76334202e+04]\n", + " [ 4.61015276e+04]\n", + " [-3.99751815e+03]\n", + " [ 5.77794692e+04]\n", + " [ 9.18723367e+04]\n", + " [ 2.09273075e+04]\n", + " [ 4.77050493e+04]\n", + " [ 5.12018338e+04]\n", + " [ 7.28669250e+04]\n", + " [ 4.01106329e+03]\n", + " [ 4.25453703e+04]\n", + " [ 5.93779900e+04]\n", + " [ 1.58331791e+04]\n", + " [-9.63513262e+03]\n", + " [ 4.66142288e+04]\n", + " [ 1.98223881e+04]\n", + " [ 3.76360987e+04]\n", + " [ 6.56289229e+04]\n", + " [ 3.79609394e+04]\n", + " [ 4.51897108e+03]\n", + " [ 3.73240020e+04]\n", + " [ 3.30489038e+04]\n", + " [ 6.55462351e+04]\n", + " [ 2.09842265e+04]\n", + " [ 2.27813359e+04]\n", + " [ 3.02900389e+04]\n", + " [ 3.39256920e+04]\n", + " [ 2.81837057e+04]\n", + " [ 5.46933428e+04]\n", + " [ 6.28676111e+04]\n", + " [ 5.88397695e+04]\n", + " [ 1.11565745e+04]\n", + " [ 4.93879095e+04]\n", + " [ 2.13959748e+04]\n", + " [-2.54840529e+03]\n", + " [ 2.04745386e+04]\n", + " [ 1.34036150e+04]\n", + " [-1.09562003e+04]\n", + " [ 4.16027602e+04]\n", + " [ 2.22832521e+04]\n", + " [-1.50826336e+04]\n", + " [ 2.20645758e+04]\n", + " [ 2.07850287e+04]\n", + " [ 2.93395501e+04]\n", + " [ 1.49221568e+04]\n", + " [ 6.99675986e+04]\n", + " [ 4.87076214e+04]\n", + " [ 3.57962061e+04]\n", + " [ 3.55077951e+04]\n", + " [ 7.08091948e+03]\n", + " [ 9.55299727e+04]\n", + " [ 4.47551170e+04]\n", + " [ 1.28153172e+05]\n", + " [ 6.11953351e+04]\n", + " [ 7.55876171e+04]\n", + " [ 3.34427164e+04]\n", + " [ 2.62837305e+04]\n", + " [ 5.99649032e+04]\n", + " [ 1.80190796e+04]\n", + " [ 4.01530999e+04]\n", + " [ 8.00956463e+04]\n", + " [ 4.53701032e+04]\n", + " [ 5.05890257e+04]\n", + " [ 9.69598772e+04]\n", + " [ 1.06663280e+04]\n", + " [ 3.06675785e+03]\n", + " [ 2.55036135e+04]\n", + " [ 5.62841964e+02]\n", + " [ 2.59426591e+04]\n", + " [ 3.71113168e+04]\n", + " [-5.31103841e+04]\n", + " [ 3.83280753e+04]\n", + " [-1.67995706e+04]\n", + " [ 5.90616355e+04]\n", + " [ 8.18032034e+04]\n", + " [ 4.92945345e+04]\n", + " [ 2.68678377e+04]\n", + " [ 7.83716564e+04]\n", + " [-2.10308322e+03]\n", + " [ 2.59623695e+04]\n", + " [ 3.88953555e+04]\n", + " [-1.52824204e+04]\n", + " [ 1.56137759e+04]\n", + " [ 4.57207020e+04]\n", + " [ 9.49859035e+04]\n", + " [ 6.01354389e+04]\n", + " [ 1.15377416e+05]\n", + " [ 9.26166504e+04]\n", + " [ 6.01807752e+04]\n", + " [ 2.00325322e+04]\n", + " [-1.54956548e+04]\n", + " [ 2.66551677e+04]\n", + " [ 1.57952963e+04]\n", + " [ 2.55009758e+04]\n", + " [ 2.97045972e+04]\n", + " [ 8.85042495e+04]\n", + " [ 7.83716564e+04]\n", + " [ 3.92959969e+02]\n", + " [ 2.99391341e+04]\n", + " [ 7.11929338e+04]\n", + " [ 7.79254238e+04]\n", + " [ 9.55299727e+04]\n", + " [ 3.54251650e+04]\n", + " [ 1.46659560e+04]\n", + " [ 1.46707517e+04]\n", + " [ 5.91641666e+04]\n", + " [ 4.16628186e+04]\n", + " [ 2.96565004e+04]\n", + " [ 5.59482104e+04]\n", + " [ 1.29428394e+04]\n", + " [ 3.90105735e+04]\n", + " [ 6.54708788e+04]\n", + " [ 8.06836105e+04]\n", + " [ 6.10708704e+04]\n", + " [ 5.43846992e+04]\n", + " [-6.98106482e+03]\n", + " [-1.25945989e+04]\n", + " [ 2.75201564e+04]\n", + " [ 8.61682697e+04]\n", + " [ 3.96568595e+04]\n", + " [ 7.22317435e+04]\n", + " [ 6.42532034e+04]\n", + " [-1.33436918e+04]\n", + " [ 7.42884259e+03]\n", + " [ 8.86030759e+04]\n", + " [ 2.87273311e+04]\n", + " [ 3.63851679e+04]\n", + " [ 1.17521020e+04]\n", + " [-5.96818037e+03]\n", + " [ 3.24832753e+04]\n", + " [ 6.34601148e+04]\n", + " [ 6.89318567e+04]\n", + " [ 2.11220070e+04]\n", + " [ 2.04199816e+04]\n", + " [ 1.98807680e+04]\n", + " [ 3.52155616e+03]\n", + " [ 6.10402847e+04]\n", + " [ 4.02624678e+04]\n", + " [ 8.23222491e+04]\n", + " [ 6.70045270e+04]\n", + " [ 2.14444622e+04]\n", + " [ 2.12126755e+04]\n", + " [ 7.21347927e+04]\n", + " [ 7.49057938e+04]\n", + " [ 5.06850048e+03]\n", + " [ 5.46107127e+04]\n", + " [ 7.41207870e+04]\n", + " [ 4.69191904e+04]\n", + " [ 3.96488170e+04]\n", + " [ 4.80348938e+04]\n", + " [ 3.63791739e+04]\n", + " [ 8.98588017e+01]\n", + " [ 7.49405450e+04]\n", + " [ 2.50679241e+04]\n", + " [ 1.06129491e+04]\n", + " [ 4.48075447e+04]\n", + " [ 7.79221970e+04]\n", + " [ 7.57540804e+04]\n", + " [ 2.69957734e+03]\n", + " [ 1.12705044e+04]\n", + " [ 1.40757960e+04]\n", + " [ 6.72862389e+04]\n", + " [ 7.59470449e+04]\n", + " [ 6.85960608e+04]\n", + " [ 3.92444274e+04]\n", + " [ 3.36973605e+04]\n", + " [ 5.97828943e+03]\n", + " [ 4.53820003e+04]\n", + " [ 4.52929960e+04]\n", + " [-2.87656795e+04]\n", + " [ 1.73480968e+04]\n", + " [ 7.18208059e+04]\n", + " [ 7.41785116e+04]\n", + " [ 4.15227678e+04]\n", + " [ 1.18171637e+05]]\n" + ] + } + ], + "source": [ + "\n", + "\n", + "x_dev = pd.DataFrame(dev)\n", + "\n", + "predict = model.predict(x_dev)\n", + "print(predict)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [], + "source": [ + " predict.tofile('dev-0/out.tsv', sep='\\n') " + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "34136.77274287094\n" + ] + } + ], + "source": [ + "error = np.sqrt(mean_squared_error(y_dev, predict))\n", + "print(error)" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [], + "source": [ + "#test" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame(predict).to_csv('dev-0/out.tsv', sep='\\t', index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price year brand engineType engineCapacity\n", + "0 203000 2010 Renault diesel 1500\n", + "1 39000 2008 Citroen benzyna 1000\n", + "2 190000 2005 Peugeot diesel 1600\n", + "3 230000 2001 Volkswagen benzyna 1598\n", + "4 189000 2000 BMW benzyna 1600\n", + ".. ... ... ... ... ...\n", + "995 465000 2005 Renault diesel 2500\n", + "996 89074 2014 BMW diesel 2000\n", + "997 21711 2014 Toyota benzyna 1329\n", + "998 144000 2014 Renault diesel 1500\n", + "999 113606 2000 Jaguar benzyna 4000\n", + "\n", + "[1000 rows x 5 columns]\n" + ] + } + ], + "source": [ + "test=pd.read_csv('test-A/in.tsv', sep='\\t', names=header)\n", + "print(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"['brand'] not found in axis\"", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtest\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'brand'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mtest\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'engineType'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0my_expected\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'price'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0my_expected\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'test-A/expected.tsv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'\\t'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 4303\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;36m1.0\u001b[0m \u001b[1;36m0.8\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4304\u001b[0m \"\"\"\n\u001b[1;32m-> 4305\u001b[1;33m return super().drop(\n\u001b[0m\u001b[0;32m 4306\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4307\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 4148\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4149\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4150\u001b[1;33m \u001b[0mobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4151\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4152\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[1;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[0;32m 4183\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4184\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4185\u001b[1;33m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4186\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4187\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m 5589\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5590\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5591\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"{labels[mask]} not found in axis\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5592\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5593\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: \"['brand'] not found in axis\"" + ] + } + ], + "source": [ + "test.drop('brand', inplace=True, axis=1)\n", + "test.drop('engineType', inplace=True, axis=1)\n", + "y_expected = pd.DataFrame(test['price'])\n", + "\n", + "y_expected.to_csv('test-A/expected.tsv', sep='\\t', encoding='utf-8')" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " year engineCapacity\n", + "0 2010 1500\n", + "1 2008 1000\n", + "2 2005 1600\n", + "3 2001 1598\n", + "4 2000 1600\n", + ".. ... ...\n", + "995 2005 2500\n", + "996 2014 2000\n", + "997 2014 1329\n", + "998 2014 1500\n", + "999 2000 4000\n", + "\n", + "[1000 rows x 2 columns]\n" + ] + } + ], + "source": [ + "print(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 2)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mx_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mpredict\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'test-A/out.tsv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'\\t'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\sklearn\\linear_model\\_base.py\u001b[0m in \u001b[0;36mpredict\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[0mReturns\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 237\u001b[0m \"\"\"\n\u001b[1;32m--> 238\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_decision_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 239\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[0m_preprocess_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstaticmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_preprocess_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\sklearn\\linear_model\\_base.py\u001b[0m in \u001b[0;36m_decision_function\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 219\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 220\u001b[0m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'csr'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'csc'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'coo'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 221\u001b[1;33m return safe_sparse_dot(X, self.coef_.T,\n\u001b[0m\u001b[0;32m 222\u001b[0m dense_output=True) + self.intercept_\n\u001b[0;32m 223\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36minner_f\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[1;33m<=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;31m# extra_args > 0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mc:\\users\\mkoci\\appdata\\local\\programs\\python\\python39\\lib\\site-packages\\sklearn\\utils\\extmath.py\u001b[0m in \u001b[0;36msafe_sparse_dot\u001b[1;34m(a, b, dense_output)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[0mret\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 151\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 152\u001b[1;33m \u001b[0mret\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m@\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 153\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 154\u001b[0m if (sparse.issparse(a) and sparse.issparse(b)\n", + "\u001b[1;31mValueError\u001b[0m: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 2)" + ] + } + ], + "source": [ + "x_test = pd.DataFrame(test)\n", + "\n", + "predict = model.predict(x_test)\n", + "pd.DataFrame(predict).to_csv('test-A/out.tsv', sep='\\t', index=False, header=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [], + "source": [ + " predict.tofile('test-A/out.tsv', sep='\\n') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/config.txt b/config.txt index 6ad06eb..4e14b7e 100644 --- a/config.txt +++ b/config.txt @@ -1 +1 @@ ---precision 1 + --metric RMSE --precision 1 diff --git a/dev-0/out.tsv b/dev-0/out.tsv index 595c1f0..44c78bb 100644 --- a/dev-0/out.tsv +++ b/dev-0/out.tsv @@ -997,4 +997,4 @@ 71820.80591477081 74178.51155762933 41522.76776669361 -118171.63708967716 \ No newline at end of file +118171.63708967716 diff --git a/test-A/expected.tsv b/test-A/expected.tsv index fc4c7af..e6299bc 100644 --- a/test-A/expected.tsv +++ b/test-A/expected.tsv @@ -1,4 +1,3 @@ - price 0 203000 1 39000 2 190000