Projekt_UMA/UMA_projekt.ipynb

861 lines
225 KiB
Plaintext
Raw Normal View History

2021-06-30 14:58:18 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_squared_error\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cel: rozpoznanie średniej oceny użytkowników dla danego filmu na bazie:\n",
"- roku wydania\n",
"- gatunku\n",
"- czasu trwania filmu\n",
"- ilości głosów\n",
"- oceny krytyków (metascore)\n",
"- przychodu"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 0. Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>duration</th>\n",
" <th>avg_vote</th>\n",
" <th>votes</th>\n",
" <th>worlwide_gross_income</th>\n",
" <th>metascore</th>\n",
" <th>Action</th>\n",
" <th>Adventure</th>\n",
" <th>Animation</th>\n",
" <th>Biography</th>\n",
" <th>...</th>\n",
" <th>Horror</th>\n",
" <th>Music</th>\n",
" <th>Musical</th>\n",
" <th>Mystery</th>\n",
" <th>Romance</th>\n",
" <th>Sci-Fi</th>\n",
" <th>Sport</th>\n",
" <th>Thriller</th>\n",
" <th>War</th>\n",
" <th>Western</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>506</th>\n",
" <td>1927</td>\n",
" <td>153</td>\n",
" <td>8.3</td>\n",
" <td>156076</td>\n",
" <td>1349711.0</td>\n",
" <td>98.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>628</th>\n",
" <td>1928</td>\n",
" <td>72</td>\n",
" <td>8.1</td>\n",
" <td>27414</td>\n",
" <td>26916.0</td>\n",
" <td>90.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>856</th>\n",
" <td>1930</td>\n",
" <td>104</td>\n",
" <td>7.7</td>\n",
" <td>13311</td>\n",
" <td>4410.0</td>\n",
" <td>88.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1048</th>\n",
" <td>1931</td>\n",
" <td>87</td>\n",
" <td>8.5</td>\n",
" <td>162668</td>\n",
" <td>46008.0</td>\n",
" <td>99.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1086</th>\n",
" <td>1931</td>\n",
" <td>70</td>\n",
" <td>7.8</td>\n",
" <td>63315</td>\n",
" <td>1626.0</td>\n",
" <td>91.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" year duration avg_vote votes worlwide_gross_income metascore \\\n",
"506 1927 153 8.3 156076 1349711.0 98.0 \n",
"628 1928 72 8.1 27414 26916.0 90.0 \n",
"856 1930 104 7.7 13311 4410.0 88.0 \n",
"1048 1931 87 8.5 162668 46008.0 99.0 \n",
"1086 1931 70 7.8 63315 1626.0 91.0 \n",
"\n",
" Action Adventure Animation Biography ... Horror Music Musical \\\n",
"506 0 0 0 0 ... 0 0 0 \n",
"628 0 0 0 0 ... 0 0 0 \n",
"856 0 0 0 0 ... 0 1 0 \n",
"1048 0 0 0 0 ... 0 0 0 \n",
"1086 0 0 0 0 ... 1 0 0 \n",
"\n",
" Mystery Romance Sci-Fi Sport Thriller War Western \n",
"506 0 0 1 0 0 0 0 \n",
"628 0 1 0 0 0 0 0 \n",
"856 0 0 0 0 0 0 0 \n",
"1048 0 1 0 0 0 0 0 \n",
"1086 0 1 0 0 0 0 0 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = pd.read_csv('IMDb movies.csv', low_memory=False)\n",
"data = data[[\"year\",\"genre\", \"duration\", \"avg_vote\", \"votes\", \"worlwide_gross_income\", \"metascore\"]]\n",
"data = data.dropna()\n",
"data = data[~data[\"worlwide_gross_income\"].str.contains(\"NPR\")]\n",
"data[\"worlwide_gross_income\"] = data[\"worlwide_gross_income\"].str.replace('$ ','', regex=False).astype(float)\n",
"data[\"genre\"] = data[\"genre\"].str.split(\", \")\n",
"genres = pd.get_dummies(data[\"genre\"].apply(pd.Series).stack()).sum(level=0)\n",
"data = pd.concat([data.drop(columns=[\"genre\"]), genres.reindex(data.index)], axis=1)\n",
"display(data.head(5))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"X = data.drop(columns=[\"avg_vote\"])\n",
"X[\"votes\"] = X[\"votes\"]/data[\"votes\"].max()\n",
"X[\"duration\"] = X[\"duration\"]/data[\"duration\"].max()\n",
"X[\"worlwide_gross_income\"] = X[\"worlwide_gross_income\"]/data[\"worlwide_gross_income\"].max()\n",
"X[\"metascore\"] = X[\"metascore\"]/data[\"metascore\"].max()\n",
"X[\"year\"] = X[\"year\"].astype(int)\n",
"X[\"year\"] = X[\"year\"]/X[\"year\"].max()\n",
"\n",
"Y = data[\"avg_vote\"]/10"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Regresja liniowa"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test: 0.0033762327444214367\n",
"Train: 0.0036015583726998865\n"
]
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"\n",
"linear_model = LinearRegression()\n",
"linear_model.fit(X_train,Y_train)\n",
"\n",
"Y_linear_test_pred = linear_model.predict(X_test)\n",
"Y_linear_train_pred = linear_model.predict(X_train)\n",
"linear_mean_squared = mean_squared_error(Y_test, Y_linear_test_pred)\n",
"linear_mean_squared_train = mean_squared_error(Y_train, Y_linear_train_pred)\n",
"\n",
"print(f\"Test: {linear_mean_squared}\")\n",
"print(f\"Train: {linear_mean_squared_train}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 1.0)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEzCAYAAAAVXYYvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABUlElEQVR4nO29fZRcZ33n+X3qTVKpbRm1GBIDXQ02mCgIhtiYOApB0E6wmhiR3UwCU0CPsNJRd8iRvQdCmF4sm5waksAOVnaQRI+REKvaySEZwDYIOLFYFmLMYnkDFjZjYuPujvHsYMlYRmpZ3VX17B+3S33vc5+n6rn13Kq6VfX9nFOnVVf35bkvde/v/l6+PyGlBCGEEEIIaY1UtwdACCGEENLL0JgihBBCCHGAxhQhhBBCiAM0pgghhBBCHKAxRQghhBDiAI0pQgghhBAHmhpTQohDQoifCSF+aPh/IYT4GyHEY0KIh4QQvxb/MAkhhBBCkomNZ+qzAG5o8P/bAbxi5TMJ4ID7sAghhBBCeoOmxpSU8lsAnmkwyw4An5Me3wVwmRDil+MaICGEEEJIkokjZ+rFAP7F9/3JlWmEEEIIIX1PJoZ1CM00bY8aIcQkvFAg1q9ff/WrXvWqGDZPCCGEENJeHnzwwVNSyhfq/i8OY+pJAC/1fX8JgKd0M0opZwHMAsA111wjT5w4EcPmCSGEEELaixBi3vR/cYT57gbw3pWqvl8HcEZK+d9jWC8hhBBCSOJp6pkSQvwXANsAbBJCPAlgL4AsAEgpDwI4BmAcwGMAFgHsbNdgCSGEEEKSRlNjSkr5rib/LwH8SWwjIoQQQgjpIaiATgghhBDiAI0pQgghhBAHaEwRQghJFOWTZYzeMYrU7SmM3jGK8slyt4dESEPikEYghBBCYqF8sozJeyaxuLwIAJg/M4/JeyYBAMUtxW4OjRAj9EwRQghJDDPHZy4aUnUWlxcxc3ymSyMipDk0pgghhCSGhTMLkaYTkgRoTBFCCEkMIxtGIk0nJAnQmCKEEJIYSmMl5LP5wLR8No/SWKlLIyKkOTSmCCGEJIbiliJmb5xFYUMBAgKFDQXM3jjL5HOSaIQnYN552OiYEEIIIb2CEOJBKeU1uv+jZ4oQQgghxAEaU4QQQkgDymVgdBRIpby/ZWqIEgWKdhJCCCEGymVgchJYXJG+mp/3vgNAkWlcZAV6pgghhBADMzOrhlSdxUVvOiF1aEwRQghxop976S0YtEJN00lnSUoIlsYUIYSQlqn30ps/Mw8JebGXXr8YVCMGrVDT9EFEZ0x3wsCuh2Dn5wEpV0Ow3TCoKI1ACCGkZUbvGMX8mfnQ9MKGAuZunuv8gGJGzZkCgHwemJ1lzhQQbkwNANlUFkIILFWXLk7LZ/Ox64WNjnoGlEqhAMzNxbaZi1AagRBCSFvo9156xaJnOBUKgBDe30aGVD+HPHXoGlMv15YDhhTQnmbVCwsAtpSBm0eBvSnv75ZyV0KwrOYjhBDSMiMbRrSeqX7qpVcs2nmhVC9NPeQJoG8V3KMYzXEb2BvfVMbp35gEcivG3GXzwI2T2DgMAJ093vRMEUIIaRn20ltF56Vph0cmSUQxmmM3sK+fWTWk6uQWvekdhsYUIYQkmKSHjdhLb5V+D3nq0BnT2VQWuXQuMK0dBvYzFf1xNU1vJzSmCCEkofRKpVxxSxFzN8+htreGuZvn+s6QsjVoTZ6Xfgp5quiM6cPvOIxDOw6FDGwAsb4YJOl405gihJCEMohho6QRxaB1DXl2ygsZ93buO1DEkx+Yg7ythic/MIf7DhRDBjaA2F8MkhRipjFFCCEJZRDDRkkjikHrEvLslBcy7u1MTwMHDgDVqve9WvW+T08H52vHi0GSQszUmSKEkITS7xpOvUDq9hQkws9JAYHa3lps23E91+WTZcwcn8HCmQWMbBhBaaykNSrivqYymVVDyk86DVQqq987dRzbCXWmCCGkB0lSGGNQ6VRejosXMoq3KW5vp86QAoDq5mAoceO6jdr5+iWfjMYUIYQklCSFMQaVThm0LkZblBBa3MZhOq2ZuKUM3Bg07n6x9AtkU9nAbP30YkBjihBCEky/V8rpaEfz2laTrqMatK2OvTRWCskJ5NI5K2MjircpbuNwclIzcSys/7RUXcKlay7t2xcDKqATQghJDGovvHrzWqD1XniuyuTFLUW7JHLHsas5zLY5zVFU6Ov7YZNfZcP+/d7f2Vkv5JdOA9UNBv2n88/g1J+dCkyzzfVKOkxAJ4QQkhja0by2U4n8LmN3GaOu2XA7GgvXt9XM+LHdl06OOw6YgE4IIT1KO0JeScbUpNaleW2UMJjL8XYZu0tieORQZIshT9tEd1MocXxNKXBs99zdPzpqDPMRQkhCaUfIK+mMjOi9OyMORV+2YTDX4+0y9iihOpN3KIqeVSshz0aJ7v5ldaHE8TUlHPlAMXBssbwAiPB2elFHjZ4pQghJKDMzqw/2OouL3vR+pVQC8kGnBvJ5b3rL67RMunY93i5jtx1j+WQZO7+0M+Ad2vmlndbeJRfxzCjeM7Vw4thfFUPHFmeS0w7GFRpThBCSUNoR8ko6xaKXzFwoAEJ4f2dn3TxxxS1FTLxgFumzBUAKpM8WMPGCcBhsYQFeWf/No8DelPd3Sxnz83ahP5ex245xz1f3YLm2HJi2XFvGnq/usToWLuFEF1kF7TV7vAQsBQ3InIhWWZiURuAM8xFCSEJpR8irFygW4w1jlsvAkQ8UUV30VloFcCQPbL00uJ2Nbyrj9G9Mrpb1XzYP3OiFwOZPejM2C/21OnbbMZ4+f1q7vGm6ysZ1G7XzmkQ1/ZTGStqEcZ3xo4YiN76phNPfVA7MyjHF2AywYQE4MwL57RJwRRHY0nxfXKs044TVfIQQklDUHB7ACxvpvB39UmLeDmyr7DaVRnG6opnx2QJwx1xgkkt1YZQxDm8rY2jH6nnV5VXVkXubP883/fUmrTE1vG44JFugQ3ed4aEiZmY879PICDD+oTI+8/QkluTqhZup5SG+MovlB1evSSEAnQlie2w73W6pUTUfjSlCCEkw5TICD6pSSW9I9VKJeadJpfQPbSGAmq8tnLg9BWj6x0EK4PZw/7g4H59Ck4hdVxJXBTB12BpDcffI0xn8uHnU8+opDFULGP7c3MVrWWc8At6x2H3PNGYfnEVVVpEWaUxePYn9b9vf1n1pBqURCCGkRykWvbf0Ws37qwshuSQVDwKmsKg6PX3WMKMmUVrbRsUB7fo0SuI6cukc9m3fZ7WduNvJ6JL2YRDtPJtaCFzLhYJ+nfl/M40DJw6gKr3Gf1VZxYETBzD9lWmrMXcjgZ3GFCGENCHpWk9xN6+NStKPj6nKbnw8OO7q10tAJdjSBZWclyitYGrw2yra9RmMEgABTalDOw5ZeyDjbiejTSw3VOlhcWMgWXz8Q2XteTm3+aB28YMngtNLYyXkhFsCe1zQmCKEkAbUwxjz815Yp56AnCSDoZtv6L1wfHRVdhMTwJEjwXF7qGEjfSzP5FWxRa1CG96mOWAGo6SeE9RKv0aTwCeAlqritF4/TZUeKjlgzXMBSYcjP5/ExCfKoepH0zGXkIEx3vePgLx71stpkwJ4tuB9f6jzoW3mTBFCSAPa0d4kbrqZM9ULx0eHdtyGXB+cKQCfnLv41VQEYIvufOVEHvLuYIJ29uoyxNuDidztOK8u10+5DOz8ZBnLb1ytyLvoyfNV6SF7FlgfTnzXJYuL23UJZGFEJQ951+xqVWB9nW269pgzRQghLdILWk+2GkVxoIb0TEnEttpM2m1E0A5qNcSoPX+msNqlCxdzmtJpz6uF17Sub6TLcVuSi7j092YCXprDtxRx0wvtzuv113vL1T/XX2/evnrMnNq6vMYz+HDZPCBkQE4Cd8x5ift3zAF5vXSDrhpvKDfUfLsAZGbRM9gUuvHbpGeKEEIa0AuelygSCnFvxxbb8TTykgCNW5TUtzPxiTKOXWjSjHfU3jMlzhQgfZ4pV4+RbRWa7Xm9/nrg+PHwdsbGgHvvDU7TnsO9Kc8QajIeHSZ5ApwbBpaHVj1
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Średnia głosów na podstawie metascore\n",
"# niebieski kolor oznacza faktyczny stosunek a zielony stosunek oszacowany przez model\n",
"\n",
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
"chart.plot(X_test[\"metascore\"], Y_test,\"bo\")\n",
"chart.plot(X_test[\"metascore\"], Y_linear_test_pred, \"go\")\n",
"plt.ylim([0,1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2.1. Regresja wielomianowa"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"degree = 3"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=3)),\n",
" ('linearregression', LinearRegression())])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn.linear_model import Ridge\n",
"\n",
"polynomial_model = make_pipeline(PolynomialFeatures(degree=degree, include_bias=True), \n",
" LinearRegression())\n",
"polynomial_model.fit(X_train,Y_train)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test: 0.08647485505065261\n",
"Train: 0.0024716850438651133\n"
]
}
],
"source": [
"Y_polynomial_test_pred = polynomial_model.predict(X_test)\n",
"Y_polynomial_train_pred = polynomial_model.predict(X_train)\n",
"\n",
"polynomial_mean_squared = mean_squared_error(Y_test, Y_polynomial_test_pred)\n",
"polynomial_mean_squared_train = mean_squared_error(Y_train, Y_polynomial_train_pred)\n",
"\n",
"print(f\"Test: {polynomial_mean_squared}\")\n",
"print(f\"Train: {polynomial_mean_squared_train}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test: 0.007654936380156268\n",
"Train: 0.0024716850438651133\n"
]
}
],
"source": [
"#Funkcja skokowa Heavisidea\n",
"Y_normalized_polynomial_test_pred = []\n",
"Y_normalized_polynomial_train_pred = []\n",
"\n",
"for x in Y_polynomial_test_pred:\n",
" x = min(x,1)\n",
" x = max(0, x)\n",
" Y_normalized_polynomial_test_pred.append(x)\n",
" \n",
"for x in Y_polynomial_train_pred:\n",
" x = min(x,1)\n",
" x = max(0, x)\n",
" Y_normalized_polynomial_train_pred.append(x)\n",
" \n",
"polynomial_normalized_mean_squared = mean_squared_error(Y_test, Y_normalized_polynomial_test_pred)\n",
"polynomial_normalized_mean_squared_train = mean_squared_error(Y_train, Y_normalized_polynomial_train_pred)\n",
"\n",
"print(f\"Test: {polynomial_normalized_mean_squared}\")\n",
"print(f\"Train: {polynomial_normalized_mean_squared_train}\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 1.0)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEzCAYAAAAVXYYvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABpLUlEQVR4nO29f3xc5X3n+3nmzIztkWITy9ncANUoAZrEiSAplJTSNE7kvYBygZDbdtOdBtWQnbXUdg25cNONbmo7fU23XXIL7m5lMkvsKGG2WbYbAjQmvGolJJSQEnMDOJDSAJFUSl4JlouNJGxpZp77x9FY8zzn+8w8Z86ZmTOj7/v18sue4/PjOb+/5/vj8xVSSjAMwzAMwzCNEWv3ABiGYRiGYToZNqYYhmEYhmECwMYUwzAMwzBMANiYYhiGYRiGCQAbUwzDMAzDMAFgY4phGIZhGCYAdY0pIcQBIcTPhRA/NPy/EEL8hRDieSHE00KIXwp/mAzDMAzDMNHExjP1RQBX1vj/qwBcsPInC2B/8GExDMMwDMN0BnWNKSnldwAcrzHLtQC+JF2+B+AsIcRbwhogwzAMwzBMlAkjZ+ocAP9U9fullWkMwzAMwzBdTzyEdQhiGtmjRgiRhRsKRE9Pz8XveMc7Qtg8w6wtnnj5CV/zX3z2xU0aSXBq7YvNuIMsH3TbYRPkvPpdtt766q3TdtuNzBeU468fx8yrMyjLstX8nXadtJNmXGdBaPW5eeKJJ45JKd9E/Z+w6c0nhBgA8DdSyncT//d5AA9LKf9q5fdzALZJKX9aa52XXHKJPHLkiMXwGYapZuCOAcycmLGaN70pjembpps7oACY9sV23EGWD7rtsAlyXv0sa7O+Wuu03Xaj8wUl7PtD7KX8BS5y99rqbduM66wZ42nWPSyEeEJKeQn1f2GE+e4HcP1KVd+vADhRz5BiGKZxckM5pBIpZVrSSSIRSyjTUokUckO5Vg7NN9S++Bl3kOWDbjtsgpxX22UTsQSSTpJcX+FoAQN3DCC2N4aBOwYwfMGw1fGxPY6tOt6zJ2at5rPd9tYtW31N72aoc0hdU7Wus2aPp133sI00wl8BeAzA24UQLwkhbhRC7BRC7FyZ5RCAFwE8D+C/ARhr2mgZhkFmMIP81XmkN6UhIJDelMaBaw/g4EcOKtPyV+eRGcy0e7g1ofbFz7iDLB9022ET5LzaLnvwIwdx4NoDnvUBQPaBLGZOzEBCYubEDCafmsTIRSMNbTvIfEHp39RPTu/b0NfQtp/5vWc8htPWLVvxzO89E8p4OwnqHFLXlOk6C/tcR+ketgrzNQMO8zEMw0SDqIU8g1A4WkD2gSwWlxfPTEslUh3xccFEm2aH+RiGYZgOxhQasw2ZRYkoeSuYtUMY1XwMwzBMB9O/qZ/0TJlCZlEnM5hh44lpKeyZYhiGWeNEKZGXqY9eLFA4Wmj3kNY8bEwxDMOscZoRGuMXfnOo5IRVFwtkH8jy8W0znIDOMAzDhAongTePbioW6DQ4AZ1hGIZpGeNT44ohBQCLy4sYnxpv04i6hygWC7AXko0phmEYJmSi+MLvFkxFAe0qFuCwowsbUwzDMEyoRO2F301ErViAvZAubEwxDMMwoRK1F343ETUdLfZCurDOFMMwDBMqlRf7+NQ4Zk/Mon9TP3JDOU4+D4ko6Wh1m0ZZo7BnimGYlsGJqmuHzGAG0zdNo7y7jOmbpiPz8mfCpVleyE57VrAxxTBMS+BEVaZTKRSAgQEgFnP/LvAle4ZmaZR12rOCdaYYhmkJrI/DdCKFApDNAotVOdapFJDPAxl2tjWFqD4rWGeKYZi2w4mqTCcyPq4aUoD7e9xQrBYkPNVpoa1m0YnPCjamGIZpCVwuz3Qis4b3NzU9SHiqE0NbzcLPsyIqIVg2phiGaQlcLs90Iv0GW5+aHkRzifWaVrF9VlRCsDMzgJTu39lsewwqNqYYhmkJUdPHYRgbcjk3R6qaVMqdrhMkPNWJoa1mYfus8BuCbSacgM4wDMMwNSgU3Bf07Kzrkcrl6OTzIInTUU26jjKxmOuR0hECKJfD3x4noDMMwzBMo1xYAG4aAHbH3L8vpONIQULZHAb3j58QbLNhY4phGIZhDPhJDA8SyuYwuH/8hGCbDYf5GIZhGMYAh9+ijW0INgw4zMcwDMMwDdDJieFrQbcqkwGmp90cqenp9gmpsjHFMAzTZNbCS61b6VR9tFbqVo2NAfG4m/gdj7u/1xpsTDEMwzQRFmPsbNqdGN6oId4q3aqxMWD/fqBUcn+XSu7vtWZQsTHFMAzTRFiMsbPQjRcAbUsMD2KItyo8mc/7m96txNs9AIZhmG6mk3Nu1hoV46Vi/FaMl/zV+bYkm9cyxOsZc/2b+snE+bDDkxWPlO30boU9UwzDME2kU3Nu1iJR8yIGMcRbFZ50HH/TuxU2phiGYZpIu3NuOpF2Na8Nw4sY5tiDGOJ+dasazc3KZv1N71bYmGIYhmkiLMboD7/Na8OslAzqRQy78W5QQzwzmMH0TdMo7y5j+qbpmoZUo7lZExPA6OiqJ8px3N8TE1ZD7BpYtJNhGIaJDAMDrhGik067OkLV6DlOgGtsNGqsBl2fn7H7GdP41DhmT8yif1P/GUNKnxbEOGdhUjtYtJNhGKZDaVfIq13MGiJq1PSwc5wygxmMvDEPZz4NSAFnPo2RN9obZn7G7mdM1d4lAKFLbbSqSKKbr2U2phiGYSJK2GGjTsBP89qwjYBCAZi8JYPS56aBvWWUPjeNyVsy1se7FY13m5Ek34oiiWZdy1ERxGVjimEYJqKMjwOL6nsTi4vu9G7FT/PazRs2k+swTa9H0OPdisa7zfAitaJIohnXcpQEcdmYYhiGiSjNCBtFnUzGFXxMp932JOm0+ztozzWbEJPpuM7M2IWngo7dZozN8CK1okgi6LGliJKUBSegMwzDRJRmJDR3E7G9MUh432ECAuXd5TO/KyGmas9IKuU1dEzHWwg3NFVr2VpQSeS6oWI7xrCT7ltFM46t7fkPC05AZxiG6UBaETbqZGy9NLYhplwOSCa969N9Dn7CU7ahqPFxYPG8AnDTALA7Btw0gMXzCp7tRFFqg/Ko6dOGh73Xsm5IAf6ObZQEcdmYYhiGiSjNCnl1C7a5Pn7CpbbBGsrLQmEbiprZWACuzgJnzQBCun9fnXWna/jRj2p2cjaVWL5jB3DDDeq0yUlgZES9lk3H2jaMHSVBXDamGIZhIkwm44b0ymX3b5MhFZWqplZi66WxrbIbHweWl+22bdsuxTphfGgcSGrus+QicOWuhs5rq5KzKa/f8jKwtKROW1wEDh1Sr+V0ml6nbfVjlLx0bEwxDMPUIer6OK2saqKMtnYeHxsvjSlcOjysjtvW2wTYN/K1DkVtMrhjUnMNnddaHrEwDW8/xRD6vKGEsZ/OAHdMA3vL7t9Pt8dtywnoDMMwNbBNDG4nrVKwppKfkyIFeX8ey0+sHoyoHR8AGNtfQP7FcZR6ZuEs9GNbKYfHPp9RziuVw2PCtgjANmE8fusASr121pzNeTUlZ1e2H1YCux8jtK8POHZMnVYouN6t2VnXI5XL+at+bOW9yQnoDMMwDdIJWk+tUrCmvB1LchHL71cPhun4tCsUWThawF3HdrjGipAo9c5gqneHm/BdhZSuQVVNIuFNSvfjPbENRWXflgOWNTeNKafI4ryaPGKOcEKVE6C8S36wDWNTROneZGOKYRimBp2g9dSqqibK+wUA2OSdrusHje1vTijSJsS468FdWC5ryVDOMnDlLs+8UqpJ0gcPAjfeqDbyHRnx99K3CUVOjGYweo7ayqbX6SPXR53X7dvdMVf+9Pw9nZxdknR8slHDmyqSAAAMqpWJGCzg+HH79er7s32791zPzNDbace9ycYUwzBMDVrRIiQow+sIr8Zyyp2uEcQ7FIMh61rGPC80QK3muvPH4Qss2rYomXt9jl5Byju9Er6reEoAtxKtkiNVKrm/m5EXNjGaQfG2acg9ZRRvm8bHt+yzOq/btwNTU+psz34lg4GjXo9YX5zO+t4cDyD6qXmX+rYVgGt
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Średnia głosów na podstawie metascore\n",
"# niebieski kolor oznacza faktyczny stosunek a zielony stosunek oszacowany przez model\n",
"\n",
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
"chart.plot(X_test[\"metascore\"], Y_test,\"bo\")\n",
"chart.plot(X_test[\"metascore\"], Y_normalized_polynomial_test_pred, \"go\")\n",
"plt.ylim([0,1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2.2 Regresja wielomianowa z regularyzacją"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"polynomial_regular_model = make_pipeline(PolynomialFeatures(degree=degree, include_bias=True),\n",
" Ridge(alpha=10, fit_intercept=True))\n",
"polynomial_regular_model.fit(X_train,Y_train)\n",
"\n",
"Y_polynomial_regular_test_pred = polynomial_regular_model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test: 0.003350267646086885\n",
"Train: 0.0033181075895871736\n"
]
}
],
"source": [
"Y_polynomial_regular_train_pred = polynomial_regular_model.predict(X_train)\n",
"\n",
"polynomial_regular_mean_squared = mean_squared_error(Y_test, Y_polynomial_regular_test_pred)\n",
"polynomial_regular_mean_squared_train = mean_squared_error(Y_train, Y_polynomial_regular_train_pred)\n",
"\n",
"print(f\"Test: {polynomial_regular_mean_squared}\")\n",
"print(f\"Train: {polynomial_regular_mean_squared_train}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 1.0)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEzCAYAAAAVXYYvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABXP0lEQVR4nO29f3hcd33n+/7OLzsjJQoep5QkaBQgTXFrftRuIM1mCZX3EqtNQu7TS2EH0HXiVSNd9kmy2xS6uuBkeeZpoWyJd1s56AYbUWa3l22BxEUJz0Y0ba+BTZxC4gQaSIlGBLi3sUzkWootzcz3/nE01pzv+X5nvmfOmZkzo/frefTIOj4/vuecmTOf+bw/3/dHSClBCCGEEEKaI9bpARBCCCGEdDMMpgghhBBCAsBgihBCCCEkAAymCCGEEEICwGCKEEIIISQADKYIIYQQQgLQMJgSQhwWQvyTEOIZw/8LIcR/FkI8L4R4WgjxK+EPkxBCCCEkmthkpj4H4IY6/78XwJXrP2MADgUfFiGEEEJId9AwmJJS/i2AU3VWuRnA56XDtwBcLIR4TVgDJIQQQgiJMmHUTF0G4Ec1f7+4vowQQgghpOdJhLAPoVmm7VEjhBiDIwWir69v1y/+4i+GcHhCCCGEkNby5JNPnpRSXqL7vzCCqRcBvLbm78sB/ES3opRyGsA0AOzevVseP348hMMTQgghhLQWIUTR9H9hyHwPAfjg+qy+twNYklL+NIT9EkIIIYQYKZwoYOi+IcTujWHoviEUThQ6Mo6GmSkhxH8DcD2A7UKIFwEcAJAEACnl/QBmAYwAeB7ACoB9rRosIYQQQgjgBFJjR8ewsrYCACguFTF2dAwAkNuZa+tYhJTa8qaWQ5mPEEIIIc0ydN8Qikte5S07kMX8nfOhH08I8aSUcrfu/+iATgghhNQhKlJSVOnU9VlYWvC1vJWEUYBOCCGE9CRRkpKiSCevz+DAoDYzNTgw2NLj6mBmihBCCDEwOTd5PlCosrK2gsm5yQ6NKFp08vrkh/NIJ9OuZelkGvnhfMuPrcJgihBCCDEQJSkpinTy+uR25jB94zSyA1kICGQHspi+cbojGUPKfIQQQoiBKElJUWTbBduw+Mqidnk7yO3MRUJuZWaKEEIIMRAlKYlEFwZThBBCiIEoSUlR5NQrp3wt71Uo8xFCCCF1iIqUFEUogzowM0UIIYSQpqAM6sBgihBCCCFNQRnUge1kCCGEEEIawHYyhBBCSJMUCsDQEBCLOb8L7CZDFFiATgghhBgoFICxMWBl3eS7WHT+BoDc5lKySB2YmSKEEEIMTE5uBFJVVlac5YRUYTBFCCGEGFgwdEUxLSftJSoSLIMpQgghxMCgwS7JtJy0j6oEWywCUm5IsJ0IqBhMEUIIIQbyeSDttlFCOu0sJ50lShIsgylCCCHEQC4HTE8D2SwghPN7eprF51EgShIsZ/MRQgghdcjlGDxFkcFBR9rTLW83zEwRQgghpOuIkgTLYIoQQgghTVM4UcDQfUOI3RvD0H1DKJwoaJeFTZQkWLaTIYQQQkhTFE4UMHZ0DCtrG5XgyVgSQgisllfPL0sn013fs4/tZAghhLSMdmQhSDSZnJt0BVIAsFZZcwVSALCytoLJud51OmUBOiGEkKZRMxPFpSLGjjr9Vro5C0HsKL68AAi7dReWetfplJkpQgghTaPLTPR6FoLUsGQ/dW5woHedThlMEUIIaRpTtqGXsxDdTqiy7FweWFWm1JWSQDnuWpSMJZEf7l2nUwZThBBCmsaUbeilLEQv1YRVZdniUhES8rws2+w5xb+bA45OAy9nASmc30/uB6Q7mBLCUgvsUhhMEUIIaZr8cB7ppDszkU6mA2UhotK8FvAffERp7DrClmXHxgCcyAH3zQP3VpzfV80CCXcB+mp5taelXwZThBBCmia3M4fpG6eRHchCQCA7kA00BT5KzWsBf8FH0LG3IwMWtiw7NQWMjwPx9URUPA5gYPNJvwymCCGEBCK3M4f5O+dROVDB/J3zgWbxtap5bbOBip/gI8jYw5bfTLRClp2aAkolJ4AslYDsxfbH6BUJlcEUIYREmKjLRmHTiua1fgIV9XpvS9gHBgsLAHYWgDuHgAMx5/fOgtXY2zUrsiWyrBIQjVw5oj3GyJa869pOHGpPANkOGEwRQkhEiZrk1Q5MTWqDNK+1DVR01/v0l/NICbvgY9s7CsCNY8DFRUBI5/eNY87yBrRrVmTosqwmUJ15agajbx51HWP0VdOY+d2c69re/4PesdVgOxlCCIkoQ0POh45KNgvMz7d7NO2hGtDUymXpdLCea7F7Y5DwftYJCFQOVM7/bbremesL6L95EgtLCxgcGER+OK8NPrbnh7BY8u4gk8ji5OR83TEO3TeE4pJ32+xAFvN31t8WcIKaybnGYwwb23EPDQHFiwrA8KRTU7U0CAwUtYaf6n2JCvXaydABnRBCIkorJK+ok8sBx04XMP3DSZT7FhBfHsTo6/LIBeheOzgwqP3AV6U603VdfCyH/hdywAKAQQCXANjpXe9USb8D0/Ja8sN53PrlMazKjSgyJezkt0660Ntm1IoXrWftUuvnd3HRsVLQBLndaKtBmY8QQiJKKySvqFM4UcDMz8ZQ7neksnJ/ETM/C1ZHM7IlD6wpxpJrTg1PLabrKoSd1BqouPvpHORDbr8m+dA08HTjYKiTLvS25xx/1+RGIFVFyPWAqgbNfekGKPMRQkhEaYXkFXWCyl3afQ5pJKa5PLKncy65tFAAbr0VWF017WkDnfQHwJUhApz6KpuapCCSrq2MWSVMSbBwoqDNqF3XP4rHfjp7PrtY7tNLepAAlrJ170tUoMxHCCFdSDVgmpx0JKjBQSCf791ACmhNIfbCAgCZc8wla5drPtyt8gs7C1j8tTEsLrlltekbpzF943RTgYoukKq3vBZbGRNwgp99X9mHtcra+bHv+8o+AE1Kgk/nIB8CcN1GoLr6gxHMvXkG6HeuT7m/qFPzHCpxx+izBt19iTrMTBFCSA/QqQLksKmXmcoP55s6R9usjymDpQZhuHPIqfnRjLHZ7FkiAZTL3uXxuOPdVA+1ZgowZ8S2f3I7Fl9Z9Owjc0EGJ3/vpO9xa6+t4fpokQDudcchfiZYtPN1Xy8zxZopQghpQNS9ntpl+Gg8fojXx+SDNHLlSNPnmM878qhrn2lgZMQ97vNF0oq1AXYqx2iBw7cukKq3vBY/dge6QKre8kZoi/YN10eHOJ11/Z1OO/fLhqrEWPuauPXLnfGpYjBFCCF16LTXk41DdCcLkMO+PqbAYPYHs02fYy7n1Jlls04xeTYLjI4CMzPucWNYUySdWnGW17IUvot4Ngut4Wc2611X95oI04XeD9qi/ZVt+pUVISydTOP2K/Ou++KnHvCOhyZdtVoAsCpXcMdD7feposxHCCF16KTXk618YypABhzpqZUSSLuuj98i60Zox30g5mSkVKRwmviuk9xVgLjJXXRtW2huYuJQAYd+PAYka4KDtTTGL5vG1PjGPv1IejrClvl0kyRw93agT5PpWu1D9pLtTb8eVUmv+LK5qF3eE35sQ5mPEEKapJNeT7YZJ1NGRECELv2pkl69wukwpVHTOW5LDDZ1HO39M2Sc+iuDrka++6/O4fAt4bmIA8DsuUl3IAUAyRV88dSk6/zueEj/mtj/XychBM7/7NmjP87BvQeRQMq1LIEUDu492NS4dVk/pE/pV06uWGfP9uyB63x+6X1eSc9IJd7UuQSBwRQhhNShk15PtjPbtD5KUngyOUGlP52kV48wpVFdLVVKpPGz/zmC4i1DkB+LoXjLEPZ9umB1HO39m8sDZW+gcW42f752qVx25EE8Ha6sZrrXi2tF1/ktrukv+tkt7u3n5gwB1dM5iKOHXX5W4uhhKz8rE7mck4WsVJzf8WX9m8O0XGXPHmf8tXz3572SnjYrBQAxi0KzkGEwRQghdTAVL9sWyQbB1hBx9hM5QDF8NM1FD1IkPTmpyDk+WFlxtm8WXS1V8tlRVHbOuArG1941ht95aKJhnZnuviZTQDzuvm6
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Średnia głosów na podstawie metascore\n",
"# niebieski kolor oznacza faktyczny stosunek a zielony stosunek oszacowany przez model\n",
"\n",
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
"chart.plot(X_test[\"metascore\"], Y_test,\"bo\")\n",
"chart.plot(X_test[\"metascore\"], Y_polynomial_regular_test_pred, \"go\")\n",
"plt.ylim([0,1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Sieć neuronowa"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x147a3896bb0>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"\n",
"batch_size = 16\n",
"epochs = 10\n",
"\n",
"model_nn = keras.Sequential(name=\"movies\")\n",
"model_nn.add(keras.Input(shape=(26,), name=\"input\"))\n",
"model_nn.add(layers.Dense(12, activation=\"relu\", name=\"layer1\"))\n",
"model_nn.add(layers.Dense(8, activation=\"sigmoid\", name=\"layer2\"))\n",
"model_nn.add(layers.Dense(1, activation=\"softplus\", name=\"output\"))\n",
"\n",
"model_nn.compile(\n",
" loss='mean_squared_error'\n",
")\n",
"\n",
"model_nn.fit(\n",
" X_train.to_numpy().astype(float),\n",
" Y_train.to_numpy(),\n",
" batch_size=batch_size,\n",
" epochs=epochs,\n",
" verbose=0\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test: 0.0034861018261550737\n",
"Train: 0.003624740580524968\n"
]
}
],
"source": [
"Y_nn_test_pred = model_nn.predict(X_test.to_numpy().astype(float))\n",
"Y_nn_train_pred = model_nn.predict(X_train.to_numpy().astype(float))\n",
"\n",
"nn_mean_squared = mean_squared_error(Y_test, Y_nn_test_pred)\n",
"nn_mean_squared_train = mean_squared_error(Y_train, Y_nn_train_pred)\n",
"print(f\"Test: {nn_mean_squared}\")\n",
"print(f\"Train: {nn_mean_squared_train}\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 1.0)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAEzCAYAAAAVXYYvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABVpUlEQVR4nO29f5QcV3nn/b39S1KPsByNCC+2mW6MHRMRQRIJJ8ZLEIzfxRpiG96TNxvSgYmMt9FMkiP7PfgFMmct25xOyIaN5T1hxgy2hBz1hpPdYGzHMpxYiRNizBp5AQ+YYzBmRthmX9AIj5HG0sx03/ePmtZU3bq3+1ZXdXd19/dzzpxWX9WPW1XdVU8/P76PkFKCEEIIIYQ0R6LTEyCEEEII6WZoTBFCCCGEhIDGFCGEEEJICGhMEUIIIYSEgMYUIYQQQkgIaEwRQgghhISgoTElhDgghPiJEOLbhv8XQoj/KoR4VgjxlBDi16OfJiGEEEJIPLHxTH0OwNV1/n8XgEtX/4oApsJPixBCCCGkO2hoTEkp/xXAyTqLXAfgXunwNQDnCyFeG9UECSGEEELiTBQ5UxcC+JHr/fOrY4QQQgghPU8qgm0IzZi2R40QoggnFIiBgYHtb3zjGyPYPSGEEEJIa3nyySdPSClfrfu/KIyp5wG8zvX+IgAv6haUUk4DmAaAHTt2yGPHjkWwe0IIIYSQ1iKEmDP9XxRhvgcAfHC1qu83ASxIKX8cwXYJIYQQQmJPQ8+UEOJvAewEsEUI8TyAfQDSACClvAvAEQAjAJ4FsAhgd6smSwghhBASNxoaU1LK9zf4fwngjyKbESGEEEJIF0EFdEIIIYSQENCYIoQQQggJAY0pQgghhJAQ0JgihBBCCAkBjSlCCCGEkBDQmCKEEEIICQGNKUIIIYSQENCYIoQQQggJAY0pQgghhJAQ0JgihBBCCAkBjSlCCCGEkBDQmCKEEELqUC4D+TyQSDiv5XKnZ0TiRsNGx4QQQki/Ui4DxSKwuOi8n5tz3gNAodC5eZF4Qc8UIYQQYmBiYs2QqrG46IwTUoPGFCGEEGLg+HEA28rAjXlgX8J53VZ2xknHiUsIlmE+QgghxMDmd5Qx/7YikFl1T50/B1xTxOZBAGCcr5PEKQRLzxQhhBBi4qqJNUOqRmbRGScdJU4hWBpThBBCiIGTK/p4nmmctA9TqLUTIVgaU4QQQoiBoU1Dgcb7kfJMGfn9eSRuSyC/P4/yTHsSl4aGoM1nG+rApaExRQghhBgoDZeQTWc9Y9l0FqXhUodmFC/KM2UUHyxibmEOEhJzC3MoPlhsi0E18tEycG3RyWMT0nm9tuiMtxkaU4QQQoiBwrYCpq+ZRm5TDgICuU05TF8zjcI2Jp8DwMTRCSwuexOXFpcXMXG09YlLR85OAGklaSq96Iy3GVbzEUIIIXUobCv0lPFUnilj4ugEji8cx9CmIZSGS00f3/EFfYKSaTxKOrlvFXqmCCGEkD4h6rBcJ3PK4pTPRmOKEEII6ROiDstdcrwEVJLewUrSGW8xccpnozFFCCGE9AlRh8aOPvMYkKh4BxMVZ7zFxCmfjTlThBBCQhFlDg5pLUObhjC3MKcdb4rt04BQxsTqOCab22YA4pLPRs8UIYSQpulkaXy76JSOUiuIPDSmeqUajfcoNKYIIYQ0TStK4+PSvBYIbizGae46og6NCSSN/9MrBqgNNKYIIYQ0TdQ5OLXmtXNzgJRrzWs7ZZQEMRbjNvd2sOetRUAqgxJIJZM97a1UoTFFCCGkaaIuT49T81ogmLEYt7nriDosO/meSYy9dQxJ4XiokiKJjes2YqW64lmuVUKecQnB0pgihJAYE/ewUdQ5OMePQ9tvrV3Na9XzvTmlNwo3b9jse4iHbbzbDsOgFWHZyfdMYuWWFch9Eiu3rOD00mntcnMLc0jdnIe4NYHUzXmMT4U7vjjl69GYIoSQmBIkbNSpX+hR5+BsfkcZuEbpt3ZN0RlvMbrz/fJ9JWSE11hMJ9L4+dLPfQ9x0xxtGu+2yzBoh2q40SspBSobneta2TiHqReKuOqvxpv+3HaylY2KkFINdraHHTt2yGPHjnVk34QQ0g3k884DXSWXA2Zn197XHsTuB0s2ne3KHnJbSnnMr/gPejCVw4mJ2Zbu23S+B3eWsfG6NemHU0unMP/KvHaOr/zZrCfUl80C09NAocFlyO/PayULcptymL1xNtiBdHg/us8jpHCMYxVlPMjnVtymajK4NrsvettGCPGklHKH7v/omSKEkJhiG/KK0y/0sOgMqXrjtoxPlRuGmEzne/7RArB/FritCuyfxfwrJ7X7OLlyHNPTjrErhPNqY0gB4T1Gtp7JdqiGF7YVMPqWUU8elT9LfRXFwAryua1t33a8ldCYIoSQmGIb8jI9cOcW5mKRnBuEVjwgx6fKmHqh6AsxqQaV6XxjW9kT+hML5qT7QsHxGlarzquNIQXUyc0yjLsJEiJsh2p4eaaMQ986hIp0tKacV7MXSWXO0oCsbd92vJXQmCKEkLhy1QSQUcrDMovOuAtTjoqAiEVybhBa8YCcfm4CSCvnMb3ojLsxne9h73LykRLESsTenUdKwJJ3m1jKOuMNCOqZLGwrYPbGWVT3VTF742xoQ0pN2t/7gH8+EFIroaAjecquEjS3KRdovJXQmCKEkJhyckX/C10d14VuBAQkmg+hdIpWPCArA/rzqI7PG843NinjMwXI+/XenWYLAeYfLQAPTgMv5Zw8opdywIPTzngD2pFUbqJcBnbfUcbc+/KQtyQw97485pcNIVld2xmpDC5lUfmynVHKRseEEEIaYqvhpAvdqIZUjXY8YMPQigdk8rT+PKrjRo+IJqwnNFGrMBV5ySSAGW9uFmYKzngDotb6CsLeu8tYfrcSGg0Q0gOkz4DMvWznKYtTo2MaU4QQ0oBOaT0FMSzU0I3Jk9OKB2yU56cVD8jixSVgWQmhLWexs1LyzLvyZUOo7ahyvreVIX/bbzTtfXivvVq64sGqbNWftIpFdLOTHpr5X9WERoX0e5xMxXUy6TEgsz8ooBRk2k8pBuhTnalepTFFCCF16GSLkDCGRbsesK04P1Hn9EyOFTB24TSSpxwPSPJUDsOL03j8MwVvYvm39aE2zCj7H/YbEIvLi1q5BMDvDdR5sMR1TqK7Sk5jE6uGGIDOeWjUEOg5FI+TiUSlqepHIF7te6gzRQghdbDVeooj5ZkyJo6u6SOVhkuRP2DjeH5sjts0byGcB3ONdNoZW1pyLbQvoddMMqBqOJm0nsRCDvKOteV0GlVx0xQz6YLhpZzjKapxY341BOgljL5Vuz971JkihJAmCdsipF3oQm1Re3h0+9E9zABnvB2hUXU+41P6vKXxh7xK23Pn6SckpVcn6uBB4EMfwrncpWQS2FjVh0oH0gPa8ZFLRzzvTXlrctMccEsK2CeAW1K44hPjABpXyi0uL+KG/zYBIXDu76qrtLsAEG1Y9s5r/QrxWMoCz4x49bqeGfGFUOt5St/0/jLETY4umLgpjze9v6z/7HWw9ZAbeqYIIaQOcfS8qNTCHc0ob4fdjy1RzEf1OI2sK+HQRwqe+Yib8o5RoqBWN4qVLOT9/hCeT11ec8zp7WWIa4tYkl7v0IbUBm2oz9YzpSPx5BiqD06uDZi8YlI4eUMuhoeBRx7xLtaKz4p6XU48PoLTlxzyhkKXssj85ApUXvcoKrKCpEiiuL2IyfdM+rb3pveX8fTFRd/6+MYocNkRJ7S4MOQYaL/m38/gV6dx4p+j99LV80zRmCKEkDq0y1AJQ7sMvnqeKBvCzEcX3tIaRAFCcLqw2uinyjhy1tU65v6SVp5AbTFTGi7hA1/4gLGKMrcpt2YEXjqCQ9865Ndi0lFJAp9YWXtvCJf5wmqrqI/4dnxWjKE/Rwvh3DtTeFLclNcfo9qSxtCiplWthxjmI4SQJikU0HSLkHZhCmvoQm1hGiKHDZ/Yhv50YSidMKVM+QU1saQPtemQm457ruvop8o49DNviHD+bUVg17gvlHTyX/wh1HoNft3b/OwThzD6llFPwriRhFLOd7Q
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Średnia głosów na podstawie metascore\n",
"# niebieski kolor oznacza faktyczny stosunek a zielony stosunek oszacowany przez model\n",
"\n",
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
"chart.plot(X_test[\"metascore\"], Y_test,\"bo\")\n",
"chart.plot(X_test[\"metascore\"], Y_nn_test_pred, \"go\")\n",
"plt.ylim([0,1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Podsumowanie"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x147a6ebc310>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlsAAAE/CAYAAABxSAagAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAADre0lEQVR4nOy9eXgc1ZWw/97qXVurtcuS5X1DckvebcALqwl7BJ4QAsQxkASGBCafk8kHCegjkMkv4ySEsJOwhCGYAAowMzghGIxt8L7J+ypZlrXLrdbWa9X9/VHd7ZYsW94tm3qfp5/uqrp1t67uOnXOuecIKSUGBgYGBgYGBgZnBuVcd8DAwMDAwMDA4ELGELYMDAwMDAwMDM4ghrBlYGBgYGBgYHAGMYQtAwMDAwMDA4MziCFsGRgYGBgYGBicQQxhy8DAwMDAwMDgDGIIWwYG/QwhxFwhxPJz3Q8DHSFElRDiynPdj+NFCPEtIcTHxzi+RAhxT+Szca0ZGJwFDGHL4IJDCHGpEOJLIYRXCHFICPGFEGJS5Jhxc/kKIYQojFwHI3vsXyyE+I9z1a8ziZTyTSnl1ee6H2cSIcSTQojFPfaNFEK0CSHGnqt+GRgcDUPYMrigEEKkAP8D/AFIA/KA/wcEzmW/zhZCCPO57kN/Qkq5FVgA/EkIIQCEEHdz+LroFwgd4//4+HkcyBFC3Av6/AEvA7+VUm4+pz0zMOgF48dtcKExEkBK+ZaUUpVS+qSUH0spK4QQY4AXgGlCiA4hRCuAEMIphPizEKJJCLFfCPGz6I1PCDFcCPF5RDvSLIR4O7J/sBBCxgs3vZlnhBALhBAeIUSlEOJrcWWdQog/CSHqhBAHhRBPCCFMvQ1ICPGfkbqcvRwrE0K8K4T4LyFEGzBXCDFACPFhRKu3J3pDipRvjYy9QwjRGRnDYCHELCFEjRDi4cg4q4QQ3+rR36PN0aa4Ojsidc6Km6NvCyGqI/U+ElenIoT4qRBirxCiRQjxVyFEWtzxuyJttQghfi7izHlCCJsQ4ikhRG3k9ZQQwnaUa+I/gCTgfiFENvD/AfOklP6jzPedce0+0uPYcbcbuQa+EEL8IXL97BBCXBF3fInQNTRfAF3AUCHEd4QQ24UQ7UKIfUKI78WV/1wIcUvk86WRub02sn2lEGJjXLvL4867KtK2VwjxDCB6dDVZCHFfpOzFQog1kbJrhBAXR/ZfJoTYHFfnJ0KI1XHby4UQN0c+DxBCvBe5ViqFED+MK+cQQrwu9N/EdiHET4QQNXHHx0TmpVUIsVUIcWNvcyulDADzgF8JIfKA7wIu4MneyhsYnGsMYcvgQmMXoEb+0L8mhHBFD0gptwPfB1ZIKZOklKmRQ38AnMBQYCZwF/CdyLFfAB+j/5HnR8oeL1OAnUAG8GvitCvA60AYGA6MA64G7ok/OSKMvAy4gaullN6jtHMT8C6QCrwJvAXUAAOAW4FfRm/yUsrUyNiTgN8Dy4CDkXpyIn3NA74NvCSEGBU5dtQ5klIWx9X5o8iY18f171JgFHAF8KjQhV6AHwI3R+obAHiAZyNjvwh4DvgWkBtpOy+uzkeAqUAJUAxMBn7W2+RIKcPoN+ZfAP8F/JeU8sveykbafR64M9KndPTv/YTbjTAF2Ic+r48B5fECZaSd7wLJwH6gEbgeSEGf398JIcZHyn4OzIp8nhGpd2bc9ue9jCcDeC/SxwxgL3BJXJEU9O+0NtKv/wWejoz7t8D/CiHSgRXAcCFEhtAfMIqAfCFEshDCAUwAlgldAP9vYBP693UF8JAQYnakvceAwZE2rwLuiOurJXLux0AW8APgzbhrsBtSylXAa8Cf0YWseVLKUG9lDQzOOVJK42W8LqgXMAb9T7gGXaD5EMiOHJsLLI8ra0I3MV4Ut+97wJLI5z8DLwH5PdoYDEjAHLdvCXBPXDt74o4lRMrnANmRNh1xx78JfBZ37irgbfQbpfUYYy0DlsZtDwRUIDlu338Ar/U47xtAFZAZ2Z4VmavEuDJ/BX7e1xzF7bsUXVgY2WOO8uPKrAZui3zeDlwRdywXCAFm4FHgrR7zFwSujGzvBa6NOz4bqOrjuvjPyDWRcIwyjwIL47YTT7bdyPdYC4ge478z7np5vI8+vw88GPl8BVAR+fx3dOF8ZWT7c6C05zWOLhSvjKtPRObgHuBKoDmu7J3A6h7trwDmRj4vA0rRhc2PI9fHNcBlcf2aAlT3qOP/Aq9GPu8DZscduweoiXyeDtQDStzxt4CyY8yPA11I/d3J/l8YL+N1Nl6Gf4fBBYfUNVhzAYQQo9G1GU+hCzQ9yQCs6H/YUfZzWIvyE3SNyGohhAf4jZTylePsSn1cn7oiSq0kdF8yC1B3WNGFAhyIO3c4Ec2JlDLYRzvx5w0ADkkp23uMZ2J0QwgxDngGXVvWFFfOI6Xs7HHeAPqeI4QQA9Fvvt+WUu7q0b/6uM9d6HMAMAj4mxBCizuuogujA+LHFZm/lh7j7NmfARybreiCUdcxyvRst/MU2z0opZTHKB//3SF0U/Nj6OZwBV3IjJrvVgAjI6bQEuBG4P9FtFeTgaXHMR4phIhur0TX1N15lLFF+xv9nqOatZrIZw+6Zi3AYa3aIGCAiJjoI5jQBbUj+sOR1+4BKWX89dDtOuuJlNInhKhE/24NDPothhnR4IJGSrkDXctVFN3Vo0gzujZlUNy+AiKmNSllvZTyXinlAHRtznNCiOFAVChJiDsv5zi7dQD9BpUhdbNeqpQyRUpZGFdmO7oZadHRzChxxI+pFkgTQiT3Nh4hRCbwN+ABKeWGHvW4hBCJPc6rpY85ipiR3geeklIu6qOv8RwAvhY3B6lSSruU8iBQR5z5LtJGeo9x9uxP7Qm0fTTq0LWD0XYTTrHdvDjTcW/lY99dxPfrPXSH/mypm7k/IuJjFRES1wEPAlsiQviX6KbbvVLK5uMYj4huSyk76L5wpOfYov2NmpmjwlbUZPk5urA1k8PC1gGgssd3miylvDauP/Fm2YFxn2uBgaL7QoH49g0MzlsMYcvggkIIMVoI8X+EEPmR7YHoGq2VkSIN6L4mVgAppYqukXky4n8yCP3m9V+R8+dE60J/kpeAGtEIHQTuEEKYhBDzgGHH00cpZR26GeY3QoiUiG/WMCHEzB7l3gIeBj4RQhxv3QfQb8D/IYSwCyHcwN3ovi9m9Jv5m1LKt49Sxf8TQliFENPRfYfe6WuOgFeAHVLKXx9PH+N4IVLnINAFQSHETZFj7wI3RBy2regrB+OFlreAn0XOyUA3//0Xp867wPVCd0C3oq96i/+fPNF2s4AfCiEsQog56Cbuj45S1grYgCYgHNFy9Qzh8DnwAIeFmyU9tnvyv0ChEKI08v3/kKM/FHyErjm7XQhhFkJ8A7gIfXUv6NfVKHQt2mqpr/QchG46jGrVVgNtQoh/jzjDm4QQRSISegX9Ovq/QghXxLH9gbj2V6E/xPwkMl+zgBuAhUfpr4HBeYMhbBlcaLSj//mvEkJ0ogtZW4D/Ezn+KbrJoV4IEdUE/AD9T34fsBz4C7oAATApUlcHuu/Xg1LKysixe4EfAy1AIfrN6Hi5C/3mug1diHsX3WepG1LK19Fv+J8KIQYfZ93fRPeXqkXXYj0mpfwnukZhOrrDcvzqwYLIefWRvtSiO9p/P6IZhGPP0W3A13vUOf04+vl79Dn9WAjRjv5dTYmMe2ukzYXo2pB2dH+wqCbmCWAtUIFuZlsf2XdKRNr918j46tDnoyauyIm2uwoYga4dfBK4VUrZ0lvBiOn3h+gCiQe4HX1+4vkc3Zl+6VG2e9bZDMwBfoV+nY4AvjhK2RZ0Afv/RMr+BLg+qjGLmJjXA1vjTNsrgP1SysZIGRVdQCoBKiPj/iP6AgfQr+WayLFP0K/7QOTcILpp9GuR854D7oq7Bg0MzltEd3cCAwODryIRLcJ/SSnz+yh6ThBCJAGtwIg4YbdfI4SYi75g4tJz3Zf+itBDTtwmpZzZZ2EDg/MYQ7NlYGDQLxFC3CCESIj4kS1A1yRVndteGZwKQohcIcQlEdP5KHQt2t/Odb8MDM4
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
"chart.plot(Y_nn_test_pred, Y_test,\"bo\", alpha=0.5, label='Sieć neuronowa')\n",
"chart.plot(Y_polynomial_test_pred, Y_test,\"ro\", alpha=0.5, label=f'Regresja wielomianowa (stopnia {degree})')\n",
"chart.plot(Y_polynomial_regular_test_pred, Y_test,\"yo\", alpha=0.5, label=f'Regresja wielomianowa z wygładzaniem (stopnia {degree})')\n",
"chart.plot(Y_linear_test_pred, Y_test,\"go\", alpha=0.5, label='Regresja liniowa')\n",
"\n",
"plt.title('Stosunek rozpoznego Y do prawidłowego Y')\n",
"plt.ylim([0,1])\n",
"plt.xlim([0,1])\n",
"\n",
"chart.legend()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Nazwa</th>\n",
" <th>Mean squared error (train)</th>\n",
" <th>Mean squared error (test)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Regresja liniowa</td>\n",
" <td>0.003602</td>\n",
" <td>0.003376</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Regresja wielomianowa (stopień 3)</td>\n",
" <td>0.002472</td>\n",
" <td>0.086475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Regresja wielomianowa z funkcją skokową Heavis...</td>\n",
" <td>0.002472</td>\n",
" <td>0.007655</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Regresja wielomianowa z regularyzjacją (stopie...</td>\n",
" <td>0.003318</td>\n",
" <td>0.003350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Sieć neuronowa</td>\n",
" <td>0.003625</td>\n",
" <td>0.003486</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Nazwa \\\n",
"0 Regresja liniowa \n",
"1 Regresja wielomianowa (stopień 3) \n",
"2 Regresja wielomianowa z funkcją skokową Heavis... \n",
"3 Regresja wielomianowa z regularyzjacją (stopie... \n",
"4 Sieć neuronowa \n",
"\n",
" Mean squared error (train) Mean squared error (test) \n",
"0 0.003602 0.003376 \n",
"1 0.002472 0.086475 \n",
"2 0.002472 0.007655 \n",
"3 0.003318 0.003350 \n",
"4 0.003625 0.003486 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dict = {'Nazwa' : ['Regresja liniowa', f'Regresja wielomianowa (stopień {degree})', f'Regresja wielomianowa z funkcją skokową Heaviside\\'a (stopień {degree})', f'Regresja wielomianowa z regularyzjacją (stopień {degree})', 'Sieć neuronowa'],\n",
" 'Mean squared error (train)' : [linear_mean_squared_train, polynomial_mean_squared_train, polynomial_normalized_mean_squared_train, polynomial_regular_mean_squared_train, nn_mean_squared_train],\n",
" 'Mean squared error (test)' : [linear_mean_squared, polynomial_mean_squared, polynomial_normalized_mean_squared, polynomial_regular_mean_squared, nn_mean_squared]}\n",
"df = pd.DataFrame(dict)\n",
"display(df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}