704 lines
22 KiB
Plaintext
704 lines
22 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"id": "ffd08cc9",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>state</th>\n",
|
|||
|
" <th>county</th>\n",
|
|||
|
" <th>community</th>\n",
|
|||
|
" <th>communityname</th>\n",
|
|||
|
" <th>fold</th>\n",
|
|||
|
" <th>population</th>\n",
|
|||
|
" <th>householdsize</th>\n",
|
|||
|
" <th>racepctblack</th>\n",
|
|||
|
" <th>racePctWhite</th>\n",
|
|||
|
" <th>racePctAsian</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>LandArea</th>\n",
|
|||
|
" <th>PopDens</th>\n",
|
|||
|
" <th>PctUsePubTrans</th>\n",
|
|||
|
" <th>PolicCars</th>\n",
|
|||
|
" <th>PolicOperBudg</th>\n",
|
|||
|
" <th>LemasPctPolicOnPatr</th>\n",
|
|||
|
" <th>LemasGangUnitDeploy</th>\n",
|
|||
|
" <th>LemasPctOfficDrugUn</th>\n",
|
|||
|
" <th>PolicBudgPerPop</th>\n",
|
|||
|
" <th>ViolentCrimesPerPop</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>16</th>\n",
|
|||
|
" <td>36</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1000</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.15</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>0.40</td>\n",
|
|||
|
" <td>0.63</td>\n",
|
|||
|
" <td>0.14</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.06</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>0.84</td>\n",
|
|||
|
" <td>0.06</td>\n",
|
|||
|
" <td>0.06</td>\n",
|
|||
|
" <td>0.91</td>\n",
|
|||
|
" <td>0.5</td>\n",
|
|||
|
" <td>0.88</td>\n",
|
|||
|
" <td>0.26</td>\n",
|
|||
|
" <td>0.49</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>23</th>\n",
|
|||
|
" <td>19</td>\n",
|
|||
|
" <td>193</td>\n",
|
|||
|
" <td>93926</td>\n",
|
|||
|
" <td>94</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.11</td>\n",
|
|||
|
" <td>0.43</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>0.89</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.16</td>\n",
|
|||
|
" <td>0.12</td>\n",
|
|||
|
" <td>0.07</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.81</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.56</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>0.63</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>33</th>\n",
|
|||
|
" <td>51</td>\n",
|
|||
|
" <td>680</td>\n",
|
|||
|
" <td>47672</td>\n",
|
|||
|
" <td>52</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>0.43</td>\n",
|
|||
|
" <td>0.51</td>\n",
|
|||
|
" <td>0.58</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.14</td>\n",
|
|||
|
" <td>0.11</td>\n",
|
|||
|
" <td>0.19</td>\n",
|
|||
|
" <td>0.05</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.60</td>\n",
|
|||
|
" <td>0.1</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>68</th>\n",
|
|||
|
" <td>34</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>58200</td>\n",
|
|||
|
" <td>79</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.05</td>\n",
|
|||
|
" <td>0.59</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.73</td>\n",
|
|||
|
" <td>0.28</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.64</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1.00</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>0.50</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>74</th>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>46520</td>\n",
|
|||
|
" <td>58</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.85</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.07</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.7</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.44</td>\n",
|
|||
|
" <td>0.11</td>\n",
|
|||
|
" <td>0.14</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1880</th>\n",
|
|||
|
" <td>34</td>\n",
|
|||
|
" <td>39</td>\n",
|
|||
|
" <td>40350</td>\n",
|
|||
|
" <td>50</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>0.65</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.03</td>\n",
|
|||
|
" <td>0.28</td>\n",
|
|||
|
" <td>0.32</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.85</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.99</td>\n",
|
|||
|
" <td>0.19</td>\n",
|
|||
|
" <td>0.22</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1963</th>\n",
|
|||
|
" <td>36</td>\n",
|
|||
|
" <td>27</td>\n",
|
|||
|
" <td>59641</td>\n",
|
|||
|
" <td>85</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>0.03</td>\n",
|
|||
|
" <td>0.32</td>\n",
|
|||
|
" <td>0.61</td>\n",
|
|||
|
" <td>0.47</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.47</td>\n",
|
|||
|
" <td>0.42</td>\n",
|
|||
|
" <td>0.07</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.49</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.37</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.45</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1981</th>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>35650</td>\n",
|
|||
|
" <td>36</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>0.07</td>\n",
|
|||
|
" <td>0.38</td>\n",
|
|||
|
" <td>0.17</td>\n",
|
|||
|
" <td>0.84</td>\n",
|
|||
|
" <td>0.11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.09</td>\n",
|
|||
|
" <td>0.13</td>\n",
|
|||
|
" <td>0.17</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.01</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.62</td>\n",
|
|||
|
" <td>0.15</td>\n",
|
|||
|
" <td>0.07</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1991</th>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>80070</td>\n",
|
|||
|
" <td>110</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>0.16</td>\n",
|
|||
|
" <td>0.37</td>\n",
|
|||
|
" <td>0.25</td>\n",
|
|||
|
" <td>0.69</td>\n",
|
|||
|
" <td>0.04</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.32</td>\n",
|
|||
|
" <td>0.18</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.06</td>\n",
|
|||
|
" <td>0.78</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.91</td>\n",
|
|||
|
" <td>0.28</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1992</th>\n",
|
|||
|
" <td>25</td>\n",
|
|||
|
" <td>17</td>\n",
|
|||
|
" <td>72600</td>\n",
|
|||
|
" <td>107</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>0.08</td>\n",
|
|||
|
" <td>0.51</td>\n",
|
|||
|
" <td>0.06</td>\n",
|
|||
|
" <td>0.87</td>\n",
|
|||
|
" <td>0.22</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.03</td>\n",
|
|||
|
" <td>0.38</td>\n",
|
|||
|
" <td>0.33</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.02</td>\n",
|
|||
|
" <td>0.79</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.22</td>\n",
|
|||
|
" <td>0.18</td>\n",
|
|||
|
" <td>0.19</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>123 rows × 128 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" state county community communityname fold population householdsize \\\n",
|
|||
|
"16 36 1 1000 0 1 0.15 0.31 \n",
|
|||
|
"23 19 193 93926 94 1 0.11 0.43 \n",
|
|||
|
"33 51 680 47672 52 1 0.09 0.43 \n",
|
|||
|
"68 34 23 58200 79 1 0.05 0.59 \n",
|
|||
|
"74 9 9 46520 58 1 0.08 0.39 \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"1880 34 39 40350 50 10 0.04 0.39 \n",
|
|||
|
"1963 36 27 59641 85 10 0.03 0.32 \n",
|
|||
|
"1981 9 9 35650 36 10 0.07 0.38 \n",
|
|||
|
"1991 9 9 80070 110 10 0.16 0.37 \n",
|
|||
|
"1992 25 17 72600 107 10 0.08 0.51 \n",
|
|||
|
"\n",
|
|||
|
" racepctblack racePctWhite racePctAsian ... LandArea PopDens \\\n",
|
|||
|
"16 0.40 0.63 0.14 ... 0.06 0.39 \n",
|
|||
|
"23 0.04 0.89 0.09 ... 0.16 0.12 \n",
|
|||
|
"33 0.51 0.58 0.04 ... 0.14 0.11 \n",
|
|||
|
"68 0.23 0.39 0.09 ... 0.01 0.73 \n",
|
|||
|
"74 0.08 0.85 0.04 ... 0.07 0.21 \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"1880 0.39 0.65 0.09 ... 0.03 0.28 \n",
|
|||
|
"1963 0.61 0.47 0.09 ... 0.01 0.47 \n",
|
|||
|
"1981 0.17 0.84 0.11 ... 0.09 0.13 \n",
|
|||
|
"1991 0.25 0.69 0.04 ... 0.08 0.32 \n",
|
|||
|
"1992 0.06 0.87 0.22 ... 0.03 0.38 \n",
|
|||
|
"\n",
|
|||
|
" PctUsePubTrans PolicCars PolicOperBudg LemasPctPolicOnPatr \\\n",
|
|||
|
"16 0.84 0.06 0.06 0.91 \n",
|
|||
|
"23 0.07 0.04 0.01 0.81 \n",
|
|||
|
"33 0.19 0.05 0.01 0.75 \n",
|
|||
|
"68 0.28 0 0.02 0.64 \n",
|
|||
|
"74 0.04 0.02 0.01 0.7 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1880 0.32 0.02 0.01 0.85 \n",
|
|||
|
"1963 0.42 0.07 0.08 0.49 \n",
|
|||
|
"1981 0.17 0.02 0.01 0.72 \n",
|
|||
|
"1991 0.18 0.08 0.06 0.78 \n",
|
|||
|
"1992 0.33 0.02 0.02 0.79 \n",
|
|||
|
"\n",
|
|||
|
" LemasGangUnitDeploy LemasPctOfficDrugUn PolicBudgPerPop \\\n",
|
|||
|
"16 0.5 0.88 0.26 \n",
|
|||
|
"23 1 0.56 0.09 \n",
|
|||
|
"33 0 0.60 0.1 \n",
|
|||
|
"68 0 1.00 0.23 \n",
|
|||
|
"74 1 0.44 0.11 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1880 0 0.99 0.19 \n",
|
|||
|
"1963 0 0.37 1 \n",
|
|||
|
"1981 0 0.62 0.15 \n",
|
|||
|
"1991 0 0.91 0.28 \n",
|
|||
|
"1992 0 0.22 0.18 \n",
|
|||
|
"\n",
|
|||
|
" ViolentCrimesPerPop \n",
|
|||
|
"16 0.49 \n",
|
|||
|
"23 0.63 \n",
|
|||
|
"33 0.31 \n",
|
|||
|
"68 0.50 \n",
|
|||
|
"74 0.14 \n",
|
|||
|
"... ... \n",
|
|||
|
"1880 0.22 \n",
|
|||
|
"1963 0.45 \n",
|
|||
|
"1981 0.07 \n",
|
|||
|
"1991 0.23 \n",
|
|||
|
"1992 0.19 \n",
|
|||
|
"\n",
|
|||
|
"[123 rows x 128 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import sklearn\n",
|
|||
|
"from sklearn.preprocessing import PolynomialFeatures, LabelEncoder\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, Ridge, RidgeCV\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.metrics import mean_squared_error\n",
|
|||
|
"\n",
|
|||
|
"col_names = [\n",
|
|||
|
"\"state\",\n",
|
|||
|
"\"county\",\n",
|
|||
|
"\"community\",\n",
|
|||
|
"\"communityname\",\n",
|
|||
|
"\"fold\",\n",
|
|||
|
"\"population\",\n",
|
|||
|
"\"householdsize\",\n",
|
|||
|
"\"racepctblack\",\n",
|
|||
|
"\"racePctWhite\",\n",
|
|||
|
"\"racePctAsian\",\n",
|
|||
|
"\"racePctHisp\",\n",
|
|||
|
"\"agePct12t21\",\n",
|
|||
|
"\"agePct12t29\",\n",
|
|||
|
"\"agePct16t24\",\n",
|
|||
|
"\"agePct65up\",\n",
|
|||
|
"\"numbUrban\",\n",
|
|||
|
"\"pctUrban\",\n",
|
|||
|
"\"medIncome\",\n",
|
|||
|
"\"pctWWage\",\n",
|
|||
|
"\"pctWFarmSelf\",\n",
|
|||
|
"\"pctWInvInc\",\n",
|
|||
|
"\"pctWSocSec\",\n",
|
|||
|
"\"pctWPubAsst\",\n",
|
|||
|
"\"pctWRetire\",\n",
|
|||
|
"\"medFamInc\",\n",
|
|||
|
"\"perCapInc\",\n",
|
|||
|
"\"whitePerCap\",\n",
|
|||
|
"\"blackPerCap\",\n",
|
|||
|
"\"indianPerCap\",\n",
|
|||
|
"\"AsianPerCap\",\n",
|
|||
|
"\"OtherPerCap\",\n",
|
|||
|
"\"HispPerCap\",\n",
|
|||
|
"\"NumUnderPov\",\n",
|
|||
|
"\"PctPopUnderPov\",\n",
|
|||
|
"\"PctLess9thGrade\",\n",
|
|||
|
"\"PctNotHSGrad\",\n",
|
|||
|
"\"PctBSorMore\",\n",
|
|||
|
"\"PctUnemployed\",\n",
|
|||
|
"\"PctEmploy\",\n",
|
|||
|
"\"PctEmplManu\",\n",
|
|||
|
"\"PctEmplProfServ\",\n",
|
|||
|
"\"PctOccupManu\",\n",
|
|||
|
"\"PctOccupMgmtProf\",\n",
|
|||
|
"\"MalePctDivorce\",\n",
|
|||
|
"\"MalePctNevMarr\",\n",
|
|||
|
"\"FemalePctDiv\",\n",
|
|||
|
"\"TotalPctDiv\",\n",
|
|||
|
"\"PersPerFam\",\n",
|
|||
|
"\"PctFam2Par\",\n",
|
|||
|
"\"PctKids2Par\",\n",
|
|||
|
"\"PctYoungKids2Par\",\n",
|
|||
|
"\"PctTeen2Par\",\n",
|
|||
|
"\"PctWorkMomYoungKids\",\n",
|
|||
|
"\"PctWorkMom\",\n",
|
|||
|
"\"NumIlleg\",\n",
|
|||
|
"\"PctIlleg\",\n",
|
|||
|
"\"NumImmig\",\n",
|
|||
|
"\"PctImmigRecent\",\n",
|
|||
|
"\"PctImmigRec5\",\n",
|
|||
|
"\"PctImmigRec8\",\n",
|
|||
|
"\"PctImmigRec10\",\n",
|
|||
|
"\"PctRecentImmig\",\n",
|
|||
|
"\"PctRecImmig5\",\n",
|
|||
|
"\"PctRecImmig8\",\n",
|
|||
|
"\"PctRecImmig10\",\n",
|
|||
|
"\"PctSpeakEnglOnly\",\n",
|
|||
|
"\"PctNotSpeakEnglWell\",\n",
|
|||
|
"\"PctLargHouseFam\",\n",
|
|||
|
"\"PctLargHouseOccup\",\n",
|
|||
|
"\"PersPerOccupHous\",\n",
|
|||
|
"\"PersPerOwnOccHous\",\n",
|
|||
|
"\"PersPerRentOccHous\",\n",
|
|||
|
"\"PctPersOwnOccup\",\n",
|
|||
|
"\"PctPersDenseHous\",\n",
|
|||
|
"\"PctHousLess3BR\",\n",
|
|||
|
"\"MedNumBR\",\n",
|
|||
|
"\"HousVacant\",\n",
|
|||
|
"\"PctHousOccup\",\n",
|
|||
|
"\"PctHousOwnOcc\",\n",
|
|||
|
"\"PctVacantBoarded\",\n",
|
|||
|
"\"PctVacMore6Mos\",\n",
|
|||
|
"\"MedYrHousBuilt\",\n",
|
|||
|
"\"PctHousNoPhone\",\n",
|
|||
|
"\"PctWOFullPlumb\",\n",
|
|||
|
"\"OwnOccLowQuart\",\n",
|
|||
|
"\"OwnOccMedVal\",\n",
|
|||
|
"\"OwnOccHiQuart\",\n",
|
|||
|
"\"RentLowQ\",\n",
|
|||
|
"\"RentMedian\",\n",
|
|||
|
"\"RentHighQ\",\n",
|
|||
|
"\"MedRent\",\n",
|
|||
|
"\"MedRentPctHousInc\",\n",
|
|||
|
"\"MedOwnCostPctInc\",\n",
|
|||
|
"\"MedOwnCostPctIncNoMtg\",\n",
|
|||
|
"\"NumInShelters\",\n",
|
|||
|
"\"NumStreet\",\n",
|
|||
|
"\"PctForeignBorn\",\n",
|
|||
|
"\"PctBornSameState\",\n",
|
|||
|
"\"PctSameHouse85\",\n",
|
|||
|
"\"PctSameCity85\",\n",
|
|||
|
"\"PctSameState85\",\n",
|
|||
|
"\"LemasSwornFT\",\n",
|
|||
|
"\"LemasSwFTPerPop\",\n",
|
|||
|
"\"LemasSwFTFieldOps\",\n",
|
|||
|
"\"LemasSwFTFieldPerPop\",\n",
|
|||
|
"\"LemasTotalReq\",\n",
|
|||
|
"\"LemasTotReqPerPop\",\n",
|
|||
|
"\"PolicReqPerOffic\",\n",
|
|||
|
"\"PolicPerPop\",\n",
|
|||
|
"\"RacialMatchCommPol\",\n",
|
|||
|
"\"PctPolicWhite\",\n",
|
|||
|
"\"PctPolicBlack\",\n",
|
|||
|
"\"PctPolicHisp\",\n",
|
|||
|
"\"PctPolicAsian\",\n",
|
|||
|
"\"PctPolicMinor\",\n",
|
|||
|
"\"OfficAssgnDrugUnits\",\n",
|
|||
|
"\"NumKindsDrugsSeiz\",\n",
|
|||
|
"\"PolicAveOTWorked\",\n",
|
|||
|
"\"LandArea\",\n",
|
|||
|
"\"PopDens\",\n",
|
|||
|
"\"PctUsePubTrans\",\n",
|
|||
|
"\"PolicCars\",\n",
|
|||
|
"\"PolicOperBudg\",\n",
|
|||
|
"\"LemasPctPolicOnPatr\",\n",
|
|||
|
"\"LemasGangUnitDeploy\",\n",
|
|||
|
"\"LemasPctOfficDrugUn\",\n",
|
|||
|
"\"PolicBudgPerPop\",\n",
|
|||
|
"\"ViolentCrimesPerPop\"]\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv('communities.data',names=col_names)\n",
|
|||
|
"df = df.replace('?',None)\n",
|
|||
|
"df = df.dropna(axis='rows')\n",
|
|||
|
"\n",
|
|||
|
"le = LabelEncoder()\n",
|
|||
|
"le.fit(df['communityname'].unique())\n",
|
|||
|
"df['communityname'] = le.transform(df['communityname'])\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"id": "9c0f6f6d",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X = df.loc[:,df.columns != 'ViolentCrimesPerPop']\n",
|
|||
|
"y = df['ViolentCrimesPerPop']"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"id": "2bdc2e77",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"id": "2bbd2ddb",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/home/tonywesoly/.local/lib/python3.8/site-packages/sklearn/linear_model/_ridge.py:251: UserWarning: Singular matrix in solving dual problem. Using least-squares solution instead.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"1.1533542718655332"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"ridgecv = RidgeCV(alphas=np.arange(1, 100, 5), scoring='r2', cv=10)\n",
|
|||
|
"ridgecv.fit(X, y)\n",
|
|||
|
"ridge = Ridge(alpha=ridgecv.alpha_)\n",
|
|||
|
"ridge.fit(X_train, y_train)\n",
|
|||
|
"ridge_y_predicted = ridge.predict(X_test)\n",
|
|||
|
"ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_y_predicted))\n",
|
|||
|
"ridge_rmse"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"id": "dbfe728b",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"#poly = PolynomialFeatures(degree=11, include_bias=False)\n",
|
|||
|
"#poly_features = poly.fit_transform(X)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"id": "3be15622",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"1.6511181528162753"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"poly_reg_model = LinearRegression()\n",
|
|||
|
"poly_reg_model.fit(X_train,y_train)\n",
|
|||
|
"poly_reg_y_predicted = poly_reg_model.predict(X_test)\n",
|
|||
|
"poly_reg_rmse = np.sqrt(mean_squared_error(y_test, poly_reg_y_predicted))\n",
|
|||
|
"poly_reg_rmse"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"id": "4ab0949a",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"0.3085495600528652"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
|
|||
|
"lin_reg_model = LinearRegression()\n",
|
|||
|
"lin_reg_model.fit(X_train, y_train)\n",
|
|||
|
"lin_reg_y_predicted = lin_reg_model.predict(X_test)\n",
|
|||
|
"lin_reg_rmse = np.sqrt(mean_squared_error(y_test, lin_reg_y_predicted))\n",
|
|||
|
"lin_reg_rmse"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "99365180",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "0e0b2f8e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.8.10"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|