1875 lines
211 KiB
Plaintext
1875 lines
211 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"id": "d80a4450",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: pandas in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (1.5.3)\n",
|
|||
|
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2.8.2)\n",
|
|||
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2023.2)\n",
|
|||
|
"Requirement already satisfied: numpy>=1.21.0 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (1.24.2)\n",
|
|||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%pip install --user pandas"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"id": "350abc87",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: kaggle in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (1.5.13)\n",
|
|||
|
"Requirement already satisfied: six>=1.10 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (1.16.0)\n",
|
|||
|
"Requirement already satisfied: certifi in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2022.12.7)\n",
|
|||
|
"Requirement already satisfied: python-dateutil in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2.8.2)\n",
|
|||
|
"Requirement already satisfied: requests in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2.28.2)\n",
|
|||
|
"Requirement already satisfied: tqdm in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (4.65.0)\n",
|
|||
|
"Requirement already satisfied: python-slugify in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (8.0.1)\n",
|
|||
|
"Requirement already satisfied: urllib3 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (1.26.15)\n",
|
|||
|
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
|||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from requests->kaggle) (3.1.0)\n",
|
|||
|
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from requests->kaggle) (3.4)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from tqdm->kaggle) (0.4.6)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%pip install --user kaggle"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"id": "0063a986",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%python -m kaggle datasets download -d ulrikthygepedersen/diamonds"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"id": "5bc46bfd",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Downloading diamonds.zip to c:\\Users\\admin\\ium_z487175\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"\n",
|
|||
|
" 0%| | 0.00/733k [00:00<?, ?B/s]\n",
|
|||
|
"100%|██████████| 733k/733k [00:00<00:00, 1.35MB/s]\n",
|
|||
|
"100%|██████████| 733k/733k [00:00<00:00, 1.33MB/s]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"!kaggle datasets download -d shivam2503/diamonds"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"id": "75024e0f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"!tar -xf diamonds.zip\n",
|
|||
|
"## rozpakowanie archiwum .zip w windowsie"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 88,
|
|||
|
"id": "99c20a95",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Unnamed: 0</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>3.98</td>\n",
|
|||
|
" <td>2.43</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.89</td>\n",
|
|||
|
" <td>3.84</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>56.9</td>\n",
|
|||
|
" <td>65.0</td>\n",
|
|||
|
" <td>327</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>4.07</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.29</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>62.4</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>334</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>4.23</td>\n",
|
|||
|
" <td>2.63</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>63.3</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>335</td>\n",
|
|||
|
" <td>4.34</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>2.75</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53935</th>\n",
|
|||
|
" <td>53936</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>60.8</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>5.76</td>\n",
|
|||
|
" <td>3.50</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53936</th>\n",
|
|||
|
" <td>53937</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>63.1</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.69</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>3.61</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53937</th>\n",
|
|||
|
" <td>53938</td>\n",
|
|||
|
" <td>0.70</td>\n",
|
|||
|
" <td>Very Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>62.8</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.66</td>\n",
|
|||
|
" <td>5.68</td>\n",
|
|||
|
" <td>3.56</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.86</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>6.15</td>\n",
|
|||
|
" <td>6.12</td>\n",
|
|||
|
" <td>3.74</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>62.2</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.83</td>\n",
|
|||
|
" <td>5.87</td>\n",
|
|||
|
" <td>3.64</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53940 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Unnamed: 0 carat cut color clarity depth table price x \\\n",
|
|||
|
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 \n",
|
|||
|
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 \n",
|
|||
|
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 \n",
|
|||
|
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 \n",
|
|||
|
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 \n",
|
|||
|
"53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 \n",
|
|||
|
"53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 \n",
|
|||
|
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 \n",
|
|||
|
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 \n",
|
|||
|
"\n",
|
|||
|
" y z \n",
|
|||
|
"0 3.98 2.43 \n",
|
|||
|
"1 3.84 2.31 \n",
|
|||
|
"2 4.07 2.31 \n",
|
|||
|
"3 4.23 2.63 \n",
|
|||
|
"4 4.35 2.75 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"53935 5.76 3.50 \n",
|
|||
|
"53936 5.75 3.61 \n",
|
|||
|
"53937 5.68 3.56 \n",
|
|||
|
"53938 6.12 3.74 \n",
|
|||
|
"53939 5.87 3.64 \n",
|
|||
|
"\n",
|
|||
|
"[53940 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 88,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"diamonds = pd.read_csv('diamonds.csv')\n",
|
|||
|
"#Wyświetlenie zbioru danych\n",
|
|||
|
"diamonds"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"id": "122b0b57",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>3.98</td>\n",
|
|||
|
" <td>2.43</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.89</td>\n",
|
|||
|
" <td>3.84</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>56.9</td>\n",
|
|||
|
" <td>65.0</td>\n",
|
|||
|
" <td>327</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>4.07</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.29</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>62.4</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>334</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>4.23</td>\n",
|
|||
|
" <td>2.63</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>63.3</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>335</td>\n",
|
|||
|
" <td>4.34</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>2.75</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53935</th>\n",
|
|||
|
" <td>53936</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>60.8</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>5.76</td>\n",
|
|||
|
" <td>3.50</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53936</th>\n",
|
|||
|
" <td>53937</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>63.1</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.69</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>3.61</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53937</th>\n",
|
|||
|
" <td>53938</td>\n",
|
|||
|
" <td>0.70</td>\n",
|
|||
|
" <td>Very Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>62.8</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.66</td>\n",
|
|||
|
" <td>5.68</td>\n",
|
|||
|
" <td>3.56</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.86</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>6.15</td>\n",
|
|||
|
" <td>6.12</td>\n",
|
|||
|
" <td>3.74</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>62.2</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.83</td>\n",
|
|||
|
" <td>5.87</td>\n",
|
|||
|
" <td>3.64</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53940 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id carat cut color clarity depth table price x y \\\n",
|
|||
|
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 \n",
|
|||
|
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 \n",
|
|||
|
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 \n",
|
|||
|
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 \n",
|
|||
|
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 \n",
|
|||
|
"53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 \n",
|
|||
|
"53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 \n",
|
|||
|
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
|
|||
|
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
|
|||
|
"\n",
|
|||
|
" z \n",
|
|||
|
"0 2.43 \n",
|
|||
|
"1 2.31 \n",
|
|||
|
"2 2.31 \n",
|
|||
|
"3 2.63 \n",
|
|||
|
"4 2.75 \n",
|
|||
|
"... ... \n",
|
|||
|
"53935 3.50 \n",
|
|||
|
"53936 3.61 \n",
|
|||
|
"53937 3.56 \n",
|
|||
|
"53938 3.74 \n",
|
|||
|
"53939 3.64 \n",
|
|||
|
"\n",
|
|||
|
"[53940 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#przydzielanie nazwy kolumny z id\n",
|
|||
|
"diamonds = diamonds.rename(columns={diamonds.columns[0]: 'id'})\n",
|
|||
|
"diamonds"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 98,
|
|||
|
"id": "a489dab8",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Unnamed: 0</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>3.98</td>\n",
|
|||
|
" <td>2.43</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.89</td>\n",
|
|||
|
" <td>3.84</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>56.9</td>\n",
|
|||
|
" <td>65.0</td>\n",
|
|||
|
" <td>327</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>4.07</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.29</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>62.4</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>334</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>4.23</td>\n",
|
|||
|
" <td>2.63</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>63.3</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>335</td>\n",
|
|||
|
" <td>4.34</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>2.75</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53935</th>\n",
|
|||
|
" <td>53936</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>60.8</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>5.76</td>\n",
|
|||
|
" <td>3.50</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53936</th>\n",
|
|||
|
" <td>53937</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>63.1</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.69</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>3.61</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53937</th>\n",
|
|||
|
" <td>53938</td>\n",
|
|||
|
" <td>0.70</td>\n",
|
|||
|
" <td>very good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>62.8</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.66</td>\n",
|
|||
|
" <td>5.68</td>\n",
|
|||
|
" <td>3.56</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.86</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>6.15</td>\n",
|
|||
|
" <td>6.12</td>\n",
|
|||
|
" <td>3.74</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>62.2</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.83</td>\n",
|
|||
|
" <td>5.87</td>\n",
|
|||
|
" <td>3.64</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53940 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Unnamed: 0 carat cut color clarity depth table price x \\\n",
|
|||
|
"0 1 0.23 ideal E SI2 61.5 55.0 326 3.95 \n",
|
|||
|
"1 2 0.21 premium E SI1 59.8 61.0 326 3.89 \n",
|
|||
|
"2 3 0.23 good E VS1 56.9 65.0 327 4.05 \n",
|
|||
|
"3 4 0.29 premium I VS2 62.4 58.0 334 4.20 \n",
|
|||
|
"4 5 0.31 good J SI2 63.3 58.0 335 4.34 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"53935 53936 0.72 ideal D SI1 60.8 57.0 2757 5.75 \n",
|
|||
|
"53936 53937 0.72 good D SI1 63.1 55.0 2757 5.69 \n",
|
|||
|
"53937 53938 0.70 very good D SI1 62.8 60.0 2757 5.66 \n",
|
|||
|
"53938 53939 0.86 premium H SI2 61.0 58.0 2757 6.15 \n",
|
|||
|
"53939 53940 0.75 ideal D SI2 62.2 55.0 2757 5.83 \n",
|
|||
|
"\n",
|
|||
|
" y z \n",
|
|||
|
"0 3.98 2.43 \n",
|
|||
|
"1 3.84 2.31 \n",
|
|||
|
"2 4.07 2.31 \n",
|
|||
|
"3 4.23 2.63 \n",
|
|||
|
"4 4.35 2.75 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"53935 5.76 3.50 \n",
|
|||
|
"53936 5.75 3.61 \n",
|
|||
|
"53937 5.68 3.56 \n",
|
|||
|
"53938 6.12 3.74 \n",
|
|||
|
"53939 5.87 3.64 \n",
|
|||
|
"\n",
|
|||
|
"[53940 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 98,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Convert to lowerCase\n",
|
|||
|
"\n",
|
|||
|
"diamonds['cut'] = diamonds['cut'].str.lower()\n",
|
|||
|
"diamonds\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 58,
|
|||
|
"id": "1cc3a8af",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: scikit-learn in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (1.2.2)\n",
|
|||
|
"Requirement already satisfied: numpy>=1.17.3 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from scikit-learn) (1.24.2)\n",
|
|||
|
"Requirement already satisfied: scipy>=1.3.2 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from scikit-learn) (1.10.1)\n",
|
|||
|
"Requirement already satisfied: joblib>=1.1.1 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from scikit-learn) (1.2.0)\n",
|
|||
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from scikit-learn) (3.1.0)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%pip install scikit-learn"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"id": "1836b2a3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import sklearn\n",
|
|||
|
"from sklearn.model_selection import train_test_split"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 91,
|
|||
|
"id": "fcf6448a",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"#podział danych na train/test/dev w proporcji 4:1:1\n",
|
|||
|
"#losować ustawiona na 10\n",
|
|||
|
"\n",
|
|||
|
"#1. Dzielimy na zbiór treningowy 80 % i resztę danych\n",
|
|||
|
"diamonds_train, diamonds_test_dev = sklearn.model_selection.train_test_split(diamonds, test_size=0.2, random_state=10)\n",
|
|||
|
"\n",
|
|||
|
"#2. Podział reszty danych na zbiór testowy 10% i walidacyjny 10%\n",
|
|||
|
"diamonds_test, diamonds_dev = train_test_split(diamonds_test_dev, test_size=0.5, random_state=10)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 92,
|
|||
|
"id": "9476846a",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Rozmiar diamonds: (53940, 11)\n",
|
|||
|
"Rozmiar diamonds_train: (43152, 11)\n",
|
|||
|
"Rozmiar diamonds_test: (5394, 11)\n",
|
|||
|
"Rozmiar diamonds_dev: (5394, 11)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Wyświetlenie rozmiarów zbiorów danych train/test/dev\n",
|
|||
|
"print(\"Rozmiar diamonds: \", diamonds.shape)\n",
|
|||
|
"print(\"Rozmiar diamonds_train: \", diamonds_train.shape)\n",
|
|||
|
"print(\"Rozmiar diamonds_test: \", diamonds_test.shape)\n",
|
|||
|
"print(\"Rozmiar diamonds_dev: \", diamonds_dev.shape)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 99,
|
|||
|
"id": "7e1f11cb",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Unnamed: 0 carat depth table price \\\n",
|
|||
|
"count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
|
|||
|
"mean 26970.500000 0.797940 61.749405 57.457184 3932.799722 \n",
|
|||
|
"std 15571.281097 0.474011 1.432621 2.234491 3989.439738 \n",
|
|||
|
"min 1.000000 0.200000 43.000000 43.000000 326.000000 \n",
|
|||
|
"25% 13485.750000 0.400000 61.000000 56.000000 950.000000 \n",
|
|||
|
"50% 26970.500000 0.700000 61.800000 57.000000 2401.000000 \n",
|
|||
|
"75% 40455.250000 1.040000 62.500000 59.000000 5324.250000 \n",
|
|||
|
"max 53940.000000 5.010000 79.000000 95.000000 18823.000000 \n",
|
|||
|
"\n",
|
|||
|
" x y z \n",
|
|||
|
"count 53940.000000 53940.000000 53940.000000 \n",
|
|||
|
"mean 5.731157 5.734526 3.538734 \n",
|
|||
|
"std 1.121761 1.142135 0.705699 \n",
|
|||
|
"min 0.000000 0.000000 0.000000 \n",
|
|||
|
"25% 4.710000 4.720000 2.910000 \n",
|
|||
|
"50% 5.700000 5.710000 3.530000 \n",
|
|||
|
"75% 6.540000 6.540000 4.040000 \n",
|
|||
|
"max 10.740000 58.900000 31.800000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# średnią, minimum, maksimum, odchylenia standardowe, medianę wartości poszczególnych parametrów)\n",
|
|||
|
"print(diamonds.describe())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 100,
|
|||
|
"id": "88a89b38",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Unnamed: 0 carat depth table price \\\n",
|
|||
|
"count 43152.000000 43152.000000 43152.000000 43152.000000 43152.000000 \n",
|
|||
|
"mean 26971.712111 0.795979 61.748241 57.448355 3920.786939 \n",
|
|||
|
"std 15565.585777 0.472184 1.426394 2.224297 3975.894633 \n",
|
|||
|
"min 3.000000 0.200000 43.000000 44.000000 327.000000 \n",
|
|||
|
"25% 13469.750000 0.400000 61.000000 56.000000 946.000000 \n",
|
|||
|
"50% 27019.500000 0.700000 61.800000 57.000000 2400.000000 \n",
|
|||
|
"75% 40439.250000 1.040000 62.500000 59.000000 5313.250000 \n",
|
|||
|
"max 53938.000000 5.010000 79.000000 76.000000 18823.000000 \n",
|
|||
|
"\n",
|
|||
|
" x y z \n",
|
|||
|
"count 43152.000000 43152.000000 43152.000000 \n",
|
|||
|
"mean 5.726933 5.731011 3.535791 \n",
|
|||
|
"std 1.119635 1.147069 0.693846 \n",
|
|||
|
"min 0.000000 0.000000 0.000000 \n",
|
|||
|
"25% 4.710000 4.720000 2.910000 \n",
|
|||
|
"50% 5.690000 5.710000 3.520000 \n",
|
|||
|
"75% 6.540000 6.530000 4.030000 \n",
|
|||
|
"max 10.740000 58.900000 8.060000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(diamonds_train.describe())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 101,
|
|||
|
"id": "80b5060f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Unnamed: 0 carat depth table price \\\n",
|
|||
|
"count 5394.000000 5394.000000 5394.000000 5394.000000 5394.000000 \n",
|
|||
|
"mean 26951.351316 0.802666 61.760808 57.470189 3970.308676 \n",
|
|||
|
"std 15565.740253 0.482062 1.464893 2.309900 4083.195823 \n",
|
|||
|
"min 1.000000 0.210000 52.300000 43.000000 326.000000 \n",
|
|||
|
"25% 13519.750000 0.400000 61.000000 56.000000 958.000000 \n",
|
|||
|
"50% 27013.500000 0.700000 61.900000 57.000000 2375.500000 \n",
|
|||
|
"75% 40342.250000 1.050000 62.500000 59.000000 5273.750000 \n",
|
|||
|
"max 53930.000000 3.510000 78.200000 95.000000 18806.000000 \n",
|
|||
|
"\n",
|
|||
|
" x y z \n",
|
|||
|
"count 5394.000000 5394.000000 5394.000000 \n",
|
|||
|
"mean 5.738817 5.739106 3.542097 \n",
|
|||
|
"std 1.132069 1.123925 0.701446 \n",
|
|||
|
"min 3.840000 3.780000 0.000000 \n",
|
|||
|
"25% 4.710000 4.710000 2.900000 \n",
|
|||
|
"50% 5.690000 5.700000 3.530000 \n",
|
|||
|
"75% 6.550000 6.540000 4.040000 \n",
|
|||
|
"max 9.660000 9.630000 6.030000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(diamonds_test.describe())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 102,
|
|||
|
"id": "31f4af56",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Unnamed: 0 carat depth table price \\\n",
|
|||
|
"count 5394.000000 5394.000000 5394.000000 5394.000000 5394.000000 \n",
|
|||
|
"mean 26979.951798 0.808901 61.747312 57.514813 3991.393029 \n",
|
|||
|
"std 15625.161644 0.480344 1.449816 2.238671 4002.742530 \n",
|
|||
|
"min 2.000000 0.200000 53.200000 51.000000 326.000000 \n",
|
|||
|
"25% 13525.500000 0.400000 61.000000 56.000000 961.000000 \n",
|
|||
|
"50% 26529.500000 0.710000 61.850000 57.000000 2484.500000 \n",
|
|||
|
"75% 40665.500000 1.050000 62.500000 59.000000 5465.250000 \n",
|
|||
|
"max 53940.000000 3.040000 73.600000 68.000000 18779.000000 \n",
|
|||
|
"\n",
|
|||
|
" x y z \n",
|
|||
|
"count 5394.000000 5394.000000 5394.000000 \n",
|
|||
|
"mean 5.757290 5.758066 3.558910 \n",
|
|||
|
"std 1.128191 1.120344 0.797759 \n",
|
|||
|
"min 3.790000 3.750000 0.000000 \n",
|
|||
|
"25% 4.730000 4.740000 2.930000 \n",
|
|||
|
"50% 5.710000 5.730000 3.540000 \n",
|
|||
|
"75% 6.560000 6.540000 4.040000 \n",
|
|||
|
"max 9.510000 9.460000 31.800000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(diamonds_dev.describe())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 120,
|
|||
|
"id": "eab3e1f9",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Ideal 17292\n",
|
|||
|
"Premium 10954\n",
|
|||
|
"Very Good 9708\n",
|
|||
|
"Good 3929\n",
|
|||
|
"Fair 1269\n",
|
|||
|
"Name: cut, dtype: int64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 120,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Wyświetlenie częstości przykładów dla poszczególnych klas diamentów\n",
|
|||
|
"diamonds_train[\"cut\"].value_counts()\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 105,
|
|||
|
"id": "2e7c37d9",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Ideal 2184\n",
|
|||
|
"Premium 1385\n",
|
|||
|
"Very Good 1183\n",
|
|||
|
"Good 473\n",
|
|||
|
"Fair 169\n",
|
|||
|
"Name: cut, dtype: int64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 105,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"diamonds_test[\"cut\"].value_counts()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 106,
|
|||
|
"id": "a7ccece5",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Ideal 2075\n",
|
|||
|
"Premium 1452\n",
|
|||
|
"Very Good 1191\n",
|
|||
|
"Good 504\n",
|
|||
|
"Fair 172\n",
|
|||
|
"Name: cut, dtype: int64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 106,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"diamonds_dev[\"cut\"].value_counts()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 109,
|
|||
|
"id": "17223f54",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAskAAAJbCAYAAAAWmwmlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABaxElEQVR4nO3dd3yN9///8edJSEJIYiQhqBF7ryKI0apZWz9GqkHQqtGKaumw+mm1tOgwqtRoS6mqWh8V1N4jVm1RlISIJEaNJNfvD7+cr3MlSDTJCXncb7dzuznv632u87rO5fDMO+/rfVkMwzAEAAAAwMrB3gUAAAAAmQ0hGQAAADAhJAMAAAAmhGQAAADAhJAMAAAAmBCSAQAAABNCMgAAAGBCSAYAAABMCMkAkAnMmTNHX3/9tb3LyBLWrVun//73v7px44a9S8nSvvvuO33zzTf2LgN4IEIykMU1atRIFStWfGS/YsWKqUePHmn2vmfOnJHFYtHs2bPTbJ9PquXLl6tfv36qVq1aql/7b85Lcudg1KhRslgsj7W/9GSxWDRq1Cjr89mzZ8tisejMmTM2/caPH68SJUrI0dFRVatWTbKfU6dOqX379vLy8pKrq2v6Fp2MlB7H0+7nn3/WG2+8oWeffdbepQAPREgGMonE/ywTH9myZVOhQoXUo0cP/f333/YuD/fZunWrRo0apejo6H+9r7///ltBQUH6/vvvVa9evX9fXBa2evVqvf3226pXr55mzZqljz/+2Gb77du39dJLL2nAgAHq27evnap8us2bN0+TJk16aJ8TJ07otdde08KFC1W9evWMKQx4DNnsXQAAW2PGjFHx4sV169Ytbd++XbNnz9bmzZt16NAhubi42Ls86F5IHj16tHr06CEPD49/ta/9+/dr8uTJ6tix42O9/tixY3JwyHrjHd27d1eXLl3k7OxsbVu3bp0cHBw0c+ZMOTk5JXnNwYMH1bNnTw0cODAjS32o5I7jSTZv3jwdOnRIb7755gP77N+/X7NmzVKLFi0yrjDgMRCSgUymRYsWqlmzpiSpd+/eyp8/vz799FMtXbpU//nPf+xcHdJay5Yt/9Xrn5ZwlVqOjo5ydHS0abt06ZJy5MiRbECWpJo1a1q/W5lFcsfxtOvUqZO9SwBSJOsNPwBPGH9/f0n35lLeb926dfL395erq6s8PDzUtm1bHTlyxLo9cb7pgx4Ps3r1auXMmVNdu3ZVXFyczbaTJ09Kku7cuaMRI0aoRo0acnd3l6urq/z9/fXHH38k2V90dLR69Oghd3d3eXh4KDAwMFVTFaKjozV48GAVK1ZMzs7OKly4sF555RVFRkZKujcv90HHuX79eut+/v77b/Xq1Uve3t5ycXFRjRo1tGrVqiTv99VXX6lChQrKmTOn8uTJo5o1a2revHmS7s3ZHTp0qCSpePHi1vdJnFMaFxenDz/8UL6+vnJ2dlaxYsX07rvv6vbt2zbvsXv3bjVr1kz58+dXjhw5VLx4cfXq1cumT0JCgr744gtVqlRJLi4u8vT0VPPmzbV7925rn5TOSf4352DWrFl67rnn5OXlJWdnZ5UvX15Tp05N0WvDw8PVs2dPFS5cWM7OzipYsKDatm1r/bwS50An93jYcZnn8losFs2aNUs3btywvj5xrnVKzklwcLDy5csnwzCsbQMHDpTFYtGXX35pbYuIiJDFYnnk8d++fVuDBw+Wp6encufOrTZt2uj8+fOPPA5J+u2339SqVSv5+PjI2dlZvr6++vDDDxUfH2/z2sTrCQ4cOKCGDRsqZ86cKlmypBYtWiRJ2rBhg2rXrq0cOXKoTJkyWrNmTZL3v/874ezsrAoVKui7776z6bN+/XpZLBYtXLhQH330kQoXLiwXFxc9//zz1n8PEutZsWKF/vrrL+s5KFasmHX7pUuXFBQUZP3+ValSRXPmzLF5r+rVq6tDhw42bZUqVZLFYtGBAwesbQsWLJDFYrH5Nw9Ia4wkA5lc4n+eefLksbatWbNGLVq0UIkSJTRq1Cj9888/+uqrr1SvXj3t3btXxYoVk6enp77//nubfd29e1eDBw9+4EibdO8isk6dOqlz58767rvvbEa5QkJC9OuvvyosLEwJCQmaMWOGunbtqj59+ujatWuaOXOmmjVrpp07d1ovmjIMQ23bttXmzZv12muvqVy5cvr1118VGBiYouO/fv26/P39deTIEfXq1UvVq1dXZGSkli5dqvPnzyt//vyaNGmSrl+/bvO6iRMnKjQ0VPny5ZN0L9zUqVNHUVFR6t+/v4oUKaIff/xRL774olavXq3nnntOkvTtt99q0KBB6tSpk9544w3dunVLBw4c0I4dO9StWzd16NBBx48f1/z58zVx4kTlz59fkuTp6Snp3uj/nDlz1KlTJw0ZMkQ7duzQ2LFjdeTIEf3666+S7oWFpk2bytPTU8OGDZOHh4fOnDmjxYsX2xxDUFCQZs+erRYtWqh3796Ki4vTpk2btH379lSNiP7bczB16lRVqFBBbdq0UbZs2bRs2TK9/vrrSkhIUP/+/R/62o4dO+rw4cMaOHCgihUrpkuXLikkJERnz55VsWLF1KFDB5UsWdLmNXv27NGkSZPk5eWV4mP8/vvvNX36dO3cuVMzZsyQJNWtW1dSys6Jv7+/Jk6cqMOHD1svZN20aZMcHBy0adMmDRo0yNomSQ0aNHhoPb1799YPP/ygbt26qW7dulq3bp1atWqVomOZPXu2cuXKpeDgYOXKlUvr1q3TiBEjFBsbq/Hjx9v0vXr1ql588UV16dJFL730kqZOnaouXbroxx9/1JtvvqnXXntN3bp10/jx49WpUyedO3dOuXPnlvR/3wmLxaIBAwbI09NT//vf/xQUFKTY2NgkUyY++eQTOTg46K233lJMTIzGjRungIAA7dixQ5L03nvvKSYmRufPn9fEiRMlSbly5ZIk/fPPP2rUqJFOnjypAQMGqHjx4vr555/Vo0cPRUdH64033rCeh/nz51vfMyoqSocPH7aeh8qVK1vPg6enp8qVK5eizxR4LAaATGHWrFmGJGPNmjXG5cuXjXPnzhmLFi0yPD09DWdnZ+PcuXPWvlWrVjW8vLyMK1euWNv2799vODg4GK+88soD3+P11183HB0djXXr1lnbGjZsaFSoUMEwDMP45ZdfjOzZsxt9+vQx4uPjrX2uXr1q5M+f33jmmWeM0NBQwzAMIy4uzrh9+7bN/q9evWp4e3sbvXr1srYtWbLEkGSMGzfO2hYXF2f4+/sbkoxZs2Y99HMZMWKEIclYvHhxkm0JCQnJvmbhwoWGJGPMmDHWtqCgIEOSsXHjRmvbzZs3jbJlyxpVq1a1trVt29b6eTzI+PHjDUlGWFiYTXtoaKghyejdu7dN+1tvvWVIsn7uv/76qyHJ2LVr1wPfY926dYYkY9CgQUm23X/cRYsWNQIDAx9ab2rOwciRIw3zfw03b95Mss9mzZoZJUqUeOj7Xr161ZBkjB8//qH97nf58mXjmWeeMSpVqmRcv37d2i7JGDlypPV54vfl/nMQGBhouLq62uwvpefk0qVLhiRjypQphmEYRnR0tOHg4GC89NJLhre3t/V1gwYNMvLmzfvAv3v3v+frr79u096tW7cUHUdyn/err75q5MyZ07h165a1rWHDhoYkY968eda2o0ePGpIMBwcHY/v27db233//Pcm5DgoKMgoWLGhERkbavFeXLl0Md3d3ax1//PGHIckoV66czXf+iy++MCQZBw8etLa1atXKKFq0aJL6J02aZEgyfvjhB2vbnTt3DD8/PyNXrlxGbGysYRiG8fPPPxuSjD///NMwDMNYunSp4ezsbLRp08bo3Lmz9bWVK1c22rdvn+R9gLTEdAsgk2nSpIk8PT1VpEgRderUSa6urlq6dKkKFy4sSbp48aJCQ0PVo0cP5c2b1/q6ypUr64UXXtDKlSuT3e/cuXM1ZcoUjRs3To0bN06yff78+ercubNeffVVffPNN9aLwY4dO6aaNWsqMjJSjRs3VpUqVSTdm0uZOCKdkJCgqKgoxcXFqWbNmtq7d69
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"diamonds['cut'].value_counts().plot(kind='bar')\n",
|
|||
|
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds')\n",
|
|||
|
"plt.xlabel('Szlif')\n",
|
|||
|
"plt.ylabel('Liczba wystąpień')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 110,
|
|||
|
"id": "8633ea7c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAJdCAYAAAA818FyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhhElEQVR4nO3dd3gU1f/28TuFFAJJaEkIJUDoELr00KQjXaQpLYhgAAVFRflKUQRB6U0UAZEqICIoXYqA9CKI1NCE0JPQ0+b5gyf7Y0mARDdZyLxf17XXlT0zu/OZzJY7J2fOOBiGYQgAAAAwCUd7FwAAAACkJQIwAAAATIUADAAAAFMhAAMAAMBUCMAAAAAwFQIwAAAATIUADAAAAFMhAAMAAMBUCMDAc2z27NmaNGmSvcswhQ0bNujTTz/V7du37V2KqX377bf66quv7F0GgOccARj/Sq1atVSyZMmnrpcvXz516dLFZts9ffq0HBwcNGvWLJs95/NqxYoV6tWrl8qWLZvix/6X45LUMRgyZIgcHBz+1fOlJgcHBw0ZMsRyf9asWXJwcNDp06et1hs9erQKFCggJycnlSlTJtHznDx5Ui1btpSPj488PDxSt+gkJHc/0rsffvhBb731ll544YU03/bGjRvl4OCgjRs3Wtq6dOmifPnypWkdDg4O6t2791PXe55eI8/TZ4qtPfreRtohAD/nEj7kEm7Ozs7KlSuXunTpon/++cfe5eEh27Zt05AhQxQREfGfn+uff/5RSEiI5syZo2rVqv334kxszZo1eu+991StWjXNnDlTn332mdXy+/fvq02bNurdu7d69OhhpyrTt3nz5mncuHFPXOf48ePq2bOnFi1apHLlyqVNYcBzwpbfL0/y2WefadmyZam6jbRCAE4nhg0bpjlz5mjatGlq1KiRvv/+e9WsWVP37t2zd2n4/7Zt26ahQ4fa5APqwIEDmjx5slq3bv2vHn/06FF9/fXX/7mO581rr72mu3fvKiAgwNK2YcMGOTo6asaMGerUqZMaN25s9Zg///xTXbt21fDhw9O63MdKaj+eZ8kJwAcOHNDMmTPVqFGjtCnqOfe8v0YGDRqku3fv2ruM54Ytv1+eJD0FYGd7FwDbaNSokSpUqCBJ6t69u7Jnz67PP/9cy5cv1yuvvGLn6mBrj4a0lHJ1dbVRJc8XJycnOTk5WbVdvnxZ7u7ucnFxSfIxFSpUsLy3nhVJ7Ud69/LLL9u7hOeKrV8jd+7cUcaMGW32fE/j7OwsZ2ciClIPPcDpVHBwsKQHYxcftmHDBgUHB8vDw0Pe3t5q3ry5jhw5YlmeMBbrcbcnWbNmjTJmzKj27dsrNjbWatmJEyckSdHR0fr4449Vvnx5eXl5ycPDQ8HBwfrtt98SPV9ERIS6dOkiLy8veXt7q3Pnzin66zYiIkL9+vVTvnz55Orqqty5c6tTp066evWqpAfjYB+3nw+P8/vnn3/UrVs3+fr6ys3NTeXLl9eqVasSbW/ixIkqUaKEMmbMqCxZsqhChQqaN2+epAfj2QYMGCBJyp8/v2U7CePzYmNj9cknnygwMFCurq7Kly+fPvzwQ92/f99qG7t371aDBg2UPXt2ubu7K3/+/OrWrZvVOvHx8Ro/fryCgoLk5uamHDlyqGHDhtq9e7dlneSOAf4vx2DmzJmqU6eOfHx85OrqquLFi2vq1KnJemx4eLi6du2q3Llzy9XVVTlz5lTz5s0tv6+E8YFJ3Z60X4+Oi3RwcNDMmTN1+/Zty+MTxiEm55j0799f2bJlk2EYlrY+ffrIwcFBEyZMsLRdunRJDg4OT93/+/fvq1+/fsqRI4cyZ86sZs2a6fz580/dD0n66aef1KRJE/n7+8vV1VWBgYH65JNPFBcXZ/XYhPH7Bw8eVM2aNZUxY0YVLFhQixcvliRt2rRJlSpVkru7u4oUKaJ169Yl2v7D7wlXV1eVKFFC3377rdU6CWNmFy1apOHDhyt37txyc3PTiy++aPk8SKhn5cqVOnPmjOUYPDyu9vLlywoJCbG8/0qXLq3Zs2dbbatcuXJq1aqVVVtQUJAcHBx08OBBS9vChQvl4OBg9ZmXlPPnz6tFixby8PCQj4+P+vXrl+i9+DhffPGFqlatqmzZssnd3V3ly5e3/G6f5NHhbA/fatWqlWj9uXPnqkiRIpbPpM2bNyf5fI+OAZ4yZYpKlCghV1dX+fv7KzQ0NNF7OuE1smfPHtWoUUMZM2bUhx9+KOnxY1Zt/ZmS1Bjg5H6m5MuXTy+99JI2btyoChUqyN3dXUFBQZbP9aVLl1o+H8uXL699+/Yleo6nfVc+XOOJEyfUpUsXeXt7y8vLS127dtWdO3es1k3ue/vmzZt6++23Ld9bPj4+qlevnvbu3fvY3+nTvl8k6fvvv1f58uXl7u6urFmzql27djp37pzV8xw/flytW7eWn5+f3NzclDt3brVr106RkZGSHhz727dva/bs2cn6vH3W8edVOpXwws+SJYulbd26dWrUqJEKFCigIUOG6O7du5o4caKqVaumvXv3Kl++fMqRI4fmzJlj9VwxMTHq16/fY3vIpAcnZL388stq27atvv32W6ueh7Vr1+rHH39UWFiY4uPj9c0336h9+/Z6/fXXdfPmTc2YMUMNGjTQzp07LScgGYah5s2b6/fff1fPnj1VrFgx/fjjj+rcuXOy9v/WrVsKDg7WkSNH1K1bN5UrV05Xr17V8uXLdf78eWXPnl3jxo3TrVu3rB43duxY7d+/X9myZZP0ILhUrlxZ169fV2hoqPLkyaO5c+fqpZde0po1a1SnTh1J0tdff62+ffvq5Zdf1ltvvaV79+7p4MGD2rFjhzp06KBWrVrp2LFjmj9/vsaOHavs2bNLknLkyCHpQa/97Nmz9fLLL+udd97Rjh07NGLECB05ckQ//vijpAdBoH79+sqRI4c++OADeXt76/Tp01q6dKnVPoSEhGjWrFlq1KiRunfvrtjYWG3ZskV//PFHinoy/+sxmDp1qkqUKKFmzZrJ2dlZP//8s958803Fx8crNDT0iY9t3bq1Dh8+rD59+ihfvny6fPmy1q5dq7Nnzypfvnxq1aqVChYsaPWYPXv2aNy4cfLx8Un2Ps6ZM0fTp0/Xzp079c0330iSqlatKil5xyQ4OFhjx47V4cOHLSeFbtmyRY6OjtqyZYv69u1raZOkGjVqPLGe7t276/vvv1eHDh1UtWpVbdiwQU2aNEnWvsyaNUuZMmVS//79lSlTJm3YsEEff/yxoqKiNHr0aKt1b9y4oZdeeknt2rVTmzZtNHXqVLVr105z587V22+/rZ49e6pDhw4aPXq0Xn75ZZ07d06ZM2eW9H/viYSTsXLkyKFff/1VISEhioqK0ttvv221rZEjR8rR0VHvvvuuIiMjNWrUKHXs2FE7duyQJH300UeKjIzU+fPnNXbsWElSpkyZJEl3795VrVq1dOLECfXu3Vv58+fXDz/8oC5duigiIkJvvfWW5TjMnz/fss3r16/r8OHDluNQqlQpy3HIkSOHihUr9tjf4927d/Xiiy/q7Nmz6tu3r/z9/TVnzhxt2LAhWcdh/PjxatasmTp27Kjo6GgtWLBAbdq00YoVK554LGvUqJHos/fMmTMaNGhQotf0pk2btHDhQvXt21eurq6aMmWKGjZsqJ07dz7x5OQhQ4Zo6NChqlu3rnr16qWjR49q6tSp2rVrl7Zu3aoMGTJY1r127ZoaNWqkdu3a6dVXX5Wvr2+y9v9J0vIz5cSJE+rQoYPeeOMNvfrqq/riiy/UtGlTTZs2TR9++KHefPNNSdKIESP0yiuv6OjRo3J0fNAnmJzvyoe98soryp8/v0aMGKG9e/fqm2++kY+Pjz7//HPLOsl9b/fs2VOLFy9W7969Vbx4cV27dk2///67jhw58tix70/7fhk+fLj+97//6ZVXXlH37t115coVTZw4UTVq1NC+ffvk7e2t6OhoNWjQQPfv31efPn3k5+enf/75RytWrFBERIS8vLw0Z84cde/eXRUrVrScDxEYGJisY/dMMvBcmzlzpiHJWLdunXH
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"diamonds_train['cut'].value_counts().plot(kind='bar')\n",
|
|||
|
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds tranującego')\n",
|
|||
|
"plt.xlabel('Szlif')\n",
|
|||
|
"plt.ylabel('Liczba wystąpień')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 111,
|
|||
|
"id": "ab567b6f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAJdCAYAAAA818FyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABlyklEQVR4nO3dd3QU5f/28SuFVEhCSSGUAKFD6NJDkRIQ6SpNaUEFgygoKsqXZgFBKVJFFBAFQUBEVLoU6R1BBIKhCQk1CT1tnj94sj+WBEh0kwXm/Tpnz8nOzO5+Zmdn9sq999zjYBiGIQAAAMAkHO1dAAAAAJCdCMAAAAAwFQIwAAAATIUADAAAAFMhAAMAAMBUCMAAAAAwFQIwAAAATIUADAAAAFMhAAM2Nnv2bE2aNMneZZjC2rVr9cEHH+jatWv2LsXUvvrqK33++ef2LgMAMowA/Bhp0KCBypcv/8DlihQpou7du9vsdY8fPy4HBwfNmjXLZs/5qFq2bJn69OmjypUrZ/qx/2W7pLcNhg0bJgcHh3/1fFnJwcFBw4YNs9yfNWuWHBwcdPz4cavlxowZo2LFisnJyUmVKlVK8zzHjh1T27Zt5efnJ09Pz6wtOh0ZXY/H3ffff6/XXntNTzzxRLa/9rp16+Tg4KB169ZZpnXv3l1FihTJ1jocHBzUt2/fBy73KH1GHqVjiq3dvW/j8UQAzgKpB7nUm7OzswoUKKDu3bvrn3/+sXd5uMPmzZs1bNgwxcbG/ufn+ueffxQeHq45c+aoTp06/704E1u5cqXeeust1alTRzNnztRHH31kNf/WrVt69tln1bdvX7300kt2qvLxNnfuXI0fP/6+yxw9elS9e/fWggULVKVKlewpDHhE2PL75X4++ugjLVmyJEtf43FEAM5CI0aM0Jw5czRt2jQ1b95c33zzjerXr6+bN2/auzT8f5s3b9bw4cNtcoDat2+fJk+erPbt2/+rxx8+fFhffPHFf67jUfPCCy/oxo0bCgoKskxbu3atHB0d9eWXX6pr16566qmnrB7zxx9/qEePHvrwww+zu9x7Sm89HmUZCcD79u3TzJkz1bx58+wp6hH3qH9GBg8erBs3bti7jEeGLb9f7ocA/O8427uAx1nz5s1VrVo1SVKvXr2UL18+ffzxx1q6dKmee+45O1cHW7s7pGWWq6urjSp5tDg5OcnJyclq2rlz5+Tu7i4XF5d0H1OtWjXLvvWwSG89HnfPPPOMvUt4pNj6M3L9+nV5eHjY7PkexNnZWc7OxAY8HmgBzkahoaGSbvddvNPatWsVGhoqT09P+fj4qHXr1jp06JBlfmpfrHvd7mflypXy8PBQp06dlJSUZDUvMjJSkpSQkKAhQ4aoatWq8vb2lqenp0JDQ/Xbb7+leb7Y2Fh1795d3t7e8vHxUbdu3TL1321sbKz69++vIkWKyNXVVQULFlTXrl114cIFSbf7wd5rPe/s5/fPP/+oZ8+e8vf3l5ubm6pWrarly5eneb2JEyeqXLly8vDwUO7cuVWtWjXNnTtX0u3+bAMHDpQkFS1a1PI6qf3zkpKS9P777ys4OFiurq4qUqSI3n33Xd26dcvqNXbu3KmwsDDly5dP7u7uKlq0qHr27Gm1TEpKiiZMmKCQkBC5ubnJ19dXzZo1086dOy3LZLQP8H/ZBjNnztSTTz4pPz8/ubq6qmzZspo6dWqGHhsdHa0ePXqoYMGCcnV1Vf78+dW6dWvL+5XaPzC92/3W6+5+kQ4ODpo5c6auXbtmeXxqP8SMbJMBAwYob968MgzDMu3VV1+Vg4ODPvvsM8u0mJgYOTg4PHD9b926pf79+8vX11e5cuVSq1atdPr06QeuhyT9+OOPatGihQIDA+Xq6qrg4GC9//77Sk5Otnpsav/9/fv3q379+vLw8FDx4sW1cOFCSdL69etVo0YNubu7q1SpUlq9enWa179zn3B1dVW5cuX01VdfWS2T2md2wYIF+vDDD1WwYEG5ubmpUaNGluNBaj0///yzTpw4YdkGd/arPXfunMLDwy37X8WKFTV79myr16pSpYratWtnNS0kJEQODg7av3+/Zdr8+fPl4OBgdcxLz+nTp9WmTRt5enrKz89P/fv3T7Mv3ssnn3yi2rVrK2/evHJ3d1fVqlUt7+393N2d7c5bgwYN0iz/7bffqlSpUpZj0oYNG9J9vrv7AE+ZMkXlypWTq6urAgMDFRERkWafTv2M7Nq1S/Xq1ZOHh4feffddSffus2rrY0p6fYAzekwpUqSInn76aa1bt07VqlWTu7u7QkJCLMf1xYsXW46PVatW1Z49e9I8x4O+K++sMTIyUt27d5ePj4+8vb3Vo0cPXb9+3WrZjO7bV65c0euvv2753vLz81OTJk20e/fue76nD/p+kaRvvvlGVatWlbu7u/LkyaOOHTvq1KlTVs9z9OhRtW/fXgEBAXJzc1PBggXVsWNHxcXFSbq97a9du6bZs2ene7zds2ePmjdvLi8vL+XMmVONGjXS1q1bLfNjY2Pl5ORkdWy8cOGCHB0d0xxH+/Tpo4CAAKv6tm3bpmbNmsnb21seHh6qX7++Nm3alOb9SN3ubm5uCg4O1ueff57u5ymj37u2wL9y2Sj1g587d27LtNWrV6t58+YqVqyYhg0bphs3bmjixImqU6eOdu/erSJFisjX11dz5syxeq7ExET179//ni1k0u0Tsp555hl16NBBX331lVXLw6pVq/TDDz8oKipKKSkpmjFjhjp16qQXX3xRV65c0ZdffqmwsDBt377dcgKSYRhq3bq1fv/9d/Xu3VtlypTRDz/8oG7dumVo/a9evarQ0FAdOnRIPXv2VJUqVXThwgUtXbpUp0+fVr58+TR+/HhdvXrV6nHjxo3T3r17lTdvXkm3g0vNmjV16dIlRUREqFChQvr222/19NNPa+XKlXryySclSV988YX69eunZ555Rq+99ppu3ryp/fv3a9u2bercubPatWunI0eOaN68eRo3bpzy5csnSfL19ZV0u9V+9uzZeuaZZ/TGG29o27ZtGjlypA4dOqQffvhB0u0g0LRpU/n6+uqdd96Rj4+Pjh8/rsWLF1utQ3h4uGbNmqXmzZurV69eSkpK0saNG7V169ZMtWT+120wdepUlStXTq1atZKzs7N++uknvfLKK0pJSVFERMR9H9u+fXsdPHhQr776qooUKaJz585p1apVOnnypIoUKaJ27dqpePHiVo/ZtWuXxo8fLz8/vwyv45w5czR9+nRt375dM2bMkCTVrl1bUsa2SWhoqMaNG6eDBw9aTgrduHGjHB0dtXHjRvXr188yTZLq1at333p69eqlb775Rp07d1bt2rW1du1atWjRIkPrMmvWLOXMmVMDBgxQzpw5tXbtWg0ZMkTx8fEaM2aM1bKXL1/W008/rY4dO+rZZ5/V1KlT1bFjR3377bd6/fXX1bt3b3Xu3FljxozRM888o1OnTilXrlyS/m+fSD0Zy9fXV7/++qvCw8MVHx+v119/3eq1Ro0aJUdHR7355puKi4vT6NGj1aVLF23btk2S9N577ykuLk6nT5/WuHHjJEk5c+aUJN24cUMNGjRQZGSk+vbtq6JFi+r7779X9+7dFRsbq9dee82yHebNm2d5zUuXLungwYOW7VChQgXLdvD19VWZMmXu+T7euHFDjRo10smTJ9WvXz8FBgZqzpw5Wrt2bYa2w4QJE9SqVSt16dJFCQkJ+u677/Tss89q2bJl992W9erVS3PsPXHihAYPHpzmM71+/XrNnz9f/fr1k6urq6ZMmaJmzZpp+/bt9z05ediwYRo+fLgaN26sPn366PDhw5o6dap27NihTZs2KUeOHJZlL168qObNm6tjx456/vnn5e/vn6H1v5/sPKZERkaqc+fOevnll/X888/rk08+UcuWLTVt2jS9++67euWVVyRJI0eO1HPPPafDhw/L0fF2O11Gvivv9Nxzz6lo0aIaOXKkdu/erRkzZsjPz08ff/yxZZmM7tu9e/fWwoUL1bdvX5UtW1YXL17U77//rkOHDt2z7/uDvl8+/PBD/e9//9N
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"diamonds_test['cut'].value_counts().plot(kind='bar')\n",
|
|||
|
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds testowego')\n",
|
|||
|
"plt.xlabel('Szlif')\n",
|
|||
|
"plt.ylabel('Liczba wystąpień')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 112,
|
|||
|
"id": "18e61963",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAscAAAJdCAYAAADeC9oLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwwklEQVR4nO3deVhU5f/G8XtAAVEWN0AUUdHcd1PJcEkTl9wrt3LPNMzSMrP85tKiaanlmuVWuWZmpqXibmmu4Z6pmUsKroArCJzfHw7n5wgqGDAg79d1zXUx5zwz53M4M2duHp7zjMUwDEMAAAAA5GDvAgAAAIDMgnAMAAAAWBGOAQAAACvCMQAAAGBFOAYAAACsCMcAAACAFeEYAAAAsCIcAwAAAFaEY+Auc+bM0aRJk+xdRrawbt06ffDBB7p27Zq9S8nWZs6cqS+++MLeZQBApkA4zkTq16+vChUqPLBdsWLF1K1btzTb7j///COLxaLZs2en2XNmVcuXL1ffvn1VtWrVVD/2vxyX5I7B8OHDZbFYHur50pPFYtHw4cPN+7Nnz5bFYtE///xj027s2LEqUaKEHB0dVaVKlSTPc+zYMbVp00ZeXl7KnTt3+hadjJTux6Puu+++02uvvabHH388w7e9YcMGWSwWbdiwwVzWrVs3FStWLEPrsFgs6tev3wPbZaXXSFY6p6S1u9/bmVVyr/WU1p5ZjmX9+vVVv359e5eR5gjHyUg8ASbecuTIocKFC6tbt276999/7V0e7rBlyxYNHz5ckZGR//m5/v33X/Xs2VPffPON6tSp89+Ly8ZWr16tt956S3Xq1NGsWbP00Ucf2ayPiYnRc889p379+ql37952qvLRNm/ePE2YMOG+bY4cOaI+ffpo0aJFqlatWsYUBgCZXA57F5CZjRw5UsWLF9fNmzf1+++/a/bs2fr111+1f/9+ubi42Ls86HY4HjFihLp16yZPT8//9Fx79uzR5MmT1a5du4d6/OHDh+XgkP3+3nzxxRfVoUMHOTs7m8vWrVsnBwcHzZgxQ05OTkkes2/fPnXv3l2vvvpqRpZ6X8ntR1Y2b9487d+/X6+//vo92+zZs0ezZs1S06ZNM66wLCyrv0aGDh2qt99+295l4D5u3LihHDmyTjRbvXq1vUtIF1nnCNhB06ZNVaNGDUlSr169VKBAAX388cdatmyZnn/+eTtXh7TWrFmz//T4rPqB+V85OjrK0dHRZtm5c+eUK1euZIOxJNWoUcN8b2UWye3Ho+7ZZ5+1dwlZSlq/Rq5fvy5XV9c0e74HyZEjR5YKXtlRVut4u9c5PqvLft1c/0FQUJCk22Ml77Ru3ToFBQUpd+7c8vT0VKtWrXTo0CFzfeLYr3vd7mf16tVydXVVx44dFRcXZ7Pu6NGjkqTY2Fi99957ql69ujw8PJQ7d24FBQVp/fr1SZ4vMjJS3bp1k4eHhzw9PdW1a9dUDUmIjIzUgAEDVKxYMTk7O6tIkSLq0qWLLly4IOn2uNt77eed4wr//fdf9ejRQ97e3nJxcVH16tW1cuXKJNubOHGiypcvL1dXV+XNm1c1atTQvHnzJN0eczVo0CBJUvHixc3tJI4HjIuL0/vvv6+AgAA5OzurWLFieueddxQTE2OzjZ07dyo4OFgFChRQrly5VLx4cfXo0cOmTUJCgj777DNVrFhRLi4uKliwoJo0aaKdO3eabVI65vi/HINZs2bpqaeekpeXl5ydnVWuXDlNnTo1RY8NDw9X9+7dVaRIETk7O6tQoUJq1aqV+ftKHMOW3O1++3X3OEyLxaJZs2bp2rVr5uMTxz2m5JgMHDhQ+fPnl2EY5rJXX31VFotFn3/+ubksIiJCFovlgfsfExOjAQMGqGDBgnJzc1PLli11+vTpB+6HJP34449q3ry5fH195ezsrICAAL3//vuKj4+3eWzi9QJ79+5VvXr15OrqqpIlS2rx4sWSpI0bN6pWrVrKlSuXSpcurTVr1iTZ/p3vCWdnZ5UvX14zZ860aZM4RnfRokX68MMPVaRIEbm4uKhhw4bm+SCxnhUrVujEiRPmMbhzbOO5c+fUs2dP8/1XuXJlzZkzx2Zb1apVU9u2bW2WVaxYURaLRXv37jWXLVy4UBaLxeacl5zTp0+rdevWyp07t7y8vDRgwIAk78V7+eSTT/TEE08of/78ypUrl6pXr27+bu/n7iFyd96SGyc5d+5clS5d2jwnbdq0Kdnnu3vM8ZQpU1S+fHk5OzvL19dXISEhSd7Tia+RXbt2qW7dunJ1ddU777wj6d7jTNP6nJLcONWUnlOKFSumZ555Rhs2bFCNGjWUK1cuVaxY0TyvL1myxDw/Vq9eXX/88UeS53jQZ+WdNR49etT8j6CHh4e6d++u69ev27RN6Xv7ypUrev31183PLS8vLz399NPavXv3PX+ne/fulcVi0bJly8xlu3btksViSTIEqWnTpqpVq5Z5P6XnjeQk91r49ddf9fjjj8vFxUUBAQH3vHg2NZ8Pv/zyi+rVqyc3Nze5u7vr8ccfNz9bhw0bppw5c+r8+fNJHte7d295enrq5s2bkpKOOU7pOSrRtm3b1KRJE3l4eMjV1VX16tXTb7/9lqRd4uvuzt9Bcq/nlH7uPwh/QqZC4gkxb9685rI1a9aoadOmKlGihIYPH64bN25o4sSJqlOnjnbv3q1ixYqpYMGC+uabb2ye69atWxowYMB9/+pavny5nn32WbVv314zZ8606bEIDQ3VDz/8oOPHjyshIUFfffWVOnbsqJdeeklXrlzRjBkzFBwcrO3bt5sXQxmGoVatWunXX39Vnz59VLZsWf3www/q2rVrivb/6tWrCgoK0qFDh9SjRw9Vq1ZNFy5c0LJly3T69GkVKFBAEyZM0NWrV20eN378eIWFhSl//vySboea2rVr69KlSwoJCZGfn5/mzp2rZ555RqtXr9ZTTz0lSfryyy/Vv39/Pfvss3rttdd08+ZN7d27V9u2bVOnTp3Utm1b/fXXX5o/f77Gjx+vAgUKSJIKFiwo6XZv/5w5c/Tss8/qjTfe0LZt2zRq1CgdOnRIP/zwg6TbIaFx48YqWLCg3n77bXl6euqff/7RkiVLbPahZ8+emj17tpo2bapevXopLi5Omzdv1u+//56qHtD/egymTp2q8uXLq2XLlsqRI4d++uknvfLKK0pISFBISMh9H9uuXTsdOHBAr776qooVK6Zz584pNDRUJ0+eVLFixdS2bVuVLFnS5jG7du3ShAkT5OXlleJ9/OabbzR9+nRt375dX331lSTpiSeekJSyYxIUFKTx48frwIED5gWqmzdvloODgzZv3qz+/fubyySpbt26962nV69e+vbbb9WpUyc98cQTWrdunZo3b56ifZk9e7by5MmjgQMHKk+ePFq3bp3ee+89RUdHa+zYsTZtL1++rGeeeUYdOnTQc889p6lTp6pDhw6aO3euXn/9dfXp00edOnXS2LFj9eyzz+rUqVNyc3OT9P/vicQLwwoWLKhffvlFPXv2VHR0dJKhEaNHj5aDg4PefPNNRUVFacyYMercubO2bdsmSXr33XcVFRWl06dPa/z48ZKkPHnySLr9b9v69evr6NGj6tevn4oXL67vvvtO3bp1U2RkpF577TXzOMyfP9/c5qVLl3TgwAHzOFSqVMk8DgULFlTZsmXv+Xu8ceOGGjZsqJMnT6p///7y9fXVN998o3Xr1qXoOHz22Wdq2bKlOnfurNjYWC1YsEDPPfecli9fft9jWbdu3STn3hMnTmjo0KFJXtMbN27UwoUL1b9/fzk7O2vKlClq0qSJtm/fft8LpYcPH64RI0aoUaNG6tu3rw4fPqypU6dqx44d+u2335QzZ06z7cWLF9W0aVN16NBBL7zwgry9vVO0//eTkeeUo0ePqlOnTnr55Zf1wgsv6JNPPlGLFi00bdo0vfPOO3rllVckSaNGjdLzzz9vM9QsJZ+Vd3r++edVvHhxjRo1Srt379ZXX30lLy8vffzxx2ablL63+/Tpo8WLF6tfv34qV66cLl68qF9//VWHDh2651j7ChUqyNPTU5s
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"diamonds_dev['cut'].value_counts().plot(kind='bar')\n",
|
|||
|
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds walidacyjnego')\n",
|
|||
|
"plt.xlabel('Szlif')\n",
|
|||
|
"plt.ylabel('Liczba wystąpień')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 119,
|
|||
|
"id": "1bf608c2",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>fair</th>\n",
|
|||
|
" <td>0.516404</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>good</th>\n",
|
|||
|
" <td>0.454054</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ideal</th>\n",
|
|||
|
" <td>0.432876</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>premium</th>\n",
|
|||
|
" <td>0.515262</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>very good</th>\n",
|
|||
|
" <td>0.459435</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" carat\n",
|
|||
|
"cut \n",
|
|||
|
"fair 0.516404\n",
|
|||
|
"good 0.454054\n",
|
|||
|
"ideal 0.432876\n",
|
|||
|
"premium 0.515262\n",
|
|||
|
"very good 0.459435"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 119,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"diamonds[[\"cut\",\"carat\"]].groupby(\"cut\").std()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 118,
|
|||
|
"id": "0d6e54d9",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='cut'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 118,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAHoCAYAAACb7e9bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAw7UlEQVR4nO3deVTU9f7H8dcMCIgibixKJO6auOLVcPlVv7hael3ydg+pCblQVmY/SVNScSnTvInWyfKKmvr7ZVLerqfSQxlFZZKmhlqJ5kK4gZhXUUlBht8fHqfmgibmzAeY5+MczpEv3+/MG0fl6Xw3S2lpaakAAAAMsZoeAAAAuDdiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADDK0/QAN8Jms+n48ePy8/OTxWIxPQ4AALgBpaWlOnfunBo3biyr9drvf1SJGDl+/LhCQ0NNjwEAAG7CkSNHdNttt13z61UiRvz8/CRd+Wbq1KljeBoAAHAjCgoKFBoaav85fi1VIkau7pqpU6cOMQIAQBXze4dYcAArAAAwihgBAABGESMAAMCoKnHMCAAAt4rNZlNRUZHpMaqFGjVqyMPD4w8/DjECAHAbRUVFOnz4sGw2m+lRqo26desqODj4D10HjBgBALiF0tJSnThxQh4eHgoNDb3uRbjw+0pLS1VYWKiTJ09Kkho1anTTj0WMAADcwuXLl1VYWKjGjRvL19fX9DjVQs2aNSVJJ0+eVGBg4E3vsiELAQBuoaSkRJLk5eVleJLq5WrYFRcX3/RjECMAALfCPc5urVvx+0mMAAAAo4gRAABgFAewAgDcWtiUDS59vux5/V36fFUB74wAAIAysrOzZbFYlJmZ6fTnIkYAAHAjlfHqs8QIAACVnM1m0/z589WiRQt5e3vr9ttv15w5cyRJkydPVqtWreTr66tmzZpp+vTpDqfZzpw5U506ddKyZcvUtGlT+fj4SJJSU1PVq1cv1a1bVw0aNNBf/vIXHTx40L5d06ZNJUmdO3eWxWLR3Xff7bTvj2NGfsPV+w2dgX2RAFD9JCQkKDk5WQsXLlSvXr104sQJZWVlSZL8/Py0cuVKNW7cWHv27FFcXJz8/Pz07LPP2rc/cOCA/vnPf+q9996zX5jswoULio+PV4cOHXT+/HklJibqgQceUGZmpqxWq7Zt26Zu3brpk08+Ubt27Zx6fRZiBACASuzcuXN65ZVX9Nprryk2NlaS1Lx5c/Xq1UuSNG3aNPu6YWFhmjhxotauXesQI0VFRVq9erUCAgLsy/761786PM+KFSsUEBCgH374QeHh4fZ1GzRooODgYKd9fxK7aQAAqNT27t2rS5cu6d577y336ykpKerZs6eCg4NVu3ZtTZs2TTk5OQ7rNGnSxCFEJOnHH3/U0KFD1axZM9WpU0dhYWGSVGZbVyBGAACoxK7e/6U8GRkZGj58uPr166cPP/xQ3377raZOnVrmINVatWqV2XbAgAE6ffq0kpOTtXXrVm3dulWSmQNc2U0DAEAl1rJlS9WsWVNpaWkaM2aMw9e2bNmiJk2aaOrUqfZlP/300+8+5s8//6x9+/YpOTlZvXv3liRt3rzZYZ2rx4hcvaePMxEjAABUYj4+Ppo8ebKeffZZeXl5qWfPnsrPz9f333+vli1bKicnR2vXrtWf/vQnbdiwQf/6179+9zHr1aunBg0aaOnSpWrUqJFycnI0ZcoUh3UCAwNVs2ZNpaam6rbbbpOPj4/8/f2d8j0SIwAAt1YVzkKcPn26PD09lZiYqOPHj6tRo0YaO3asRo8erQkTJmjcuHG6dOmS+vfvr+nTp2vmzJnXfTyr1aq1a9dq/PjxCg8PV+vWrfXqq686nL7r6empV199VbNnz1ZiYqJ69+6t9PR0p3x/ltLS0lKnPPItVFBQIH9/f509e1Z16tRx2vNwai8AVF8XL17U4cOHHa61gT/uer+vN/rzmwNYAQCAUcQIAAAwihgBAABGESMAAMCoCsfIF198oQEDBqhx48ayWCxav379726Tnp6uLl26yNvbWy1atNDKlStvYlQAAP64KnDeRpVis9n+8GNU+NTeCxcuqGPHjho1apSGDBnyu+sfPnxY/fv319ixY/XWW2/ZL9rSqFEj9e3b96aGBgCgomrUqCGLxaL8/HwFBATIYrGYHqlKKy0tVVFRkfLz82W1Wv/QjfQqHCP333+/7r///htef8mSJWratKkWLFggSWrbtq02b96shQsXXjNGLl26pEuXLtk/LygoqOiYAAA48PDw0G233aajR48qOzvb9DjVhq+vr26//XZZrTd/5IfTL3qWkZGhqKgoh2V9+/bV//zP/1xzm7lz52rWrFlOngwA4G5q166tli1bqri42PQo1YKHh4c8PT3/8LtMTo+R3NxcBQUFOSwLCgpSQUGBfvnll3JvAJSQkKD4+Hj75wUFBQoNDXX2qAAAN+Dh4SEPDw/TY+A3KuXl4L29veXt7W16DAAA4AJOP7U3ODhYeXl5Dsvy8vJUp06d694WGQAAuAenx0hkZKTS0tIclm3atEmRkZHOfmoAAFAFVDhGzp8/r8zMTGVmZkq6cupuZmamcnJyJF053iMmJsa+/tixY3Xo0CE9++yzysrK0uuvv6533nlHEyZMuDXfAQAAqNIqHCPbt29X586d1blzZ0lSfHy8OnfurMTEREnSiRMn7GEiSU2bNtWGDRu0adMmdezYUQsWLNCyZcu4xggAAJAkWUqrwKXobvQWxH9U2JQNTntsV8me19/0CAAASLrxn9/cmwYAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUZ6mBwAA3JiwKRtMj3BLZM/rb3oEVDK8MwIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKO7ai0qpOtydlDuTAsCN4Z0RAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADDqpmJk8eLFCgsLk4+Pj7p3765t27Zdd/1FixapdevWqlmzpkJDQzVhwgRdvHjxpgYGAADVS4VjJCUlRfHx8ZoxY4Z27typjh07qm/fvjp58mS5669Zs0ZTpkzRjBkztHfvXi1fvlwpKSl67rnn/vDwAACg6vOs6AZJSUmKi4vTyJEjJUlLlizRhg0btGLFCk2ZMqXM+lu2bFHPnj01bNgwSVJYWJiGDh2qrVu3/sHRAQAwI2zKBtMj3BLZ8/qbHkFSBd8ZKSoq0o4dOxQVFfXrA1itioqKUkZGRrnb9OjRQzt27LDvyjl06JA2btyofv36XfN5Ll26pIKCAocPAABQPVXonZFTp06ppKREQUFBDsuDgoKUlZVV7jbDhg3TqVOn1KtXL5WWlury5csaO3bsdXfTzJ07V7NmzarIaAAAoIpy+tk06enpevHFF/X6669r586deu+997RhwwY9//zz19wmISFBZ8+etX8cOXLE2WMCAABDKvTOSMOGDeXh4aG8vDyH5Xl5eQoODi53m+nTp2vEiBEaM2aMJKl9+/a6cOGCHn30UU2dOlVWa9ke8vb2lre3d0VGAwAAVVSF3hnx8vJSRESE0tLS7MtsNpvS0tIUGRlZ7jaFhYVlgsPDw0OSVFpaWtF5AQBANVPhs2ni4+MVGxurrl27qlu3blq0aJEuXLhgP7smJiZGISEhmjt3riRpwIABSkpKUufOndW9e3cdOHBA06dP14ABA+xRAgAA3FeFYyQ6Olr5+flKTExUbm6uOnXqpNTUVPtBrTk5OQ7vhEybNk0Wi0XTpk3TsWPHFBAQoAEDBmjOnDm
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"diamonds[[\"cut\",\"carat\"]].groupby(\"cut\").mean().plot(kind=\"bar\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 124,
|
|||
|
"id": "4598d9cf",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Unnamed: 0</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.006237</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>0.513889</td>\n",
|
|||
|
" <td>0.230769</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.367784</td>\n",
|
|||
|
" <td>0.067572</td>\n",
|
|||
|
" <td>0.076415</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.002079</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>0.466667</td>\n",
|
|||
|
" <td>0.346154</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.362197</td>\n",
|
|||
|
" <td>0.065195</td>\n",
|
|||
|
" <td>0.072642</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.006237</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>0.386111</td>\n",
|
|||
|
" <td>0.423077</td>\n",
|
|||
|
" <td>0.000054</td>\n",
|
|||
|
" <td>0.377095</td>\n",
|
|||
|
" <td>0.069100</td>\n",
|
|||
|
" <td>0.072642</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.018711</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>0.538889</td>\n",
|
|||
|
" <td>0.288462</td>\n",
|
|||
|
" <td>0.000433</td>\n",
|
|||
|
" <td>0.391061</td>\n",
|
|||
|
" <td>0.071817</td>\n",
|
|||
|
" <td>0.082704</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.022869</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>0.563889</td>\n",
|
|||
|
" <td>0.288462</td>\n",
|
|||
|
" <td>0.000487</td>\n",
|
|||
|
" <td>0.404097</td>\n",
|
|||
|
" <td>0.073854</td>\n",
|
|||
|
" <td>0.086478</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53935</th>\n",
|
|||
|
" <td>53936</td>\n",
|
|||
|
" <td>0.108108</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>0.494444</td>\n",
|
|||
|
" <td>0.269231</td>\n",
|
|||
|
" <td>0.131427</td>\n",
|
|||
|
" <td>0.535382</td>\n",
|
|||
|
" <td>0.097793</td>\n",
|
|||
|
" <td>0.110063</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53936</th>\n",
|
|||
|
" <td>53937</td>\n",
|
|||
|
" <td>0.108108</td>\n",
|
|||
|
" <td>good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>0.558333</td>\n",
|
|||
|
" <td>0.230769</td>\n",
|
|||
|
" <td>0.131427</td>\n",
|
|||
|
" <td>0.529795</td>\n",
|
|||
|
" <td>0.097623</td>\n",
|
|||
|
" <td>0.113522</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53937</th>\n",
|
|||
|
" <td>53938</td>\n",
|
|||
|
" <td>0.103950</td>\n",
|
|||
|
" <td>very good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>0.550000</td>\n",
|
|||
|
" <td>0.326923</td>\n",
|
|||
|
" <td>0.131427</td>\n",
|
|||
|
" <td>0.527002</td>\n",
|
|||
|
" <td>0.096435</td>\n",
|
|||
|
" <td>0.111950</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.137214</td>\n",
|
|||
|
" <td>premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>0.500000</td>\n",
|
|||
|
" <td>0.288462</td>\n",
|
|||
|
" <td>0.131427</td>\n",
|
|||
|
" <td>0.572626</td>\n",
|
|||
|
" <td>0.103905</td>\n",
|
|||
|
" <td>0.117610</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.114345</td>\n",
|
|||
|
" <td>ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>0.533333</td>\n",
|
|||
|
" <td>0.230769</td>\n",
|
|||
|
" <td>0.131427</td>\n",
|
|||
|
" <td>0.542831</td>\n",
|
|||
|
" <td>0.099660</td>\n",
|
|||
|
" <td>0.114465</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53940 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Unnamed: 0 carat cut color clarity depth table \\\n",
|
|||
|
"0 1 0.006237 ideal E SI2 0.513889 0.230769 \n",
|
|||
|
"1 2 0.002079 premium E SI1 0.466667 0.346154 \n",
|
|||
|
"2 3 0.006237 good E VS1 0.386111 0.423077 \n",
|
|||
|
"3 4 0.018711 premium I VS2 0.538889 0.288462 \n",
|
|||
|
"4 5 0.022869 good J SI2 0.563889 0.288462 \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"53935 53936 0.108108 ideal D SI1 0.494444 0.269231 \n",
|
|||
|
"53936 53937 0.108108 good D SI1 0.558333 0.230769 \n",
|
|||
|
"53937 53938 0.103950 very good D SI1 0.550000 0.326923 \n",
|
|||
|
"53938 53939 0.137214 premium H SI2 0.500000 0.288462 \n",
|
|||
|
"53939 53940 0.114345 ideal D SI2 0.533333 0.230769 \n",
|
|||
|
"\n",
|
|||
|
" price x y z \n",
|
|||
|
"0 0.000000 0.367784 0.067572 0.076415 \n",
|
|||
|
"1 0.000000 0.362197 0.065195 0.072642 \n",
|
|||
|
"2 0.000054 0.377095 0.069100 0.072642 \n",
|
|||
|
"3 0.000433 0.391061 0.071817 0.082704 \n",
|
|||
|
"4 0.000487 0.404097 0.073854 0.086478 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"53935 0.131427 0.535382 0.097793 0.110063 \n",
|
|||
|
"53936 0.131427 0.529795 0.097623 0.113522 \n",
|
|||
|
"53937 0.131427 0.527002 0.096435 0.111950 \n",
|
|||
|
"53938 0.131427 0.572626 0.103905 0.117610 \n",
|
|||
|
"53939 0.131427 0.542831 0.099660 0.114465 \n",
|
|||
|
"\n",
|
|||
|
"[53940 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 124,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#normalizacja wartości typu float do zakrsu 0.0 - 1.0\n",
|
|||
|
"#Powyżej wykonano jeszcze konwersję danych typu string na lowerCase\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.preprocessing import MinMaxScaler\n",
|
|||
|
"scaler = MinMaxScaler()\n",
|
|||
|
"diamonds[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']] = scaler.fit_transform(diamonds[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']])\n",
|
|||
|
"\n",
|
|||
|
"#wyświetlenie zbioru\n",
|
|||
|
"diamonds"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 96,
|
|||
|
"id": "97350bed",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Unnamed: 0</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>3.98</td>\n",
|
|||
|
" <td>2.43</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.89</td>\n",
|
|||
|
" <td>3.84</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>56.9</td>\n",
|
|||
|
" <td>65.0</td>\n",
|
|||
|
" <td>327</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>4.07</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.29</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>62.4</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>334</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>4.23</td>\n",
|
|||
|
" <td>2.63</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>63.3</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>335</td>\n",
|
|||
|
" <td>4.34</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>2.75</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53935</th>\n",
|
|||
|
" <td>53936</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>60.8</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>5.76</td>\n",
|
|||
|
" <td>3.50</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53936</th>\n",
|
|||
|
" <td>53937</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>63.1</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.69</td>\n",
|
|||
|
" <td>5.75</td>\n",
|
|||
|
" <td>3.61</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53937</th>\n",
|
|||
|
" <td>53938</td>\n",
|
|||
|
" <td>0.70</td>\n",
|
|||
|
" <td>Very Good</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>62.8</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.66</td>\n",
|
|||
|
" <td>5.68</td>\n",
|
|||
|
" <td>3.56</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.86</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>6.15</td>\n",
|
|||
|
" <td>6.12</td>\n",
|
|||
|
" <td>3.74</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>62.2</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.83</td>\n",
|
|||
|
" <td>5.87</td>\n",
|
|||
|
" <td>3.64</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53940 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Unnamed: 0 carat cut color clarity depth table price x \\\n",
|
|||
|
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 \n",
|
|||
|
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 \n",
|
|||
|
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 \n",
|
|||
|
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 \n",
|
|||
|
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 \n",
|
|||
|
"53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 \n",
|
|||
|
"53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 \n",
|
|||
|
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 \n",
|
|||
|
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 \n",
|
|||
|
"\n",
|
|||
|
" y z \n",
|
|||
|
"0 3.98 2.43 \n",
|
|||
|
"1 3.84 2.31 \n",
|
|||
|
"2 4.07 2.31 \n",
|
|||
|
"3 4.23 2.63 \n",
|
|||
|
"4 4.35 2.75 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"53935 5.76 3.50 \n",
|
|||
|
"53936 5.75 3.61 \n",
|
|||
|
"53937 5.68 3.56 \n",
|
|||
|
"53938 6.12 3.74 \n",
|
|||
|
"53939 5.87 3.64 \n",
|
|||
|
"\n",
|
|||
|
"[53940 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 96,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Usuwanie artefaktów\n",
|
|||
|
"diamonds = diamonds.dropna() ## usuwanie pustych wierszy, które posiadają przynajmniej jedno wystąpienie NULL or NaN\n",
|
|||
|
"diamonds"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.2"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|