ium_z487175/02_Dane-Zadanie01.ipynb

1813 lines
214 KiB
Plaintext
Raw Normal View History

2023-04-03 21:27:41 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
2023-04-03 21:27:41 +02:00
"id": "0063a986",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).\n"
]
}
],
"source": [
"%python -m kaggle datasets download -d ulrikthygepedersen/diamonds"
]
},
{
"cell_type": "code",
"execution_count": 4,
2023-04-03 21:27:41 +02:00
"id": "5bc46bfd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"diamonds.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
2023-04-03 21:27:41 +02:00
]
}
],
"source": [
"!kaggle datasets download -d shivam2503/diamonds"
]
},
{
"cell_type": "code",
"execution_count": 28,
2023-04-03 21:27:41 +02:00
"id": "75024e0f",
"metadata": {},
"outputs": [],
"source": [
"!tar -xf diamonds.zip\n",
"## rozpakowanie archiwum .zip w windowsie"
]
},
{
"cell_type": "code",
"execution_count": 3,
2023-04-03 21:27:41 +02:00
"id": "99c20a95",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>53936</td>\n",
" <td>0.72</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>60.8</td>\n",
" <td>57.0</td>\n",
" <td>2757</td>\n",
" <td>5.75</td>\n",
" <td>5.76</td>\n",
" <td>3.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53936</th>\n",
" <td>53937</td>\n",
" <td>0.72</td>\n",
" <td>Good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>63.1</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.69</td>\n",
" <td>5.75</td>\n",
" <td>3.61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53937</th>\n",
" <td>53938</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>62.8</td>\n",
" <td>60.0</td>\n",
" <td>2757</td>\n",
" <td>5.66</td>\n",
" <td>5.68</td>\n",
" <td>3.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53940 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 carat cut color clarity depth table price x \\\n",
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 \n",
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 \n",
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 \n",
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 \n",
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 \n",
"53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 \n",
"53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 \n",
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 \n",
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 \n",
"\n",
" y z \n",
"0 3.98 2.43 \n",
"1 3.84 2.31 \n",
"2 4.07 2.31 \n",
"3 4.23 2.63 \n",
"4 4.35 2.75 \n",
"... ... ... \n",
"53935 5.76 3.50 \n",
"53936 5.75 3.61 \n",
"53937 5.68 3.56 \n",
"53938 6.12 3.74 \n",
"53939 5.87 3.64 \n",
"\n",
"[53940 rows x 11 columns]"
]
},
"execution_count": 3,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"diamonds = pd.read_csv('diamonds.csv')\n",
"#Wyświetlenie zbioru danych\n",
"diamonds"
]
},
{
"cell_type": "code",
"execution_count": 4,
2023-04-03 21:27:41 +02:00
"id": "122b0b57",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>53936</td>\n",
" <td>0.72</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>60.8</td>\n",
" <td>57.0</td>\n",
" <td>2757</td>\n",
" <td>5.75</td>\n",
" <td>5.76</td>\n",
" <td>3.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53936</th>\n",
" <td>53937</td>\n",
" <td>0.72</td>\n",
" <td>Good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>63.1</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.69</td>\n",
" <td>5.75</td>\n",
" <td>3.61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53937</th>\n",
" <td>53938</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>62.8</td>\n",
" <td>60.0</td>\n",
" <td>2757</td>\n",
" <td>5.66</td>\n",
" <td>5.68</td>\n",
" <td>3.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53940 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 \n",
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 \n",
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 \n",
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 \n",
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 \n",
"53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 \n",
"53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 \n",
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
"\n",
" z \n",
"0 2.43 \n",
"1 2.31 \n",
"2 2.31 \n",
"3 2.63 \n",
"4 2.75 \n",
"... ... \n",
"53935 3.50 \n",
"53936 3.61 \n",
"53937 3.56 \n",
"53938 3.74 \n",
"53939 3.64 \n",
"\n",
"[53940 rows x 11 columns]"
]
},
"execution_count": 4,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#przydzielanie nazwy kolumny z id\n",
"diamonds = diamonds.rename(columns={diamonds.columns[0]: 'id'})\n",
"diamonds"
]
},
{
"cell_type": "code",
"execution_count": 5,
2023-04-03 21:27:41 +02:00
"id": "a489dab8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
2023-04-03 21:27:41 +02:00
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>53936</td>\n",
" <td>0.72</td>\n",
" <td>ideal</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>60.8</td>\n",
" <td>57.0</td>\n",
" <td>2757</td>\n",
" <td>5.75</td>\n",
" <td>5.76</td>\n",
" <td>3.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53936</th>\n",
" <td>53937</td>\n",
" <td>0.72</td>\n",
" <td>good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>63.1</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.69</td>\n",
" <td>5.75</td>\n",
" <td>3.61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53937</th>\n",
" <td>53938</td>\n",
" <td>0.70</td>\n",
" <td>very good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>62.8</td>\n",
" <td>60.0</td>\n",
" <td>2757</td>\n",
" <td>5.66</td>\n",
" <td>5.68</td>\n",
" <td>3.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53940 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"0 1 0.23 ideal E SI2 61.5 55.0 326 3.95 3.98 \n",
"1 2 0.21 premium E SI1 59.8 61.0 326 3.89 3.84 \n",
"2 3 0.23 good E VS1 56.9 65.0 327 4.05 4.07 \n",
"3 4 0.29 premium I VS2 62.4 58.0 334 4.20 4.23 \n",
"4 5 0.31 good J SI2 63.3 58.0 335 4.34 4.35 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"53935 53936 0.72 ideal D SI1 60.8 57.0 2757 5.75 5.76 \n",
"53936 53937 0.72 good D SI1 63.1 55.0 2757 5.69 5.75 \n",
"53937 53938 0.70 very good D SI1 62.8 60.0 2757 5.66 5.68 \n",
"53938 53939 0.86 premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
"53939 53940 0.75 ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
2023-04-03 21:27:41 +02:00
"\n",
" z \n",
"0 2.43 \n",
"1 2.31 \n",
"2 2.31 \n",
"3 2.63 \n",
"4 2.75 \n",
"... ... \n",
"53935 3.50 \n",
"53936 3.61 \n",
"53937 3.56 \n",
"53938 3.74 \n",
"53939 3.64 \n",
2023-04-03 21:27:41 +02:00
"\n",
"[53940 rows x 11 columns]"
]
},
"execution_count": 5,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Convert to lowerCase\n",
"\n",
"diamonds['cut'] = diamonds['cut'].str.lower()\n",
"diamonds\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
2023-04-03 21:27:41 +02:00
"id": "1836b2a3",
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 7,
2023-04-03 21:27:41 +02:00
"id": "fcf6448a",
"metadata": {},
"outputs": [],
"source": [
"#podział danych na train/test/dev w proporcji 4:1:1\n",
"#losować ustawiona na 10\n",
"\n",
"#1. Dzielimy na zbiór treningowy 80 % i resztę danych\n",
"diamonds_train, diamonds_test_dev = sklearn.model_selection.train_test_split(diamonds, test_size=0.2, random_state=10)\n",
"\n",
"#2. Podział reszty danych na zbiór testowy 10% i walidacyjny 10%\n",
"diamonds_test, diamonds_dev = train_test_split(diamonds_test_dev, test_size=0.5, random_state=10)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
2023-04-03 21:27:41 +02:00
"id": "9476846a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rozmiar diamonds: (53940, 11)\n",
"Rozmiar diamonds_train: (43152, 11)\n",
"Rozmiar diamonds_test: (5394, 11)\n",
"Rozmiar diamonds_dev: (5394, 11)\n"
]
}
],
"source": [
"#Wyświetlenie rozmiarów zbiorów danych train/test/dev\n",
"print(\"Rozmiar diamonds: \", diamonds.shape)\n",
"print(\"Rozmiar diamonds_train: \", diamonds_train.shape)\n",
"print(\"Rozmiar diamonds_test: \", diamonds_test.shape)\n",
"print(\"Rozmiar diamonds_dev: \", diamonds_dev.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
2023-04-03 21:27:41 +02:00
"id": "7e1f11cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id carat depth table price \\\n",
2023-04-03 21:27:41 +02:00
"count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
"mean 26970.500000 0.797940 61.749405 57.457184 3932.799722 \n",
"std 15571.281097 0.474011 1.432621 2.234491 3989.439738 \n",
"min 1.000000 0.200000 43.000000 43.000000 326.000000 \n",
"25% 13485.750000 0.400000 61.000000 56.000000 950.000000 \n",
"50% 26970.500000 0.700000 61.800000 57.000000 2401.000000 \n",
"75% 40455.250000 1.040000 62.500000 59.000000 5324.250000 \n",
"max 53940.000000 5.010000 79.000000 95.000000 18823.000000 \n",
"\n",
" x y z \n",
"count 53940.000000 53940.000000 53940.000000 \n",
"mean 5.731157 5.734526 3.538734 \n",
"std 1.121761 1.142135 0.705699 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.710000 4.720000 2.910000 \n",
"50% 5.700000 5.710000 3.530000 \n",
"75% 6.540000 6.540000 4.040000 \n",
"max 10.740000 58.900000 31.800000 \n"
]
}
],
"source": [
"# średnią, minimum, maksimum, odchylenia standardowe, medianę wartości poszczególnych parametrów)\n",
"print(diamonds.describe())"
]
},
{
"cell_type": "code",
"execution_count": 10,
2023-04-03 21:27:41 +02:00
"id": "88a89b38",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id carat depth table price \\\n",
2023-04-03 21:27:41 +02:00
"count 43152.000000 43152.000000 43152.000000 43152.000000 43152.000000 \n",
"mean 26971.712111 0.795979 61.748241 57.448355 3920.786939 \n",
"std 15565.585777 0.472184 1.426394 2.224297 3975.894633 \n",
"min 3.000000 0.200000 43.000000 44.000000 327.000000 \n",
"25% 13469.750000 0.400000 61.000000 56.000000 946.000000 \n",
"50% 27019.500000 0.700000 61.800000 57.000000 2400.000000 \n",
"75% 40439.250000 1.040000 62.500000 59.000000 5313.250000 \n",
"max 53938.000000 5.010000 79.000000 76.000000 18823.000000 \n",
"\n",
" x y z \n",
"count 43152.000000 43152.000000 43152.000000 \n",
"mean 5.726933 5.731011 3.535791 \n",
"std 1.119635 1.147069 0.693846 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.710000 4.720000 2.910000 \n",
"50% 5.690000 5.710000 3.520000 \n",
"75% 6.540000 6.530000 4.030000 \n",
"max 10.740000 58.900000 8.060000 \n"
]
}
],
"source": [
"print(diamonds_train.describe())"
]
},
{
"cell_type": "code",
"execution_count": 11,
2023-04-03 21:27:41 +02:00
"id": "80b5060f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id carat depth table price \\\n",
2023-04-03 21:27:41 +02:00
"count 5394.000000 5394.000000 5394.000000 5394.000000 5394.000000 \n",
"mean 26951.351316 0.802666 61.760808 57.470189 3970.308676 \n",
"std 15565.740253 0.482062 1.464893 2.309900 4083.195823 \n",
"min 1.000000 0.210000 52.300000 43.000000 326.000000 \n",
"25% 13519.750000 0.400000 61.000000 56.000000 958.000000 \n",
"50% 27013.500000 0.700000 61.900000 57.000000 2375.500000 \n",
"75% 40342.250000 1.050000 62.500000 59.000000 5273.750000 \n",
"max 53930.000000 3.510000 78.200000 95.000000 18806.000000 \n",
"\n",
" x y z \n",
"count 5394.000000 5394.000000 5394.000000 \n",
"mean 5.738817 5.739106 3.542097 \n",
"std 1.132069 1.123925 0.701446 \n",
"min 3.840000 3.780000 0.000000 \n",
"25% 4.710000 4.710000 2.900000 \n",
"50% 5.690000 5.700000 3.530000 \n",
"75% 6.550000 6.540000 4.040000 \n",
"max 9.660000 9.630000 6.030000 \n"
]
}
],
"source": [
"print(diamonds_test.describe())"
]
},
{
"cell_type": "code",
"execution_count": 12,
2023-04-03 21:27:41 +02:00
"id": "31f4af56",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id carat depth table price \\\n",
2023-04-03 21:27:41 +02:00
"count 5394.000000 5394.000000 5394.000000 5394.000000 5394.000000 \n",
"mean 26979.951798 0.808901 61.747312 57.514813 3991.393029 \n",
"std 15625.161644 0.480344 1.449816 2.238671 4002.742530 \n",
"min 2.000000 0.200000 53.200000 51.000000 326.000000 \n",
"25% 13525.500000 0.400000 61.000000 56.000000 961.000000 \n",
"50% 26529.500000 0.710000 61.850000 57.000000 2484.500000 \n",
"75% 40665.500000 1.050000 62.500000 59.000000 5465.250000 \n",
"max 53940.000000 3.040000 73.600000 68.000000 18779.000000 \n",
"\n",
" x y z \n",
"count 5394.000000 5394.000000 5394.000000 \n",
"mean 5.757290 5.758066 3.558910 \n",
"std 1.128191 1.120344 0.797759 \n",
"min 3.790000 3.750000 0.000000 \n",
"25% 4.730000 4.740000 2.930000 \n",
"50% 5.710000 5.730000 3.540000 \n",
"75% 6.560000 6.540000 4.040000 \n",
"max 9.510000 9.460000 31.800000 \n"
]
}
],
"source": [
"print(diamonds_dev.describe())"
]
},
{
"cell_type": "code",
"execution_count": 13,
2023-04-03 21:27:41 +02:00
"id": "eab3e1f9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ideal 17292\n",
"premium 10954\n",
"very good 9708\n",
"good 3929\n",
"fair 1269\n",
2023-04-03 21:27:41 +02:00
"Name: cut, dtype: int64"
]
},
"execution_count": 13,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Wyświetlenie częstości przykładów dla poszczególnych klas diamentów\n",
"diamonds_train[\"cut\"].value_counts()"
2023-04-03 21:27:41 +02:00
]
},
{
"cell_type": "code",
"execution_count": 14,
2023-04-03 21:27:41 +02:00
"id": "2e7c37d9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ideal 2184\n",
"premium 1385\n",
"very good 1183\n",
"good 473\n",
"fair 169\n",
2023-04-03 21:27:41 +02:00
"Name: cut, dtype: int64"
]
},
"execution_count": 14,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diamonds_test[\"cut\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 15,
2023-04-03 21:27:41 +02:00
"id": "a7ccece5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ideal 2075\n",
"premium 1452\n",
"very good 1191\n",
"good 504\n",
"fair 172\n",
2023-04-03 21:27:41 +02:00
"Name: cut, dtype: int64"
]
},
"execution_count": 15,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diamonds_dev[\"cut\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
2023-04-03 21:27:41 +02:00
"id": "17223f54",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsoAAAJaCAYAAAA28GEDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABiK0lEQVR4nO3dd3RU1f7+8WdIJyQhCSQhSA0QQm+XUERCB+mglECoBrwoXJoo+FWKCogUKxakS1URURQBKReEUEVEAelFEmoSekg5vz/8ZS5DDiSBJBPg/Vpr1sqcs2fmc+bMnDyzZ589FsMwDAEAAACwkcfeBQAAAAC5EUEZAAAAMEFQBgAAAEwQlAEAAAATBGUAAADABEEZAAAAMEFQBgAAAEwQlAEAAAATBGUAAADABEEZQIZ88cUXcnNz06ZNm+xdymPh/fffl6enp/bs2WPvUh5rK1askLOzs3766Sd7lwLADgjKyLA5c+bIYrFYL46OjipUqJC6dOmiQ4cOZdvj9urVS/ny5Uu3XVhYmMLCwrL0sYsXL65evXpl6X3mhOvXr2vMmDHasGFDltzf/v37NWDAAM2fP1/16tXL1G1TXzfHjx+/r8ceM2aMLBaLzbLcul8sFovGjBljvb5hwwZZLJY0++GDDz5QqVKl5OzsLIvFori4OJv127dv16hRo/T111+rSpUq2V73ne58Lx0/flwWi0Vz5szJ8Vqyw5kzZzRmzJh0P4QcO3ZMvXr10meffaZmzZrlTHG36dWrl4oXL26z7M7XWE6wWCx68cUX0213t9d7bnXnceRhq/9+mR1TcXeO9i4AD5/Zs2erbNmyunnzpn755Re99dZbWr9+vQ4cOCBvb297lwf9E5THjh0rSQ/84eH69et69tln9dZbb+mZZ57J9O1btmyprVu3qlChQg9Ux8OoWrVq2rp1q8qVK2ddtmfPHg0aNEjPPfecevbsKUdHR3l4eFjXX7p0SZ07d9b06dPVpEkTe5SdRqFChbR161YFBQXZu5QscebMGY0dO1bFixe/6weRW7duqVOnThoyZEiu/FCWG5m93h8mD3v9yB4EZWRahQoVVKNGDUn/hLDk5GSNHj1ay5cvV+/eve1cHbJa3rx5tW/fvvu+fcGCBVWwYMEsrOjh4enpqVq1atks++OPPyRJkZGRqlmzZprb+Pj46NixYzlSX0a5uLik2Y5HnbOzs3bs2GHvMh4qZq/3B3H9+nXlzZs3y+4vPVldPx4NDL3AA0sNzWfPnrVZvmLFCtWuXVt58+aVh4eHmjRpoq1bt9q0uX0ox52Xe31V/8svv6hAgQJq1aqVrl27dtd2Y8eOVWhoqHx8fOTp6alq1app5syZMgzDpl1iYqJGjBihgIAA5c2bV08++aS2b9+e4ecgISFB48aNU0hIiFxdXeXr66sGDRpoy5Ytkv73VZfZ5fbeqlu3bunNN99U2bJl5eLiooIFC6p37946f/68zeOtW7dOYWFh8vX1lZubm4oWLaqOHTvq+vXrOn78uDWYjh071vRxNm/erEaNGsnDw0N58+ZVnTp1tHLlSpvHuH79uoYPH64SJUrI1dVVPj4+qlGjhhYtWmTTbtu2bWrdurV8fX3l6uqqoKAgDR482Lo+M0MvVq5cqSpVqsjFxUUlSpTQ5MmTM/DsSzdv3tSwYcNUpUoVeXl5ycfHR7Vr19a3336bodv/+uuvatWqlfz8/OTi4qLAwEC1bNlSp0+flpTx/XenO7/KDQsLU/fu3SVJoaGhaW4/a9YsVa5c2fp8t2/fXvv377d5fiwWi02A+/rrr2WxWNSyZUubx65UqZI6dux4z+02DEOTJk1SsWLF5OrqqmrVqunHH39M085s6MXhw4fVu3dvlS5dWnnz5lXhwoXVunVr/f7776bPwcKFC/Xyyy+rUKFCypcvn1q3bq2zZ8/qypUr6tevnwoUKKACBQqod+/eunr1apo6p0+fripVqsjNzU3e3t565plndPToUZt2YWFhqlChgnbs2KF69eopb968KlmypCZOnKiUlBRrPf/6178kSb1797bux9uHM6R37Prjjz9ksVj05ZdfWpft2rVLFotF5cuXt6mpTZs2ql69+j33g/TP+yQ4OFguLi4KCQnRvHnz0r2NJJ0/f14DBgxQuXLllC9fPvn5+alhw4YZOpfgzuF0t1/Mvon69NNPVaZMGbm4uKhcuXJavHixzfq7DV3IyP+C1PfY7t279cwzz8jb29v6DcbdhtWZDU0xk9Hju1n9O3fuVJcuXVS8eHG5ubmpePHi6tq1q06cOGFz29Tnct26dYqMjJSvr688PT3Vo0cPXbt2TTExMerUqZPy58+vQoUKafjw4UpMTLS5j0uXLmnAgAEqXLiwnJ2dVbJkSb366qtKSEiwaZc6FGb+/PkKCQlR3rx5VblyZX3//fdptimjx9Qvv/xSoaGh8vLysr5v+vTpk+5z+zigRxkPLLX3q0yZMtZlCxcuVLdu3dS0aVMtWrRICQkJmjRpksLCwvTzzz/rySeflKQ0B8sbN24oIiJCycnJ8vHxMX28pUuXqkePHurTp48++OADOTg4WNedPn1aVatW1dKlS1W6dGkdP35c/fv3V9GiRSVJUVFRGjhwoP7++2+9/vrr1ttFRkZq3rx5Gj58uJo0aaJ9+/apQ4cOunLlSrrbn5SUpBYtWmjTpk0aPHiwGjZsqKSkJEVFRenkyZOqU6eOnnvuOTVv3tzmdsuWLdM777xj/ceakpKitm3batOmTRoxYoTq1KmjEydOaPTo0QoLC9POnTvl5uam48ePq2XLlqpXr55mzZql/Pnz6++//9aqVat069YtFSpUSKtWrVLz5s3Vt29fPffcc5JkDc8bN25UkyZNVKlSJc2cOVMuLi6aPn26WrdurUWLFqlz586SpKFDh2r+/Pl68803VbVqVV27dk379u3TxYsXrdvw008/qXXr1goJCdHUqVNVtGhRHT9+XKtXr073ebvTzz//rLZt26p27dpavHixkpOTNWnSpDQfwMwkJCTo0qVLGj58uAoXLqxbt25p7dq16tChg2bPnq0ePXrc9bbXrl1TkyZNVKJECX300Ufy9/dXTEyM1q9fb93/Gdl/GTF9+nQtWrRIb775pnUIU+p+mTBhgkaNGqWuXbtqwoQJunjxosaMGaPatWtrx44dKl26tOrXry8nJyetXbvWGvbWrl0rNzc3bdy4UYmJiXJyctK5c+e0b98+/fvf/75nPWPHjtXYsWPVt29fPfPMMzp16pQiIyOVnJys4ODge972zJkz8vX11cSJE1WwYEFdunRJc+fOVWhoqH799dc0tx81apQaNGigOXPm6Pjx4xo+fLi6du0qR0dHVa5cWYsWLdKvv/6qUaNGycPDQ++//771tv3799ecOXM0aNAgvf3227p06ZLGjRunOnXq6LfffpO/v7+1bUxMjLp166Zhw4Zp9OjR+uabbzRy5EgFBgaqR48eqlatmmbPnq3evXvr//7v/6wfMJ544glJGTt2lS9fXoUKFdLatWv17LPP2uyHP//8U2fOnFFgYKCSkpK0ceNGPf/88/d8LufMmaPevXurbdu2mjJliuLj4zVmzBglJCQoT55792ddunRJkjR69GgFBATo6tWr+uabb6z13mvoVeqwqNtt3bpVQ4cOTfO6XrFihdavX69x48bJ3d1d06dPt+6/ew3Jyuj/glQdOnRQly5d9Pzzz9+zEyQzHuT4fvz4cQUHB6tLly7y8fFRdHS0Pv74Y/3rX//Sn3/+qQIFCti0f+6559ShQwctXrzY+npOSkrSwYMH1aFDB/Xr109r167V22+/rcDAQA0dOlTSPx/2GzRooCNHjmjs2LGqVKmSNm3apAkTJmjPnj1pOjJWrlypHTt2aNy4ccqXL58mTZqk9u3b6+DBgypZsqSkjB9Tt27dqs6dO6tz584aM2aMXF1ddeLECa1bt+5BnvZHhwFk0OzZsw1JRlRUlJGYmGhcuXLFWLVqlREQEGA89dRTRmJiomEYhpGcnGwEBgYaFStWNJKTk623v3LliuHn52fUqVPH9P6TkpK
2023-04-03 21:27:41 +02:00
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"diamonds['cut'].value_counts().plot(kind='bar')\n",
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds')\n",
"plt.xlabel('Szlif')\n",
"plt.ylabel('Liczba wystąpień')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
2023-04-03 21:27:41 +02:00
"id": "8633ea7c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsoAAAJaCAYAAAA28GEDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABw/klEQVR4nO3dd3QUZf/+8WtJJ5AlIaQhvYQSOg9V6QSQDko1VIM+KkizgI8UCyAKWLEgvaMigmgEpAjSwYBUqQKSUFOoISTz+4Nf9suSgSSYZAO8X+fsOdmZe2Y/s7M7e2X2nnsthmEYAgAAAGAnl6MLAAAAAHIigjIAAABggqAMAAAAmCAoAwAAACYIygAAAIAJgjIAAABggqAMAAAAmCAoAwAAACYIygAAAIAJgjKQRebMmSMPDw+tX7/e0aU8Ej7++GN5eXkpMjLS0aU80pYuXSpXV1f98ssvji4FAP41gnIWmjFjhiwWi+3m7OyswMBAdenSRYcOHcqyx+3Vq5fy5MmTZrsGDRqoQYMGmfrYRYsWVa9evTJ1ndnh6tWrGjVqlNauXZsp69u/f79eeOEFzZ49W0888USGlk153Rw/fvy+HnvUqFGyWCx203LqfrFYLBo1apTt/tq1a2WxWFLth08++UQlS5aUq6urLBaLYmNj7eZv3bpVw4cP13fffafKlStned13uvO9dPz4cVksFs2YMSPba8kKp0+f1qhRo9L8J+TYsWPq1auXvvrqKzVr1ix7irtNr169VLRoUbtpd77GsoPFYtFLL72UZru7vd5zqjuPIw9a/ffL7Jh6v8aMGaMlS5Zkyrqyw8N2LLsfzo4u4FEwffp0lSlTRtevX9fvv/+ud999V2vWrNGBAwfk7e3t6PKgW0F59OjRkvSv/3m4evWqnn76ab377rt66qmnMrx8y5YttWnTJgUGBv6rOh5EVatW1aZNm1SuXDnbtMjISA0YMEDPPvusevbsKWdnZ+XNm9c2/+LFi+rcubMmT56spk2bOqLsVAIDA7Vp0yaVKFHC0aVkitOnT2v06NEqWrToXf8RuXHjhjp16qRBgwblyH/KciKz1/uD5EGv3xHGjBmjp556Su3atXN0KenysB3L7gdBORuEhISoevXqkm6FsKSkJI0cOVJLlixR7969HVwdMlvu3Lm1Z8+e+16+QIECKlCgQCZW9ODw8vJSrVq17Kbt3btXkhQeHq4aNWqkWsbHx0fHjh3LlvrSy83NLdV2POxcXV21bds2R5fxQDF7vf8bV69eVe7cuTNtfWnJ7Pph79q1a3J3d8+0s9n341E8lt2JrhcOkBKaz5w5Yzd96dKlql27tnLnzq28efOqadOm2rRpk12b27ty3Hm711f1v//+u3x9fdWqVStduXLlru1Gjx6tmjVrysfHR15eXqpataqmTp0qwzDs2iUmJurVV19VQECAcufOrccff1xbt25N93OQkJCgt956S2XLlpW7u7vy58+vhg0bauPGjZL+76sus9vtZ6tu3Lihd955R2XKlJGbm5sKFCig3r1769y5c3aPt3r1ajVo0ED58+eXh4eHChcurI4dO+rq1as6fvy4LZiOHj3a9HE2bNigxo0bK2/evMqdO7fq1Kmj5cuX2z3G1atXNXToUBUrVkzu7u7y8fFR9erVNX/+fLt2W7ZsUevWrZU/f365u7urRIkSGjhwoG1+RrpeLF++XJUrV5abm5uKFSumDz74IB3PvnT9+nUNGTJElStXltVqlY+Pj2rXrq0ffvghXcv/8ccfatWqlfz8/OTm5qagoCC1bNlSp06dkpT+/XenO7/KbdCggZ555hlJUs2aNVMtP23aNFWqVMn2fLdv31779++3e34sFotdgPvuu+9ksVjUsmVLu8euWLGiOnbseM/tNgxD48ePV5EiReTu7q6qVavq559/TtXO7OvKw4cPq3fv3ipVqpRy586tggULqnXr1vrzzz9Nn4N58+bptddeU2BgoPLkyaPWrVvrzJkzunTpkvr16ydfX1/5+vqqd+/eunz5cqo6J0+erMqVK8vDw0Pe3t566qmndPToUbt2DRo0UEhIiLZt26YnnnhCuXPnVvHixTVu3DglJyfb6vnPf/4jSerdu7dtP97enSGtY9fevXtlsVj0zTff2Kbt2LFDFotF5cuXt6upTZs2qlat2j33g3TrfRIcHCw3NzeVLVtWs2bNSnMZSTp37pxeeOEFlStXTnny5JGfn58aNWqUrmsJ7uxOd/vN7JuoL7/8UqVLl5abm5vKlSunBQsW2M2/W9eF9HwWpLzHdu7cqaeeekre3t62s35361Zn1jXFTHqP72b1b9++XV26dFHRokXl4eGhokWLqmvXrvr777/tlk15LlevXq3w8HDlz59fXl5e6tGjh65cuaLo6Gh16tRJ+fLlU2BgoIYOHarExES7dVy8eFEvvPCCChYsKFdXVxUvXlxvvPGGEhIS7NqldIWZPXu2ypYtq9y5c6tSpUr68ccfU21Teo+p33zzjWrWrCmr1Wp73/Tp0+eez6vFYtGVK1c0c+bMVK+blOdjxYoV6tOnjwoUKKDcuXMrISEhw8eO+fPn64033lBQUJC8vLzUpEkTHTx40K7t3brjpacbWXrrkaTY2FgNGTJExYsXl5ubm/z8/PTkk0/qwIEDtjbp/RxPSEjQkCFDbK/LevXqaceOHabbsmfPHrVt21be3t5yd3dX5cqVNXPmzHvsnbvjjLIDpJz9Kl26tG3avHnz1L17d4WGhmr+/PlKSEjQ+PHj1aBBA/366696/PHHJSnVwfLatWsKCwtTUlKSfHx8TB9v0aJF6tGjh/r06aNPPvlETk5OtnmnTp1SlSpVtGjRIpUqVUrHjx/Xc889p8KFC0uSNm/erP79++uff/7RiBEjbMuFh4dr1qxZGjp0qJo2bao9e/aoQ4cOunTpUprbf/PmTbVo0ULr16/XwIED1ahRI928eVObN2/WiRMnVKdOHT377LNq3ry53XKLFy/W+++/b/tgTU5OVtu2bbV+/Xq9+uqrqlOnjv7++2+NHDlSDRo00Pbt2+Xh4aHjx4+rZcuWeuKJJzRt2jTly5dP//zzjyIiInTjxg0FBgYqIiJCzZs3V9++ffXss89Kki08r1u3Tk2bNlXFihU1depUubm5afLkyWrdurXmz5+vzp07S5IGDx6s2bNn65133lGVKlV05coV7dmzRxcuXLBtwy+//KLWrVurbNmymjhxogoXLqzjx49rxYoVaT5vd/r111/Vtm1b1a5dWwsWLFBSUpLGjx+f6h8wMwkJCbp48aKGDh2qggUL6saNG1q1apU6dOig6dOnq0ePHndd9sqVK2ratKmKFSumzz77TP7+/oqOjtaaNWts+z89+y89Jk+erPnz5+udd96xdWFK2S9jx47V8OHD1bVrV40dO1YXLlzQqFGjVLt2bW3btk2lSpVS/fr15eLiolWrVtnC3qpVq+Th4aF169YpMTFRLi4uOnv2rPbs2aP//ve/96xn9OjRGj16tPr27aunnnpKJ0+eVHh4uJKSkhQcHHzPZU+fPq38+fNr3LhxKlCggC5evKiZM2eqZs2a+uOPP1ItP3z4cDVs2FAzZszQ8ePHNXToUHXt2lXOzs6qVKmS5s+frz/++EPDhw9X3rx59fHHH9uWfe655zRjxgwNGDBA7733ni5evKi33npLderU0a5du+Tv729rGx0dre7du2vIkCEaOXKkvv/+ew0bNkxBQUHq0aOHqlatqunTp6t379763//+Z/sH47HHHpOUvmNX+fLlFRgYqFWrVunpp5+22w/79u3T6dOnFRQUpJs3b2rdunV6/vnn7/lczpgxQ71791bbtm01YcIExcXFadSoUUpISFCuXPc+/3Px4kVJ0siRIxUQEKDLly/r+++/t9V7r65XKd2ibrdp0yYNHjw41et66dKlWrNmjd566y15enpq8uTJtv13ry5Z6f0sSNGhQwd16dJFzz///D1PgmTEvzm+Hz9+XMHBwerSpYt8fHwUFRWlzz//XP/5z3+0b98++fr62rV/9tln1aFDBy1YsMD2er5
2023-04-03 21:27:41 +02:00
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"diamonds_train['cut'].value_counts().plot(kind='bar')\n",
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds tranującego')\n",
"plt.xlabel('Szlif')\n",
"plt.ylabel('Liczba wystąpień')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 18,
2023-04-03 21:27:41 +02:00
"id": "ab567b6f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsEAAAJaCAYAAADOEJr0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABjRElEQVR4nO3deXxMZ///8ffILpJIQjZLEGopsd5iae1r7dXaKrY0tFpuSrXab4suqLbo3d6qVbUVpa0qpW7RlJvatapatLaixJ7YI5Lr94df5jYSJO3E4Lyej8c8HplzrpnzmXPOnHnPyXWusRljjAAAAAALyefqAgAAAIDbjRAMAAAAyyEEAwAAwHIIwQAAALAcQjAAAAAshxAMAAAAyyEEAwAAwHIIwQAAALAcQjAAAAAshxAMONEnn3wiHx8frV692tWlWMK//vUv+fv7a+vWra4uxdIWLVokT09P/ec//3F1KQCQY4RgJ5s+fbpsNpv95u7urvDwcHXp0kW///57ni23V69eKlCgwC3bNWjQQA0aNHDqskuUKKFevXo59TlvhwsXLmjkyJFauXKlU55vx44d6t+/v2bNmqUHH3wwV4/N3G/279//l5Y9cuRI2Ww2h2l36nax2WwaOXKk/f7KlStls9mybId3331XpUuXlqenp2w2m5KTkx3mb9y4US+88IK++OILValSJc/rvt7176X9+/fLZrNp+vTpt72WvHD48GGNHDnyll8w9u3bp169eunDDz9U8+bNb09x1+jVq5dKlCjhMO36fex2sNlsevrpp2/Z7kb7+53q+uPI3Vb/X5XdMfWvGj16tBYuXOiU57qRX3/9VSNHjvzLnyFWRQjOI9OmTdO6deu0YsUKPf3001q0aJEeeOABnT592tWl4f+7cOGCRo0a5ZSD+YULF/Too4/q9ddf1yOPPJLrx7dq1Urr1q1TeHj4367lblOtWjWtW7dO1apVs0/bunWrBg4cqIYNGyoxMVHr1q2Tn5+fff6pU6fUuXNnTZo0SU2bNnVF2VmEh4dr3bp1atWqlatLcYrDhw9r1KhRNw3Bly9fVqdOnTR48OA78gvXnSi7/f1ucrfX7wq3KwSPGjWKEJxL7q4u4F5VsWJF1ahRQ9LVM0bp6ekaMWKEFi5cqN69e7u4Ojhb/vz5tX379r/8+MKFC6tw4cJOrOju4e/vr1q1ajlM++WXXyRJ8fHxqlmzZpbHBAUFad++fbelvpzy8vLK8jrudZ6entq0aZOry7irZLe//x0XLlxQ/vz5nfZ8t+Ls+gFX4kzwbZIZiI8ePeowfdGiRapdu7by588vPz8/NW3aVOvWrXNoc233iutvN/vW9/3336tQoUJq3bq1zp8/f8N2o0aNUkxMjIKCguTv769q1app6tSpMsY4tEtLS9OwYcMUFham/Pnz64EHHtDGjRtzvA5SU1P1yiuvqHz58vL29lZwcLAaNmyotWvXSvrfv5+yu117luny5ct67bXXVK5cOXl5ealw4cLq3bu3jh8/7rC8xMRENWjQQMHBwfLx8VHx4sXVsWNHXbhwQfv377eHzlGjRmW7nDVr1qhx48by8/NT/vz5VadOHS1ZssRhGRcuXNDQoUNVsmRJeXt7KygoSDVq1NDcuXMd2m3YsEFt2rRRcHCwvL29FRUVpUGDBtnn56Y7xJIlS1SlShV5eXmpZMmSeuutt3Kw9qVLly5pyJAhqlKligICAhQUFKTatWvrq6++ytHjf/zxR7Vu3VohISHy8vJSRESEWrVqpUOHDknK+fa73vX/Xm3QoIG6d+8uSYqJicny+I8//liVK1e2r+8OHTpox44dDuvHZrM5hLMvvvhCNpsty1na6OhodezY8aav2xijcePGKTIyUt7e3qpWrZq++eabLO2y6w6xe/du9e7dW2XKlFH+/PlVpEgRtWnTRj///HO262DOnDl67rnnFB4ergIFCqhNmzY6evSozp49q759+6pQoUIqVKiQevfurXPnzmWpc9KkSapSpYp8fHwUGBioRx55RHv37nVo16BBA1WsWFGbNm3Sgw8+qPz586tUqVIaO3asMjIy7PX84x//kCT17t3bvh2v7WJwq2PXL7/8IpvNps8++8w+bcuWLbLZbLr//vsdamrbtq2qV69+0+0gXX2flC1bVl5eXipfvrxmzpx5y8dI0vHjx9W/f39VqFBBBQoUUEhIiBo1apSjvvvXd3G79pZd17IPPvhA9913n7y8vFShQgV9+umnDvNv1J0gJ58Fme+xH374QY888ogCAwMVFRUl6cZd3bLrLpKdnB7fs6t/8+bN6tKli0qUKCEfHx+VKFFCXbt21R9//OHw2Mx1mZiYqPj4eAUHB8vf3189evTQ+fPnlZSUpE6dOqlgwYIKDw/X0KFDlZaW5vAcp06dUv/+/VWkSBF5enqqVKlSevHFF5WamurQLrN7yqxZs1S+fHnlz59flStX1tdff53lNeX0mPrZZ58pJiZGAQEB9vdNnz59brpebTabzp8/rxkzZmS73yQlJalfv34qWrSoPD09VbJkSY0aNUpXrlxxeJ73339flStXVoECBeTn56dy5crphRdesK/XRx99VJLUsGFD+3KuPRbl5XEzp8ceY4xGjx5tP5bWqFFDCQkJ2e67Bw4cUPfu3e2fN+XLl9fbb79tP0Y5jYFTTZs2zUgymzZtcpj+3nvvGUnmiy++sE+bPXu2kWSaNWtmFi5caObNm2eqV69uPD09zerVq+3t1q1b53BLTEw0RYoUMWFhYSYlJcUYY0zPnj2Nr6+v/THz5s0zXl5e5sknnzRXrlyxT69fv76JiooyVapUMb/99psxxphevXqZqVOnmoSEBJOQkGBeffVV4+PjY0aNGuXwGnr27GlsNpt59tlnzfLly8348eNNkSJFjL+/v+nZs+dN10taWppp2LChcXd3N0OHDjVLly41ixYtMi+88IKZO3euMcaYgwcPZnmtzz77rJFkxo0bZ4wxJj093bRo0cL4+vqaUaNGmYSEBPPRRx+ZIkWKmAoVKpgLFy4YY4zZt2+f8fb2Nk2bNjULFy40K1euNLNnzzaxsbHm9OnT5tKlS2bZsmVGkomLi7Mvb/fu3cYYY1auXGk8PDxM9erVzbx588zChQtNs2bNjM1mM59++qn9dfXr18/kz5/fjB8/3nz33Xfm66+/NmPHjjXvvvuuvc2yZcuMh4eHiY6ONtOnTzeJiYnm448/Nl26dMmy3+zbt++m63HFihXGzc3NPPDAA2bBggXms88+M//4xz9M8eLFzfVv58jISIftkpycbHr16mVmzZplEhMTzbJly8zQoUNNvnz5zIwZM2663HPnzpng4GBTo0YNM3/+fLNq1Sozb94888QTT5hff/01x9vPGGMkmREjRtjvf/fdd0aS+e6774wxxvzyyy/m//7v/4wkM23aNIftMnr0aCPJdO3a1SxZssTMnDnTlCpVygQEBNj357NnzxoPDw8zevRo+zKeeOIJ4+PjY3x9fc3ly5eNMcYcPXrU2Gw2M2nSpJu+9hEjRtj3k2+++cZ8+OGH9vdf/fr17e327dtnrznTqlWrzJAhQ8znn39uVq1aZb788kvTvn174+PjY3bu3JllHURGRppevXqZZcuWmcmTJ5sCBQqYhg0bmqZNm5qhQ4ea5cuXmzfeeMO4ubmZAQMGONQZHx9vPDw8zJAhQ8yyZcvMnDlzTLly5UxoaKhJSkqyt6tfv74JDg42ZcqUMZMnTzYJCQmmf//+RpJ9P0hJSbHvk//3f/9n354HDx40xuT82BUeHm769u1rvz927Fjj4+NjJJk///zTGHP12ODv72+GDRt20+2QWU+7du3M4sWLzSeffGJKly5tihUrZiIjIx3aXr+P7dy50zz55JPm008/NStXrjRff/21iYuLM/ny5bPvdzdy7NixLPv1+PHjjSTTv39/h2UWK1bMVKhQwcydO9csWrTItGjRwkgyn332mb3d9ft7btZn5r4YGRlpnnvuOZOQkGAWLlxojLm6Xa/dHzP17Nkzy/rJTk6P79nV/9lnn5mXX37ZfPnll2bVqlX
2023-04-03 21:27:41 +02:00
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"diamonds_test['cut'].value_counts().plot(kind='bar')\n",
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds testowego')\n",
"plt.xlabel('Szlif')\n",
"plt.ylabel('Liczba wystąpień')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 19,
2023-04-03 21:27:41 +02:00
"id": "18e61963",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAskAAAJaCAYAAADdx9oAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwiElEQVR4nO3deXxMZ///8ffILiQE2TSCWErt3La2xL7vra2139GWctu60FvRBdWW9tZbV7UrXVS1NBW13NROUS1qLVqxJ/aI5Pr94ZfzNSdBUolJeD0fj3k8Mudcc+Zz5pw5856T61zjMMYYAQAAALDkcnUBAAAAQHZDSAYAAABsCMkAAACADSEZAAAAsCEkAwAAADaEZAAAAMCGkAwAAADYEJIBAAAAG0IyAAAAYENIBm4we/Zs+fj4aPXq1a4u5b7wn//8R35+ftq2bZurS7mvLVq0SJ6envrhhx9cXQoAZBuEZJvp06fL4XBYN3d3d4WEhKhz587au3dvlj1vz549lSdPntu2i4yMVGRkZKY+d9GiRdWzZ89MXebdcOnSJY0ePVorV67MlOXt2rVL/fr106xZs/Too49m6LEp+82hQ4f+1nOPHj1aDofDaVp23S4Oh0OjR4+27q9cuVIOhyPVdpg8ebJKlCghT09PORwOxcXFOc3fuHGjRowYoa+++kqVKlXK8rrt7O+lQ4cOyeFwaPr06Xe9lqzw119/afTo0bf9AnLw4EH17NlTH330kZo0aXJ3irtBz549VbRoUadp9n3sbnA4HHr22Wdv2+5m+3t2ZT+O5LT6/660jqnZVXqPqWlJ6/3jClmRTbIDd1cXkF1NmzZNDz74oK5cuaKffvpJr7/+ulasWKHdu3crf/78ri4Puh6Sx4wZI0l3/Oa8dOmSHn/8cb3++ut67LHHMvz4Fi1aaN26dQoJCbmjOnKiKlWqaN26dSpbtqw1bdu2bRo4cKD++c9/qkePHnJ3d1fevHmt+WfOnFGnTp00ZcoUNWrUyBVlpxISEqJ169YpIiLC1aVkir/++ktjxoxR0aJFb/ol5OrVq+rYsaMGDx6cLb+QZUdp7e85SU6v/36QE7fRlClTXF1CliAk30S5cuVUrVo1SdcDWFJSkkaNGqWFCxeqV69eLq4OmS137tzauXPn3358oUKFVKhQoUysKOfw8/NTzZo1nab9+uuvkqSoqChVr1491WMCAgJ08ODBu1Jfenl5eaVaj3udp6enNm3a5OoycpS09vc7cenSJeXOnTvTlnc7mV0/Ml9O3EY5KdBnBN0t0iklMB8/ftxp+qJFi1SrVi3lzp1befPmVaNGjbRu3TqnNjd237DfbvXv+Z9++kkFCxZUy5YtdfHixZu2GzNmjGrUqKGAgAD5+fmpSpUqmjp1qowxTu0SExP1/PPPKzg4WLlz59YjjzyijRs3pvs1SEhI0CuvvKIyZcrI29tbBQoUUL169bR27VpJ//fvrbRuN56lunr1ql577TU9+OCD8vLyUqFChdSrVy+dPHnS6fmWL1+uyMhIFShQQD4+PipSpIg6dOigS5cu6dChQ1YoHTNmTJrPs2bNGjVo0EB58+ZV7ty5Vbt2bS1evNjpOS5duqRhw4apWLFi8vb2VkBAgKpVq6bPPvvMqd2GDRvUqlUrFShQQN7e3oqIiNCgQYOs+RnpbrF48WJVqlRJXl5eKlasmN566610vPrSlStXNHToUFWqVEn+/v4KCAhQrVq19M0336Tr8T///LNatmypwMBAeXl5KTQ0VC1atNDRo0clpX/72dn/NRgZGaknn3xSklSjRo1Uj//0009VsWJF6/Vu166ddu3a5fT6OBwOp/D21VdfyeFwqEWLFk7PXaFCBXXo0OGW622M0YQJExQeHi5vb29VqVJF33//fap2aXW32Ldvn3r16qWSJUsqd+7cKly4sFq1aqVffvklzddg7ty5euGFFxQSEqI8efKoVatWOn78uM6fP6++ffuqYMGCKliwoHr16qULFy6kqnPKlCmqVKmSfHx8lD9/fj322GM6cOCAU7vIyEiVK1dOmzZt0qOPPqrcuXOrePHiGj9+vJKTk616/vGPf0iSevXqZW3HG/+le7tj16+//iqHw6EvvvjCmrZlyxY5HA499NBDTjW1bt1aVatWveV2kK6/T0qXLi0vLy+VKVNGM2fOvO1jJOnkyZPq16+fypYtqzx58igwMFD169dP17UD9i50N97S+g/Uhx9+qFKlSsnLy0tly5bVvHnznObf7F/h6fksSHmPbd26VY899pjy589v/efiZv+uTu+/09N7fE+r/s2bN6tz584qWrSofHx8VLRoUXXp0kV//PGH02NTXsvly5crKipKBQoUkJ+fn7p3766LFy8qNjZWHTt2VL58+RQSEqJhw4YpMTHRaRlnzpxRv379VLhwYXl6eqp48eJ66aWXlJCQ4NQupfvLrFmzVKZMGeXOnVsVK1bUd999l2qd0ntM/eKLL1SjRg35+/tb75vevXvf8nV9/PHHU+3vrVq1SvXe2Lp1qxwOh7799ltJd7bP3mwfS+/7J72ZQJLmzp2rWrVqKU+ePMqTJ48qVaqkqVOnSpJeffVVubu768iRI6ke17t3bxUoUEBXrlyRdPPua2+99ZYmTpyoYsWKKU+ePKpVq5bWr1+fanmbN29W69atFRAQIG9vb1WuXFmff/55qnZr1qxRrVq15O3trcKFC2vkyJH65JNPUn3+Jicna8KECVbOCAwMVPfu3a3Pu3QzcDJt2jQjyWzatMlp+nvvvWckma+++sqaNmfOHCPJNG7c2CxcuNDMnz/fVK1a1Xh6eprVq1db7datW+d0W758uSlcuLAJDg428fHxxhhjevToYXx9fa3HzJ8/33h5eZlnnnnGXLt2zZpet25dExERYSpVqmR+//13Y4wxPXv2NFOnTjUxMTEmJibGvPrqq8bHx8eMGTPGaR169OhhHA6Hee6558zSpUvNxIkTTeHChY2fn5/p0aPHLV+XxMREU69ePePu7m6GDRtmlixZYhYtWmRGjBhhPvvsM2OMMUeOHEm1rs8995yRZCZMmGCMMSYpKck0bdrU+Pr6mjFjxpiYmBjzySefmMKFC5uyZcuaS5cuGWOMOXjwoPH29jaNGjUyCxcuNCtXrjRz5swx3bp1M2fPnjVXrlwx0dHRRpLp06eP9Xz79u0zxhizcuVK4+HhYapWrWrmz59vFi5caBo3bmwcDoeZN2+etV5PPfWUyZ07t5k4caJZsWKF+e6778z48ePN5MmTrTbR0dHGw8PDVKhQwUyfPt0sX77cfPrpp6Zz586p9puDBw/e8nVctmyZcXNzM4888ohZsGCB+eKLL8w//vEPU6RIEWN/O4aHhzttl7i4ONOzZ08za9Yss3z5chMdHW2GDRtmcuXKZWbMmHHL571w4YIpUKCAqVatmvn888/NqlWrzPz5883TTz9tfvvtt3RvP2OMkWRGjRpl3V+xYoWRZFasWGGMMebXX381//73v40kM23aNKftMnbsWCPJdOnSxSxevNjMnDnTFC9e3Pj7+1v78/nz542Hh4cZO3as9RxPP/208fHxMb6+vubq1avGGGOOHz9uHA6HmTJlyi3XfdSoUdZ+8v3335uPPvrIev/VrVvXanfw4EGr5hSrVq0yQ4cONV9++aVZtWqV+frrr03btm2Nj4+P2b17d6rXIDw83PTs2dNER0ebDz74wOTJk8fUq1fPNGrUyAwbNswsXbrUvPHGG8bNzc0MGDDAqc6oqCjj4eFhhg4daqKjo83cuXPNgw8+aIKCgkxsbKzVrm7duqZAgQKmZMmS5oMPPjAxMTGmX79+RpK1H8THx1v75L///W9rex45csQYk/5jV0hIiOnbt691f/z48cbHx8dIMn/++acx5vqxwc/Pzzz//PO33A4p9bRp08Z8++23Zvbs2aZEiRImLCzMhIeHO7W172O7d+82zzzzjJk3b55ZuXKl+e6770yfPn1Mrly5rP3uZk6cOJFqv544caKRZPr16+f0nGFhYaZs2bLms88+M4sWLTJNmzY1kswXX3xhtbPv7xl
2023-04-03 21:27:41 +02:00
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"diamonds_dev['cut'].value_counts().plot(kind='bar')\n",
"plt.title('Rozkład częstości dla szlifów diamentów dla zbioru diamonds walidacyjnego')\n",
"plt.xlabel('Szlif')\n",
"plt.ylabel('Liczba wystąpień')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
2023-04-03 21:27:41 +02:00
"id": "1bf608c2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cut</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>fair</th>\n",
" <td>0.516404</td>\n",
" </tr>\n",
" <tr>\n",
" <th>good</th>\n",
" <td>0.454054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ideal</th>\n",
" <td>0.432876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>premium</th>\n",
" <td>0.515262</td>\n",
" </tr>\n",
" <tr>\n",
" <th>very good</th>\n",
" <td>0.459435</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat\n",
"cut \n",
"fair 0.516404\n",
"good 0.454054\n",
"ideal 0.432876\n",
"premium 0.515262\n",
"very good 0.459435"
]
},
"execution_count": 20,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diamonds[[\"cut\",\"carat\"]].groupby(\"cut\").std()"
]
},
{
"cell_type": "code",
"execution_count": 21,
2023-04-03 21:27:41 +02:00
"id": "0d6e54d9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='cut'>"
]
},
"execution_count": 21,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAHoCAYAAACb7e9bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAxc0lEQVR4nO3de1hVdaLG8XcDAmKoiYA3VBQ1E7XCLFHmjFaYOupYTTReIEWKmOQkakqOpqZZTqF1Sk3F0BlrsKY6NXFMRtO8ViKoKd4veAERO4qXCRT2+cPHfdqBJurmB+zv53n287h/e629380WeFnrt9ayWK1WqwAAAAxxMR0AAAA4N8oIAAAwijICAACMoowAAACjKCMAAMAoyggAADCKMgIAAIxyMx3gRpSWlurEiRPy9vaWxWIxHQcAANwAq9Wqc+fOqUmTJnJxufb2j2pRRk6cOKGAgADTMQAAwE04evSomjVrds3Hq0UZ8fb2lnTlzdStW9dwGgAAcCMKCwsVEBBg+z1+LdWijFzdNVO3bl3KCAAA1cyvTbFgAisAADCKMgIAAIyijAAAAKOqxZwRAABup5KSEl26dMl0jGqvVq1acnV1veXnoYwAAJyG1WpVXl6ezpw5YzpKjVG/fn01atTols4DRhkBADiNq0XEz89PXl5enEjzFlitVl28eFH5+fmSpMaNG9/0c1FGAABOoaSkxFZEfHx8TMepEWrXri1Jys/Pl5+f303vsmECKwDAKVydI+Ll5WU4Sc1y9et5K3NwKCMAAKfCrpnb63Z8PSkjAADAKMoIAAAwigmsAACn1nLCl5X6eodf61epr1cdsGUEAACUcfjwYVksFmVlZTn8tSgjAAA4kap45lnKCAAAVVxpaalef/11BQUFycPDQ82bN9eMGTMkSePHj1fbtm3l5eWlVq1aadKkSXaFY8qUKbrnnnu0ePFitWrVSh4eHrJarVqxYoV69Oih+vXry8fHR7/73e904MAB23qBgYGSpHvvvVcWi0W//e1vHfb+mDPyM5W939AR2BcJADVPYmKiFi5cqNmzZ6tHjx7Kzc3V7t27JUne3t5KSUlRkyZNtGPHDsXExMjb21svvviibf39+/dr+fLl+sc//mE7MdmFCxeUkJCgjh076sKFC5o8ebIGDRqkrKwsubi46LvvvlPXrl31r3/9Sx06dJC7u7vD3h9lBACAKuzcuXN666239M477ygqKkqS1Lp1a/Xo0UOS9Oc//9m2bMuWLTVmzBilpqbalZHi4mL99a9/la+vr23s8ccft3ud5ORk+fn5adeuXQoODrYt6+Pjo0aNGjns/UnspgEAoErLzs5WUVGRHnrooXIf//jjj9WjRw81atRId9xxhyZNmqScnBy7ZVq0aGFXRCTpwIEDGjx4sFq1aqW6devadsv8ct3KQBkBAKAKu3r9l/Js3rxZTz31lPr06aN//vOfyszM1MSJE1VcXGy3XJ06dcqs279/f50+fVoLFy7Ut99+q2+//VaSyqxbGdhNAwBAFdamTRvVrl1bq1at0siRI+0e27Bhg1q0aKGJEyfaxo4cOfKrz3n69GllZ2frvffeU1hYmCRp/fr1dstcnSNSUlJyq2/hV1FGAACowjw9PTV+/Hi9+OKLcnd3V/fu3XXq1Cnt3LlTQUFBysnJ0d///nfdf//9+vLLL/Xpp5/+6nPeeeed8vHx0YIFC9S4cWPl5ORowoQJdsv4+fmpdu3aWrFihZo1ayZPT0/Vq1fPIe+RMgIAcGrV4SjESZMmyc3NTZMnT9aJEyfUuHFjxcbGKjo6WqNHj9bzzz+voqIi9evXT5MmTdKUKVOu+3wuLi76+9//rvj4eAUHB6tdu3Z6++237Q7fdXNz09tvv61p06Zp8uTJCgsL05o1axzy/ixWq9XqkGe+jQoLC1WvXj2dPXtWdevWddjrcGgvANRcP/30kw4dOqTAwEB5enqajlNjXO/reqO/v5nACgAAjKKMAAAAoygjAADAKMoIAAAwqsJl5JtvvlH//v3VpEkTWSwWffbZZ7+6ztq1axUSEiJPT0+1atVK8+fPv5msAADcstLSUtMRapTb8fWs8KG9Fy5cUOfOnTV8+PAy57Uvz6FDh9S3b1/FxMTob3/7mzZs2KC4uDj5+vre0PoAANwO7u7ucnFx0YkTJ+Tr6yt3d3dZLBbTsaotq9Wq4uJinTp1Si4uLrd0Ib0Kl5E+ffqoT58+N7z8/Pnz1bx5c82ZM0eS1L59e23ZskVvvPHGNctIUVGRioqKbPcLCwsrGhMAADsuLi4KDAxUbm6uTpw4YTpOjeHl5aXmzZvLxeXmZ344/KRnmzZtUnh4uN1Y7969lZycrEuXLqlWrVpl1pk5c6amTp3q6GgAACfj7u6u5s2b6/Lly5VymvOaztXVVW5ubre8hcnhZSQvL0/+/v52Y/7+/rp8+bIKCgrUuHHjMuskJiYqISHBdr+wsFABAQGOjgoAcAIWi0W1atUq949hmFEpp4P/ZWO6etLXazUpDw8PeXh4ODwXAAAwz+GH9jZq1Eh5eXl2Y/n5+XJzc5OPj4+jXx4AAFRxDi8j3bp1U3p6ut3YypUr1aVLFzaRAQCAipeR8+fPKysrS1lZWZKuHLqblZWlnJwcSVfme0RGRtqWj42N1ZEjR5SQkKDs7GwtXrxYycnJGjt27O15BwAAoFqr8JyRLVu2qGfPnrb7VyeaRkVFKSUlRbm5ubZiIkmBgYFKS0vT6NGj9e6776pJkyZ6++23OccIAACQJFmsV2eTVmE3egniW9VywpcOe+7Kcvi1fqYjAAAg6cZ/f3NtGgAAYBRlBAAAGEUZAQAARlFGAACAUZQRAABgFGUEAAAYRRkBAABGUUYAAIBRlBEAAGAUZQQAABhFGQEAAEZRRgAAgFGUEQAAYBRlBAAAGEUZAQAARlFGAACAUZQRAABgFGUEAAAYRRkBAABGUUYAAIBRlBEAAGAUZQQAABhFGQEAAEZRRgAAgFGUEQAAYBRlBAAAGEUZAQAARlFGAACAUZQRAABglJvpAACAG9NywpemI9wWh1/rZzoCqhi2jAAAAKMoIwAAwCjKCAAAMIoyAgAAjKKMAAAAoygjAADAKMoIAAAwijICAACMoowAAACjKCMAAMAoyggAADCKMgIAAIyijAAAAKO4ai+qpJpwdVKuTAoAN4YtIwAAwCjKCAAAMIoyAgAAjKKMAAAAoygjAADAKMoIAAAwijICAACMoowAAACjKCMAAMAoyggAADCKMgIAAIyijAAAAKNuqozMnTtXgYGB8vT0VEhIiNatW3fd5ZctW6bOnTvLy8tLjRs31vDhw3X69OmbCgwAAGqWCpeR1NRUvfDCC5o4caIyMzMVFhamPn36KCcnp9zl169fr8jISEVHR2vnzp366KOP9P3332vkyJG3HB4AAFR/bhVdISkpSdHR0bYyMWfOHH311VeaN2+eZs6cWWb5zZs3q2XLloqPj5ckBQYG6tlnn9WsWbNuMToAAGa0nPCl6Qi3xeHX+pmOIKmCW0aKi4uVkZGh8PBwu/Hw8HBt3Lix3HVCQ0N17NgxpaWlyWq16uTJk/r444/Vr9+1vwBFRUUqLCy0uwEAgJqpQmWkoKBAJSUl8vf3txv39/dXXl5eueuEhoZq2bJlioiIkLu7uxo1aqT69evrv/7rv675OjNnzlS9evVst4CAgIrEBAAA1chNTWC1WCx2961Wa5mxq3bt2qX4+HhNnjxZGRkZWrFihQ4dOqTY2NhrPn9iYqLOnj1rux09evRmYgIAgGqgQnNGGjZsKFdX1zJbQfLz88tsLblq5syZ6t69u8aNGydJ6tSpk+rUqaOwsDBNnz5djRs3LrOOh4eHPDw8KhINAABUUxXaMuLu7q6QkBClp6fbjaenpys0NLTcdS5evCgXF/uXcXV1lXRliwoAAHBuFd5Nk5CQoEWLFmnx4sXKzs7W6NGjlZOTY9vtkpiYqMjISNvy/fv31yeffKJ58+bp4MGD2rBhg+Lj49W1a1c1adLk9r0TAABQLVX40N6IiAidPn1a06ZNU25uroKDg5WWlqY
2023-04-03 21:27:41 +02:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"diamonds[[\"cut\",\"carat\"]].groupby(\"cut\").mean().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
2023-04-03 21:27:41 +02:00
"id": "4598d9cf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
2023-04-03 21:27:41 +02:00
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.006237</td>\n",
" <td>ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>0.513889</td>\n",
" <td>0.230769</td>\n",
" <td>0.000000</td>\n",
" <td>0.367784</td>\n",
" <td>0.067572</td>\n",
" <td>0.076415</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.002079</td>\n",
" <td>premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>0.466667</td>\n",
" <td>0.346154</td>\n",
" <td>0.000000</td>\n",
" <td>0.362197</td>\n",
" <td>0.065195</td>\n",
" <td>0.072642</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.006237</td>\n",
" <td>good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>0.386111</td>\n",
" <td>0.423077</td>\n",
" <td>0.000054</td>\n",
" <td>0.377095</td>\n",
" <td>0.069100</td>\n",
" <td>0.072642</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.018711</td>\n",
" <td>premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>0.538889</td>\n",
" <td>0.288462</td>\n",
" <td>0.000433</td>\n",
" <td>0.391061</td>\n",
" <td>0.071817</td>\n",
" <td>0.082704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.022869</td>\n",
" <td>good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>0.563889</td>\n",
" <td>0.288462</td>\n",
" <td>0.000487</td>\n",
" <td>0.404097</td>\n",
" <td>0.073854</td>\n",
" <td>0.086478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>53936</td>\n",
" <td>0.108108</td>\n",
" <td>ideal</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.494444</td>\n",
" <td>0.269231</td>\n",
" <td>0.131427</td>\n",
" <td>0.535382</td>\n",
" <td>0.097793</td>\n",
" <td>0.110063</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53936</th>\n",
" <td>53937</td>\n",
" <td>0.108108</td>\n",
" <td>good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.558333</td>\n",
" <td>0.230769</td>\n",
" <td>0.131427</td>\n",
" <td>0.529795</td>\n",
" <td>0.097623</td>\n",
" <td>0.113522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53937</th>\n",
" <td>53938</td>\n",
" <td>0.103950</td>\n",
" <td>very good</td>\n",
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.550000</td>\n",
" <td>0.326923</td>\n",
" <td>0.131427</td>\n",
" <td>0.527002</td>\n",
" <td>0.096435</td>\n",
" <td>0.111950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.137214</td>\n",
" <td>premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>0.500000</td>\n",
" <td>0.288462</td>\n",
" <td>0.131427</td>\n",
" <td>0.572626</td>\n",
" <td>0.103905</td>\n",
" <td>0.117610</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.114345</td>\n",
" <td>ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>0.533333</td>\n",
" <td>0.230769</td>\n",
" <td>0.131427</td>\n",
" <td>0.542831</td>\n",
" <td>0.099660</td>\n",
" <td>0.114465</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53940 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price \\\n",
"0 1 0.006237 ideal E SI2 0.513889 0.230769 0.000000 \n",
"1 2 0.002079 premium E SI1 0.466667 0.346154 0.000000 \n",
"2 3 0.006237 good E VS1 0.386111 0.423077 0.000054 \n",
"3 4 0.018711 premium I VS2 0.538889 0.288462 0.000433 \n",
"4 5 0.022869 good J SI2 0.563889 0.288462 0.000487 \n",
"... ... ... ... ... ... ... ... ... \n",
"53935 53936 0.108108 ideal D SI1 0.494444 0.269231 0.131427 \n",
"53936 53937 0.108108 good D SI1 0.558333 0.230769 0.131427 \n",
"53937 53938 0.103950 very good D SI1 0.550000 0.326923 0.131427 \n",
"53938 53939 0.137214 premium H SI2 0.500000 0.288462 0.131427 \n",
"53939 53940 0.114345 ideal D SI2 0.533333 0.230769 0.131427 \n",
2023-04-03 21:27:41 +02:00
"\n",
" x y z \n",
"0 0.367784 0.067572 0.076415 \n",
"1 0.362197 0.065195 0.072642 \n",
"2 0.377095 0.069100 0.072642 \n",
"3 0.391061 0.071817 0.082704 \n",
"4 0.404097 0.073854 0.086478 \n",
"... ... ... ... \n",
"53935 0.535382 0.097793 0.110063 \n",
"53936 0.529795 0.097623 0.113522 \n",
"53937 0.527002 0.096435 0.111950 \n",
"53938 0.572626 0.103905 0.117610 \n",
"53939 0.542831 0.099660 0.114465 \n",
2023-04-03 21:27:41 +02:00
"\n",
"[53940 rows x 11 columns]"
]
},
"execution_count": 22,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#normalizacja wartości typu float do zakrsu 0.0 - 1.0\n",
"#Powyżej wykonano jeszcze konwersję danych typu string na lowerCase\n",
"\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"scaler = MinMaxScaler()\n",
"diamonds[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']] = scaler.fit_transform(diamonds[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']])\n",
"\n",
"#wyświetlenie zbioru\n",
"diamonds"
]
},
{
"cell_type": "code",
"execution_count": 23,
2023-04-03 21:27:41 +02:00
"id": "97350bed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
2023-04-03 21:27:41 +02:00
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.006237</td>\n",
" <td>ideal</td>\n",
2023-04-03 21:27:41 +02:00
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>0.513889</td>\n",
" <td>0.230769</td>\n",
" <td>0.000000</td>\n",
" <td>0.367784</td>\n",
" <td>0.067572</td>\n",
" <td>0.076415</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.002079</td>\n",
" <td>premium</td>\n",
2023-04-03 21:27:41 +02:00
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>0.466667</td>\n",
" <td>0.346154</td>\n",
" <td>0.000000</td>\n",
" <td>0.362197</td>\n",
" <td>0.065195</td>\n",
" <td>0.072642</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.006237</td>\n",
" <td>good</td>\n",
2023-04-03 21:27:41 +02:00
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>0.386111</td>\n",
" <td>0.423077</td>\n",
" <td>0.000054</td>\n",
" <td>0.377095</td>\n",
" <td>0.069100</td>\n",
" <td>0.072642</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.018711</td>\n",
" <td>premium</td>\n",
2023-04-03 21:27:41 +02:00
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>0.538889</td>\n",
" <td>0.288462</td>\n",
" <td>0.000433</td>\n",
" <td>0.391061</td>\n",
" <td>0.071817</td>\n",
" <td>0.082704</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.022869</td>\n",
" <td>good</td>\n",
2023-04-03 21:27:41 +02:00
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>0.563889</td>\n",
" <td>0.288462</td>\n",
" <td>0.000487</td>\n",
" <td>0.404097</td>\n",
" <td>0.073854</td>\n",
" <td>0.086478</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>53936</td>\n",
" <td>0.108108</td>\n",
" <td>ideal</td>\n",
2023-04-03 21:27:41 +02:00
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.494444</td>\n",
" <td>0.269231</td>\n",
" <td>0.131427</td>\n",
" <td>0.535382</td>\n",
" <td>0.097793</td>\n",
" <td>0.110063</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>53936</th>\n",
" <td>53937</td>\n",
" <td>0.108108</td>\n",
" <td>good</td>\n",
2023-04-03 21:27:41 +02:00
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.558333</td>\n",
" <td>0.230769</td>\n",
" <td>0.131427</td>\n",
" <td>0.529795</td>\n",
" <td>0.097623</td>\n",
" <td>0.113522</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>53937</th>\n",
" <td>53938</td>\n",
" <td>0.103950</td>\n",
" <td>very good</td>\n",
2023-04-03 21:27:41 +02:00
" <td>D</td>\n",
" <td>SI1</td>\n",
" <td>0.550000</td>\n",
" <td>0.326923</td>\n",
" <td>0.131427</td>\n",
" <td>0.527002</td>\n",
" <td>0.096435</td>\n",
" <td>0.111950</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.137214</td>\n",
" <td>premium</td>\n",
2023-04-03 21:27:41 +02:00
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>0.500000</td>\n",
" <td>0.288462</td>\n",
" <td>0.131427</td>\n",
" <td>0.572626</td>\n",
" <td>0.103905</td>\n",
" <td>0.117610</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.114345</td>\n",
" <td>ideal</td>\n",
2023-04-03 21:27:41 +02:00
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>0.533333</td>\n",
" <td>0.230769</td>\n",
" <td>0.131427</td>\n",
" <td>0.542831</td>\n",
" <td>0.099660</td>\n",
" <td>0.114465</td>\n",
2023-04-03 21:27:41 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53940 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price \\\n",
"0 1 0.006237 ideal E SI2 0.513889 0.230769 0.000000 \n",
"1 2 0.002079 premium E SI1 0.466667 0.346154 0.000000 \n",
"2 3 0.006237 good E VS1 0.386111 0.423077 0.000054 \n",
"3 4 0.018711 premium I VS2 0.538889 0.288462 0.000433 \n",
"4 5 0.022869 good J SI2 0.563889 0.288462 0.000487 \n",
"... ... ... ... ... ... ... ... ... \n",
"53935 53936 0.108108 ideal D SI1 0.494444 0.269231 0.131427 \n",
"53936 53937 0.108108 good D SI1 0.558333 0.230769 0.131427 \n",
"53937 53938 0.103950 very good D SI1 0.550000 0.326923 0.131427 \n",
"53938 53939 0.137214 premium H SI2 0.500000 0.288462 0.131427 \n",
"53939 53940 0.114345 ideal D SI2 0.533333 0.230769 0.131427 \n",
2023-04-03 21:27:41 +02:00
"\n",
" x y z \n",
"0 0.367784 0.067572 0.076415 \n",
"1 0.362197 0.065195 0.072642 \n",
"2 0.377095 0.069100 0.072642 \n",
"3 0.391061 0.071817 0.082704 \n",
"4 0.404097 0.073854 0.086478 \n",
"... ... ... ... \n",
"53935 0.535382 0.097793 0.110063 \n",
"53936 0.529795 0.097623 0.113522 \n",
"53937 0.527002 0.096435 0.111950 \n",
"53938 0.572626 0.103905 0.117610 \n",
"53939 0.542831 0.099660 0.114465 \n",
2023-04-03 21:27:41 +02:00
"\n",
"[53940 rows x 11 columns]"
]
},
"execution_count": 23,
2023-04-03 21:27:41 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Usuwanie artefaktów\n",
"diamonds = diamonds.dropna() ## usuwanie pustych wierszy, które posiadają przynajmniej jedno wystąpienie NULL or NaN\n",
"diamonds"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4e91548a",
"metadata": {},
"source": [
"#### Zapisanie zbiorów danych do pliku csv (na potrzeby zadania z LAB 5 - BIBLIOTEKI DL)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "37f739a6",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"directory ='dane'\n",
"if not os.path.exists(directory):\n",
" os.makedirs('dane')\n",
"\n",
2023-06-04 15:39:17 +02:00
"diamonds.to_csv('dane/diamonds.csv', index=False)\n",
"diamonds_train.to_csv('dane/diamonds_train.csv', index=False)\n",
"diamonds_test.to_csv('dane/diamonds_test.csv', index=False)\n",
"diamonds_dev.to_csv('dane/diamonds_dev.csv', index=False)"
]
2023-04-03 21:27:41 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
2023-04-03 21:27:41 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}