{ "cells": [ { "cell_type": "code", "execution_count": 5, "id": "d80a4450", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (1.5.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (2023.2)\n", "Requirement already satisfied: numpy>=1.21.0 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from pandas) (1.24.2)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install --user pandas" ] }, { "cell_type": "code", "execution_count": 6, "id": "350abc87", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (1.5.13)\n", "Requirement already satisfied: six>=1.10 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: certifi in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2022.12.7)\n", "Requirement already satisfied: python-dateutil in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: requests in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (2.28.2)\n", "Requirement already satisfied: tqdm in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (4.65.0)\n", "Requirement already satisfied: python-slugify in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (8.0.1)\n", "Requirement already satisfied: urllib3 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from kaggle) (1.26.15)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from requests->kaggle) (3.1.0)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from requests->kaggle) (3.4)\n", "Requirement already satisfied: colorama in c:\\users\\admin\\appdata\\roaming\\python\\python311\\site-packages (from tqdm->kaggle) (0.4.6)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install --user kaggle" ] }, { "cell_type": "code", "execution_count": 7, "id": "0063a986", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).\n" ] } ], "source": [ "%python -m kaggle datasets download -d ulrikthygepedersen/diamonds" ] }, { "cell_type": "code", "execution_count": 8, "id": "5bc46bfd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading diamonds.zip to c:\\Users\\admin\\ium_z487175\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/733k [00:00, ?B/s]\n", "100%|██████████| 733k/733k [00:00<00:00, 1.35MB/s]\n", "100%|██████████| 733k/733k [00:00<00:00, 1.33MB/s]\n" ] } ], "source": [ "!kaggle datasets download -d shivam2503/diamonds" ] }, { "cell_type": "code", "execution_count": 10, "id": "75024e0f", "metadata": {}, "outputs": [], "source": [ "!tar -xf diamonds.zip\n", "## rozpakowanie archiwum .zip w windowsie" ] }, { "cell_type": "code", "execution_count": 88, "id": "99c20a95", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "carat | \n", "cut | \n", "color | \n", "clarity | \n", "depth | \n", "table | \n", "price | \n", "x | \n", "y | \n", "z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0.23 | \n", "Ideal | \n", "E | \n", "SI2 | \n", "61.5 | \n", "55.0 | \n", "326 | \n", "3.95 | \n", "3.98 | \n", "2.43 | \n", "
1 | \n", "2 | \n", "0.21 | \n", "Premium | \n", "E | \n", "SI1 | \n", "59.8 | \n", "61.0 | \n", "326 | \n", "3.89 | \n", "3.84 | \n", "2.31 | \n", "
2 | \n", "3 | \n", "0.23 | \n", "Good | \n", "E | \n", "VS1 | \n", "56.9 | \n", "65.0 | \n", "327 | \n", "4.05 | \n", "4.07 | \n", "2.31 | \n", "
3 | \n", "4 | \n", "0.29 | \n", "Premium | \n", "I | \n", "VS2 | \n", "62.4 | \n", "58.0 | \n", "334 | \n", "4.20 | \n", "4.23 | \n", "2.63 | \n", "
4 | \n", "5 | \n", "0.31 | \n", "Good | \n", "J | \n", "SI2 | \n", "63.3 | \n", "58.0 | \n", "335 | \n", "4.34 | \n", "4.35 | \n", "2.75 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
53935 | \n", "53936 | \n", "0.72 | \n", "Ideal | \n", "D | \n", "SI1 | \n", "60.8 | \n", "57.0 | \n", "2757 | \n", "5.75 | \n", "5.76 | \n", "3.50 | \n", "
53936 | \n", "53937 | \n", "0.72 | \n", "Good | \n", "D | \n", "SI1 | \n", "63.1 | \n", "55.0 | \n", "2757 | \n", "5.69 | \n", "5.75 | \n", "3.61 | \n", "
53937 | \n", "53938 | \n", "0.70 | \n", "Very Good | \n", "D | \n", "SI1 | \n", "62.8 | \n", "60.0 | \n", "2757 | \n", "5.66 | \n", "5.68 | \n", "3.56 | \n", "
53938 | \n", "53939 | \n", "0.86 | \n", "Premium | \n", "H | \n", "SI2 | \n", "61.0 | \n", "58.0 | \n", "2757 | \n", "6.15 | \n", "6.12 | \n", "3.74 | \n", "
53939 | \n", "53940 | \n", "0.75 | \n", "Ideal | \n", "D | \n", "SI2 | \n", "62.2 | \n", "55.0 | \n", "2757 | \n", "5.83 | \n", "5.87 | \n", "3.64 | \n", "
53940 rows × 11 columns
\n", "\n", " | id | \n", "carat | \n", "cut | \n", "color | \n", "clarity | \n", "depth | \n", "table | \n", "price | \n", "x | \n", "y | \n", "z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0.23 | \n", "Ideal | \n", "E | \n", "SI2 | \n", "61.5 | \n", "55.0 | \n", "326 | \n", "3.95 | \n", "3.98 | \n", "2.43 | \n", "
1 | \n", "2 | \n", "0.21 | \n", "Premium | \n", "E | \n", "SI1 | \n", "59.8 | \n", "61.0 | \n", "326 | \n", "3.89 | \n", "3.84 | \n", "2.31 | \n", "
2 | \n", "3 | \n", "0.23 | \n", "Good | \n", "E | \n", "VS1 | \n", "56.9 | \n", "65.0 | \n", "327 | \n", "4.05 | \n", "4.07 | \n", "2.31 | \n", "
3 | \n", "4 | \n", "0.29 | \n", "Premium | \n", "I | \n", "VS2 | \n", "62.4 | \n", "58.0 | \n", "334 | \n", "4.20 | \n", "4.23 | \n", "2.63 | \n", "
4 | \n", "5 | \n", "0.31 | \n", "Good | \n", "J | \n", "SI2 | \n", "63.3 | \n", "58.0 | \n", "335 | \n", "4.34 | \n", "4.35 | \n", "2.75 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
53935 | \n", "53936 | \n", "0.72 | \n", "Ideal | \n", "D | \n", "SI1 | \n", "60.8 | \n", "57.0 | \n", "2757 | \n", "5.75 | \n", "5.76 | \n", "3.50 | \n", "
53936 | \n", "53937 | \n", "0.72 | \n", "Good | \n", "D | \n", "SI1 | \n", "63.1 | \n", "55.0 | \n", "2757 | \n", "5.69 | \n", "5.75 | \n", "3.61 | \n", "
53937 | \n", "53938 | \n", "0.70 | \n", "Very Good | \n", "D | \n", "SI1 | \n", "62.8 | \n", "60.0 | \n", "2757 | \n", "5.66 | \n", "5.68 | \n", "3.56 | \n", "
53938 | \n", "53939 | \n", "0.86 | \n", "Premium | \n", "H | \n", "SI2 | \n", "61.0 | \n", "58.0 | \n", "2757 | \n", "6.15 | \n", "6.12 | \n", "3.74 | \n", "
53939 | \n", "53940 | \n", "0.75 | \n", "Ideal | \n", "D | \n", "SI2 | \n", "62.2 | \n", "55.0 | \n", "2757 | \n", "5.83 | \n", "5.87 | \n", "3.64 | \n", "
53940 rows × 11 columns
\n", "\n", " | Unnamed: 0 | \n", "carat | \n", "cut | \n", "color | \n", "clarity | \n", "depth | \n", "table | \n", "price | \n", "x | \n", "y | \n", "z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0.23 | \n", "ideal | \n", "E | \n", "SI2 | \n", "61.5 | \n", "55.0 | \n", "326 | \n", "3.95 | \n", "3.98 | \n", "2.43 | \n", "
1 | \n", "2 | \n", "0.21 | \n", "premium | \n", "E | \n", "SI1 | \n", "59.8 | \n", "61.0 | \n", "326 | \n", "3.89 | \n", "3.84 | \n", "2.31 | \n", "
2 | \n", "3 | \n", "0.23 | \n", "good | \n", "E | \n", "VS1 | \n", "56.9 | \n", "65.0 | \n", "327 | \n", "4.05 | \n", "4.07 | \n", "2.31 | \n", "
3 | \n", "4 | \n", "0.29 | \n", "premium | \n", "I | \n", "VS2 | \n", "62.4 | \n", "58.0 | \n", "334 | \n", "4.20 | \n", "4.23 | \n", "2.63 | \n", "
4 | \n", "5 | \n", "0.31 | \n", "good | \n", "J | \n", "SI2 | \n", "63.3 | \n", "58.0 | \n", "335 | \n", "4.34 | \n", "4.35 | \n", "2.75 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
53935 | \n", "53936 | \n", "0.72 | \n", "ideal | \n", "D | \n", "SI1 | \n", "60.8 | \n", "57.0 | \n", "2757 | \n", "5.75 | \n", "5.76 | \n", "3.50 | \n", "
53936 | \n", "53937 | \n", "0.72 | \n", "good | \n", "D | \n", "SI1 | \n", "63.1 | \n", "55.0 | \n", "2757 | \n", "5.69 | \n", "5.75 | \n", "3.61 | \n", "
53937 | \n", "53938 | \n", "0.70 | \n", "very good | \n", "D | \n", "SI1 | \n", "62.8 | \n", "60.0 | \n", "2757 | \n", "5.66 | \n", "5.68 | \n", "3.56 | \n", "
53938 | \n", "53939 | \n", "0.86 | \n", "premium | \n", "H | \n", "SI2 | \n", "61.0 | \n", "58.0 | \n", "2757 | \n", "6.15 | \n", "6.12 | \n", "3.74 | \n", "
53939 | \n", "53940 | \n", "0.75 | \n", "ideal | \n", "D | \n", "SI2 | \n", "62.2 | \n", "55.0 | \n", "2757 | \n", "5.83 | \n", "5.87 | \n", "3.64 | \n", "
53940 rows × 11 columns
\n", "\n", " | carat | \n", "
---|---|
cut | \n", "\n", " |
fair | \n", "0.516404 | \n", "
good | \n", "0.454054 | \n", "
ideal | \n", "0.432876 | \n", "
premium | \n", "0.515262 | \n", "
very good | \n", "0.459435 | \n", "
\n", " | Unnamed: 0 | \n", "carat | \n", "cut | \n", "color | \n", "clarity | \n", "depth | \n", "table | \n", "price | \n", "x | \n", "y | \n", "z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0.006237 | \n", "ideal | \n", "E | \n", "SI2 | \n", "0.513889 | \n", "0.230769 | \n", "0.000000 | \n", "0.367784 | \n", "0.067572 | \n", "0.076415 | \n", "
1 | \n", "2 | \n", "0.002079 | \n", "premium | \n", "E | \n", "SI1 | \n", "0.466667 | \n", "0.346154 | \n", "0.000000 | \n", "0.362197 | \n", "0.065195 | \n", "0.072642 | \n", "
2 | \n", "3 | \n", "0.006237 | \n", "good | \n", "E | \n", "VS1 | \n", "0.386111 | \n", "0.423077 | \n", "0.000054 | \n", "0.377095 | \n", "0.069100 | \n", "0.072642 | \n", "
3 | \n", "4 | \n", "0.018711 | \n", "premium | \n", "I | \n", "VS2 | \n", "0.538889 | \n", "0.288462 | \n", "0.000433 | \n", "0.391061 | \n", "0.071817 | \n", "0.082704 | \n", "
4 | \n", "5 | \n", "0.022869 | \n", "good | \n", "J | \n", "SI2 | \n", "0.563889 | \n", "0.288462 | \n", "0.000487 | \n", "0.404097 | \n", "0.073854 | \n", "0.086478 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
53935 | \n", "53936 | \n", "0.108108 | \n", "ideal | \n", "D | \n", "SI1 | \n", "0.494444 | \n", "0.269231 | \n", "0.131427 | \n", "0.535382 | \n", "0.097793 | \n", "0.110063 | \n", "
53936 | \n", "53937 | \n", "0.108108 | \n", "good | \n", "D | \n", "SI1 | \n", "0.558333 | \n", "0.230769 | \n", "0.131427 | \n", "0.529795 | \n", "0.097623 | \n", "0.113522 | \n", "
53937 | \n", "53938 | \n", "0.103950 | \n", "very good | \n", "D | \n", "SI1 | \n", "0.550000 | \n", "0.326923 | \n", "0.131427 | \n", "0.527002 | \n", "0.096435 | \n", "0.111950 | \n", "
53938 | \n", "53939 | \n", "0.137214 | \n", "premium | \n", "H | \n", "SI2 | \n", "0.500000 | \n", "0.288462 | \n", "0.131427 | \n", "0.572626 | \n", "0.103905 | \n", "0.117610 | \n", "
53939 | \n", "53940 | \n", "0.114345 | \n", "ideal | \n", "D | \n", "SI2 | \n", "0.533333 | \n", "0.230769 | \n", "0.131427 | \n", "0.542831 | \n", "0.099660 | \n", "0.114465 | \n", "
53940 rows × 11 columns
\n", "\n", " | Unnamed: 0 | \n", "carat | \n", "cut | \n", "color | \n", "clarity | \n", "depth | \n", "table | \n", "price | \n", "x | \n", "y | \n", "z | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0.23 | \n", "Ideal | \n", "E | \n", "SI2 | \n", "61.5 | \n", "55.0 | \n", "326 | \n", "3.95 | \n", "3.98 | \n", "2.43 | \n", "
1 | \n", "2 | \n", "0.21 | \n", "Premium | \n", "E | \n", "SI1 | \n", "59.8 | \n", "61.0 | \n", "326 | \n", "3.89 | \n", "3.84 | \n", "2.31 | \n", "
2 | \n", "3 | \n", "0.23 | \n", "Good | \n", "E | \n", "VS1 | \n", "56.9 | \n", "65.0 | \n", "327 | \n", "4.05 | \n", "4.07 | \n", "2.31 | \n", "
3 | \n", "4 | \n", "0.29 | \n", "Premium | \n", "I | \n", "VS2 | \n", "62.4 | \n", "58.0 | \n", "334 | \n", "4.20 | \n", "4.23 | \n", "2.63 | \n", "
4 | \n", "5 | \n", "0.31 | \n", "Good | \n", "J | \n", "SI2 | \n", "63.3 | \n", "58.0 | \n", "335 | \n", "4.34 | \n", "4.35 | \n", "2.75 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
53935 | \n", "53936 | \n", "0.72 | \n", "Ideal | \n", "D | \n", "SI1 | \n", "60.8 | \n", "57.0 | \n", "2757 | \n", "5.75 | \n", "5.76 | \n", "3.50 | \n", "
53936 | \n", "53937 | \n", "0.72 | \n", "Good | \n", "D | \n", "SI1 | \n", "63.1 | \n", "55.0 | \n", "2757 | \n", "5.69 | \n", "5.75 | \n", "3.61 | \n", "
53937 | \n", "53938 | \n", "0.70 | \n", "Very Good | \n", "D | \n", "SI1 | \n", "62.8 | \n", "60.0 | \n", "2757 | \n", "5.66 | \n", "5.68 | \n", "3.56 | \n", "
53938 | \n", "53939 | \n", "0.86 | \n", "Premium | \n", "H | \n", "SI2 | \n", "61.0 | \n", "58.0 | \n", "2757 | \n", "6.15 | \n", "6.12 | \n", "3.74 | \n", "
53939 | \n", "53940 | \n", "0.75 | \n", "Ideal | \n", "D | \n", "SI2 | \n", "62.2 | \n", "55.0 | \n", "2757 | \n", "5.83 | \n", "5.87 | \n", "3.64 | \n", "
53940 rows × 11 columns
\n", "