ium_426206/zadanie1.ipynb

1505 lines
68 KiB
Plaintext
Raw Normal View History

2021-03-21 22:16:26 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
2021-05-07 20:16:31 +02:00
"id": "strange-teens",
2021-03-21 22:16:26 +01:00
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#!pip install kaggle\n",
"#!pip install pandas\n",
"#!pip install matplotlib\n",
"#!pip install sklearn"
]
},
{
"cell_type": "code",
"execution_count": null,
2021-05-07 20:16:31 +02:00
"id": "another-accessory",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": [
"!kaggle datasets download -d apoorvaappz/global-super-store-dataset"
]
},
{
"cell_type": "code",
"execution_count": 158,
2021-05-07 20:16:31 +02:00
"id": "valid-malta",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: global-super-store-dataset.zip\n",
" inflating: Global_Superstore2.csv \n",
" inflating: Global_Superstore2.xlsx \n"
]
}
],
"source": [
"!unzip global-super-store-dataset.zip"
]
},
{
"cell_type": "code",
"execution_count": 159,
2021-05-07 20:16:31 +02:00
"id": "noble-compilation",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Row ID</th>\n",
" <th>Order ID</th>\n",
" <th>Order Date</th>\n",
" <th>Ship Date</th>\n",
" <th>Ship Mode</th>\n",
" <th>Customer ID</th>\n",
" <th>Customer Name</th>\n",
" <th>Segment</th>\n",
" <th>City</th>\n",
" <th>State</th>\n",
" <th>...</th>\n",
" <th>Product ID</th>\n",
" <th>Category</th>\n",
" <th>Sub-Category</th>\n",
" <th>Product Name</th>\n",
" <th>Sales</th>\n",
" <th>Quantity</th>\n",
" <th>Discount</th>\n",
" <th>Profit</th>\n",
" <th>Shipping Cost</th>\n",
" <th>Order Priority</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>32298</td>\n",
" <td>CA-2012-124891</td>\n",
" <td>31-07-2012</td>\n",
" <td>31-07-2012</td>\n",
" <td>Same Day</td>\n",
" <td>RH-19495</td>\n",
" <td>Rick Hansen</td>\n",
" <td>Consumer</td>\n",
" <td>New York City</td>\n",
" <td>New York</td>\n",
" <td>...</td>\n",
" <td>TEC-AC-10003033</td>\n",
" <td>Technology</td>\n",
" <td>Accessories</td>\n",
" <td>Plantronics CS510 - Over-the-Head monaural Wir...</td>\n",
" <td>2309.650</td>\n",
" <td>7</td>\n",
" <td>0.0</td>\n",
" <td>762.1845</td>\n",
" <td>933.57</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>26341</td>\n",
" <td>IN-2013-77878</td>\n",
" <td>05-02-2013</td>\n",
" <td>07-02-2013</td>\n",
" <td>Second Class</td>\n",
" <td>JR-16210</td>\n",
" <td>Justin Ritter</td>\n",
" <td>Corporate</td>\n",
" <td>Wollongong</td>\n",
" <td>New South Wales</td>\n",
" <td>...</td>\n",
" <td>FUR-CH-10003950</td>\n",
" <td>Furniture</td>\n",
" <td>Chairs</td>\n",
" <td>Novimex Executive Leather Armchair, Black</td>\n",
" <td>3709.395</td>\n",
" <td>9</td>\n",
" <td>0.1</td>\n",
" <td>-288.7650</td>\n",
" <td>923.63</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25330</td>\n",
" <td>IN-2013-71249</td>\n",
" <td>17-10-2013</td>\n",
" <td>18-10-2013</td>\n",
" <td>First Class</td>\n",
" <td>CR-12730</td>\n",
" <td>Craig Reiter</td>\n",
" <td>Consumer</td>\n",
" <td>Brisbane</td>\n",
" <td>Queensland</td>\n",
" <td>...</td>\n",
" <td>TEC-PH-10004664</td>\n",
" <td>Technology</td>\n",
" <td>Phones</td>\n",
" <td>Nokia Smart Phone, with Caller ID</td>\n",
" <td>5175.171</td>\n",
" <td>9</td>\n",
" <td>0.1</td>\n",
" <td>919.9710</td>\n",
" <td>915.49</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13524</td>\n",
" <td>ES-2013-1579342</td>\n",
" <td>28-01-2013</td>\n",
" <td>30-01-2013</td>\n",
" <td>First Class</td>\n",
" <td>KM-16375</td>\n",
" <td>Katherine Murray</td>\n",
" <td>Home Office</td>\n",
" <td>Berlin</td>\n",
" <td>Berlin</td>\n",
" <td>...</td>\n",
" <td>TEC-PH-10004583</td>\n",
" <td>Technology</td>\n",
" <td>Phones</td>\n",
" <td>Motorola Smart Phone, Cordless</td>\n",
" <td>2892.510</td>\n",
" <td>5</td>\n",
" <td>0.1</td>\n",
" <td>-96.5400</td>\n",
" <td>910.16</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>47221</td>\n",
" <td>SG-2013-4320</td>\n",
" <td>05-11-2013</td>\n",
" <td>06-11-2013</td>\n",
" <td>Same Day</td>\n",
" <td>RH-9495</td>\n",
" <td>Rick Hansen</td>\n",
" <td>Consumer</td>\n",
" <td>Dakar</td>\n",
" <td>Dakar</td>\n",
" <td>...</td>\n",
" <td>TEC-SHA-10000501</td>\n",
" <td>Technology</td>\n",
" <td>Copiers</td>\n",
" <td>Sharp Wireless Fax, High-Speed</td>\n",
" <td>2832.960</td>\n",
" <td>8</td>\n",
" <td>0.0</td>\n",
" <td>311.5200</td>\n",
" <td>903.04</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51285</th>\n",
" <td>29002</td>\n",
" <td>IN-2014-62366</td>\n",
" <td>19-06-2014</td>\n",
" <td>19-06-2014</td>\n",
" <td>Same Day</td>\n",
" <td>KE-16420</td>\n",
" <td>Katrina Edelman</td>\n",
" <td>Corporate</td>\n",
" <td>Kure</td>\n",
" <td>Hiroshima</td>\n",
" <td>...</td>\n",
" <td>OFF-FA-10000746</td>\n",
" <td>Office Supplies</td>\n",
" <td>Fasteners</td>\n",
" <td>Advantus Thumb Tacks, 12 Pack</td>\n",
" <td>65.100</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>4.5000</td>\n",
" <td>0.01</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51286</th>\n",
" <td>35398</td>\n",
" <td>US-2014-102288</td>\n",
" <td>20-06-2014</td>\n",
" <td>24-06-2014</td>\n",
" <td>Standard Class</td>\n",
" <td>ZC-21910</td>\n",
" <td>Zuschuss Carroll</td>\n",
" <td>Consumer</td>\n",
" <td>Houston</td>\n",
" <td>Texas</td>\n",
" <td>...</td>\n",
" <td>OFF-AP-10002906</td>\n",
" <td>Office Supplies</td>\n",
" <td>Appliances</td>\n",
" <td>Hoover Replacement Belt for Commercial Guardsm...</td>\n",
" <td>0.444</td>\n",
" <td>1</td>\n",
" <td>0.8</td>\n",
" <td>-1.1100</td>\n",
" <td>0.01</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51287</th>\n",
" <td>40470</td>\n",
" <td>US-2013-155768</td>\n",
" <td>02-12-2013</td>\n",
" <td>02-12-2013</td>\n",
" <td>Same Day</td>\n",
" <td>LB-16795</td>\n",
" <td>Laurel Beltran</td>\n",
" <td>Home Office</td>\n",
" <td>Oxnard</td>\n",
" <td>California</td>\n",
" <td>...</td>\n",
" <td>OFF-EN-10001219</td>\n",
" <td>Office Supplies</td>\n",
" <td>Envelopes</td>\n",
" <td>#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes</td>\n",
" <td>22.920</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>11.2308</td>\n",
" <td>0.01</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51288</th>\n",
" <td>9596</td>\n",
" <td>MX-2012-140767</td>\n",
" <td>18-02-2012</td>\n",
" <td>22-02-2012</td>\n",
" <td>Standard Class</td>\n",
" <td>RB-19795</td>\n",
" <td>Ross Baird</td>\n",
" <td>Home Office</td>\n",
" <td>Valinhos</td>\n",
" <td>Săo Paulo</td>\n",
" <td>...</td>\n",
" <td>OFF-BI-10000806</td>\n",
" <td>Office Supplies</td>\n",
" <td>Binders</td>\n",
" <td>Acco Index Tab, Economy</td>\n",
" <td>13.440</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>2.4000</td>\n",
" <td>0.00</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51289</th>\n",
" <td>6147</td>\n",
" <td>MX-2012-134460</td>\n",
" <td>22-05-2012</td>\n",
" <td>26-05-2012</td>\n",
" <td>Second Class</td>\n",
" <td>MC-18100</td>\n",
" <td>Mick Crebagga</td>\n",
" <td>Consumer</td>\n",
" <td>Tipitapa</td>\n",
" <td>Managua</td>\n",
" <td>...</td>\n",
" <td>OFF-PA-10004155</td>\n",
" <td>Office Supplies</td>\n",
" <td>Paper</td>\n",
" <td>Eaton Computer Printout Paper, 8.5 x 11</td>\n",
" <td>61.380</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1.8000</td>\n",
" <td>0.00</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>51290 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
"0 32298 CA-2012-124891 31-07-2012 31-07-2012 Same Day \n",
"1 26341 IN-2013-77878 05-02-2013 07-02-2013 Second Class \n",
"2 25330 IN-2013-71249 17-10-2013 18-10-2013 First Class \n",
"3 13524 ES-2013-1579342 28-01-2013 30-01-2013 First Class \n",
"4 47221 SG-2013-4320 05-11-2013 06-11-2013 Same Day \n",
"... ... ... ... ... ... \n",
"51285 29002 IN-2014-62366 19-06-2014 19-06-2014 Same Day \n",
"51286 35398 US-2014-102288 20-06-2014 24-06-2014 Standard Class \n",
"51287 40470 US-2013-155768 02-12-2013 02-12-2013 Same Day \n",
"51288 9596 MX-2012-140767 18-02-2012 22-02-2012 Standard Class \n",
"51289 6147 MX-2012-134460 22-05-2012 26-05-2012 Second Class \n",
"\n",
" Customer ID Customer Name Segment City \\\n",
"0 RH-19495 Rick Hansen Consumer New York City \n",
"1 JR-16210 Justin Ritter Corporate Wollongong \n",
"2 CR-12730 Craig Reiter Consumer Brisbane \n",
"3 KM-16375 Katherine Murray Home Office Berlin \n",
"4 RH-9495 Rick Hansen Consumer Dakar \n",
"... ... ... ... ... \n",
"51285 KE-16420 Katrina Edelman Corporate Kure \n",
"51286 ZC-21910 Zuschuss Carroll Consumer Houston \n",
"51287 LB-16795 Laurel Beltran Home Office Oxnard \n",
"51288 RB-19795 Ross Baird Home Office Valinhos \n",
"51289 MC-18100 Mick Crebagga Consumer Tipitapa \n",
"\n",
" State ... Product ID Category Sub-Category \\\n",
"0 New York ... TEC-AC-10003033 Technology Accessories \n",
"1 New South Wales ... FUR-CH-10003950 Furniture Chairs \n",
"2 Queensland ... TEC-PH-10004664 Technology Phones \n",
"3 Berlin ... TEC-PH-10004583 Technology Phones \n",
"4 Dakar ... TEC-SHA-10000501 Technology Copiers \n",
"... ... ... ... ... ... \n",
"51285 Hiroshima ... OFF-FA-10000746 Office Supplies Fasteners \n",
"51286 Texas ... OFF-AP-10002906 Office Supplies Appliances \n",
"51287 California ... OFF-EN-10001219 Office Supplies Envelopes \n",
"51288 Săo Paulo ... OFF-BI-10000806 Office Supplies Binders \n",
"51289 Managua ... OFF-PA-10004155 Office Supplies Paper \n",
"\n",
" Product Name Sales Quantity \\\n",
"0 Plantronics CS510 - Over-the-Head monaural Wir... 2309.650 7 \n",
"1 Novimex Executive Leather Armchair, Black 3709.395 9 \n",
"2 Nokia Smart Phone, with Caller ID 5175.171 9 \n",
"3 Motorola Smart Phone, Cordless 2892.510 5 \n",
"4 Sharp Wireless Fax, High-Speed 2832.960 8 \n",
"... ... ... ... \n",
"51285 Advantus Thumb Tacks, 12 Pack 65.100 5 \n",
"51286 Hoover Replacement Belt for Commercial Guardsm... 0.444 1 \n",
"51287 #10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes 22.920 3 \n",
"51288 Acco Index Tab, Economy 13.440 2 \n",
"51289 Eaton Computer Printout Paper, 8.5 x 11 61.380 3 \n",
"\n",
" Discount Profit Shipping Cost Order Priority \n",
"0 0.0 762.1845 933.57 Critical \n",
"1 0.1 -288.7650 923.63 Critical \n",
"2 0.1 919.9710 915.49 Medium \n",
"3 0.1 -96.5400 910.16 Medium \n",
"4 0.0 311.5200 903.04 Critical \n",
"... ... ... ... ... \n",
"51285 0.0 4.5000 0.01 Medium \n",
"51286 0.8 -1.1100 0.01 Medium \n",
"51287 0.0 11.2308 0.01 High \n",
"51288 0.0 2.4000 0.00 Medium \n",
"51289 0.0 1.8000 0.00 High \n",
"\n",
"[51290 rows x 24 columns]"
]
},
"execution_count": 159,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"gssd=pd.read_csv('Global_Superstore2.csv', encoding=\"latin2\")\n",
"gssd"
]
},
{
"cell_type": "code",
"execution_count": 160,
2021-05-07 20:16:31 +02:00
"id": "multiple-council",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 161,
2021-05-07 20:16:31 +02:00
"id": "green-trunk",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": [
"gssd_train, gssd_dev, gssd_test = np.split(gssd.sample(frac=1, random_state=42), [int(.6*len(gssd)), int(.8*len(gssd))])"
]
},
{
"cell_type": "code",
"execution_count": 162,
2021-05-07 20:16:31 +02:00
"id": "operating-catalyst",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"51290"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd.shape[0] # Liczba danych w całym zbiorze"
]
},
{
"cell_type": "code",
"execution_count": 163,
2021-05-07 20:16:31 +02:00
"id": "female-landscape",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30774"
]
},
"execution_count": 163,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd_train.shape[0] # Liczba danych w zbiorze do nauczania"
]
},
{
"cell_type": "code",
"execution_count": 164,
2021-05-07 20:16:31 +02:00
"id": "thirty-auckland",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10258"
]
},
"execution_count": 164,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd_dev.shape[0] # Liczba danych w zbiorze do walidacji"
]
},
{
"cell_type": "code",
"execution_count": 165,
2021-05-07 20:16:31 +02:00
"id": "mysterious-alignment",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10258"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd_test.shape[0] # Liczba danych w zbiorze do testowania"
]
},
{
"cell_type": "code",
"execution_count": 166,
2021-05-07 20:16:31 +02:00
"id": "stone-combining",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Row ID</th>\n",
" <th>Order ID</th>\n",
" <th>Order Date</th>\n",
" <th>Ship Date</th>\n",
" <th>Ship Mode</th>\n",
" <th>Customer ID</th>\n",
" <th>Customer Name</th>\n",
" <th>Segment</th>\n",
" <th>City</th>\n",
" <th>State</th>\n",
" <th>...</th>\n",
" <th>Product ID</th>\n",
" <th>Category</th>\n",
" <th>Sub-Category</th>\n",
" <th>Product Name</th>\n",
" <th>Sales</th>\n",
" <th>Quantity</th>\n",
" <th>Discount</th>\n",
" <th>Profit</th>\n",
" <th>Shipping Cost</th>\n",
" <th>Order Priority</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>51290.00000</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>...</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290</td>\n",
" <td>51290.000000</td>\n",
" <td>51290.000000</td>\n",
" <td>51290.000000</td>\n",
" <td>51290.000000</td>\n",
" <td>51290.000000</td>\n",
" <td>51290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>NaN</td>\n",
" <td>25035</td>\n",
" <td>1430</td>\n",
" <td>1464</td>\n",
" <td>4</td>\n",
" <td>1590</td>\n",
" <td>795</td>\n",
" <td>3</td>\n",
" <td>3636</td>\n",
" <td>1094</td>\n",
" <td>...</td>\n",
" <td>10292</td>\n",
" <td>3</td>\n",
" <td>17</td>\n",
" <td>3788</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>NaN</td>\n",
" <td>CA-2014-100111</td>\n",
" <td>18-06-2014</td>\n",
" <td>22-11-2014</td>\n",
" <td>Standard Class</td>\n",
" <td>PO-18850</td>\n",
" <td>Muhammed Yedwab</td>\n",
" <td>Consumer</td>\n",
" <td>New York City</td>\n",
" <td>California</td>\n",
" <td>...</td>\n",
" <td>OFF-AR-10003651</td>\n",
" <td>Office Supplies</td>\n",
" <td>Binders</td>\n",
" <td>Staples</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>135</td>\n",
" <td>130</td>\n",
" <td>30775</td>\n",
" <td>97</td>\n",
" <td>108</td>\n",
" <td>26518</td>\n",
" <td>915</td>\n",
" <td>2001</td>\n",
" <td>...</td>\n",
" <td>35</td>\n",
" <td>31273</td>\n",
" <td>6152</td>\n",
" <td>227</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>29433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>25645.50000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>246.490581</td>\n",
" <td>3.476545</td>\n",
" <td>0.142908</td>\n",
" <td>28.610982</td>\n",
" <td>26.375915</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>14806.29199</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>487.565361</td>\n",
" <td>2.278766</td>\n",
" <td>0.212280</td>\n",
" <td>174.340972</td>\n",
" <td>57.296804</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.00000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.444000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-6599.978000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>12823.25000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>30.758625</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.610000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>25645.50000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>85.053000</td>\n",
" <td>3.000000</td>\n",
" <td>0.000000</td>\n",
" <td>9.240000</td>\n",
" <td>7.790000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>38467.75000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>251.053200</td>\n",
" <td>5.000000</td>\n",
" <td>0.200000</td>\n",
" <td>36.810000</td>\n",
" <td>24.450000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>51290.00000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>22638.480000</td>\n",
" <td>14.000000</td>\n",
" <td>0.850000</td>\n",
" <td>8399.976000</td>\n",
" <td>933.570000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>11 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
"count 51290.00000 51290 51290 51290 51290 \n",
"unique NaN 25035 1430 1464 4 \n",
"top NaN CA-2014-100111 18-06-2014 22-11-2014 Standard Class \n",
"freq NaN 14 135 130 30775 \n",
"mean 25645.50000 NaN NaN NaN NaN \n",
"std 14806.29199 NaN NaN NaN NaN \n",
"min 1.00000 NaN NaN NaN NaN \n",
"25% 12823.25000 NaN NaN NaN NaN \n",
"50% 25645.50000 NaN NaN NaN NaN \n",
"75% 38467.75000 NaN NaN NaN NaN \n",
"max 51290.00000 NaN NaN NaN NaN \n",
"\n",
" Customer ID Customer Name Segment City State ... \\\n",
"count 51290 51290 51290 51290 51290 ... \n",
"unique 1590 795 3 3636 1094 ... \n",
"top PO-18850 Muhammed Yedwab Consumer New York City California ... \n",
"freq 97 108 26518 915 2001 ... \n",
"mean NaN NaN NaN NaN NaN ... \n",
"std NaN NaN NaN NaN NaN ... \n",
"min NaN NaN NaN NaN NaN ... \n",
"25% NaN NaN NaN NaN NaN ... \n",
"50% NaN NaN NaN NaN NaN ... \n",
"75% NaN NaN NaN NaN NaN ... \n",
"max NaN NaN NaN NaN NaN ... \n",
"\n",
" Product ID Category Sub-Category Product Name \\\n",
"count 51290 51290 51290 51290 \n",
"unique 10292 3 17 3788 \n",
"top OFF-AR-10003651 Office Supplies Binders Staples \n",
"freq 35 31273 6152 227 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" Sales Quantity Discount Profit Shipping Cost \\\n",
"count 51290.000000 51290.000000 51290.000000 51290.000000 51290.000000 \n",
"unique NaN NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN NaN \n",
"mean 246.490581 3.476545 0.142908 28.610982 26.375915 \n",
"std 487.565361 2.278766 0.212280 174.340972 57.296804 \n",
"min 0.444000 1.000000 0.000000 -6599.978000 0.000000 \n",
"25% 30.758625 2.000000 0.000000 0.000000 2.610000 \n",
"50% 85.053000 3.000000 0.000000 9.240000 7.790000 \n",
"75% 251.053200 5.000000 0.200000 36.810000 24.450000 \n",
"max 22638.480000 14.000000 0.850000 8399.976000 933.570000 \n",
"\n",
" Order Priority \n",
"count 51290 \n",
"unique 4 \n",
"top Medium \n",
"freq 29433 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
"[11 rows x 24 columns]"
]
},
"execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 167,
2021-05-07 20:16:31 +02:00
"id": "demanding-milwaukee",
2021-03-21 22:16:26 +01:00
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Binders 6152\n",
"Storage 5059\n",
"Art 4883\n",
"Paper 3538\n",
"Chairs 3434\n",
"Phones 3357\n",
"Furnishings 3170\n",
"Accessories 3075\n",
"Labels 2606\n",
"Envelopes 2435\n",
"Supplies 2425\n",
"Fasteners 2420\n",
"Bookcases 2411\n",
"Copiers 2223\n",
"Appliances 1755\n",
"Machines 1486\n",
"Tables 861\n",
"Name: Sub-Category, dtype: int64"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd[\"Sub-Category\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 168,
2021-05-07 20:16:31 +02:00
"id": "above-script",
2021-03-21 22:16:26 +01:00
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEpCAYAAAB/ZvKwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAsUUlEQVR4nO3deZhkZX328e/NjKiACMiIhMUhOkrIa1gyLAoaBUEEDSYRxLhMCAF9MzEYExWMBgVN3BIjRjEo+IJBATcgisA4Im5hGXZZDCNKYAQZQQGXgMD9/vE8Rdc03dNdp05vnPtzXX111amqXz/VXf07z3lW2SYiIrphnZkuQERETJ8k/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA6ZP9MFWJtNN93UCxcunOliRETMKZdddtlPbS8Y67FZnfQXLlzIihUrZroYERFziqSbx3sszTsRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SGzenLWWBYe+ZVJPe9H791/iksSETH3pKYfEdEhk0r6kjaS9HlJN0i6XtKzJW0iaZmkG+v3jetzJek4SSslXS1pp744S+rzb5S0ZKreVEREjG2yNf0PA+fa3hbYHrgeOBJYbnsRsLzeB3gxsKh+HQ4cDyBpE+BoYFdgF+Do3okiIiKmx4RJX9ITgecBJwLYvt/2z4EDgJPr004GXlZvHwCc4uIiYCNJmwMvApbZvsv2z4BlwL4tvpeIiJjAZGr62wCrgU9JukLSJyWtD2xm+7b6nNuBzertLYBb+l5/az023vE1SDpc0gpJK1avXj3Yu4mIiLWaTNKfD+wEHG97R+CXjDTlAGDbgNsokO0TbC+2vXjBgjGXg46IiIYmk/RvBW61fXG9/3nKSeAntdmG+v2O+vgqYKu+129Zj413PCIipsmESd/27cAtkp5ZD+0FXAecDfRG4CwBzqq3zwZeW0fx7AbcXZuBzgP2kbRx7cDdpx6LiIhpMtnJWW8ATpW0LnATcAjlhHGGpEOBm4GD6nPPAfYDVgK/qs/F9l2SjgUurc87xvZdrbyLiIiYlEklfdtXAovHeGivMZ5rYOk4cU4CThqgfBER0aLMyI2I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA6ZVNKX9CNJ10i6UtKKemwTScsk3Vi/b1yPS9JxklZKulrSTn1xltTn3yhpydS8pYiIGM8gNf0X2N7B9uJ6/0hgue1FwPJ6H+DFwKL6dThwPJSTBHA0sCuwC3B070QRERHTY/4Qrz0AeH69fTLwDeCt9fgptg1cJGkjSZvX5y6zfReApGXAvsBnhyjD0BYe+ZVJPe9H791/iksSETH1JlvTN3C+pMskHV6PbWb7tnr7dmCzensL4Ja+195aj413PCIipslka/p72F4l6cnAMkk39D9o25LcRoHqSeVwgK233rqNkBERUU2qpm97Vf1+B/AlSpv8T2qzDfX7HfXpq4Ct+l6+ZT023vHRP+sE24ttL16wYMFg7yYiItZqwqQvaX1JT+jdBvYBvgecDfRG4CwBzqq3zwZeW0fx7AbcXZuBzgP2kbRx7cDdpx6LiIhpMpnmnc2AL0nqPf8zts+VdClwhqRDgZuBg+rzzwH2A1YCvwIOAbB9l6RjgUvr847pdepGRMT0mDDp274J2H6M43cCe41x3MDScWKdBJw0eDHnjsmMBspIoIiYKZmRGxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIZNO+pLmSbpC0pfr/W0kXSxppaTTJa1bjz+23l9ZH1/YF+Ooevz7kl7U+ruJiIi1GqSmfwRwfd/99wEfsv104GfAofX4ocDP6vEP1echaTvgYOB3gX2Bj0maN1zxIyJiEJNK+pK2BPYHPlnvC9gT+Hx9ysnAy+rtA+p96uN71ecfAJxm+z7bPwRWAru08B4iImKSJlvT/1fgLcBD9f6TgJ/bfqDevxXYot7eArgFoD5+d33+w8fHeM3DJB0uaYWkFatXr578O4mIiAlNmPQlvQS4w/Zl01AebJ9ge7HtxQsWLJiOHxkR0RnzJ/Gc3YE/lLQf8DhgQ+DDwEaS5tfa/JbAqvr8VcBWwK2S5gNPBO7sO97T/5qIiJgGE9b0bR9le0vbCykdsV+3/SrgAuDl9WlLgLPq7bPrferjX7ftevzgOrpnG2ARcElr7yQiIiY0mZr+eN4KnCbp3cAVwIn1+InApyWtBO6inCiwfa2kM4DrgAeApbYfHOLnR0TEgAZK+ra/AXyj3r6JMUbf2P5f4MBxXv8e4D2DFjIiItqRGbkRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIcOsvRNTbOGRX5nU83703v2nuCQR8WiRmn5ERIck6UdEdEiadzokzUURkZp+RESHJOlHRHRImneisTQXRcw9qelHRHRIkn5ERIck6UdEdEiSfkREhyTpR0R0SEbvxKyQkUAR0yM1/YiIDknSj4jokAmTvqTHSbpE0lWSrpX0rnp8G0kXS1op6XRJ69bjj633V9bHF/bFOqoe/76kF03Zu4qIiDFNpk3/PmBP27+Q9Bjg25K+CrwJ+JDt0yR9HDgUOL5+/5ntp0s6GHgf8ApJ2wEHA78L/BbwNUnPsP3gFLyv6Lj0EUSMbcKavotf1LuPqV8G9gQ+X4+fDLys3j6g3qc+vpck1eOn2b7P9g+BlcAubbyJiIiYnEm16UuaJ+lK4A5gGfAD4Oe2H6hPuRXYot7eArgFoD5+N/Ck/uNjvKb/Zx0uaYWkFatXrx74DUVExPgmlfRtP2h7B2BLSu1826kqkO0TbC+2vXjBggVT9WMiIjppoNE7tn8OXAA8G9hIUq9PYEtgVb29CtgKoD7+RODO/uNjvCYiIqbBZEbvLJC0Ub39eGBv4HpK8n95fdoS4Kx6++x6n/r41227Hj+4ju7ZBlgEXNLS+4iIiEmYzOidzYGTJc2jnCTOsP1lSdcBp0l6N3AFcGJ9/onApyWtBO6ijNjB9rWSzgCuAx4AlmbkTkTE9Jow6du+GthxjOM3McboG9v/Cxw4Tqz3AO8ZvJgREdGGzMiNiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQbJcYMQlZqjkeLVLTj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JCM3omYZm2PBMrIohhEkn5ErGEmTko5IU2fNO9ERHRIkn5ERIck6UdEdEiSfkREhyTpR0R0SEbvRMSckeGpw0tNPyKiQ5L0IyI6JEk/IqJDJkz6kraSdIGk6yRdK+mIenwTScsk3Vi/b1yPS9JxklZKulrSTn2xltTn3yhpydS9rYiIGMtkavoPAH9reztgN2CppO2AI4HlthcBy+t9gBcDi+rX4cDxUE4SwNHArsAuwNG9E0VEREyPCZO+7dtsX15v3wtcD2wBHACcXJ92MvCyevsA4BQXFwEbSdoceBGwzPZdtn8GLAP2bfPNRETE2g3Upi9pIbAjcDGwme3b6kO3A5vV21sAt/S97NZ6bLzjERExTSad9CVtAHwBeKPte/ofs23AbRRI0uGSVkhasXr16jZCRkRENamkL+kxlIR/qu0v1sM/qc021O931OOrgK36Xr5lPTbe8TXYPsH2YtuLFyxYMMh7iYiICUxm9I6AE4Hrbf9L30NnA70ROEuAs/qOv7aO4tkNuLs2A50H7CNp49qBu089FhER02QyyzDsDrwGuEbSlfXY24D3Amd
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib\n",
"gssd[\"Sub-Category\"].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
"execution_count": 169,
2021-05-07 20:16:31 +02:00
"id": "abroad-durham",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Postal Code\n"
]
}
],
"source": [
"#Wypisanie kolumn z wartościami NaN\n",
"for col in gssd.columns:\n",
" if gssd[col].isnull().values.any():\n",
" print(col) "
]
},
{
"cell_type": "code",
"execution_count": 170,
2021-05-07 20:16:31 +02:00
"id": "centered-realtor",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": [
"#Usunięcię kolumny Postal Code, ponieważ nie ma większego znaczenia dla danych, a jest w niej sporo wartości NaN\n",
"gssd = gssd.dropna(axis='columns')"
]
},
{
"cell_type": "code",
"execution_count": null,
2021-05-07 20:16:31 +02:00
"id": "relevant-receptor",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": [
"#Normalizacja wartości float\n",
"from sklearn import preprocessing\n",
"flcols = gssd.select_dtypes(include=['float64']).columns\n",
"x = gssd.select_dtypes(include=['float64']).values\n",
"min_max_scaler = preprocessing.MinMaxScaler()\n",
"x_scaled = min_max_scaler.fit_transform(x)\n",
"normcols = pd.DataFrame(x_scaled, columns=flcols)\n",
"for col in flcols:\n",
" gssd[col] = normcols[col]"
]
},
{
"cell_type": "code",
"execution_count": 172,
2021-05-07 20:16:31 +02:00
"id": "informal-unemployment",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Row ID</th>\n",
" <th>Order ID</th>\n",
" <th>Order Date</th>\n",
" <th>Ship Date</th>\n",
" <th>Ship Mode</th>\n",
" <th>Customer ID</th>\n",
" <th>Customer Name</th>\n",
" <th>Segment</th>\n",
" <th>City</th>\n",
" <th>State</th>\n",
" <th>...</th>\n",
" <th>Product ID</th>\n",
" <th>Category</th>\n",
" <th>Sub-Category</th>\n",
" <th>Product Name</th>\n",
" <th>Sales</th>\n",
" <th>Quantity</th>\n",
" <th>Discount</th>\n",
" <th>Profit</th>\n",
" <th>Shipping Cost</th>\n",
" <th>Order Priority</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>32298</td>\n",
" <td>CA-2012-124891</td>\n",
" <td>31-07-2012</td>\n",
" <td>31-07-2012</td>\n",
" <td>Same Day</td>\n",
" <td>RH-19495</td>\n",
" <td>Rick Hansen</td>\n",
" <td>Consumer</td>\n",
" <td>New York City</td>\n",
" <td>New York</td>\n",
" <td>...</td>\n",
" <td>TEC-AC-10003033</td>\n",
" <td>Technology</td>\n",
" <td>Accessories</td>\n",
" <td>Plantronics CS510 - Over-the-Head monaural Wir...</td>\n",
" <td>0.102006</td>\n",
" <td>7</td>\n",
" <td>0.000000</td>\n",
" <td>0.490812</td>\n",
" <td>1.000000</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>26341</td>\n",
" <td>IN-2013-77878</td>\n",
" <td>05-02-2013</td>\n",
" <td>07-02-2013</td>\n",
" <td>Second Class</td>\n",
" <td>JR-16210</td>\n",
" <td>Justin Ritter</td>\n",
" <td>Corporate</td>\n",
" <td>Wollongong</td>\n",
" <td>New South Wales</td>\n",
" <td>...</td>\n",
" <td>FUR-CH-10003950</td>\n",
" <td>Furniture</td>\n",
" <td>Chairs</td>\n",
" <td>Novimex Executive Leather Armchair, Black</td>\n",
" <td>0.163837</td>\n",
" <td>9</td>\n",
" <td>0.117647</td>\n",
" <td>0.420749</td>\n",
" <td>0.989353</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25330</td>\n",
" <td>IN-2013-71249</td>\n",
" <td>17-10-2013</td>\n",
" <td>18-10-2013</td>\n",
" <td>First Class</td>\n",
" <td>CR-12730</td>\n",
" <td>Craig Reiter</td>\n",
" <td>Consumer</td>\n",
" <td>Brisbane</td>\n",
" <td>Queensland</td>\n",
" <td>...</td>\n",
" <td>TEC-PH-10004664</td>\n",
" <td>Technology</td>\n",
" <td>Phones</td>\n",
" <td>Nokia Smart Phone, with Caller ID</td>\n",
" <td>0.228586</td>\n",
" <td>9</td>\n",
" <td>0.117647</td>\n",
" <td>0.501331</td>\n",
" <td>0.980633</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13524</td>\n",
" <td>ES-2013-1579342</td>\n",
" <td>28-01-2013</td>\n",
" <td>30-01-2013</td>\n",
" <td>First Class</td>\n",
" <td>KM-16375</td>\n",
" <td>Katherine Murray</td>\n",
" <td>Home Office</td>\n",
" <td>Berlin</td>\n",
" <td>Berlin</td>\n",
" <td>...</td>\n",
" <td>TEC-PH-10004583</td>\n",
" <td>Technology</td>\n",
" <td>Phones</td>\n",
" <td>Motorola Smart Phone, Cordless</td>\n",
" <td>0.127753</td>\n",
" <td>5</td>\n",
" <td>0.117647</td>\n",
" <td>0.433564</td>\n",
" <td>0.974924</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>47221</td>\n",
" <td>SG-2013-4320</td>\n",
" <td>05-11-2013</td>\n",
" <td>06-11-2013</td>\n",
" <td>Same Day</td>\n",
" <td>RH-9495</td>\n",
" <td>Rick Hansen</td>\n",
" <td>Consumer</td>\n",
" <td>Dakar</td>\n",
" <td>Dakar</td>\n",
" <td>...</td>\n",
" <td>TEC-SHA-10000501</td>\n",
" <td>Technology</td>\n",
" <td>Copiers</td>\n",
" <td>Sharp Wireless Fax, High-Speed</td>\n",
" <td>0.125122</td>\n",
" <td>8</td>\n",
" <td>0.000000</td>\n",
" <td>0.460768</td>\n",
" <td>0.967298</td>\n",
" <td>Critical</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51285</th>\n",
" <td>29002</td>\n",
" <td>IN-2014-62366</td>\n",
" <td>19-06-2014</td>\n",
" <td>19-06-2014</td>\n",
" <td>Same Day</td>\n",
" <td>KE-16420</td>\n",
" <td>Katrina Edelman</td>\n",
" <td>Corporate</td>\n",
" <td>Kure</td>\n",
" <td>Hiroshima</td>\n",
" <td>...</td>\n",
" <td>OFF-FA-10000746</td>\n",
" <td>Office Supplies</td>\n",
" <td>Fasteners</td>\n",
" <td>Advantus Thumb Tacks, 12 Pack</td>\n",
" <td>0.002856</td>\n",
" <td>5</td>\n",
" <td>0.000000</td>\n",
" <td>0.440300</td>\n",
" <td>0.000011</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51286</th>\n",
" <td>35398</td>\n",
" <td>US-2014-102288</td>\n",
" <td>20-06-2014</td>\n",
" <td>24-06-2014</td>\n",
" <td>Standard Class</td>\n",
" <td>ZC-21910</td>\n",
" <td>Zuschuss Carroll</td>\n",
" <td>Consumer</td>\n",
" <td>Houston</td>\n",
" <td>Texas</td>\n",
" <td>...</td>\n",
" <td>OFF-AP-10002906</td>\n",
" <td>Office Supplies</td>\n",
" <td>Appliances</td>\n",
" <td>Hoover Replacement Belt for Commercial Guardsm...</td>\n",
" <td>0.000000</td>\n",
" <td>1</td>\n",
" <td>0.941176</td>\n",
" <td>0.439926</td>\n",
" <td>0.000011</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51287</th>\n",
" <td>40470</td>\n",
" <td>US-2013-155768</td>\n",
" <td>02-12-2013</td>\n",
" <td>02-12-2013</td>\n",
" <td>Same Day</td>\n",
" <td>LB-16795</td>\n",
" <td>Laurel Beltran</td>\n",
" <td>Home Office</td>\n",
" <td>Oxnard</td>\n",
" <td>California</td>\n",
" <td>...</td>\n",
" <td>OFF-EN-10001219</td>\n",
" <td>Office Supplies</td>\n",
" <td>Envelopes</td>\n",
" <td>#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes</td>\n",
" <td>0.000993</td>\n",
" <td>3</td>\n",
" <td>0.000000</td>\n",
" <td>0.440749</td>\n",
" <td>0.000011</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51288</th>\n",
" <td>9596</td>\n",
" <td>MX-2012-140767</td>\n",
" <td>18-02-2012</td>\n",
" <td>22-02-2012</td>\n",
" <td>Standard Class</td>\n",
" <td>RB-19795</td>\n",
" <td>Ross Baird</td>\n",
" <td>Home Office</td>\n",
" <td>Valinhos</td>\n",
" <td>Săo Paulo</td>\n",
" <td>...</td>\n",
" <td>OFF-BI-10000806</td>\n",
" <td>Office Supplies</td>\n",
" <td>Binders</td>\n",
" <td>Acco Index Tab, Economy</td>\n",
" <td>0.000574</td>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>0.440160</td>\n",
" <td>0.000000</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51289</th>\n",
" <td>6147</td>\n",
" <td>MX-2012-134460</td>\n",
" <td>22-05-2012</td>\n",
" <td>26-05-2012</td>\n",
" <td>Second Class</td>\n",
" <td>MC-18100</td>\n",
" <td>Mick Crebagga</td>\n",
" <td>Consumer</td>\n",
" <td>Tipitapa</td>\n",
" <td>Managua</td>\n",
" <td>...</td>\n",
" <td>OFF-PA-10004155</td>\n",
" <td>Office Supplies</td>\n",
" <td>Paper</td>\n",
" <td>Eaton Computer Printout Paper, 8.5 x 11</td>\n",
" <td>0.002692</td>\n",
" <td>3</td>\n",
" <td>0.000000</td>\n",
" <td>0.440120</td>\n",
" <td>0.000000</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>51290 rows × 23 columns</p>\n",
"</div>"
],
"text/plain": [
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
"0 32298 CA-2012-124891 31-07-2012 31-07-2012 Same Day \n",
"1 26341 IN-2013-77878 05-02-2013 07-02-2013 Second Class \n",
"2 25330 IN-2013-71249 17-10-2013 18-10-2013 First Class \n",
"3 13524 ES-2013-1579342 28-01-2013 30-01-2013 First Class \n",
"4 47221 SG-2013-4320 05-11-2013 06-11-2013 Same Day \n",
"... ... ... ... ... ... \n",
"51285 29002 IN-2014-62366 19-06-2014 19-06-2014 Same Day \n",
"51286 35398 US-2014-102288 20-06-2014 24-06-2014 Standard Class \n",
"51287 40470 US-2013-155768 02-12-2013 02-12-2013 Same Day \n",
"51288 9596 MX-2012-140767 18-02-2012 22-02-2012 Standard Class \n",
"51289 6147 MX-2012-134460 22-05-2012 26-05-2012 Second Class \n",
"\n",
" Customer ID Customer Name Segment City \\\n",
"0 RH-19495 Rick Hansen Consumer New York City \n",
"1 JR-16210 Justin Ritter Corporate Wollongong \n",
"2 CR-12730 Craig Reiter Consumer Brisbane \n",
"3 KM-16375 Katherine Murray Home Office Berlin \n",
"4 RH-9495 Rick Hansen Consumer Dakar \n",
"... ... ... ... ... \n",
"51285 KE-16420 Katrina Edelman Corporate Kure \n",
"51286 ZC-21910 Zuschuss Carroll Consumer Houston \n",
"51287 LB-16795 Laurel Beltran Home Office Oxnard \n",
"51288 RB-19795 Ross Baird Home Office Valinhos \n",
"51289 MC-18100 Mick Crebagga Consumer Tipitapa \n",
"\n",
" State ... Product ID Category Sub-Category \\\n",
"0 New York ... TEC-AC-10003033 Technology Accessories \n",
"1 New South Wales ... FUR-CH-10003950 Furniture Chairs \n",
"2 Queensland ... TEC-PH-10004664 Technology Phones \n",
"3 Berlin ... TEC-PH-10004583 Technology Phones \n",
"4 Dakar ... TEC-SHA-10000501 Technology Copiers \n",
"... ... ... ... ... ... \n",
"51285 Hiroshima ... OFF-FA-10000746 Office Supplies Fasteners \n",
"51286 Texas ... OFF-AP-10002906 Office Supplies Appliances \n",
"51287 California ... OFF-EN-10001219 Office Supplies Envelopes \n",
"51288 Săo Paulo ... OFF-BI-10000806 Office Supplies Binders \n",
"51289 Managua ... OFF-PA-10004155 Office Supplies Paper \n",
"\n",
" Product Name Sales Quantity \\\n",
"0 Plantronics CS510 - Over-the-Head monaural Wir... 0.102006 7 \n",
"1 Novimex Executive Leather Armchair, Black 0.163837 9 \n",
"2 Nokia Smart Phone, with Caller ID 0.228586 9 \n",
"3 Motorola Smart Phone, Cordless 0.127753 5 \n",
"4 Sharp Wireless Fax, High-Speed 0.125122 8 \n",
"... ... ... ... \n",
"51285 Advantus Thumb Tacks, 12 Pack 0.002856 5 \n",
"51286 Hoover Replacement Belt for Commercial Guardsm... 0.000000 1 \n",
"51287 #10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes 0.000993 3 \n",
"51288 Acco Index Tab, Economy 0.000574 2 \n",
"51289 Eaton Computer Printout Paper, 8.5 x 11 0.002692 3 \n",
"\n",
" Discount Profit Shipping Cost Order Priority \n",
"0 0.000000 0.490812 1.000000 Critical \n",
"1 0.117647 0.420749 0.989353 Critical \n",
"2 0.117647 0.501331 0.980633 Medium \n",
"3 0.117647 0.433564 0.974924 Medium \n",
"4 0.000000 0.460768 0.967298 Critical \n",
"... ... ... ... ... \n",
"51285 0.000000 0.440300 0.000011 Medium \n",
"51286 0.941176 0.439926 0.000011 Medium \n",
"51287 0.000000 0.440749 0.000011 High \n",
"51288 0.000000 0.440160 0.000000 Medium \n",
"51289 0.000000 0.440120 0.000000 High \n",
"\n",
"[51290 rows x 23 columns]"
]
},
"execution_count": 172,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gssd"
]
},
{
"cell_type": "code",
"execution_count": null,
2021-05-07 20:16:31 +02:00
"id": "reserved-cookie",
2021-03-21 22:16:26 +01:00
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2021-05-07 20:16:31 +02:00
"version": "3.8.5"
2021-03-21 22:16:26 +01:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}