2021-03-21 22:16:26 +01:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "strange-teens",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"#!pip install kaggle\n",
|
|
|
|
|
"#!pip install pandas\n",
|
|
|
|
|
"#!pip install matplotlib\n",
|
|
|
|
|
"#!pip install sklearn"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "another-accessory",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"!kaggle datasets download -d apoorvaappz/global-super-store-dataset"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 158,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "valid-malta",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Archive: global-super-store-dataset.zip\n",
|
|
|
|
|
" inflating: Global_Superstore2.csv \n",
|
|
|
|
|
" inflating: Global_Superstore2.xlsx \n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"!unzip global-super-store-dataset.zip"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 159,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "noble-compilation",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>Row ID</th>\n",
|
|
|
|
|
" <th>Order ID</th>\n",
|
|
|
|
|
" <th>Order Date</th>\n",
|
|
|
|
|
" <th>Ship Date</th>\n",
|
|
|
|
|
" <th>Ship Mode</th>\n",
|
|
|
|
|
" <th>Customer ID</th>\n",
|
|
|
|
|
" <th>Customer Name</th>\n",
|
|
|
|
|
" <th>Segment</th>\n",
|
|
|
|
|
" <th>City</th>\n",
|
|
|
|
|
" <th>State</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>Product ID</th>\n",
|
|
|
|
|
" <th>Category</th>\n",
|
|
|
|
|
" <th>Sub-Category</th>\n",
|
|
|
|
|
" <th>Product Name</th>\n",
|
|
|
|
|
" <th>Sales</th>\n",
|
|
|
|
|
" <th>Quantity</th>\n",
|
|
|
|
|
" <th>Discount</th>\n",
|
|
|
|
|
" <th>Profit</th>\n",
|
|
|
|
|
" <th>Shipping Cost</th>\n",
|
|
|
|
|
" <th>Order Priority</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>32298</td>\n",
|
|
|
|
|
" <td>CA-2012-124891</td>\n",
|
|
|
|
|
" <td>31-07-2012</td>\n",
|
|
|
|
|
" <td>31-07-2012</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>RH-19495</td>\n",
|
|
|
|
|
" <td>Rick Hansen</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>New York City</td>\n",
|
|
|
|
|
" <td>New York</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-AC-10003033</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Accessories</td>\n",
|
|
|
|
|
" <td>Plantronics CS510 - Over-the-Head monaural Wir...</td>\n",
|
|
|
|
|
" <td>2309.650</td>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>762.1845</td>\n",
|
|
|
|
|
" <td>933.57</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>26341</td>\n",
|
|
|
|
|
" <td>IN-2013-77878</td>\n",
|
|
|
|
|
" <td>05-02-2013</td>\n",
|
|
|
|
|
" <td>07-02-2013</td>\n",
|
|
|
|
|
" <td>Second Class</td>\n",
|
|
|
|
|
" <td>JR-16210</td>\n",
|
|
|
|
|
" <td>Justin Ritter</td>\n",
|
|
|
|
|
" <td>Corporate</td>\n",
|
|
|
|
|
" <td>Wollongong</td>\n",
|
|
|
|
|
" <td>New South Wales</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>FUR-CH-10003950</td>\n",
|
|
|
|
|
" <td>Furniture</td>\n",
|
|
|
|
|
" <td>Chairs</td>\n",
|
|
|
|
|
" <td>Novimex Executive Leather Armchair, Black</td>\n",
|
|
|
|
|
" <td>3709.395</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>0.1</td>\n",
|
|
|
|
|
" <td>-288.7650</td>\n",
|
|
|
|
|
" <td>923.63</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>25330</td>\n",
|
|
|
|
|
" <td>IN-2013-71249</td>\n",
|
|
|
|
|
" <td>17-10-2013</td>\n",
|
|
|
|
|
" <td>18-10-2013</td>\n",
|
|
|
|
|
" <td>First Class</td>\n",
|
|
|
|
|
" <td>CR-12730</td>\n",
|
|
|
|
|
" <td>Craig Reiter</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Brisbane</td>\n",
|
|
|
|
|
" <td>Queensland</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-PH-10004664</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Phones</td>\n",
|
|
|
|
|
" <td>Nokia Smart Phone, with Caller ID</td>\n",
|
|
|
|
|
" <td>5175.171</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>0.1</td>\n",
|
|
|
|
|
" <td>919.9710</td>\n",
|
|
|
|
|
" <td>915.49</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>13524</td>\n",
|
|
|
|
|
" <td>ES-2013-1579342</td>\n",
|
|
|
|
|
" <td>28-01-2013</td>\n",
|
|
|
|
|
" <td>30-01-2013</td>\n",
|
|
|
|
|
" <td>First Class</td>\n",
|
|
|
|
|
" <td>KM-16375</td>\n",
|
|
|
|
|
" <td>Katherine Murray</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Berlin</td>\n",
|
|
|
|
|
" <td>Berlin</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-PH-10004583</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Phones</td>\n",
|
|
|
|
|
" <td>Motorola Smart Phone, Cordless</td>\n",
|
|
|
|
|
" <td>2892.510</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>0.1</td>\n",
|
|
|
|
|
" <td>-96.5400</td>\n",
|
|
|
|
|
" <td>910.16</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>47221</td>\n",
|
|
|
|
|
" <td>SG-2013-4320</td>\n",
|
|
|
|
|
" <td>05-11-2013</td>\n",
|
|
|
|
|
" <td>06-11-2013</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>RH-9495</td>\n",
|
|
|
|
|
" <td>Rick Hansen</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Dakar</td>\n",
|
|
|
|
|
" <td>Dakar</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-SHA-10000501</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Copiers</td>\n",
|
|
|
|
|
" <td>Sharp Wireless Fax, High-Speed</td>\n",
|
|
|
|
|
" <td>2832.960</td>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>311.5200</td>\n",
|
|
|
|
|
" <td>903.04</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51285</th>\n",
|
|
|
|
|
" <td>29002</td>\n",
|
|
|
|
|
" <td>IN-2014-62366</td>\n",
|
|
|
|
|
" <td>19-06-2014</td>\n",
|
|
|
|
|
" <td>19-06-2014</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>KE-16420</td>\n",
|
|
|
|
|
" <td>Katrina Edelman</td>\n",
|
|
|
|
|
" <td>Corporate</td>\n",
|
|
|
|
|
" <td>Kure</td>\n",
|
|
|
|
|
" <td>Hiroshima</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-FA-10000746</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Fasteners</td>\n",
|
|
|
|
|
" <td>Advantus Thumb Tacks, 12 Pack</td>\n",
|
|
|
|
|
" <td>65.100</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>4.5000</td>\n",
|
|
|
|
|
" <td>0.01</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51286</th>\n",
|
|
|
|
|
" <td>35398</td>\n",
|
|
|
|
|
" <td>US-2014-102288</td>\n",
|
|
|
|
|
" <td>20-06-2014</td>\n",
|
|
|
|
|
" <td>24-06-2014</td>\n",
|
|
|
|
|
" <td>Standard Class</td>\n",
|
|
|
|
|
" <td>ZC-21910</td>\n",
|
|
|
|
|
" <td>Zuschuss Carroll</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Houston</td>\n",
|
|
|
|
|
" <td>Texas</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-AP-10002906</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Appliances</td>\n",
|
|
|
|
|
" <td>Hoover Replacement Belt for Commercial Guardsm...</td>\n",
|
|
|
|
|
" <td>0.444</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.8</td>\n",
|
|
|
|
|
" <td>-1.1100</td>\n",
|
|
|
|
|
" <td>0.01</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51287</th>\n",
|
|
|
|
|
" <td>40470</td>\n",
|
|
|
|
|
" <td>US-2013-155768</td>\n",
|
|
|
|
|
" <td>02-12-2013</td>\n",
|
|
|
|
|
" <td>02-12-2013</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>LB-16795</td>\n",
|
|
|
|
|
" <td>Laurel Beltran</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Oxnard</td>\n",
|
|
|
|
|
" <td>California</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-EN-10001219</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Envelopes</td>\n",
|
|
|
|
|
" <td>#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes</td>\n",
|
|
|
|
|
" <td>22.920</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>11.2308</td>\n",
|
|
|
|
|
" <td>0.01</td>\n",
|
|
|
|
|
" <td>High</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51288</th>\n",
|
|
|
|
|
" <td>9596</td>\n",
|
|
|
|
|
" <td>MX-2012-140767</td>\n",
|
|
|
|
|
" <td>18-02-2012</td>\n",
|
|
|
|
|
" <td>22-02-2012</td>\n",
|
|
|
|
|
" <td>Standard Class</td>\n",
|
|
|
|
|
" <td>RB-19795</td>\n",
|
|
|
|
|
" <td>Ross Baird</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Valinhos</td>\n",
|
|
|
|
|
" <td>Săo Paulo</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-BI-10000806</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Binders</td>\n",
|
|
|
|
|
" <td>Acco Index Tab, Economy</td>\n",
|
|
|
|
|
" <td>13.440</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>2.4000</td>\n",
|
|
|
|
|
" <td>0.00</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51289</th>\n",
|
|
|
|
|
" <td>6147</td>\n",
|
|
|
|
|
" <td>MX-2012-134460</td>\n",
|
|
|
|
|
" <td>22-05-2012</td>\n",
|
|
|
|
|
" <td>26-05-2012</td>\n",
|
|
|
|
|
" <td>Second Class</td>\n",
|
|
|
|
|
" <td>MC-18100</td>\n",
|
|
|
|
|
" <td>Mick Crebagga</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Tipitapa</td>\n",
|
|
|
|
|
" <td>Managua</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-PA-10004155</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Paper</td>\n",
|
|
|
|
|
" <td>Eaton Computer Printout Paper, 8.5 x 11</td>\n",
|
|
|
|
|
" <td>61.380</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.8000</td>\n",
|
|
|
|
|
" <td>0.00</td>\n",
|
|
|
|
|
" <td>High</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>51290 rows × 24 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
|
|
|
|
|
"0 32298 CA-2012-124891 31-07-2012 31-07-2012 Same Day \n",
|
|
|
|
|
"1 26341 IN-2013-77878 05-02-2013 07-02-2013 Second Class \n",
|
|
|
|
|
"2 25330 IN-2013-71249 17-10-2013 18-10-2013 First Class \n",
|
|
|
|
|
"3 13524 ES-2013-1579342 28-01-2013 30-01-2013 First Class \n",
|
|
|
|
|
"4 47221 SG-2013-4320 05-11-2013 06-11-2013 Same Day \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"51285 29002 IN-2014-62366 19-06-2014 19-06-2014 Same Day \n",
|
|
|
|
|
"51286 35398 US-2014-102288 20-06-2014 24-06-2014 Standard Class \n",
|
|
|
|
|
"51287 40470 US-2013-155768 02-12-2013 02-12-2013 Same Day \n",
|
|
|
|
|
"51288 9596 MX-2012-140767 18-02-2012 22-02-2012 Standard Class \n",
|
|
|
|
|
"51289 6147 MX-2012-134460 22-05-2012 26-05-2012 Second Class \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Customer ID Customer Name Segment City \\\n",
|
|
|
|
|
"0 RH-19495 Rick Hansen Consumer New York City \n",
|
|
|
|
|
"1 JR-16210 Justin Ritter Corporate Wollongong \n",
|
|
|
|
|
"2 CR-12730 Craig Reiter Consumer Brisbane \n",
|
|
|
|
|
"3 KM-16375 Katherine Murray Home Office Berlin \n",
|
|
|
|
|
"4 RH-9495 Rick Hansen Consumer Dakar \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"51285 KE-16420 Katrina Edelman Corporate Kure \n",
|
|
|
|
|
"51286 ZC-21910 Zuschuss Carroll Consumer Houston \n",
|
|
|
|
|
"51287 LB-16795 Laurel Beltran Home Office Oxnard \n",
|
|
|
|
|
"51288 RB-19795 Ross Baird Home Office Valinhos \n",
|
|
|
|
|
"51289 MC-18100 Mick Crebagga Consumer Tipitapa \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" State ... Product ID Category Sub-Category \\\n",
|
|
|
|
|
"0 New York ... TEC-AC-10003033 Technology Accessories \n",
|
|
|
|
|
"1 New South Wales ... FUR-CH-10003950 Furniture Chairs \n",
|
|
|
|
|
"2 Queensland ... TEC-PH-10004664 Technology Phones \n",
|
|
|
|
|
"3 Berlin ... TEC-PH-10004583 Technology Phones \n",
|
|
|
|
|
"4 Dakar ... TEC-SHA-10000501 Technology Copiers \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"51285 Hiroshima ... OFF-FA-10000746 Office Supplies Fasteners \n",
|
|
|
|
|
"51286 Texas ... OFF-AP-10002906 Office Supplies Appliances \n",
|
|
|
|
|
"51287 California ... OFF-EN-10001219 Office Supplies Envelopes \n",
|
|
|
|
|
"51288 Săo Paulo ... OFF-BI-10000806 Office Supplies Binders \n",
|
|
|
|
|
"51289 Managua ... OFF-PA-10004155 Office Supplies Paper \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Product Name Sales Quantity \\\n",
|
|
|
|
|
"0 Plantronics CS510 - Over-the-Head monaural Wir... 2309.650 7 \n",
|
|
|
|
|
"1 Novimex Executive Leather Armchair, Black 3709.395 9 \n",
|
|
|
|
|
"2 Nokia Smart Phone, with Caller ID 5175.171 9 \n",
|
|
|
|
|
"3 Motorola Smart Phone, Cordless 2892.510 5 \n",
|
|
|
|
|
"4 Sharp Wireless Fax, High-Speed 2832.960 8 \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"51285 Advantus Thumb Tacks, 12 Pack 65.100 5 \n",
|
|
|
|
|
"51286 Hoover Replacement Belt for Commercial Guardsm... 0.444 1 \n",
|
|
|
|
|
"51287 #10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes 22.920 3 \n",
|
|
|
|
|
"51288 Acco Index Tab, Economy 13.440 2 \n",
|
|
|
|
|
"51289 Eaton Computer Printout Paper, 8.5 x 11 61.380 3 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Discount Profit Shipping Cost Order Priority \n",
|
|
|
|
|
"0 0.0 762.1845 933.57 Critical \n",
|
|
|
|
|
"1 0.1 -288.7650 923.63 Critical \n",
|
|
|
|
|
"2 0.1 919.9710 915.49 Medium \n",
|
|
|
|
|
"3 0.1 -96.5400 910.16 Medium \n",
|
|
|
|
|
"4 0.0 311.5200 903.04 Critical \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"51285 0.0 4.5000 0.01 Medium \n",
|
|
|
|
|
"51286 0.8 -1.1100 0.01 Medium \n",
|
|
|
|
|
"51287 0.0 11.2308 0.01 High \n",
|
|
|
|
|
"51288 0.0 2.4000 0.00 Medium \n",
|
|
|
|
|
"51289 0.0 1.8000 0.00 High \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[51290 rows x 24 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 159,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"gssd=pd.read_csv('Global_Superstore2.csv', encoding=\"latin2\")\n",
|
|
|
|
|
"gssd"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 160,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "multiple-council",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import numpy as np"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 161,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "green-trunk",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd_train, gssd_dev, gssd_test = np.split(gssd.sample(frac=1, random_state=42), [int(.6*len(gssd)), int(.8*len(gssd))])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 162,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "operating-catalyst",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"51290"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 162,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd.shape[0] # Liczba danych w całym zbiorze"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 163,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "female-landscape",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"30774"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 163,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd_train.shape[0] # Liczba danych w zbiorze do nauczania"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 164,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "thirty-auckland",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"10258"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 164,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd_dev.shape[0] # Liczba danych w zbiorze do walidacji"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 165,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "mysterious-alignment",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"10258"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 165,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd_test.shape[0] # Liczba danych w zbiorze do testowania"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 166,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "stone-combining",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>Row ID</th>\n",
|
|
|
|
|
" <th>Order ID</th>\n",
|
|
|
|
|
" <th>Order Date</th>\n",
|
|
|
|
|
" <th>Ship Date</th>\n",
|
|
|
|
|
" <th>Ship Mode</th>\n",
|
|
|
|
|
" <th>Customer ID</th>\n",
|
|
|
|
|
" <th>Customer Name</th>\n",
|
|
|
|
|
" <th>Segment</th>\n",
|
|
|
|
|
" <th>City</th>\n",
|
|
|
|
|
" <th>State</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>Product ID</th>\n",
|
|
|
|
|
" <th>Category</th>\n",
|
|
|
|
|
" <th>Sub-Category</th>\n",
|
|
|
|
|
" <th>Product Name</th>\n",
|
|
|
|
|
" <th>Sales</th>\n",
|
|
|
|
|
" <th>Quantity</th>\n",
|
|
|
|
|
" <th>Discount</th>\n",
|
|
|
|
|
" <th>Profit</th>\n",
|
|
|
|
|
" <th>Shipping Cost</th>\n",
|
|
|
|
|
" <th>Order Priority</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>count</th>\n",
|
|
|
|
|
" <td>51290.00000</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" <td>51290.000000</td>\n",
|
|
|
|
|
" <td>51290.000000</td>\n",
|
|
|
|
|
" <td>51290.000000</td>\n",
|
|
|
|
|
" <td>51290.000000</td>\n",
|
|
|
|
|
" <td>51290.000000</td>\n",
|
|
|
|
|
" <td>51290</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>unique</th>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>25035</td>\n",
|
|
|
|
|
" <td>1430</td>\n",
|
|
|
|
|
" <td>1464</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>1590</td>\n",
|
|
|
|
|
" <td>795</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3636</td>\n",
|
|
|
|
|
" <td>1094</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>10292</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>17</td>\n",
|
|
|
|
|
" <td>3788</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>top</th>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>CA-2014-100111</td>\n",
|
|
|
|
|
" <td>18-06-2014</td>\n",
|
|
|
|
|
" <td>22-11-2014</td>\n",
|
|
|
|
|
" <td>Standard Class</td>\n",
|
|
|
|
|
" <td>PO-18850</td>\n",
|
|
|
|
|
" <td>Muhammed Yedwab</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>New York City</td>\n",
|
|
|
|
|
" <td>California</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-AR-10003651</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Binders</td>\n",
|
|
|
|
|
" <td>Staples</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>freq</th>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>14</td>\n",
|
|
|
|
|
" <td>135</td>\n",
|
|
|
|
|
" <td>130</td>\n",
|
|
|
|
|
" <td>30775</td>\n",
|
|
|
|
|
" <td>97</td>\n",
|
|
|
|
|
" <td>108</td>\n",
|
|
|
|
|
" <td>26518</td>\n",
|
|
|
|
|
" <td>915</td>\n",
|
|
|
|
|
" <td>2001</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>35</td>\n",
|
|
|
|
|
" <td>31273</td>\n",
|
|
|
|
|
" <td>6152</td>\n",
|
|
|
|
|
" <td>227</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>29433</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
|
" <td>25645.50000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>246.490581</td>\n",
|
|
|
|
|
" <td>3.476545</td>\n",
|
|
|
|
|
" <td>0.142908</td>\n",
|
|
|
|
|
" <td>28.610982</td>\n",
|
|
|
|
|
" <td>26.375915</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>std</th>\n",
|
|
|
|
|
" <td>14806.29199</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>487.565361</td>\n",
|
|
|
|
|
" <td>2.278766</td>\n",
|
|
|
|
|
" <td>0.212280</td>\n",
|
|
|
|
|
" <td>174.340972</td>\n",
|
|
|
|
|
" <td>57.296804</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>min</th>\n",
|
|
|
|
|
" <td>1.00000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.444000</td>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>-6599.978000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
|
" <td>12823.25000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>30.758625</td>\n",
|
|
|
|
|
" <td>2.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>2.610000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
|
" <td>25645.50000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>85.053000</td>\n",
|
|
|
|
|
" <td>3.000000</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>9.240000</td>\n",
|
|
|
|
|
" <td>7.790000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
|
" <td>38467.75000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>251.053200</td>\n",
|
|
|
|
|
" <td>5.000000</td>\n",
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
" <td>36.810000</td>\n",
|
|
|
|
|
" <td>24.450000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>max</th>\n",
|
|
|
|
|
" <td>51290.00000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>22638.480000</td>\n",
|
|
|
|
|
" <td>14.000000</td>\n",
|
|
|
|
|
" <td>0.850000</td>\n",
|
|
|
|
|
" <td>8399.976000</td>\n",
|
|
|
|
|
" <td>933.570000</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>11 rows × 24 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
|
|
|
|
|
"count 51290.00000 51290 51290 51290 51290 \n",
|
|
|
|
|
"unique NaN 25035 1430 1464 4 \n",
|
|
|
|
|
"top NaN CA-2014-100111 18-06-2014 22-11-2014 Standard Class \n",
|
|
|
|
|
"freq NaN 14 135 130 30775 \n",
|
|
|
|
|
"mean 25645.50000 NaN NaN NaN NaN \n",
|
|
|
|
|
"std 14806.29199 NaN NaN NaN NaN \n",
|
|
|
|
|
"min 1.00000 NaN NaN NaN NaN \n",
|
|
|
|
|
"25% 12823.25000 NaN NaN NaN NaN \n",
|
|
|
|
|
"50% 25645.50000 NaN NaN NaN NaN \n",
|
|
|
|
|
"75% 38467.75000 NaN NaN NaN NaN \n",
|
|
|
|
|
"max 51290.00000 NaN NaN NaN NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Customer ID Customer Name Segment City State ... \\\n",
|
|
|
|
|
"count 51290 51290 51290 51290 51290 ... \n",
|
|
|
|
|
"unique 1590 795 3 3636 1094 ... \n",
|
|
|
|
|
"top PO-18850 Muhammed Yedwab Consumer New York City California ... \n",
|
|
|
|
|
"freq 97 108 26518 915 2001 ... \n",
|
|
|
|
|
"mean NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"std NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"min NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"25% NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"50% NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"75% NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"max NaN NaN NaN NaN NaN ... \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Product ID Category Sub-Category Product Name \\\n",
|
|
|
|
|
"count 51290 51290 51290 51290 \n",
|
|
|
|
|
"unique 10292 3 17 3788 \n",
|
|
|
|
|
"top OFF-AR-10003651 Office Supplies Binders Staples \n",
|
|
|
|
|
"freq 35 31273 6152 227 \n",
|
|
|
|
|
"mean NaN NaN NaN NaN \n",
|
|
|
|
|
"std NaN NaN NaN NaN \n",
|
|
|
|
|
"min NaN NaN NaN NaN \n",
|
|
|
|
|
"25% NaN NaN NaN NaN \n",
|
|
|
|
|
"50% NaN NaN NaN NaN \n",
|
|
|
|
|
"75% NaN NaN NaN NaN \n",
|
|
|
|
|
"max NaN NaN NaN NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Sales Quantity Discount Profit Shipping Cost \\\n",
|
|
|
|
|
"count 51290.000000 51290.000000 51290.000000 51290.000000 51290.000000 \n",
|
|
|
|
|
"unique NaN NaN NaN NaN NaN \n",
|
|
|
|
|
"top NaN NaN NaN NaN NaN \n",
|
|
|
|
|
"freq NaN NaN NaN NaN NaN \n",
|
|
|
|
|
"mean 246.490581 3.476545 0.142908 28.610982 26.375915 \n",
|
|
|
|
|
"std 487.565361 2.278766 0.212280 174.340972 57.296804 \n",
|
|
|
|
|
"min 0.444000 1.000000 0.000000 -6599.978000 0.000000 \n",
|
|
|
|
|
"25% 30.758625 2.000000 0.000000 0.000000 2.610000 \n",
|
|
|
|
|
"50% 85.053000 3.000000 0.000000 9.240000 7.790000 \n",
|
|
|
|
|
"75% 251.053200 5.000000 0.200000 36.810000 24.450000 \n",
|
|
|
|
|
"max 22638.480000 14.000000 0.850000 8399.976000 933.570000 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Order Priority \n",
|
|
|
|
|
"count 51290 \n",
|
|
|
|
|
"unique 4 \n",
|
|
|
|
|
"top Medium \n",
|
|
|
|
|
"freq 29433 \n",
|
|
|
|
|
"mean NaN \n",
|
|
|
|
|
"std NaN \n",
|
|
|
|
|
"min NaN \n",
|
|
|
|
|
"25% NaN \n",
|
|
|
|
|
"50% NaN \n",
|
|
|
|
|
"75% NaN \n",
|
|
|
|
|
"max NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[11 rows x 24 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 166,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd.describe(include='all')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 167,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "demanding-milwaukee",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"Binders 6152\n",
|
|
|
|
|
"Storage 5059\n",
|
|
|
|
|
"Art 4883\n",
|
|
|
|
|
"Paper 3538\n",
|
|
|
|
|
"Chairs 3434\n",
|
|
|
|
|
"Phones 3357\n",
|
|
|
|
|
"Furnishings 3170\n",
|
|
|
|
|
"Accessories 3075\n",
|
|
|
|
|
"Labels 2606\n",
|
|
|
|
|
"Envelopes 2435\n",
|
|
|
|
|
"Supplies 2425\n",
|
|
|
|
|
"Fasteners 2420\n",
|
|
|
|
|
"Bookcases 2411\n",
|
|
|
|
|
"Copiers 2223\n",
|
|
|
|
|
"Appliances 1755\n",
|
|
|
|
|
"Machines 1486\n",
|
|
|
|
|
"Tables 861\n",
|
|
|
|
|
"Name: Sub-Category, dtype: int64"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 167,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd[\"Sub-Category\"].value_counts()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 168,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "above-script",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<AxesSubplot:>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 168,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEpCAYAAAB/ZvKwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAsUUlEQVR4nO3deZhkZX328e/NjKiACMiIhMUhOkrIa1gyLAoaBUEEDSYRxLhMCAF9MzEYExWMBgVN3BIjRjEo+IJBATcgisA4Im5hGXZZDCNKYAQZQQGXgMD9/vE8Rdc03dNdp05vnPtzXX111amqXz/VXf07z3lW2SYiIrphnZkuQERETJ8k/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA6ZP9MFWJtNN93UCxcunOliRETMKZdddtlPbS8Y67FZnfQXLlzIihUrZroYERFziqSbx3sszTsRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SGzenLWWBYe+ZVJPe9H791/iksSETH3pKYfEdEhk0r6kjaS9HlJN0i6XtKzJW0iaZmkG+v3jetzJek4SSslXS1pp744S+rzb5S0ZKreVEREjG2yNf0PA+fa3hbYHrgeOBJYbnsRsLzeB3gxsKh+HQ4cDyBpE+BoYFdgF+Do3okiIiKmx4RJX9ITgecBJwLYvt/2z4EDgJPr004GXlZvHwCc4uIiYCNJmwMvApbZvsv2z4BlwL4tvpeIiJjAZGr62wCrgU9JukLSJyWtD2xm+7b6nNuBzertLYBb+l5/az023vE1SDpc0gpJK1avXj3Yu4mIiLWaTNKfD+wEHG97R+CXjDTlAGDbgNsokO0TbC+2vXjBgjGXg46IiIYmk/RvBW61fXG9/3nKSeAntdmG+v2O+vgqYKu+129Zj413PCIipsmESd/27cAtkp5ZD+0FXAecDfRG4CwBzqq3zwZeW0fx7AbcXZuBzgP2kbRx7cDdpx6LiIhpMtnJWW8ATpW0LnATcAjlhHGGpEOBm4GD6nPPAfYDVgK/qs/F9l2SjgUurc87xvZdrbyLiIiYlEklfdtXAovHeGivMZ5rYOk4cU4CThqgfBER0aLMyI2I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQJP2IiA6ZVNKX9CNJ10i6UtKKemwTScsk3Vi/b1yPS9JxklZKulrSTn1xltTn3yhpydS8pYiIGM8gNf0X2N7B9uJ6/0hgue1FwPJ6H+DFwKL6dThwPJSTBHA0sCuwC3B070QRERHTY/4Qrz0AeH69fTLwDeCt9fgptg1cJGkjSZvX5y6zfReApGXAvsBnhyjD0BYe+ZVJPe9H791/iksSETH1JlvTN3C+pMskHV6PbWb7tnr7dmCzensL4Ja+195aj413PCIipslka/p72F4l6cnAMkk39D9o25LcRoHqSeVwgK233rqNkBERUU2qpm97Vf1+B/AlSpv8T2qzDfX7HfXpq4Ct+l6+ZT023vHRP+sE24ttL16wYMFg7yYiItZqwqQvaX1JT+jdBvYBvgecDfRG4CwBzqq3zwZeW0fx7AbcXZuBzgP2kbRx7cDdpx6LiIhpMpnmnc2AL0nqPf8zts+VdClwhqRDgZuBg+rzzwH2A1YCvwIOAbB9l6RjgUvr847pdepGRMT0mDDp274J2H6M43cCe41x3MDScWKdBJw0eDHnjsmMBspIoIiYKZmRGxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIZNO+pLmSbpC0pfr/W0kXSxppaTTJa1bjz+23l9ZH1/YF+Ooevz7kl7U+ruJiIi1GqSmfwRwfd/99wEfsv104GfAofX4ocDP6vEP1echaTvgYOB3gX2Bj0maN1zxIyJiEJNK+pK2BPYHPlnvC9gT+Hx9ysnAy+rtA+p96uN71ecfAJxm+z7bPwRWAru08B4iImKSJlvT/1fgLcBD9f6TgJ/bfqDevxXYot7eArgFoD5+d33+w8fHeM3DJB0uaYWkFatXr578O4mIiAlNmPQlvQS4w/Zl01AebJ9ge7HtxQsWLJiOHxkR0RnzJ/Gc3YE/lLQf8DhgQ+DDwEaS5tfa/JbAqvr8VcBWwK2S5gNPBO7sO97T/5qIiJgGE9b0bR9le0vbCykdsV+3/SrgAuDl9WlLgLPq7bPrferjX7ftevzgOrpnG2ARcElr7yQiIiY0mZr+eN4KnCbp3cAVwIn1+InApyWtBO6inCiwfa2kM4DrgAeApbYfHOLnR0TEgAZK+ra/AXyj3r6JMUbf2P5f4MBxXv8e4D2DFjIiItqRGbkRER2SpB8R0SFJ+hERHZKkHxHRIUn6EREdkqQfEdEhSfoRER2SpB8R0SFJ+hERHZKkHxHRIcOsvRNTbOGRX5nU83703v2nuCQR8WiRmn5ERIck6UdEdEiadzokzUURkZp+RESHJOlHRHRImneisTQXRcw9qelHRHRIkn5ERIck6UdEdEiSfkREhyTpR0R0SEbvxKyQkUAR0yM1/YiIDknSj4jokAmTvqTHSbpE0lWSrpX0rnp8G0kXS1op6XRJ69bjj633V9bHF/bFOqoe/76kF03Zu4qIiDFNpk3/PmBP27+Q9Bjg25K+CrwJ+JDt0yR9HDgUOL5+/5ntp0s6GHgf8ApJ2wEHA78L/BbwNUnPsP3gFLyv6Lj0EUSMbcKavotf1LuPqV8G9gQ+X4+fDLys3j6g3qc+vpck1eOn2b7P9g+BlcAubbyJiIiYnEm16UuaJ+lK4A5gGfAD4Oe2H6hPuRXYot7eArgFoD5+N/Ck/uNjvKb/Zx0uaYWkFatXrx74DUVExPgmlfRtP2h7B2BLSu1826kqkO0TbC+2vXjBggVT9WMiIjppoNE7tn8OXAA8G9hIUq9PYEtgVb29CtgKoD7+RODO/uNjvCYiIqbBZEbvLJC0Ub39eGBv4HpK8n95fdoS4Kx6++x6n/r41227Hj+4ju7ZBlgEXNLS+4iIiEmYzOidzYGTJc2jnCTOsP1lSdcBp0l6N3AFcGJ9/onApyWtBO6ijNjB9rWSzgCuAx4AlmbkTkTE9Jow6du+GthxjOM3McboG9v/Cxw4Tqz3AO8ZvJgREdGGzMiNiOiQJP2IiA5J0o+I6JAk/YiIDknSj4jokCT9iIgOSdKPiOiQbJcYMQlZqjkeLVLTj4jokCT9iIgOSdKPiOiQJP2IiA5J0o+I6JCM3omYZm2PBMrIohhEkn5ErGEmTko5IU2fNO9ERHRIkn5ERIck6UdEdEiSfkREhyTpR0R0SEbvRMSckeGpw0tNPyKiQ5L0IyI6JEk/IqJDJkz6kraSdIGk6yRdK+mIenwTScsk3Vi/b1yPS9JxklZKulrSTn2xltTn3yhpydS9rYiIGMtkavoPAH9reztgN2CppO2AI4HlthcBy+t9gBcDi+rX4cDxUE4SwNHArsAuwNG9E0VEREyPCZO+7dtsX15v3wtcD2wBHACcXJ92MvCyevsA4BQXFwEbSdoceBGwzPZdtn8GLAP2bfPNRETE2g3Upi9pIbAjcDGwme3b6kO3A5vV21sAt/S97NZ6bLzjERExTSad9CVtAHwBeKPte/ofs23AbRRI0uGSVkhasXr16jZCRkRENamkL+kxlIR/qu0v1sM/qc021O931OOrgK36Xr5lPTbe8TXYPsH2YtuLFyxYMMh7iYiICUxm9I6AE4Hrbf9L30NnA70ROEuAs/qOv7aO4tkNuLs2A50H7CNp49qBu089FhER02QyyzDsDrwGuEbSlfXY24D3Amd
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {
|
|
|
|
|
"needs_background": "light"
|
|
|
|
|
},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import matplotlib\n",
|
|
|
|
|
"gssd[\"Sub-Category\"].value_counts().plot(kind=\"bar\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 169,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "abroad-durham",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Postal Code\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"#Wypisanie kolumn z wartościami NaN\n",
|
|
|
|
|
"for col in gssd.columns:\n",
|
|
|
|
|
" if gssd[col].isnull().values.any():\n",
|
|
|
|
|
" print(col) "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 170,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "centered-realtor",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"#Usunięcię kolumny Postal Code, ponieważ nie ma większego znaczenia dla danych, a jest w niej sporo wartości NaN\n",
|
|
|
|
|
"gssd = gssd.dropna(axis='columns')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "relevant-receptor",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"#Normalizacja wartości float\n",
|
|
|
|
|
"from sklearn import preprocessing\n",
|
|
|
|
|
"flcols = gssd.select_dtypes(include=['float64']).columns\n",
|
|
|
|
|
"x = gssd.select_dtypes(include=['float64']).values\n",
|
|
|
|
|
"min_max_scaler = preprocessing.MinMaxScaler()\n",
|
|
|
|
|
"x_scaled = min_max_scaler.fit_transform(x)\n",
|
|
|
|
|
"normcols = pd.DataFrame(x_scaled, columns=flcols)\n",
|
|
|
|
|
"for col in flcols:\n",
|
|
|
|
|
" gssd[col] = normcols[col]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 172,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "informal-unemployment",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>Row ID</th>\n",
|
|
|
|
|
" <th>Order ID</th>\n",
|
|
|
|
|
" <th>Order Date</th>\n",
|
|
|
|
|
" <th>Ship Date</th>\n",
|
|
|
|
|
" <th>Ship Mode</th>\n",
|
|
|
|
|
" <th>Customer ID</th>\n",
|
|
|
|
|
" <th>Customer Name</th>\n",
|
|
|
|
|
" <th>Segment</th>\n",
|
|
|
|
|
" <th>City</th>\n",
|
|
|
|
|
" <th>State</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>Product ID</th>\n",
|
|
|
|
|
" <th>Category</th>\n",
|
|
|
|
|
" <th>Sub-Category</th>\n",
|
|
|
|
|
" <th>Product Name</th>\n",
|
|
|
|
|
" <th>Sales</th>\n",
|
|
|
|
|
" <th>Quantity</th>\n",
|
|
|
|
|
" <th>Discount</th>\n",
|
|
|
|
|
" <th>Profit</th>\n",
|
|
|
|
|
" <th>Shipping Cost</th>\n",
|
|
|
|
|
" <th>Order Priority</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>32298</td>\n",
|
|
|
|
|
" <td>CA-2012-124891</td>\n",
|
|
|
|
|
" <td>31-07-2012</td>\n",
|
|
|
|
|
" <td>31-07-2012</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>RH-19495</td>\n",
|
|
|
|
|
" <td>Rick Hansen</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>New York City</td>\n",
|
|
|
|
|
" <td>New York</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-AC-10003033</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Accessories</td>\n",
|
|
|
|
|
" <td>Plantronics CS510 - Over-the-Head monaural Wir...</td>\n",
|
|
|
|
|
" <td>0.102006</td>\n",
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.490812</td>\n",
|
|
|
|
|
" <td>1.000000</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>26341</td>\n",
|
|
|
|
|
" <td>IN-2013-77878</td>\n",
|
|
|
|
|
" <td>05-02-2013</td>\n",
|
|
|
|
|
" <td>07-02-2013</td>\n",
|
|
|
|
|
" <td>Second Class</td>\n",
|
|
|
|
|
" <td>JR-16210</td>\n",
|
|
|
|
|
" <td>Justin Ritter</td>\n",
|
|
|
|
|
" <td>Corporate</td>\n",
|
|
|
|
|
" <td>Wollongong</td>\n",
|
|
|
|
|
" <td>New South Wales</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>FUR-CH-10003950</td>\n",
|
|
|
|
|
" <td>Furniture</td>\n",
|
|
|
|
|
" <td>Chairs</td>\n",
|
|
|
|
|
" <td>Novimex Executive Leather Armchair, Black</td>\n",
|
|
|
|
|
" <td>0.163837</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>0.117647</td>\n",
|
|
|
|
|
" <td>0.420749</td>\n",
|
|
|
|
|
" <td>0.989353</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>25330</td>\n",
|
|
|
|
|
" <td>IN-2013-71249</td>\n",
|
|
|
|
|
" <td>17-10-2013</td>\n",
|
|
|
|
|
" <td>18-10-2013</td>\n",
|
|
|
|
|
" <td>First Class</td>\n",
|
|
|
|
|
" <td>CR-12730</td>\n",
|
|
|
|
|
" <td>Craig Reiter</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Brisbane</td>\n",
|
|
|
|
|
" <td>Queensland</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-PH-10004664</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Phones</td>\n",
|
|
|
|
|
" <td>Nokia Smart Phone, with Caller ID</td>\n",
|
|
|
|
|
" <td>0.228586</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>0.117647</td>\n",
|
|
|
|
|
" <td>0.501331</td>\n",
|
|
|
|
|
" <td>0.980633</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>13524</td>\n",
|
|
|
|
|
" <td>ES-2013-1579342</td>\n",
|
|
|
|
|
" <td>28-01-2013</td>\n",
|
|
|
|
|
" <td>30-01-2013</td>\n",
|
|
|
|
|
" <td>First Class</td>\n",
|
|
|
|
|
" <td>KM-16375</td>\n",
|
|
|
|
|
" <td>Katherine Murray</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Berlin</td>\n",
|
|
|
|
|
" <td>Berlin</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-PH-10004583</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Phones</td>\n",
|
|
|
|
|
" <td>Motorola Smart Phone, Cordless</td>\n",
|
|
|
|
|
" <td>0.127753</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>0.117647</td>\n",
|
|
|
|
|
" <td>0.433564</td>\n",
|
|
|
|
|
" <td>0.974924</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>47221</td>\n",
|
|
|
|
|
" <td>SG-2013-4320</td>\n",
|
|
|
|
|
" <td>05-11-2013</td>\n",
|
|
|
|
|
" <td>06-11-2013</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>RH-9495</td>\n",
|
|
|
|
|
" <td>Rick Hansen</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Dakar</td>\n",
|
|
|
|
|
" <td>Dakar</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>TEC-SHA-10000501</td>\n",
|
|
|
|
|
" <td>Technology</td>\n",
|
|
|
|
|
" <td>Copiers</td>\n",
|
|
|
|
|
" <td>Sharp Wireless Fax, High-Speed</td>\n",
|
|
|
|
|
" <td>0.125122</td>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.460768</td>\n",
|
|
|
|
|
" <td>0.967298</td>\n",
|
|
|
|
|
" <td>Critical</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51285</th>\n",
|
|
|
|
|
" <td>29002</td>\n",
|
|
|
|
|
" <td>IN-2014-62366</td>\n",
|
|
|
|
|
" <td>19-06-2014</td>\n",
|
|
|
|
|
" <td>19-06-2014</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>KE-16420</td>\n",
|
|
|
|
|
" <td>Katrina Edelman</td>\n",
|
|
|
|
|
" <td>Corporate</td>\n",
|
|
|
|
|
" <td>Kure</td>\n",
|
|
|
|
|
" <td>Hiroshima</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-FA-10000746</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Fasteners</td>\n",
|
|
|
|
|
" <td>Advantus Thumb Tacks, 12 Pack</td>\n",
|
|
|
|
|
" <td>0.002856</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.440300</td>\n",
|
|
|
|
|
" <td>0.000011</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51286</th>\n",
|
|
|
|
|
" <td>35398</td>\n",
|
|
|
|
|
" <td>US-2014-102288</td>\n",
|
|
|
|
|
" <td>20-06-2014</td>\n",
|
|
|
|
|
" <td>24-06-2014</td>\n",
|
|
|
|
|
" <td>Standard Class</td>\n",
|
|
|
|
|
" <td>ZC-21910</td>\n",
|
|
|
|
|
" <td>Zuschuss Carroll</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Houston</td>\n",
|
|
|
|
|
" <td>Texas</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-AP-10002906</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Appliances</td>\n",
|
|
|
|
|
" <td>Hoover Replacement Belt for Commercial Guardsm...</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.941176</td>\n",
|
|
|
|
|
" <td>0.439926</td>\n",
|
|
|
|
|
" <td>0.000011</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51287</th>\n",
|
|
|
|
|
" <td>40470</td>\n",
|
|
|
|
|
" <td>US-2013-155768</td>\n",
|
|
|
|
|
" <td>02-12-2013</td>\n",
|
|
|
|
|
" <td>02-12-2013</td>\n",
|
|
|
|
|
" <td>Same Day</td>\n",
|
|
|
|
|
" <td>LB-16795</td>\n",
|
|
|
|
|
" <td>Laurel Beltran</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Oxnard</td>\n",
|
|
|
|
|
" <td>California</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-EN-10001219</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Envelopes</td>\n",
|
|
|
|
|
" <td>#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes</td>\n",
|
|
|
|
|
" <td>0.000993</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.440749</td>\n",
|
|
|
|
|
" <td>0.000011</td>\n",
|
|
|
|
|
" <td>High</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51288</th>\n",
|
|
|
|
|
" <td>9596</td>\n",
|
|
|
|
|
" <td>MX-2012-140767</td>\n",
|
|
|
|
|
" <td>18-02-2012</td>\n",
|
|
|
|
|
" <td>22-02-2012</td>\n",
|
|
|
|
|
" <td>Standard Class</td>\n",
|
|
|
|
|
" <td>RB-19795</td>\n",
|
|
|
|
|
" <td>Ross Baird</td>\n",
|
|
|
|
|
" <td>Home Office</td>\n",
|
|
|
|
|
" <td>Valinhos</td>\n",
|
|
|
|
|
" <td>Săo Paulo</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-BI-10000806</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Binders</td>\n",
|
|
|
|
|
" <td>Acco Index Tab, Economy</td>\n",
|
|
|
|
|
" <td>0.000574</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.440160</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>Medium</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>51289</th>\n",
|
|
|
|
|
" <td>6147</td>\n",
|
|
|
|
|
" <td>MX-2012-134460</td>\n",
|
|
|
|
|
" <td>22-05-2012</td>\n",
|
|
|
|
|
" <td>26-05-2012</td>\n",
|
|
|
|
|
" <td>Second Class</td>\n",
|
|
|
|
|
" <td>MC-18100</td>\n",
|
|
|
|
|
" <td>Mick Crebagga</td>\n",
|
|
|
|
|
" <td>Consumer</td>\n",
|
|
|
|
|
" <td>Tipitapa</td>\n",
|
|
|
|
|
" <td>Managua</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>OFF-PA-10004155</td>\n",
|
|
|
|
|
" <td>Office Supplies</td>\n",
|
|
|
|
|
" <td>Paper</td>\n",
|
|
|
|
|
" <td>Eaton Computer Printout Paper, 8.5 x 11</td>\n",
|
|
|
|
|
" <td>0.002692</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>0.440120</td>\n",
|
|
|
|
|
" <td>0.000000</td>\n",
|
|
|
|
|
" <td>High</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>51290 rows × 23 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" Row ID Order ID Order Date Ship Date Ship Mode \\\n",
|
|
|
|
|
"0 32298 CA-2012-124891 31-07-2012 31-07-2012 Same Day \n",
|
|
|
|
|
"1 26341 IN-2013-77878 05-02-2013 07-02-2013 Second Class \n",
|
|
|
|
|
"2 25330 IN-2013-71249 17-10-2013 18-10-2013 First Class \n",
|
|
|
|
|
"3 13524 ES-2013-1579342 28-01-2013 30-01-2013 First Class \n",
|
|
|
|
|
"4 47221 SG-2013-4320 05-11-2013 06-11-2013 Same Day \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"51285 29002 IN-2014-62366 19-06-2014 19-06-2014 Same Day \n",
|
|
|
|
|
"51286 35398 US-2014-102288 20-06-2014 24-06-2014 Standard Class \n",
|
|
|
|
|
"51287 40470 US-2013-155768 02-12-2013 02-12-2013 Same Day \n",
|
|
|
|
|
"51288 9596 MX-2012-140767 18-02-2012 22-02-2012 Standard Class \n",
|
|
|
|
|
"51289 6147 MX-2012-134460 22-05-2012 26-05-2012 Second Class \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Customer ID Customer Name Segment City \\\n",
|
|
|
|
|
"0 RH-19495 Rick Hansen Consumer New York City \n",
|
|
|
|
|
"1 JR-16210 Justin Ritter Corporate Wollongong \n",
|
|
|
|
|
"2 CR-12730 Craig Reiter Consumer Brisbane \n",
|
|
|
|
|
"3 KM-16375 Katherine Murray Home Office Berlin \n",
|
|
|
|
|
"4 RH-9495 Rick Hansen Consumer Dakar \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"51285 KE-16420 Katrina Edelman Corporate Kure \n",
|
|
|
|
|
"51286 ZC-21910 Zuschuss Carroll Consumer Houston \n",
|
|
|
|
|
"51287 LB-16795 Laurel Beltran Home Office Oxnard \n",
|
|
|
|
|
"51288 RB-19795 Ross Baird Home Office Valinhos \n",
|
|
|
|
|
"51289 MC-18100 Mick Crebagga Consumer Tipitapa \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" State ... Product ID Category Sub-Category \\\n",
|
|
|
|
|
"0 New York ... TEC-AC-10003033 Technology Accessories \n",
|
|
|
|
|
"1 New South Wales ... FUR-CH-10003950 Furniture Chairs \n",
|
|
|
|
|
"2 Queensland ... TEC-PH-10004664 Technology Phones \n",
|
|
|
|
|
"3 Berlin ... TEC-PH-10004583 Technology Phones \n",
|
|
|
|
|
"4 Dakar ... TEC-SHA-10000501 Technology Copiers \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"51285 Hiroshima ... OFF-FA-10000746 Office Supplies Fasteners \n",
|
|
|
|
|
"51286 Texas ... OFF-AP-10002906 Office Supplies Appliances \n",
|
|
|
|
|
"51287 California ... OFF-EN-10001219 Office Supplies Envelopes \n",
|
|
|
|
|
"51288 Săo Paulo ... OFF-BI-10000806 Office Supplies Binders \n",
|
|
|
|
|
"51289 Managua ... OFF-PA-10004155 Office Supplies Paper \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Product Name Sales Quantity \\\n",
|
|
|
|
|
"0 Plantronics CS510 - Over-the-Head monaural Wir... 0.102006 7 \n",
|
|
|
|
|
"1 Novimex Executive Leather Armchair, Black 0.163837 9 \n",
|
|
|
|
|
"2 Nokia Smart Phone, with Caller ID 0.228586 9 \n",
|
|
|
|
|
"3 Motorola Smart Phone, Cordless 0.127753 5 \n",
|
|
|
|
|
"4 Sharp Wireless Fax, High-Speed 0.125122 8 \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"51285 Advantus Thumb Tacks, 12 Pack 0.002856 5 \n",
|
|
|
|
|
"51286 Hoover Replacement Belt for Commercial Guardsm... 0.000000 1 \n",
|
|
|
|
|
"51287 #10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes 0.000993 3 \n",
|
|
|
|
|
"51288 Acco Index Tab, Economy 0.000574 2 \n",
|
|
|
|
|
"51289 Eaton Computer Printout Paper, 8.5 x 11 0.002692 3 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Discount Profit Shipping Cost Order Priority \n",
|
|
|
|
|
"0 0.000000 0.490812 1.000000 Critical \n",
|
|
|
|
|
"1 0.117647 0.420749 0.989353 Critical \n",
|
|
|
|
|
"2 0.117647 0.501331 0.980633 Medium \n",
|
|
|
|
|
"3 0.117647 0.433564 0.974924 Medium \n",
|
|
|
|
|
"4 0.000000 0.460768 0.967298 Critical \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"51285 0.000000 0.440300 0.000011 Medium \n",
|
|
|
|
|
"51286 0.941176 0.439926 0.000011 Medium \n",
|
|
|
|
|
"51287 0.000000 0.440749 0.000011 High \n",
|
|
|
|
|
"51288 0.000000 0.440160 0.000000 Medium \n",
|
|
|
|
|
"51289 0.000000 0.440120 0.000000 High \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[51290 rows x 23 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 172,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gssd"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"id": "reserved-cookie",
|
2021-03-21 22:16:26 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2021-05-07 20:16:31 +02:00
|
|
|
|
"version": "3.8.5"
|
2021-03-21 22:16:26 +01:00
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|