{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "strange-teens", "metadata": { "tags": [] }, "outputs": [], "source": [ "#!pip install kaggle\n", "#!pip install pandas\n", "#!pip install matplotlib\n", "#!pip install sklearn" ] }, { "cell_type": "code", "execution_count": null, "id": "another-accessory", "metadata": {}, "outputs": [], "source": [ "!kaggle datasets download -d apoorvaappz/global-super-store-dataset" ] }, { "cell_type": "code", "execution_count": 158, "id": "valid-malta", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: global-super-store-dataset.zip\n", " inflating: Global_Superstore2.csv \n", " inflating: Global_Superstore2.xlsx \n" ] } ], "source": [ "!unzip global-super-store-dataset.zip" ] }, { "cell_type": "code", "execution_count": 159, "id": "noble-compilation", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Row ID | \n", "Order ID | \n", "Order Date | \n", "Ship Date | \n", "Ship Mode | \n", "Customer ID | \n", "Customer Name | \n", "Segment | \n", "City | \n", "State | \n", "... | \n", "Product ID | \n", "Category | \n", "Sub-Category | \n", "Product Name | \n", "Sales | \n", "Quantity | \n", "Discount | \n", "Profit | \n", "Shipping Cost | \n", "Order Priority | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "32298 | \n", "CA-2012-124891 | \n", "31-07-2012 | \n", "31-07-2012 | \n", "Same Day | \n", "RH-19495 | \n", "Rick Hansen | \n", "Consumer | \n", "New York City | \n", "New York | \n", "... | \n", "TEC-AC-10003033 | \n", "Technology | \n", "Accessories | \n", "Plantronics CS510 - Over-the-Head monaural Wir... | \n", "2309.650 | \n", "7 | \n", "0.0 | \n", "762.1845 | \n", "933.57 | \n", "Critical | \n", "
1 | \n", "26341 | \n", "IN-2013-77878 | \n", "05-02-2013 | \n", "07-02-2013 | \n", "Second Class | \n", "JR-16210 | \n", "Justin Ritter | \n", "Corporate | \n", "Wollongong | \n", "New South Wales | \n", "... | \n", "FUR-CH-10003950 | \n", "Furniture | \n", "Chairs | \n", "Novimex Executive Leather Armchair, Black | \n", "3709.395 | \n", "9 | \n", "0.1 | \n", "-288.7650 | \n", "923.63 | \n", "Critical | \n", "
2 | \n", "25330 | \n", "IN-2013-71249 | \n", "17-10-2013 | \n", "18-10-2013 | \n", "First Class | \n", "CR-12730 | \n", "Craig Reiter | \n", "Consumer | \n", "Brisbane | \n", "Queensland | \n", "... | \n", "TEC-PH-10004664 | \n", "Technology | \n", "Phones | \n", "Nokia Smart Phone, with Caller ID | \n", "5175.171 | \n", "9 | \n", "0.1 | \n", "919.9710 | \n", "915.49 | \n", "Medium | \n", "
3 | \n", "13524 | \n", "ES-2013-1579342 | \n", "28-01-2013 | \n", "30-01-2013 | \n", "First Class | \n", "KM-16375 | \n", "Katherine Murray | \n", "Home Office | \n", "Berlin | \n", "Berlin | \n", "... | \n", "TEC-PH-10004583 | \n", "Technology | \n", "Phones | \n", "Motorola Smart Phone, Cordless | \n", "2892.510 | \n", "5 | \n", "0.1 | \n", "-96.5400 | \n", "910.16 | \n", "Medium | \n", "
4 | \n", "47221 | \n", "SG-2013-4320 | \n", "05-11-2013 | \n", "06-11-2013 | \n", "Same Day | \n", "RH-9495 | \n", "Rick Hansen | \n", "Consumer | \n", "Dakar | \n", "Dakar | \n", "... | \n", "TEC-SHA-10000501 | \n", "Technology | \n", "Copiers | \n", "Sharp Wireless Fax, High-Speed | \n", "2832.960 | \n", "8 | \n", "0.0 | \n", "311.5200 | \n", "903.04 | \n", "Critical | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
51285 | \n", "29002 | \n", "IN-2014-62366 | \n", "19-06-2014 | \n", "19-06-2014 | \n", "Same Day | \n", "KE-16420 | \n", "Katrina Edelman | \n", "Corporate | \n", "Kure | \n", "Hiroshima | \n", "... | \n", "OFF-FA-10000746 | \n", "Office Supplies | \n", "Fasteners | \n", "Advantus Thumb Tacks, 12 Pack | \n", "65.100 | \n", "5 | \n", "0.0 | \n", "4.5000 | \n", "0.01 | \n", "Medium | \n", "
51286 | \n", "35398 | \n", "US-2014-102288 | \n", "20-06-2014 | \n", "24-06-2014 | \n", "Standard Class | \n", "ZC-21910 | \n", "Zuschuss Carroll | \n", "Consumer | \n", "Houston | \n", "Texas | \n", "... | \n", "OFF-AP-10002906 | \n", "Office Supplies | \n", "Appliances | \n", "Hoover Replacement Belt for Commercial Guardsm... | \n", "0.444 | \n", "1 | \n", "0.8 | \n", "-1.1100 | \n", "0.01 | \n", "Medium | \n", "
51287 | \n", "40470 | \n", "US-2013-155768 | \n", "02-12-2013 | \n", "02-12-2013 | \n", "Same Day | \n", "LB-16795 | \n", "Laurel Beltran | \n", "Home Office | \n", "Oxnard | \n", "California | \n", "... | \n", "OFF-EN-10001219 | \n", "Office Supplies | \n", "Envelopes | \n", "#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes | \n", "22.920 | \n", "3 | \n", "0.0 | \n", "11.2308 | \n", "0.01 | \n", "High | \n", "
51288 | \n", "9596 | \n", "MX-2012-140767 | \n", "18-02-2012 | \n", "22-02-2012 | \n", "Standard Class | \n", "RB-19795 | \n", "Ross Baird | \n", "Home Office | \n", "Valinhos | \n", "Săo Paulo | \n", "... | \n", "OFF-BI-10000806 | \n", "Office Supplies | \n", "Binders | \n", "Acco Index Tab, Economy | \n", "13.440 | \n", "2 | \n", "0.0 | \n", "2.4000 | \n", "0.00 | \n", "Medium | \n", "
51289 | \n", "6147 | \n", "MX-2012-134460 | \n", "22-05-2012 | \n", "26-05-2012 | \n", "Second Class | \n", "MC-18100 | \n", "Mick Crebagga | \n", "Consumer | \n", "Tipitapa | \n", "Managua | \n", "... | \n", "OFF-PA-10004155 | \n", "Office Supplies | \n", "Paper | \n", "Eaton Computer Printout Paper, 8.5 x 11 | \n", "61.380 | \n", "3 | \n", "0.0 | \n", "1.8000 | \n", "0.00 | \n", "High | \n", "
51290 rows × 24 columns
\n", "\n", " | Row ID | \n", "Order ID | \n", "Order Date | \n", "Ship Date | \n", "Ship Mode | \n", "Customer ID | \n", "Customer Name | \n", "Segment | \n", "City | \n", "State | \n", "... | \n", "Product ID | \n", "Category | \n", "Sub-Category | \n", "Product Name | \n", "Sales | \n", "Quantity | \n", "Discount | \n", "Profit | \n", "Shipping Cost | \n", "Order Priority | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "51290.00000 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "... | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290 | \n", "51290.000000 | \n", "51290.000000 | \n", "51290.000000 | \n", "51290.000000 | \n", "51290.000000 | \n", "51290 | \n", "
unique | \n", "NaN | \n", "25035 | \n", "1430 | \n", "1464 | \n", "4 | \n", "1590 | \n", "795 | \n", "3 | \n", "3636 | \n", "1094 | \n", "... | \n", "10292 | \n", "3 | \n", "17 | \n", "3788 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4 | \n", "
top | \n", "NaN | \n", "CA-2014-100111 | \n", "18-06-2014 | \n", "22-11-2014 | \n", "Standard Class | \n", "PO-18850 | \n", "Muhammed Yedwab | \n", "Consumer | \n", "New York City | \n", "California | \n", "... | \n", "OFF-AR-10003651 | \n", "Office Supplies | \n", "Binders | \n", "Staples | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Medium | \n", "
freq | \n", "NaN | \n", "14 | \n", "135 | \n", "130 | \n", "30775 | \n", "97 | \n", "108 | \n", "26518 | \n", "915 | \n", "2001 | \n", "... | \n", "35 | \n", "31273 | \n", "6152 | \n", "227 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "29433 | \n", "
mean | \n", "25645.50000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "246.490581 | \n", "3.476545 | \n", "0.142908 | \n", "28.610982 | \n", "26.375915 | \n", "NaN | \n", "
std | \n", "14806.29199 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "487.565361 | \n", "2.278766 | \n", "0.212280 | \n", "174.340972 | \n", "57.296804 | \n", "NaN | \n", "
min | \n", "1.00000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.444000 | \n", "1.000000 | \n", "0.000000 | \n", "-6599.978000 | \n", "0.000000 | \n", "NaN | \n", "
25% | \n", "12823.25000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "30.758625 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.610000 | \n", "NaN | \n", "
50% | \n", "25645.50000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "85.053000 | \n", "3.000000 | \n", "0.000000 | \n", "9.240000 | \n", "7.790000 | \n", "NaN | \n", "
75% | \n", "38467.75000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "251.053200 | \n", "5.000000 | \n", "0.200000 | \n", "36.810000 | \n", "24.450000 | \n", "NaN | \n", "
max | \n", "51290.00000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "22638.480000 | \n", "14.000000 | \n", "0.850000 | \n", "8399.976000 | \n", "933.570000 | \n", "NaN | \n", "
11 rows × 24 columns
\n", "\n", " | Row ID | \n", "Order ID | \n", "Order Date | \n", "Ship Date | \n", "Ship Mode | \n", "Customer ID | \n", "Customer Name | \n", "Segment | \n", "City | \n", "State | \n", "... | \n", "Product ID | \n", "Category | \n", "Sub-Category | \n", "Product Name | \n", "Sales | \n", "Quantity | \n", "Discount | \n", "Profit | \n", "Shipping Cost | \n", "Order Priority | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "32298 | \n", "CA-2012-124891 | \n", "31-07-2012 | \n", "31-07-2012 | \n", "Same Day | \n", "RH-19495 | \n", "Rick Hansen | \n", "Consumer | \n", "New York City | \n", "New York | \n", "... | \n", "TEC-AC-10003033 | \n", "Technology | \n", "Accessories | \n", "Plantronics CS510 - Over-the-Head monaural Wir... | \n", "0.102006 | \n", "7 | \n", "0.000000 | \n", "0.490812 | \n", "1.000000 | \n", "Critical | \n", "
1 | \n", "26341 | \n", "IN-2013-77878 | \n", "05-02-2013 | \n", "07-02-2013 | \n", "Second Class | \n", "JR-16210 | \n", "Justin Ritter | \n", "Corporate | \n", "Wollongong | \n", "New South Wales | \n", "... | \n", "FUR-CH-10003950 | \n", "Furniture | \n", "Chairs | \n", "Novimex Executive Leather Armchair, Black | \n", "0.163837 | \n", "9 | \n", "0.117647 | \n", "0.420749 | \n", "0.989353 | \n", "Critical | \n", "
2 | \n", "25330 | \n", "IN-2013-71249 | \n", "17-10-2013 | \n", "18-10-2013 | \n", "First Class | \n", "CR-12730 | \n", "Craig Reiter | \n", "Consumer | \n", "Brisbane | \n", "Queensland | \n", "... | \n", "TEC-PH-10004664 | \n", "Technology | \n", "Phones | \n", "Nokia Smart Phone, with Caller ID | \n", "0.228586 | \n", "9 | \n", "0.117647 | \n", "0.501331 | \n", "0.980633 | \n", "Medium | \n", "
3 | \n", "13524 | \n", "ES-2013-1579342 | \n", "28-01-2013 | \n", "30-01-2013 | \n", "First Class | \n", "KM-16375 | \n", "Katherine Murray | \n", "Home Office | \n", "Berlin | \n", "Berlin | \n", "... | \n", "TEC-PH-10004583 | \n", "Technology | \n", "Phones | \n", "Motorola Smart Phone, Cordless | \n", "0.127753 | \n", "5 | \n", "0.117647 | \n", "0.433564 | \n", "0.974924 | \n", "Medium | \n", "
4 | \n", "47221 | \n", "SG-2013-4320 | \n", "05-11-2013 | \n", "06-11-2013 | \n", "Same Day | \n", "RH-9495 | \n", "Rick Hansen | \n", "Consumer | \n", "Dakar | \n", "Dakar | \n", "... | \n", "TEC-SHA-10000501 | \n", "Technology | \n", "Copiers | \n", "Sharp Wireless Fax, High-Speed | \n", "0.125122 | \n", "8 | \n", "0.000000 | \n", "0.460768 | \n", "0.967298 | \n", "Critical | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
51285 | \n", "29002 | \n", "IN-2014-62366 | \n", "19-06-2014 | \n", "19-06-2014 | \n", "Same Day | \n", "KE-16420 | \n", "Katrina Edelman | \n", "Corporate | \n", "Kure | \n", "Hiroshima | \n", "... | \n", "OFF-FA-10000746 | \n", "Office Supplies | \n", "Fasteners | \n", "Advantus Thumb Tacks, 12 Pack | \n", "0.002856 | \n", "5 | \n", "0.000000 | \n", "0.440300 | \n", "0.000011 | \n", "Medium | \n", "
51286 | \n", "35398 | \n", "US-2014-102288 | \n", "20-06-2014 | \n", "24-06-2014 | \n", "Standard Class | \n", "ZC-21910 | \n", "Zuschuss Carroll | \n", "Consumer | \n", "Houston | \n", "Texas | \n", "... | \n", "OFF-AP-10002906 | \n", "Office Supplies | \n", "Appliances | \n", "Hoover Replacement Belt for Commercial Guardsm... | \n", "0.000000 | \n", "1 | \n", "0.941176 | \n", "0.439926 | \n", "0.000011 | \n", "Medium | \n", "
51287 | \n", "40470 | \n", "US-2013-155768 | \n", "02-12-2013 | \n", "02-12-2013 | \n", "Same Day | \n", "LB-16795 | \n", "Laurel Beltran | \n", "Home Office | \n", "Oxnard | \n", "California | \n", "... | \n", "OFF-EN-10001219 | \n", "Office Supplies | \n", "Envelopes | \n", "#10- 4 1/8\" x 9 1/2\" Security-Tint Envelopes | \n", "0.000993 | \n", "3 | \n", "0.000000 | \n", "0.440749 | \n", "0.000011 | \n", "High | \n", "
51288 | \n", "9596 | \n", "MX-2012-140767 | \n", "18-02-2012 | \n", "22-02-2012 | \n", "Standard Class | \n", "RB-19795 | \n", "Ross Baird | \n", "Home Office | \n", "Valinhos | \n", "Săo Paulo | \n", "... | \n", "OFF-BI-10000806 | \n", "Office Supplies | \n", "Binders | \n", "Acco Index Tab, Economy | \n", "0.000574 | \n", "2 | \n", "0.000000 | \n", "0.440160 | \n", "0.000000 | \n", "Medium | \n", "
51289 | \n", "6147 | \n", "MX-2012-134460 | \n", "22-05-2012 | \n", "26-05-2012 | \n", "Second Class | \n", "MC-18100 | \n", "Mick Crebagga | \n", "Consumer | \n", "Tipitapa | \n", "Managua | \n", "... | \n", "OFF-PA-10004155 | \n", "Office Supplies | \n", "Paper | \n", "Eaton Computer Printout Paper, 8.5 x 11 | \n", "0.002692 | \n", "3 | \n", "0.000000 | \n", "0.440120 | \n", "0.000000 | \n", "High | \n", "
51290 rows × 23 columns
\n", "