diff --git a/download_data.ipynb b/download_data.ipynb
index a16dd92..621fb86 100644
--- a/download_data.ipynb
+++ b/download_data.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "5e2107a5",
"metadata": {},
"outputs": [],
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"id": "bcc889e5",
"metadata": {},
"outputs": [
@@ -21,21 +21,21 @@
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in /home/mikolaj/.local/lib/python3.8/site-packages (1.5.12)\n",
- "Requirement already satisfied: tqdm in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.22)\n",
"Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.11.0)\n",
"Requirement already satisfied: requests in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (2.25.1)\n",
- "Requirement already satisfied: python-dateutil in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2018.1.18)\n",
"Requirement already satisfied: python-slugify in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)\n",
+ "Requirement already satisfied: python-dateutil in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (2.8.1)\n",
+ "Requirement already satisfied: tqdm in /home/mikolaj/.local/lib/python3.8/site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /home/mikolaj/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /home/mikolaj/.local/lib/python3.8/site-packages (from requests->kaggle) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/lib/python3/dist-packages (from requests->kaggle) (3.0.4)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /home/mikolaj/.local/lib/python3.8/site-packages (from requests->kaggle) (2.10)\n",
"\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.0.4 is available.\n",
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
"Requirement already satisfied: pandas in /home/mikolaj/.local/lib/python3.8/site-packages (1.1.5)\n",
- "Requirement already satisfied: pytz>=2017.2 in /usr/lib/python3/dist-packages (from pandas) (2018.3)\n",
"Requirement already satisfied: numpy>=1.15.4 in /home/mikolaj/.local/lib/python3.8/site-packages (from pandas) (1.19.5)\n",
+ "Requirement already satisfied: pytz>=2017.2 in /usr/lib/python3/dist-packages (from pandas) (2018.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /home/mikolaj/.local/lib/python3.8/site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas) (1.11.0)\n",
"\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.0.4 is available.\n",
@@ -45,12 +45,13 @@
],
"source": [
"!pip install --user kaggle #API Kaggle, do pobrania zbioru\n",
- "!pip install --user pandas"
+ "!pip install --user pandas\n",
+ "!pip install --user numpy"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 3,
"id": "02a4034f",
"metadata": {},
"outputs": [
@@ -59,9 +60,7 @@
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/mikolaj/.kaggle/kaggle.json'\n",
- "Downloading real-or-fake-fake-jobposting-prediction.zip to /home/mikolaj/ai_tech/inzynieria\n",
- " 99%|█████████████████████████████████████▊| 16.0M/16.1M [00:01<00:00, 10.2MB/s]\n",
- "100%|██████████████████████████████████████| 16.1M/16.1M [00:01<00:00, 9.54MB/s]\n"
+ "real-or-fake-fake-jobposting-prediction.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
]
}
],
@@ -73,7 +72,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 4,
"id": "5035aef0",
"metadata": {},
"outputs": [
@@ -92,7 +91,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 5,
"id": "14344d2f",
"metadata": {},
"outputs": [
@@ -100,22 +99,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Collecting seaborn\n",
- " Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)\n",
- " |████████████████████████████████| 292 kB 1.8 MB/s \n",
- "\u001b[?25hRequirement already satisfied: numpy>=1.15 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (1.19.5)\n",
+ "Requirement already satisfied: seaborn in /home/mikolaj/.local/lib/python3.8/site-packages (0.11.2)\n",
"Requirement already satisfied: scipy>=1.0 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (1.7.2)\n",
- "Requirement already satisfied: pandas>=0.23 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (1.1.5)\n",
"Requirement already satisfied: matplotlib>=2.2 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (3.4.2)\n",
- "Requirement already satisfied: pyparsing>=2.2.1 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
- "Requirement already satisfied: pillow>=6.2.0 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (8.2.0)\n",
+ "Requirement already satisfied: numpy>=1.15 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (1.19.5)\n",
+ "Requirement already satisfied: pandas>=0.23 in /home/mikolaj/.local/lib/python3.8/site-packages (from seaborn) (1.1.5)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n",
- "Requirement already satisfied: kiwisolver>=1.0.1 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (8.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.1)\n",
+ "Requirement already satisfied: pyparsing>=2.2.1 in /home/mikolaj/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/lib/python3/dist-packages (from pandas>=0.23->seaborn) (2018.3)\n",
"Requirement already satisfied: six in /usr/lib/python3/dist-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.11.0)\n",
- "Installing collected packages: seaborn\n",
- "Successfully installed seaborn-0.11.2\n",
"\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.0.4 is available.\n",
"You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
]
@@ -127,7 +122,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 22,
"id": "0f5ebfab",
"metadata": {},
"outputs": [
@@ -530,7 +525,7 @@
"[17880 rows x 18 columns]"
]
},
- "execution_count": 43,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -543,7 +538,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 23,
"id": "edbf49da",
"metadata": {},
"outputs": [
@@ -562,60 +557,443 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 29,
"id": "bc594582",
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " job_id | \n",
+ " title | \n",
+ " location | \n",
+ " department | \n",
+ " salary_range | \n",
+ " company_profile | \n",
+ " description | \n",
+ " requirements | \n",
+ " benefits | \n",
+ " telecommuting | \n",
+ " has_company_logo | \n",
+ " has_questions | \n",
+ " employment_type | \n",
+ " required_experience | \n",
+ " required_education | \n",
+ " industry | \n",
+ " function | \n",
+ " fraudulent | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Marketing Intern | \n",
+ " US, NY, New York | \n",
+ " Marketing | \n",
+ " | \n",
+ " We're Food52, and we've created a groundbreaki... | \n",
+ " Food52, a fast-growing, James Beard Award-winn... | \n",
+ " Experience with content management systems a m... | \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Other | \n",
+ " Internship | \n",
+ " | \n",
+ " | \n",
+ " Marketing | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " Customer Service - Cloud Video Production | \n",
+ " NZ, , Auckland | \n",
+ " Success | \n",
+ " | \n",
+ " 90 Seconds, the worlds Cloud Video Production ... | \n",
+ " Organised - Focused - Vibrant - Awesome!Do you... | \n",
+ " What we expect from you:Your key responsibilit... | \n",
+ " What you will get from usThrough being part of... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Full-time | \n",
+ " Not Applicable | \n",
+ " | \n",
+ " Marketing and Advertising | \n",
+ " Customer Service | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Commissioning Machinery Assistant (CMA) | \n",
+ " US, IA, Wever | \n",
+ " | \n",
+ " | \n",
+ " Valor Services provides Workforce Solutions th... | \n",
+ " Our client, located in Houston, is actively se... | \n",
+ " Implement pre-commissioning and commissioning ... | \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " Account Executive - Washington DC | \n",
+ " US, DC, Washington | \n",
+ " Sales | \n",
+ " | \n",
+ " Our passion for improving quality of life thro... | \n",
+ " THE COMPANY: ESRI – Environmental Systems Rese... | \n",
+ " EDUCATION: Bachelor’s or Master’s in GIS, busi... | \n",
+ " Our culture is anything but corporate—we have ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Full-time | \n",
+ " Mid-Senior level | \n",
+ " Bachelor's Degree | \n",
+ " Computer Software | \n",
+ " Sales | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " Bill Review Manager | \n",
+ " US, FL, Fort Worth | \n",
+ " | \n",
+ " | \n",
+ " SpotSource Solutions LLC is a Global Human Cap... | \n",
+ " JOB TITLE: Itemization Review ManagerLOCATION:... | \n",
+ " QUALIFICATIONS:RN license in the State of Texa... | \n",
+ " Full Benefits Offered | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Full-time | \n",
+ " Mid-Senior level | \n",
+ " Bachelor's Degree | \n",
+ " Hospital & Health Care | \n",
+ " Health Care Provider | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 17875 | \n",
+ " 17876 | \n",
+ " Account Director - Distribution | \n",
+ " CA, ON, Toronto | \n",
+ " Sales | \n",
+ " | \n",
+ " Vend is looking for some awesome new talent to... | \n",
+ " Just in case this is the first time you’ve vis... | \n",
+ " To ace this role you:Will eat comprehensive St... | \n",
+ " What can you expect from us?We have an open cu... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Full-time | \n",
+ " Mid-Senior level | \n",
+ " | \n",
+ " Computer Software | \n",
+ " Sales | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17876 | \n",
+ " 17877 | \n",
+ " Payroll Accountant | \n",
+ " US, PA, Philadelphia | \n",
+ " Accounting | \n",
+ " | \n",
+ " WebLinc is the e-commerce platform and service... | \n",
+ " The Payroll Accountant will focus primarily on... | \n",
+ " - B.A. or B.S. in Accounting- Desire to have f... | \n",
+ " Health & WellnessMedical planPrescription ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Full-time | \n",
+ " Mid-Senior level | \n",
+ " Bachelor's Degree | \n",
+ " Internet | \n",
+ " Accounting/Auditing | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17877 | \n",
+ " 17878 | \n",
+ " Project Cost Control Staff Engineer - Cost Con... | \n",
+ " US, TX, Houston | \n",
+ " | \n",
+ " | \n",
+ " We Provide Full Time Permanent Positions for m... | \n",
+ " Experienced Project Cost Control Staff Enginee... | \n",
+ " At least 12 years professional experience.Abil... | \n",
+ " | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Full-time | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17878 | \n",
+ " 17879 | \n",
+ " Graphic Designer | \n",
+ " NG, LA, Lagos | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " Nemsia Studios is looking for an experienced v... | \n",
+ " 1. Must be fluent in the latest versions of Co... | \n",
+ " Competitive salary (compensation will be based... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Contract | \n",
+ " Not Applicable | \n",
+ " Professional | \n",
+ " Graphic Design | \n",
+ " Design | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17879 | \n",
+ " 17880 | \n",
+ " Web Application Developers | \n",
+ " NZ, N, Wellington | \n",
+ " Engineering | \n",
+ " | \n",
+ " Vend is looking for some awesome new talent to... | \n",
+ " Who are we?Vend is an award winning web based ... | \n",
+ " We want to hear from you if:You have an in-dep... | \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Full-time | \n",
+ " Mid-Senior level | \n",
+ " | \n",
+ " Computer Software | \n",
+ " Engineering | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17880 rows × 18 columns
\n",
+ "
"
+ ],
"text/plain": [
- "English Teacher Abroad 311\n",
- "Customer Service Associate 146\n",
- "Graduates: English Teacher Abroad (Conversational) 144\n",
- "English Teacher Abroad 95\n",
- "Software Engineer 86\n",
- " ... \n",
- "Welcome desk Administrator 1\n",
- "Game Studio Manager 1\n",
- "Head of Content (m/f) 1\n",
- "International Broadcaster, Bambara and Songhai Language 1\n",
- "Community / Marketing Coordinator 1\n",
- "Name: title, Length: 11231, dtype: int64"
+ " job_id title \\\n",
+ "0 1 Marketing Intern \n",
+ "1 2 Customer Service - Cloud Video Production \n",
+ "2 3 Commissioning Machinery Assistant (CMA) \n",
+ "3 4 Account Executive - Washington DC \n",
+ "4 5 Bill Review Manager \n",
+ "... ... ... \n",
+ "17875 17876 Account Director - Distribution \n",
+ "17876 17877 Payroll Accountant \n",
+ "17877 17878 Project Cost Control Staff Engineer - Cost Con... \n",
+ "17878 17879 Graphic Designer \n",
+ "17879 17880 Web Application Developers \n",
+ "\n",
+ " location department salary_range \\\n",
+ "0 US, NY, New York Marketing \n",
+ "1 NZ, , Auckland Success \n",
+ "2 US, IA, Wever \n",
+ "3 US, DC, Washington Sales \n",
+ "4 US, FL, Fort Worth \n",
+ "... ... ... ... \n",
+ "17875 CA, ON, Toronto Sales \n",
+ "17876 US, PA, Philadelphia Accounting \n",
+ "17877 US, TX, Houston \n",
+ "17878 NG, LA, Lagos \n",
+ "17879 NZ, N, Wellington Engineering \n",
+ "\n",
+ " company_profile \\\n",
+ "0 We're Food52, and we've created a groundbreaki... \n",
+ "1 90 Seconds, the worlds Cloud Video Production ... \n",
+ "2 Valor Services provides Workforce Solutions th... \n",
+ "3 Our passion for improving quality of life thro... \n",
+ "4 SpotSource Solutions LLC is a Global Human Cap... \n",
+ "... ... \n",
+ "17875 Vend is looking for some awesome new talent to... \n",
+ "17876 WebLinc is the e-commerce platform and service... \n",
+ "17877 We Provide Full Time Permanent Positions for m... \n",
+ "17878 \n",
+ "17879 Vend is looking for some awesome new talent to... \n",
+ "\n",
+ " description \\\n",
+ "0 Food52, a fast-growing, James Beard Award-winn... \n",
+ "1 Organised - Focused - Vibrant - Awesome!Do you... \n",
+ "2 Our client, located in Houston, is actively se... \n",
+ "3 THE COMPANY: ESRI – Environmental Systems Rese... \n",
+ "4 JOB TITLE: Itemization Review ManagerLOCATION:... \n",
+ "... ... \n",
+ "17875 Just in case this is the first time you’ve vis... \n",
+ "17876 The Payroll Accountant will focus primarily on... \n",
+ "17877 Experienced Project Cost Control Staff Enginee... \n",
+ "17878 Nemsia Studios is looking for an experienced v... \n",
+ "17879 Who are we?Vend is an award winning web based ... \n",
+ "\n",
+ " requirements \\\n",
+ "0 Experience with content management systems a m... \n",
+ "1 What we expect from you:Your key responsibilit... \n",
+ "2 Implement pre-commissioning and commissioning ... \n",
+ "3 EDUCATION: Bachelor’s or Master’s in GIS, busi... \n",
+ "4 QUALIFICATIONS:RN license in the State of Texa... \n",
+ "... ... \n",
+ "17875 To ace this role you:Will eat comprehensive St... \n",
+ "17876 - B.A. or B.S. in Accounting- Desire to have f... \n",
+ "17877 At least 12 years professional experience.Abil... \n",
+ "17878 1. Must be fluent in the latest versions of Co... \n",
+ "17879 We want to hear from you if:You have an in-dep... \n",
+ "\n",
+ " benefits telecommuting \\\n",
+ "0 0 \n",
+ "1 What you will get from usThrough being part of... 0 \n",
+ "2 0 \n",
+ "3 Our culture is anything but corporate—we have ... 0 \n",
+ "4 Full Benefits Offered 0 \n",
+ "... ... ... \n",
+ "17875 What can you expect from us?We have an open cu... 0 \n",
+ "17876 Health & WellnessMedical planPrescription ... 0 \n",
+ "17877 0 \n",
+ "17878 Competitive salary (compensation will be based... 0 \n",
+ "17879 0 \n",
+ "\n",
+ " has_company_logo has_questions employment_type required_experience \\\n",
+ "0 1 0 Other Internship \n",
+ "1 1 0 Full-time Not Applicable \n",
+ "2 1 0 \n",
+ "3 1 0 Full-time Mid-Senior level \n",
+ "4 1 1 Full-time Mid-Senior level \n",
+ "... ... ... ... ... \n",
+ "17875 1 1 Full-time Mid-Senior level \n",
+ "17876 1 1 Full-time Mid-Senior level \n",
+ "17877 0 0 Full-time \n",
+ "17878 0 1 Contract Not Applicable \n",
+ "17879 1 1 Full-time Mid-Senior level \n",
+ "\n",
+ " required_education industry function \\\n",
+ "0 Marketing \n",
+ "1 Marketing and Advertising Customer Service \n",
+ "2 \n",
+ "3 Bachelor's Degree Computer Software Sales \n",
+ "4 Bachelor's Degree Hospital & Health Care Health Care Provider \n",
+ "... ... ... ... \n",
+ "17875 Computer Software Sales \n",
+ "17876 Bachelor's Degree Internet Accounting/Auditing \n",
+ "17877 \n",
+ "17878 Professional Graphic Design Design \n",
+ "17879 Computer Software Engineering \n",
+ "\n",
+ " fraudulent \n",
+ "0 0 \n",
+ "1 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 \n",
+ "... ... \n",
+ "17875 0 \n",
+ "17876 0 \n",
+ "17877 0 \n",
+ "17878 0 \n",
+ "17879 0 \n",
+ "\n",
+ "[17880 rows x 18 columns]"
]
},
- "execution_count": 35,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data[\"title\"].value_counts()"
+ "data = data.replace(np.nan, '', regex=True)\n",
+ "data"
]
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 30,
"id": "e60b3f32",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "Sales 551\n",
- "Engineering 487\n",
- "Marketing 401\n",
- "Operations 270\n",
- "IT 225\n",
- " ... \n",
- "Audiology 1\n",
- "Professional Association 1\n",
- "Multiwork 1\n",
- "Dev2 to 5 years experience in a client-facing technical role. Ability to diplomatically address customer concerns and provide feedback. A firm understanding of the technology stacks common to the Web ecosystem. A demonstrated history of creating non-trivi 1\n",
- "Business Strategy 1\n",
- "Name: department, Length: 1337, dtype: int64"
+ " 11547\n",
+ "Sales 551\n",
+ "Engineering 487\n",
+ "Marketing 401\n",
+ "Operations 270\n",
+ " ... \n",
+ "Pricing 1\n",
+ "Mobility 1\n",
+ "Housekeeping 1\n",
+ "An Impact Engine Company 1\n",
+ "Trainee 1\n",
+ "Name: department, Length: 1338, dtype: int64"
]
},
- "execution_count": 36,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -626,7 +1004,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 32,
"id": "c5ac75f5",
"metadata": {},
"outputs": [
@@ -676,42 +1054,42 @@
" count | \n",
" 17880.000000 | \n",
" 17880 | \n",
- " 17534 | \n",
- " 6333 | \n",
- " 2868 | \n",
- " 14572 | \n",
- " 17879 | \n",
- " 15185 | \n",
- " 10670 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
" 17880.000000 | \n",
" 17880.000000 | \n",
" 17880.000000 | \n",
- " 14409 | \n",
- " 10830 | \n",
- " 9775 | \n",
- " 12977 | \n",
- " 11425 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
+ " 17880 | \n",
" 17880.000000 | \n",
" \n",
" \n",
" unique | \n",
" NaN | \n",
" 11231 | \n",
- " 3105 | \n",
- " 1337 | \n",
- " 874 | \n",
- " 1709 | \n",
- " 14801 | \n",
- " 11968 | \n",
- " 6205 | \n",
+ " 3106 | \n",
+ " 1338 | \n",
+ " 875 | \n",
+ " 1710 | \n",
+ " 14802 | \n",
+ " 11969 | \n",
+ " 6206 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " 5 | \n",
- " 7 | \n",
- " 13 | \n",
- " 131 | \n",
- " 37 | \n",
+ " 6 | \n",
+ " 8 | \n",
+ " 14 | \n",
+ " 132 | \n",
+ " 38 | \n",
" NaN | \n",
"
\n",
" \n",
@@ -719,20 +1097,20 @@
" NaN | \n",
" English Teacher Abroad | \n",
" GB, LND, London | \n",
- " Sales | \n",
- " 0-0 | \n",
- " We help teachers get safe & secure jobs ab... | \n",
+ " | \n",
+ " | \n",
+ " | \n",
" Play with kids, get paid for it Love travel? J... | \n",
- " University degree required. TEFL / TESOL / CEL... | \n",
- " See job description | \n",
+ " | \n",
+ " | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" Full-time | \n",
- " Mid-Senior level | \n",
- " Bachelor's Degree | \n",
- " Information Technology and Services | \n",
- " Information Technology | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
" NaN | \n",
"
\n",
" \n",
@@ -740,20 +1118,20 @@
" NaN | \n",
" 311 | \n",
" 718 | \n",
- " 551 | \n",
- " 142 | \n",
- " 726 | \n",
+ " 11547 | \n",
+ " 15012 | \n",
+ " 3308 | \n",
" 379 | \n",
- " 410 | \n",
- " 726 | \n",
+ " 2695 | \n",
+ " 7210 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 11620 | \n",
- " 3809 | \n",
- " 5145 | \n",
- " 1734 | \n",
- " 1749 | \n",
+ " 7050 | \n",
+ " 8105 | \n",
+ " 4903 | \n",
+ " 6455 | \n",
" NaN | \n",
"
\n",
" \n",
@@ -909,10 +1287,10 @@
],
"text/plain": [
" job_id title location department \\\n",
- "count 17880.000000 17880 17534 6333 \n",
- "unique NaN 11231 3105 1337 \n",
- "top NaN English Teacher Abroad GB, LND, London Sales \n",
- "freq NaN 311 718 551 \n",
+ "count 17880.000000 17880 17880 17880 \n",
+ "unique NaN 11231 3106 1338 \n",
+ "top NaN English Teacher Abroad GB, LND, London \n",
+ "freq NaN 311 718 11547 \n",
"mean 8940.500000 NaN NaN NaN \n",
"std 5161.655742 NaN NaN NaN \n",
"min 1.000000 NaN NaN NaN \n",
@@ -921,104 +1299,79 @@
"75% 13410.250000 NaN NaN NaN \n",
"max 17880.000000 NaN NaN NaN \n",
"\n",
- " salary_range company_profile \\\n",
- "count 2868 14572 \n",
- "unique 874 1709 \n",
- "top 0-0 We help teachers get safe & secure jobs ab... \n",
- "freq 142 726 \n",
- "mean NaN NaN \n",
- "std NaN NaN \n",
- "min NaN NaN \n",
- "25% NaN NaN \n",
- "50% NaN NaN \n",
- "75% NaN NaN \n",
- "max NaN NaN \n",
+ " salary_range company_profile \\\n",
+ "count 17880 17880 \n",
+ "unique 875 1710 \n",
+ "top \n",
+ "freq 15012 3308 \n",
+ "mean NaN NaN \n",
+ "std NaN NaN \n",
+ "min NaN NaN \n",
+ "25% NaN NaN \n",
+ "50% NaN NaN \n",
+ "75% NaN NaN \n",
+ "max NaN NaN \n",
"\n",
- " description \\\n",
- "count 17879 \n",
- "unique 14801 \n",
- "top Play with kids, get paid for it Love travel? J... \n",
- "freq 379 \n",
- "mean NaN \n",
- "std NaN \n",
- "min NaN \n",
- "25% NaN \n",
- "50% NaN \n",
- "75% NaN \n",
- "max NaN \n",
+ " description requirements \\\n",
+ "count 17880 17880 \n",
+ "unique 14802 11969 \n",
+ "top Play with kids, get paid for it Love travel? J... \n",
+ "freq 379 2695 \n",
+ "mean NaN NaN \n",
+ "std NaN NaN \n",
+ "min NaN NaN \n",
+ "25% NaN NaN \n",
+ "50% NaN NaN \n",
+ "75% NaN NaN \n",
+ "max NaN NaN \n",
"\n",
- " requirements \\\n",
- "count 15185 \n",
- "unique 11968 \n",
- "top University degree required. TEFL / TESOL / CEL... \n",
- "freq 410 \n",
- "mean NaN \n",
- "std NaN \n",
- "min NaN \n",
- "25% NaN \n",
- "50% NaN \n",
- "75% NaN \n",
- "max NaN \n",
+ " benefits telecommuting has_company_logo has_questions \\\n",
+ "count 17880 17880.000000 17880.000000 17880.000000 \n",
+ "unique 6206 NaN NaN NaN \n",
+ "top NaN NaN NaN \n",
+ "freq 7210 NaN NaN NaN \n",
+ "mean NaN 0.042897 0.795302 0.491723 \n",
+ "std NaN 0.202631 0.403492 0.499945 \n",
+ "min NaN 0.000000 0.000000 0.000000 \n",
+ "25% NaN 0.000000 1.000000 0.000000 \n",
+ "50% NaN 0.000000 1.000000 0.000000 \n",
+ "75% NaN 0.000000 1.000000 1.000000 \n",
+ "max NaN 1.000000 1.000000 1.000000 \n",
"\n",
- " benefits telecommuting has_company_logo has_questions \\\n",
- "count 10670 17880.000000 17880.000000 17880.000000 \n",
- "unique 6205 NaN NaN NaN \n",
- "top See job description NaN NaN NaN \n",
- "freq 726 NaN NaN NaN \n",
- "mean NaN 0.042897 0.795302 0.491723 \n",
- "std NaN 0.202631 0.403492 0.499945 \n",
- "min NaN 0.000000 0.000000 0.000000 \n",
- "25% NaN 0.000000 1.000000 0.000000 \n",
- "50% NaN 0.000000 1.000000 0.000000 \n",
- "75% NaN 0.000000 1.000000 1.000000 \n",
- "max NaN 1.000000 1.000000 1.000000 \n",
+ " employment_type required_experience required_education industry \\\n",
+ "count 17880 17880 17880 17880 \n",
+ "unique 6 8 14 132 \n",
+ "top Full-time \n",
+ "freq 11620 7050 8105 4903 \n",
+ "mean NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN \n",
"\n",
- " employment_type required_experience required_education \\\n",
- "count 14409 10830 9775 \n",
- "unique 5 7 13 \n",
- "top Full-time Mid-Senior level Bachelor's Degree \n",
- "freq 11620 3809 5145 \n",
- "mean NaN NaN NaN \n",
- "std NaN NaN NaN \n",
- "min NaN NaN NaN \n",
- "25% NaN NaN NaN \n",
- "50% NaN NaN NaN \n",
- "75% NaN NaN NaN \n",
- "max NaN NaN NaN \n",
- "\n",
- " industry function \\\n",
- "count 12977 11425 \n",
- "unique 131 37 \n",
- "top Information Technology and Services Information Technology \n",
- "freq 1734 1749 \n",
- "mean NaN NaN \n",
- "std NaN NaN \n",
- "min NaN NaN \n",
- "25% NaN NaN \n",
- "50% NaN NaN \n",
- "75% NaN NaN \n",
- "max NaN NaN \n",
- "\n",
- " fraudulent \n",
- "count 17880.000000 \n",
- "unique NaN \n",
- "top NaN \n",
- "freq NaN \n",
- "mean 0.048434 \n",
- "std 0.214688 \n",
- "min 0.000000 \n",
- "25% 0.000000 \n",
- "50% 0.000000 \n",
- "75% 0.000000 \n",
- "max 1.000000 "
+ " function fraudulent \n",
+ "count 17880 17880.000000 \n",
+ "unique 38 NaN \n",
+ "top NaN \n",
+ "freq 6455 NaN \n",
+ "mean NaN 0.048434 \n",
+ "std NaN 0.214688 \n",
+ "min NaN 0.000000 \n",
+ "25% NaN 0.000000 \n",
+ "50% NaN 0.000000 \n",
+ "75% NaN 0.000000 \n",
+ "max NaN 1.000000 "
]
},
- "execution_count": 37,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "data = data.replace(np.nan, '', regex=True)\n",
"data.describe(include='all')"
]
},