{ "cells": [ { "cell_type": "code", "execution_count": 28, "id": "5e2107a5", "metadata": {}, "outputs": [], "source": [ "#Skrypt do ściagnięcia zbiory danych\n" ] }, { "cell_type": "code", "execution_count": 29, "id": "bcc889e5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in /home/students/s444463/.local/lib/python3.8/site-packages (1.5.12)\n", "Requirement already satisfied: tqdm in /home/students/s444463/.local/lib/python3.8/site-packages (from kaggle) (4.63.0)\n", "Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.25.8)\n", "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.14.0)\n", "Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2019.11.28)\n", "Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from kaggle) (2.22.0)\n", "Requirement already satisfied: python-slugify in /home/students/s444463/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: python-dateutil in /usr/lib/python3/dist-packages (from kaggle) (2.7.3)\n", "Requirement already satisfied: text-unidecode>=1.3 in /home/students/s444463/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n", "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (0.25.3)\n", "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Requirement already satisfied: numpy in /usr/lib/python3/dist-packages (1.17.4)\n", "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "!pip install --user kaggle #API Kaggle, do pobrania zbioru\n", "!pip install --user pandas\n", "!pip install --user numpy" ] }, { "cell_type": "code", "execution_count": 30, "id": "02a4034f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/bin/bash: kaggle: command not found\r\n" ] } ], "source": [ "# Żeby poniższa komenda zadziałała, musisz posiadać plik ~/.kaggle/kaggle.json, zawierający Kaggle API token.\n", "# Instrukcje: https://www.kaggle.com/docs/api\n", "!kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction" ] }, { "cell_type": "code", "execution_count": 31, "id": "5035aef0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "unzip: cannot find or open real-or-fake-fake-jobposting-prediction.zip, real-or-fake-fake-jobposting-prediction.zip.zip or real-or-fake-fake-jobposting-prediction.zip.ZIP.\r\n" ] } ], "source": [ "!unzip -o real-or-fake-fake-jobposting-prediction.zip" ] }, { "cell_type": "code", "execution_count": 32, "id": "14344d2f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: seaborn in /home/students/s444463/.local/lib/python3.8/site-packages (0.11.2)\n", "Requirement already satisfied: numpy>=1.15 in /usr/lib/python3/dist-packages (from seaborn) (1.17.4)\n", "Requirement already satisfied: scipy>=1.0 in /usr/lib/python3/dist-packages (from seaborn) (1.3.3)\n", "Requirement already satisfied: matplotlib>=2.2 in /home/students/s444463/.local/lib/python3.8/site-packages (from seaborn) (3.4.3)\n", "Requirement already satisfied: pandas>=0.23 in /usr/lib/python3/dist-packages (from seaborn) (0.25.3)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib>=2.2->seaborn) (8.3.2)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", "Requirement already satisfied: cycler>=0.10 in /home/students/s444463/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /home/students/s444463/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/lib/python3/dist-packages (from matplotlib>=2.2->seaborn) (2.7.3)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from cycler>=0.10->matplotlib>=2.2->seaborn) (1.14.0)\n", "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "!pip install --user seaborn" ] }, { "cell_type": "code", "execution_count": 33, "id": "0f5ebfab", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | job_id | \n", "title | \n", "location | \n", "department | \n", "salary_range | \n", "company_profile | \n", "description | \n", "requirements | \n", "benefits | \n", "telecommuting | \n", "has_company_logo | \n", "has_questions | \n", "employment_type | \n", "required_experience | \n", "required_education | \n", "industry | \n", "function | \n", "fraudulent | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "Marketing Intern | \n", "US, NY, New York | \n", "Marketing | \n", "NaN | \n", "We're Food52, and we've created a groundbreaki... | \n", "Food52, a fast-growing, James Beard Award-winn... | \n", "Experience with content management systems a m... | \n", "NaN | \n", "0 | \n", "1 | \n", "0 | \n", "Other | \n", "Internship | \n", "NaN | \n", "NaN | \n", "Marketing | \n", "0 | \n", "
1 | \n", "2 | \n", "Customer Service - Cloud Video Production | \n", "NZ, , Auckland | \n", "Success | \n", "NaN | \n", "90 Seconds, the worlds Cloud Video Production ... | \n", "Organised - Focused - Vibrant - Awesome!Do you... | \n", "What we expect from you:Your key responsibilit... | \n", "What you will get from usThrough being part of... | \n", "0 | \n", "1 | \n", "0 | \n", "Full-time | \n", "Not Applicable | \n", "NaN | \n", "Marketing and Advertising | \n", "Customer Service | \n", "0 | \n", "
2 | \n", "3 | \n", "Commissioning Machinery Assistant (CMA) | \n", "US, IA, Wever | \n", "NaN | \n", "NaN | \n", "Valor Services provides Workforce Solutions th... | \n", "Our client, located in Houston, is actively se... | \n", "Implement pre-commissioning and commissioning ... | \n", "NaN | \n", "0 | \n", "1 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "
3 | \n", "4 | \n", "Account Executive - Washington DC | \n", "US, DC, Washington | \n", "Sales | \n", "NaN | \n", "Our passion for improving quality of life thro... | \n", "THE COMPANY: ESRI – Environmental Systems Rese... | \n", "EDUCATION: Bachelor’s or Master’s in GIS, busi... | \n", "Our culture is anything but corporate—we have ... | \n", "0 | \n", "1 | \n", "0 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Computer Software | \n", "Sales | \n", "0 | \n", "
4 | \n", "5 | \n", "Bill Review Manager | \n", "US, FL, Fort Worth | \n", "NaN | \n", "NaN | \n", "SpotSource Solutions LLC is a Global Human Cap... | \n", "JOB TITLE: Itemization Review ManagerLOCATION:... | \n", "QUALIFICATIONS:RN license in the State of Texa... | \n", "Full Benefits Offered | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Hospital & Health Care | \n", "Health Care Provider | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
17875 | \n", "17876 | \n", "Account Director - Distribution | \n", "CA, ON, Toronto | \n", "Sales | \n", "NaN | \n", "Vend is looking for some awesome new talent to... | \n", "Just in case this is the first time you’ve vis... | \n", "To ace this role you:Will eat comprehensive St... | \n", "What can you expect from us?We have an open cu... | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "NaN | \n", "Computer Software | \n", "Sales | \n", "0 | \n", "
17876 | \n", "17877 | \n", "Payroll Accountant | \n", "US, PA, Philadelphia | \n", "Accounting | \n", "NaN | \n", "WebLinc is the e-commerce platform and service... | \n", "The Payroll Accountant will focus primarily on... | \n", "- B.A. or B.S. in Accounting- Desire to have f... | \n", "Health & WellnessMedical planPrescription ... | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Internet | \n", "Accounting/Auditing | \n", "0 | \n", "
17877 | \n", "17878 | \n", "Project Cost Control Staff Engineer - Cost Con... | \n", "US, TX, Houston | \n", "NaN | \n", "NaN | \n", "We Provide Full Time Permanent Positions for m... | \n", "Experienced Project Cost Control Staff Enginee... | \n", "At least 12 years professional experience.Abil... | \n", "NaN | \n", "0 | \n", "0 | \n", "0 | \n", "Full-time | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "
17878 | \n", "17879 | \n", "Graphic Designer | \n", "NG, LA, Lagos | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Nemsia Studios is looking for an experienced v... | \n", "1. Must be fluent in the latest versions of Co... | \n", "Competitive salary (compensation will be based... | \n", "0 | \n", "0 | \n", "1 | \n", "Contract | \n", "Not Applicable | \n", "Professional | \n", "Graphic Design | \n", "Design | \n", "0 | \n", "
17879 | \n", "17880 | \n", "Web Application Developers | \n", "NZ, N, Wellington | \n", "Engineering | \n", "NaN | \n", "Vend is looking for some awesome new talent to... | \n", "Who are we?Vend is an award winning web based ... | \n", "We want to hear from you if:You have an in-dep... | \n", "NaN | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "NaN | \n", "Computer Software | \n", "Engineering | \n", "0 | \n", "
17880 rows × 18 columns
\n", "\n", " | job_id | \n", "title | \n", "location | \n", "department | \n", "salary_range | \n", "company_profile | \n", "description | \n", "requirements | \n", "benefits | \n", "telecommuting | \n", "has_company_logo | \n", "has_questions | \n", "employment_type | \n", "required_experience | \n", "required_education | \n", "industry | \n", "function | \n", "fraudulent | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "Marketing Intern | \n", "US, NY, New York | \n", "Marketing | \n", "\n", " | We're Food52, and we've created a groundbreaki... | \n", "Food52, a fast-growing, James Beard Award-winn... | \n", "Experience with content management systems a m... | \n", "\n", " | 0 | \n", "1 | \n", "0 | \n", "Other | \n", "Internship | \n", "\n", " | \n", " | Marketing | \n", "0 | \n", "
1 | \n", "2 | \n", "Customer Service - Cloud Video Production | \n", "NZ, , Auckland | \n", "Success | \n", "\n", " | 90 Seconds, the worlds Cloud Video Production ... | \n", "Organised - Focused - Vibrant - Awesome!Do you... | \n", "What we expect from you:Your key responsibilit... | \n", "What you will get from usThrough being part of... | \n", "0 | \n", "1 | \n", "0 | \n", "Full-time | \n", "Not Applicable | \n", "\n", " | Marketing and Advertising | \n", "Customer Service | \n", "0 | \n", "
2 | \n", "3 | \n", "Commissioning Machinery Assistant (CMA) | \n", "US, IA, Wever | \n", "\n", " | \n", " | Valor Services provides Workforce Solutions th... | \n", "Our client, located in Houston, is actively se... | \n", "Implement pre-commissioning and commissioning ... | \n", "\n", " | 0 | \n", "1 | \n", "0 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | 0 | \n", "
3 | \n", "4 | \n", "Account Executive - Washington DC | \n", "US, DC, Washington | \n", "Sales | \n", "\n", " | Our passion for improving quality of life thro... | \n", "THE COMPANY: ESRI – Environmental Systems Rese... | \n", "EDUCATION: Bachelor’s or Master’s in GIS, busi... | \n", "Our culture is anything but corporate—we have ... | \n", "0 | \n", "1 | \n", "0 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Computer Software | \n", "Sales | \n", "0 | \n", "
4 | \n", "5 | \n", "Bill Review Manager | \n", "US, FL, Fort Worth | \n", "\n", " | \n", " | SpotSource Solutions LLC is a Global Human Cap... | \n", "JOB TITLE: Itemization Review ManagerLOCATION:... | \n", "QUALIFICATIONS:RN license in the State of Texa... | \n", "Full Benefits Offered | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Hospital & Health Care | \n", "Health Care Provider | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
17875 | \n", "17876 | \n", "Account Director - Distribution | \n", "CA, ON, Toronto | \n", "Sales | \n", "\n", " | Vend is looking for some awesome new talent to... | \n", "Just in case this is the first time you’ve vis... | \n", "To ace this role you:Will eat comprehensive St... | \n", "What can you expect from us?We have an open cu... | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "\n", " | Computer Software | \n", "Sales | \n", "0 | \n", "
17876 | \n", "17877 | \n", "Payroll Accountant | \n", "US, PA, Philadelphia | \n", "Accounting | \n", "\n", " | WebLinc is the e-commerce platform and service... | \n", "The Payroll Accountant will focus primarily on... | \n", "- B.A. or B.S. in Accounting- Desire to have f... | \n", "Health & WellnessMedical planPrescription ... | \n", "0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "Bachelor's Degree | \n", "Internet | \n", "Accounting/Auditing | \n", "0 | \n", "
17877 | \n", "17878 | \n", "Project Cost Control Staff Engineer - Cost Con... | \n", "US, TX, Houston | \n", "\n", " | \n", " | We Provide Full Time Permanent Positions for m... | \n", "Experienced Project Cost Control Staff Enginee... | \n", "At least 12 years professional experience.Abil... | \n", "\n", " | 0 | \n", "0 | \n", "0 | \n", "Full-time | \n", "\n", " | \n", " | \n", " | \n", " | 0 | \n", "
17878 | \n", "17879 | \n", "Graphic Designer | \n", "NG, LA, Lagos | \n", "\n", " | \n", " | \n", " | Nemsia Studios is looking for an experienced v... | \n", "1. Must be fluent in the latest versions of Co... | \n", "Competitive salary (compensation will be based... | \n", "0 | \n", "0 | \n", "1 | \n", "Contract | \n", "Not Applicable | \n", "Professional | \n", "Graphic Design | \n", "Design | \n", "0 | \n", "
17879 | \n", "17880 | \n", "Web Application Developers | \n", "NZ, N, Wellington | \n", "Engineering | \n", "\n", " | Vend is looking for some awesome new talent to... | \n", "Who are we?Vend is an award winning web based ... | \n", "We want to hear from you if:You have an in-dep... | \n", "\n", " | 0 | \n", "1 | \n", "1 | \n", "Full-time | \n", "Mid-Senior level | \n", "\n", " | Computer Software | \n", "Engineering | \n", "0 | \n", "
17880 rows × 18 columns
\n", "\n", " | job_id | \n", "title | \n", "location | \n", "department | \n", "salary_range | \n", "company_profile | \n", "description | \n", "requirements | \n", "benefits | \n", "telecommuting | \n", "has_company_logo | \n", "has_questions | \n", "employment_type | \n", "required_experience | \n", "required_education | \n", "industry | \n", "function | \n", "fraudulent | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "17880.000000 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880.000000 | \n", "17880.000000 | \n", "17880.000000 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880 | \n", "17880.000000 | \n", "
unique | \n", "NaN | \n", "11231 | \n", "3106 | \n", "1338 | \n", "875 | \n", "1710 | \n", "14802 | \n", "11969 | \n", "6206 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "6 | \n", "8 | \n", "14 | \n", "132 | \n", "38 | \n", "NaN | \n", "
top | \n", "NaN | \n", "English Teacher Abroad | \n", "GB, LND, London | \n", "\n", " | \n", " | \n", " | Play with kids, get paid for it Love travel? J... | \n", "\n", " | \n", " | NaN | \n", "NaN | \n", "NaN | \n", "Full-time | \n", "\n", " | \n", " | \n", " | \n", " | NaN | \n", "
freq | \n", "NaN | \n", "311 | \n", "718 | \n", "11547 | \n", "15012 | \n", "3308 | \n", "379 | \n", "2695 | \n", "7210 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "11620 | \n", "7050 | \n", "8105 | \n", "4903 | \n", "6455 | \n", "NaN | \n", "
mean | \n", "8940.500000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.042897 | \n", "0.795302 | \n", "0.491723 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.048434 | \n", "
std | \n", "5161.655742 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.202631 | \n", "0.403492 | \n", "0.499945 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.214688 | \n", "
min | \n", "1.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "
25% | \n", "4470.750000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "
50% | \n", "8940.500000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "
75% | \n", "13410.250000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000 | \n", "
max | \n", "17880.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.000000 | \n", "