ium_440058/dataset.ipynb

788 lines
31 KiB
Plaintext
Raw Normal View History

2021-03-21 23:49:49 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Finance & Accounting Courses in udemy.com\n",
"## Includes:\n",
"* id\n",
"* title\n",
"* is_paid\n",
"* num_subscribers\n",
"* rating\n",
"* num_reviews\n",
"* created"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import preprocessing\n",
"import kaggle\n",
"\n",
"kaggle.api.authenticate()\n",
"kaggle.api.dataset_download_files('jilkothari/finance-accounting-courses-udemy-13k-course', path='.', unzip=True)\n",
"\n",
"courses = pd.read_csv('courses.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>title</th>\n",
" <th>url</th>\n",
" <th>is_paid</th>\n",
" <th>num_subscribers</th>\n",
" <th>rating</th>\n",
" <th>num_reviews</th>\n",
" <th>created</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>762616</td>\n",
" <td>the_complete_sql_bootcamp_2020:_go_from_zero_t...</td>\n",
" <td>/course/the-complete-sql-bootcamp/</td>\n",
" <td>True</td>\n",
" <td>295509</td>\n",
" <td>4.7</td>\n",
" <td>78006</td>\n",
" <td>2016-02-14T22:57:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>937678</td>\n",
" <td>tableau_2020_a-z:_hands-on_tableau_training_fo...</td>\n",
" <td>/course/tableau10/</td>\n",
" <td>True</td>\n",
" <td>209070</td>\n",
" <td>4.6</td>\n",
" <td>54581</td>\n",
" <td>2016-08-22T12:10:18Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1361790</td>\n",
" <td>pmp_exam_prep_seminar_-__pmbok_guide_6</td>\n",
" <td>/course/pmp-pmbok6-35-pdus/</td>\n",
" <td>True</td>\n",
" <td>155282</td>\n",
" <td>4.6</td>\n",
" <td>52653</td>\n",
" <td>2017-09-26T16:32:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>648826</td>\n",
" <td>the_complete_financial_analyst_course_2020</td>\n",
" <td>/course/the-complete-financial-analyst-course/</td>\n",
" <td>True</td>\n",
" <td>245860</td>\n",
" <td>4.5</td>\n",
" <td>46447</td>\n",
" <td>2015-10-23T13:34:35Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>637930</td>\n",
" <td>an_entire_mba_in_1_course:award_winning_busine...</td>\n",
" <td>/course/an-entire-mba-in-1-courseaward-winning...</td>\n",
" <td>True</td>\n",
" <td>374836</td>\n",
" <td>4.5</td>\n",
" <td>41630</td>\n",
" <td>2015-10-12T06:39:46Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13531</th>\n",
" <td>3171702</td>\n",
" <td>máster_en_inversión_bursátil,_completo_análisi...</td>\n",
" <td>/course/master-en-inversion-bursatil-completo-...</td>\n",
" <td>False</td>\n",
" <td>485</td>\n",
" <td>4.4</td>\n",
" <td>11</td>\n",
" <td>2020-05-26T17:34:49Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13532</th>\n",
" <td>2925096</td>\n",
" <td>curso_do_zero_a_investidor_em_ações_na_bolsa</td>\n",
" <td>/course/curso-do-zero-a-investidor-em-acoes-na...</td>\n",
" <td>False</td>\n",
" <td>260</td>\n",
" <td>4.2</td>\n",
" <td>11</td>\n",
" <td>2020-03-28T18:39:36Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13533</th>\n",
" <td>3146788</td>\n",
" <td>day_trading_kumo-méthode_de_trading_range-_for...</td>\n",
" <td>/course/day-trading-kumo-methode-de-trading-ra...</td>\n",
" <td>False</td>\n",
" <td>121</td>\n",
" <td>4.1</td>\n",
" <td>10</td>\n",
" <td>2020-05-19T17:08:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13534</th>\n",
" <td>2400574</td>\n",
" <td>investindo_do_zero_com_tesouro_direto</td>\n",
" <td>/course/investindo-do-zero-com-tesouro-direto-...</td>\n",
" <td>False</td>\n",
" <td>233</td>\n",
" <td>3.6</td>\n",
" <td>10</td>\n",
" <td>2019-06-05T23:08:57Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13535</th>\n",
" <td>2888390</td>\n",
" <td>acabou_a_previdência_e_agora?_-_volume_01</td>\n",
" <td>/course/acabou-a-previdencia-e-agora-volume-01/</td>\n",
" <td>False</td>\n",
" <td>175</td>\n",
" <td>4.5</td>\n",
" <td>10</td>\n",
" <td>2020-03-20T01:41:25Z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>9501 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" id title \\\n",
"0 762616 the_complete_sql_bootcamp_2020:_go_from_zero_t... \n",
"1 937678 tableau_2020_a-z:_hands-on_tableau_training_fo... \n",
"2 1361790 pmp_exam_prep_seminar_-__pmbok_guide_6 \n",
"3 648826 the_complete_financial_analyst_course_2020 \n",
"4 637930 an_entire_mba_in_1_course:award_winning_busine... \n",
"... ... ... \n",
"13531 3171702 máster_en_inversión_bursátil,_completo_análisi... \n",
"13532 2925096 curso_do_zero_a_investidor_em_ações_na_bolsa \n",
"13533 3146788 day_trading_kumo-méthode_de_trading_range-_for... \n",
"13534 2400574 investindo_do_zero_com_tesouro_direto \n",
"13535 2888390 acabou_a_previdência_e_agora?_-_volume_01 \n",
"\n",
" url is_paid \\\n",
"0 /course/the-complete-sql-bootcamp/ True \n",
"1 /course/tableau10/ True \n",
"2 /course/pmp-pmbok6-35-pdus/ True \n",
"3 /course/the-complete-financial-analyst-course/ True \n",
"4 /course/an-entire-mba-in-1-courseaward-winning... True \n",
"... ... ... \n",
"13531 /course/master-en-inversion-bursatil-completo-... False \n",
"13532 /course/curso-do-zero-a-investidor-em-acoes-na... False \n",
"13533 /course/day-trading-kumo-methode-de-trading-ra... False \n",
"13534 /course/investindo-do-zero-com-tesouro-direto-... False \n",
"13535 /course/acabou-a-previdencia-e-agora-volume-01/ False \n",
"\n",
" num_subscribers rating num_reviews created \n",
"0 295509 4.7 78006 2016-02-14T22:57:48Z \n",
"1 209070 4.6 54581 2016-08-22T12:10:18Z \n",
"2 155282 4.6 52653 2017-09-26T16:32:48Z \n",
"3 245860 4.5 46447 2015-10-23T13:34:35Z \n",
"4 374836 4.5 41630 2015-10-12T06:39:46Z \n",
"... ... ... ... ... \n",
"13531 485 4.4 11 2020-05-26T17:34:49Z \n",
"13532 260 4.2 11 2020-03-28T18:39:36Z \n",
"13533 121 4.1 10 2020-05-19T17:08:48Z \n",
"13534 233 3.6 10 2019-06-05T23:08:57Z \n",
"13535 175 4.5 10 2020-03-20T01:41:25Z \n",
"\n",
"[9501 rows x 8 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"courses"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete redundant columns"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"imp_col = ['id', 'title', 'url', 'is_paid', 'num_subscribers', 'rating', 'num_reviews', 'created']\n",
"courses = courses[imp_col]\n",
"courses.to_csv(\"courses.csv\", index=False)\n",
"courses = pd.read_csv('courses.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete empty rows of rating column and number of reviews less than 10"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"rating_col = 'rating'\n",
"num_reviews_col = 'num_reviews'\n",
"courses = courses.drop(courses[courses.rating == 0].index)\n",
"courses = courses.drop(courses[courses.num_reviews < 10].index)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simplify numbers to one decimal place and format 'title' column to specifc schema"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"courses = courses.round(1)\n",
"courses['title'] = courses['title'].str.lower()\n",
"courses['title'] = courses['title'].str.replace(\" \", \"_\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete artifacts"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"courses = courses.dropna()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Split dataset into 60% 20% 20% - train, valid, test"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"courses_train, courses_validate, courses_test = np.split(courses.sample(frac=1), [int(.6*len(courses)), int(.8*len(courses))])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary of train, valid, test"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Courses: 76008\n",
"Courses (train) : 45600\n",
"Courses (validate): 15200\n",
"Courses (test) 15208\n"
]
}
],
"source": [
"print(\"Courses: \".ljust(20), np.size(courses))\n",
"print(\"Courses (train) : \".ljust(20), np.size(courses_train))\n",
"print(\"Courses (validate): \".ljust(20), np.size(courses_validate))\n",
"print(\"Courses (test) \".ljust(20), np.size(courses_test))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Describe numeric columns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>num_subscribers</th>\n",
" <th>rating</th>\n",
" <th>num_reviews</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>9501.0</td>\n",
" <td>9501.0</td>\n",
" <td>9501.0</td>\n",
" <td>9501.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1484700.3</td>\n",
" <td>3953.9</td>\n",
" <td>4.1</td>\n",
" <td>346.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>887299.7</td>\n",
" <td>11103.9</td>\n",
" <td>0.4</td>\n",
" <td>1882.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>2762.0</td>\n",
" <td>13.0</td>\n",
" <td>1.5</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>718252.0</td>\n",
" <td>261.0</td>\n",
" <td>3.9</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1413712.0</td>\n",
" <td>1170.0</td>\n",
" <td>4.2</td>\n",
" <td>49.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2193058.0</td>\n",
" <td>3644.0</td>\n",
" <td>4.4</td>\n",
" <td>157.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>3477486.0</td>\n",
" <td>374836.0</td>\n",
" <td>5.0</td>\n",
" <td>78006.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id num_subscribers rating num_reviews\n",
"count 9501.0 9501.0 9501.0 9501.0\n",
"mean 1484700.3 3953.9 4.1 346.6\n",
"std 887299.7 11103.9 0.4 1882.7\n",
"min 2762.0 13.0 1.5 10.0\n",
"25% 718252.0 261.0 3.9 21.0\n",
"50% 1413712.0 1170.0 4.2 49.0\n",
"75% 2193058.0 3644.0 4.4 157.0\n",
"max 3477486.0 374836.0 5.0 78006.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"courses.describe().round(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Distribution of 'is_paid' column"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAOvklEQVR4nO3df6zd9V3H8eeLdmPdGApyS1jL1m5pnIAug4qdJPtjaOjCspIYYheRakiaENT5IzPFqEvUGuavOBIhqZujRCKp2wyNDB2pI2YGhxdGwkqtNGMrlY7eodO6TUbZ2z/OB3fWnvaeZu055Xyej+TkfM/nfL/nfk7SPO+3n/PjpqqQJPXhrGlPQJI0OUZfkjpi9CWpI0Zfkjpi9CWpI0ZfkjqydNoTWMwFF1xQq1atmvY0JOkV5dFHH/1qVc0dPX7GR3/VqlXMz89PexqS9IqS5Mujxl3ekaSOGH1J6ojRl6SOGH1J6ojRl6SOGH1J6ojRl6SOGH1J6sgZ/+GsV4pVW+6f9hRmxpduu3baU5Bmlmf6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktQRoy9JHTH6ktSRsaKf5FeS7E7yhSR/leQ1Sc5P8mCSp9r1eUP735pkX5K9Sa4ZGr8iyRPtvtuT5HQ8KUnSaItGP8kK4JeAtVV1GbAE2AhsAXZV1RpgV7tNkkva/ZcC64E7kixpD3cnsBlY0y7rT+mzkSSd0LjLO0uBZUmWAq8FngU2ANvb/duB69r2BuDeqnqhqp4G9gFXJrkIOLeqHq6qAu4eOkaSNAGLRr+q/h34I2A/cBD4r6r6NHBhVR1s+xwElrdDVgDPDD3EgTa2om0fPS5JmpBxlnfOY3D2vhp4A/C6JDec6JARY3WC8VE/c3OS+STzCwsLi01RkjSmcZZ3fgJ4uqoWqupF4JPAjwPPtSUb2vWhtv8B4OKh41cyWA460LaPHj9GVW2rqrVVtXZubu5kno8k6QTGif5+YF2S17Z321wN7AF2ApvaPpuA+9r2TmBjkrOTrGbwgu0jbQnocJJ17XFuHDpGkjQBSxfboao+l+TjwGPAEeDzwDbgHGBHkpsY/GK4vu2/O8kO4Mm2/y1V9VJ7uJuBu4BlwAPtIkmakEWjD1BVHwQ+eNTwCwzO+kftvxXYOmJ8HrjsJOcoSTpF/ESuJHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR4y+JHXE6EtSR8aKfpLvT/LxJP+aZE+SdyQ5P8mDSZ5q1+cN7X9rkn1J9ia5Zmj8iiRPtPtuT5LT8aQkSaONe6b/YeDvquqtwNuAPcAWYFdVrQF2tdskuQTYCFwKrAfuSLKkPc6dwGZgTbusP0XPQ5I0hkWjn+Rc4J3ARwGq6ltV9TVgA7C97bYduK5tbwDuraoXquppYB9wZZKLgHOr6uGqKuDuoWMkSRMwzpn+m4EF4GNJPp/kI0leB1xYVQcB2vXytv8K4Jmh4w+0sRVt++hxSdKEjBP9pcDlwJ1V9Xbg67SlnOMYtU5fJxg/9gGSzUnmk8wvLCyMMUVJ0jjGif4B4EBVfa7d/jiDXwLPtSUb2vWhof0vHjp+JfBsG185YvwYVbWtqtZW1dq5ublxn4skaRGLRr+qvgI8k+QH29DVwJPATmBTG9sE3Ne2dwIbk5ydZDWDF2wfaUtAh5Osa+/auXHoGEnSBCwdc79fBO5J8mrgi8DPM/iFsSPJTcB+4HqAqtqdZAeDXwxHgFuq6qX2ODcDdwHLgAfaRZI0IWNFv6oeB9aOuOvq4+y/Fdg6YnweuOwk5idJOoX8RK4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdcToS1JHjL4kdWTs6CdZkuTzSf623T4/yYNJnmrX5w3te2uSfUn2JrlmaPyKJE+0+25PklP7dCRJJ3IyZ/rvB/YM3d4C7KqqNcCudpsklwAbgUuB9cAdSZa0Y+4ENgNr2mX99zR7SdJJGSv6SVYC1wIfGRreAGxv29uB64bG762qF6rqaWAfcGWSi4Bzq+rhqirg7qFjJEkTMO6Z/p8Cvw58e2jswqo6CNCul7fxFcAzQ/sdaGMr2vbR45KkCVk0+kneAxyqqkfHfMxR6/R1gvFRP3Nzkvkk8wsLC2P+WEnSYsY5078KeG+SLwH3Au9K8pfAc23JhnZ9qO1/ALh46PiVwLNtfOWI8WNU1baqWltVa+fm5k7i6UiSTmTR6FfVrVW1sqpWMXiB9h+q6gZgJ7Cp7bYJuK9t7wQ2Jjk7yWoGL9g+0paADidZ1961c+PQMZKkCVj6PRx7G7AjyU3AfuB6gKranWQH8CRwBLilql5qx9wM3AUsAx5oF0nShJxU9KvqIeChtv08cPVx9tsKbB0xPg9cdrKTlCSdGn4iV5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSNGX5I6YvQlqSOLRj/JxUk+k2RPkt1J3t/Gz0/yYJKn2vV5Q8fcmmRfkr1JrhkavyLJE+2+25Pk9DwtSdIo45zpHwF+rap+CFgH3JLkEmALsKuq1gC72m3afRuBS4H1wB1JlrTHuhPYDKxpl/Wn8LlIkhaxaPSr6mBVPda2DwN7gBXABmB72207cF3b3gDcW1UvVNXTwD7gyiQXAedW1cNVVcDdQ8dIkibgpNb0k6wC3g58Driwqg7C4BcDsLzttgJ4ZuiwA21sRds+elySNCFjRz/JOcAngF+uqv8+0a4jxuoE46N+1uYk80nmFxYWxp2iJGkRY0U/yasYBP+eqvpkG36uLdnQrg+18QPAxUOHrwSebeMrR4wfo6q2VdXaqlo7Nzc37nORJC1inHfvBPgosKeq/mTorp3Apra9CbhvaHxjkrOTrGbwgu0jbQnocJJ17TFvHDpGkjQBS8fY5yrgZ4Enkjzexn4DuA3YkeQmYD9wPUBV7U6yA3iSwTt/bqmql9pxNwN3AcuAB9pFkjQhi0a/qj7L6PV4gKuPc8xWYOuI8XngspOZoCTp1PETuZLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR0x+pLUEaMvSR1ZOu0JSDq9Vm25f9pTmClfuu3aaU/he+KZviR1xOhLUkeMviR1xOhLUkcmHv0k65PsTbIvyZZJ/3xJ6tlEo59kCfBnwLuBS4D3JblkknOQpJ5N+kz/SmBfVX2xqr4F3AtsmPAcJKlbk47+CuCZodsH2pgkaQIm/eGsjBirY3ZKNgOb283/SbL3tM6qHxcAX532JBaTD017BpoS/32eWm8aNTjp6B8ALh66vRJ49uidqmobsG1Sk+pFkvmqWjvteUij+O9zMia9vPMvwJokq5O8GtgI7JzwHCSpWxM906+qI0l
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(courses['is_paid']).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Current dataset"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>title</th>\n",
" <th>url</th>\n",
" <th>is_paid</th>\n",
" <th>num_subscribers</th>\n",
" <th>rating</th>\n",
" <th>num_reviews</th>\n",
" <th>created</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>762616</td>\n",
" <td>the_complete_sql_bootcamp_2020:_go_from_zero_t...</td>\n",
" <td>/course/the-complete-sql-bootcamp/</td>\n",
" <td>True</td>\n",
" <td>295509</td>\n",
" <td>4.7</td>\n",
" <td>78006</td>\n",
" <td>2016-02-14T22:57:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>937678</td>\n",
" <td>tableau_2020_a-z:_hands-on_tableau_training_fo...</td>\n",
" <td>/course/tableau10/</td>\n",
" <td>True</td>\n",
" <td>209070</td>\n",
" <td>4.6</td>\n",
" <td>54581</td>\n",
" <td>2016-08-22T12:10:18Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1361790</td>\n",
" <td>pmp_exam_prep_seminar_-__pmbok_guide_6</td>\n",
" <td>/course/pmp-pmbok6-35-pdus/</td>\n",
" <td>True</td>\n",
" <td>155282</td>\n",
" <td>4.6</td>\n",
" <td>52653</td>\n",
" <td>2017-09-26T16:32:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>648826</td>\n",
" <td>the_complete_financial_analyst_course_2020</td>\n",
" <td>/course/the-complete-financial-analyst-course/</td>\n",
" <td>True</td>\n",
" <td>245860</td>\n",
" <td>4.5</td>\n",
" <td>46447</td>\n",
" <td>2015-10-23T13:34:35Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>637930</td>\n",
" <td>an_entire_mba_in_1_course:award_winning_busine...</td>\n",
" <td>/course/an-entire-mba-in-1-courseaward-winning...</td>\n",
" <td>True</td>\n",
" <td>374836</td>\n",
" <td>4.5</td>\n",
" <td>41630</td>\n",
" <td>2015-10-12T06:39:46Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13531</th>\n",
" <td>3171702</td>\n",
" <td>máster_en_inversión_bursátil,_completo_análisi...</td>\n",
" <td>/course/master-en-inversion-bursatil-completo-...</td>\n",
" <td>False</td>\n",
" <td>485</td>\n",
" <td>4.4</td>\n",
" <td>11</td>\n",
" <td>2020-05-26T17:34:49Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13532</th>\n",
" <td>2925096</td>\n",
" <td>curso_do_zero_a_investidor_em_ações_na_bolsa</td>\n",
" <td>/course/curso-do-zero-a-investidor-em-acoes-na...</td>\n",
" <td>False</td>\n",
" <td>260</td>\n",
" <td>4.2</td>\n",
" <td>11</td>\n",
" <td>2020-03-28T18:39:36Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13533</th>\n",
" <td>3146788</td>\n",
" <td>day_trading_kumo-méthode_de_trading_range-_for...</td>\n",
" <td>/course/day-trading-kumo-methode-de-trading-ra...</td>\n",
" <td>False</td>\n",
" <td>121</td>\n",
" <td>4.1</td>\n",
" <td>10</td>\n",
" <td>2020-05-19T17:08:48Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13534</th>\n",
" <td>2400574</td>\n",
" <td>investindo_do_zero_com_tesouro_direto</td>\n",
" <td>/course/investindo-do-zero-com-tesouro-direto-...</td>\n",
" <td>False</td>\n",
" <td>233</td>\n",
" <td>3.6</td>\n",
" <td>10</td>\n",
" <td>2019-06-05T23:08:57Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13535</th>\n",
" <td>2888390</td>\n",
" <td>acabou_a_previdência_e_agora?_-_volume_01</td>\n",
" <td>/course/acabou-a-previdencia-e-agora-volume-01/</td>\n",
" <td>False</td>\n",
" <td>175</td>\n",
" <td>4.5</td>\n",
" <td>10</td>\n",
" <td>2020-03-20T01:41:25Z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>9501 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" id title \\\n",
"0 762616 the_complete_sql_bootcamp_2020:_go_from_zero_t... \n",
"1 937678 tableau_2020_a-z:_hands-on_tableau_training_fo... \n",
"2 1361790 pmp_exam_prep_seminar_-__pmbok_guide_6 \n",
"3 648826 the_complete_financial_analyst_course_2020 \n",
"4 637930 an_entire_mba_in_1_course:award_winning_busine... \n",
"... ... ... \n",
"13531 3171702 máster_en_inversión_bursátil,_completo_análisi... \n",
"13532 2925096 curso_do_zero_a_investidor_em_ações_na_bolsa \n",
"13533 3146788 day_trading_kumo-méthode_de_trading_range-_for... \n",
"13534 2400574 investindo_do_zero_com_tesouro_direto \n",
"13535 2888390 acabou_a_previdência_e_agora?_-_volume_01 \n",
"\n",
" url is_paid \\\n",
"0 /course/the-complete-sql-bootcamp/ True \n",
"1 /course/tableau10/ True \n",
"2 /course/pmp-pmbok6-35-pdus/ True \n",
"3 /course/the-complete-financial-analyst-course/ True \n",
"4 /course/an-entire-mba-in-1-courseaward-winning... True \n",
"... ... ... \n",
"13531 /course/master-en-inversion-bursatil-completo-... False \n",
"13532 /course/curso-do-zero-a-investidor-em-acoes-na... False \n",
"13533 /course/day-trading-kumo-methode-de-trading-ra... False \n",
"13534 /course/investindo-do-zero-com-tesouro-direto-... False \n",
"13535 /course/acabou-a-previdencia-e-agora-volume-01/ False \n",
"\n",
" num_subscribers rating num_reviews created \n",
"0 295509 4.7 78006 2016-02-14T22:57:48Z \n",
"1 209070 4.6 54581 2016-08-22T12:10:18Z \n",
"2 155282 4.6 52653 2017-09-26T16:32:48Z \n",
"3 245860 4.5 46447 2015-10-23T13:34:35Z \n",
"4 374836 4.5 41630 2015-10-12T06:39:46Z \n",
"... ... ... ... ... \n",
"13531 485 4.4 11 2020-05-26T17:34:49Z \n",
"13532 260 4.2 11 2020-03-28T18:39:36Z \n",
"13533 121 4.1 10 2020-05-19T17:08:48Z \n",
"13534 233 3.6 10 2019-06-05T23:08:57Z \n",
"13535 175 4.5 10 2020-03-20T01:41:25Z \n",
"\n",
"[9501 rows x 8 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"courses"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}