2023-03-21 17:37:51 +01:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 3,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import pandas as pd\n",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"from IPython.display import display,Markdown\n",
|
|
|
|
|
"from sklearn.model_selection import train_test_split"
|
2023-03-21 17:37:51 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 4,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"TRUE_NEWS_PATH = \"data/True.csv\"\n",
|
|
|
|
|
"FAKE_NEWS_PATH = \"data/Fake.csv\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"#loading datasets\n",
|
|
|
|
|
"true_news = pd.read_csv(TRUE_NEWS_PATH)\n",
|
|
|
|
|
"fake_news = pd.read_csv(FAKE_NEWS_PATH)\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 5,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# clearing dataset\n",
|
|
|
|
|
"true_news = true_news.drop(columns=['title','subject','date'])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"fake_news = fake_news.drop(columns=['title','subject','date'])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"attachments": {},
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### Seting binary classifiaction values\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 6,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/markdown": [
|
|
|
|
|
"### True news"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<IPython.core.display.Markdown object>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 21417 entries, 0 to 21416\n",
|
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 text 21417 non-null object\n",
|
|
|
|
|
" 1 Value 21417 non-null int64 \n",
|
|
|
|
|
"dtypes: int64(1), object(1)\n",
|
|
|
|
|
"memory usage: 334.8+ KB\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"None"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>text</th>\n",
|
|
|
|
|
" <th>Value</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - The head of a conservat...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - Transgender people will...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - The special counsel inv...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - Trump campaign adviser ...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>SEATTLE/WASHINGTON (Reuters) - President Donal...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>WEST PALM BEACH, Fla./WASHINGTON (Reuters) - T...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <td>WEST PALM BEACH, Fla (Reuters) - President Don...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <td>The following statements were posted to the ve...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>The following statements were posted to the ve...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - Alabama Secretary of St...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" text Value\n",
|
|
|
|
|
"0 WASHINGTON (Reuters) - The head of a conservat... 1\n",
|
|
|
|
|
"1 WASHINGTON (Reuters) - Transgender people will... 1\n",
|
|
|
|
|
"2 WASHINGTON (Reuters) - The special counsel inv... 1\n",
|
|
|
|
|
"3 WASHINGTON (Reuters) - Trump campaign adviser ... 1\n",
|
|
|
|
|
"4 SEATTLE/WASHINGTON (Reuters) - President Donal... 1\n",
|
|
|
|
|
"5 WEST PALM BEACH, Fla./WASHINGTON (Reuters) - T... 1\n",
|
|
|
|
|
"6 WEST PALM BEACH, Fla (Reuters) - President Don... 1\n",
|
|
|
|
|
"7 The following statements were posted to the ve... 1\n",
|
|
|
|
|
"8 The following statements were posted to the ve... 1\n",
|
|
|
|
|
"9 WASHINGTON (Reuters) - Alabama Secretary of St... 1"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/markdown": [
|
|
|
|
|
"### Fake news"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<IPython.core.display.Markdown object>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"RangeIndex: 23481 entries, 0 to 23480\n",
|
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 text 23481 non-null object\n",
|
|
|
|
|
" 1 Value 23481 non-null int64 \n",
|
|
|
|
|
"dtypes: int64(1), object(1)\n",
|
|
|
|
|
"memory usage: 367.0+ KB\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"None"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>text</th>\n",
|
|
|
|
|
" <th>Value</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>Donald Trump just couldn t wish all Americans ...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>House Intelligence Committee Chairman Devin Nu...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>On Friday, it was revealed that former Milwauk...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>On Christmas day, Donald Trump announced that ...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>Pope Francis used his annual Christmas Day mes...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <td>The number of cases of cops brutalizing and ki...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <td>Donald Trump spent a good portion of his day a...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <td>In the wake of yet another court decision that...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <td>Many people have raised the alarm regarding th...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <td>Just when you might have thought we d get a br...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" text Value\n",
|
|
|
|
|
"0 Donald Trump just couldn t wish all Americans ... 0\n",
|
|
|
|
|
"1 House Intelligence Committee Chairman Devin Nu... 0\n",
|
|
|
|
|
"2 On Friday, it was revealed that former Milwauk... 0\n",
|
|
|
|
|
"3 On Christmas day, Donald Trump announced that ... 0\n",
|
|
|
|
|
"4 Pope Francis used his annual Christmas Day mes... 0\n",
|
|
|
|
|
"5 The number of cases of cops brutalizing and ki... 0\n",
|
|
|
|
|
"6 Donald Trump spent a good portion of his day a... 0\n",
|
|
|
|
|
"7 In the wake of yet another court decision that... 0\n",
|
|
|
|
|
"8 Many people have raised the alarm regarding th... 0\n",
|
|
|
|
|
"9 Just when you might have thought we d get a br... 0"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"true_news['Value'] = 1\n",
|
|
|
|
|
"fake_news['Value'] = 0\n",
|
|
|
|
|
"display(Markdown(r\"### True news\"))\n",
|
|
|
|
|
"display(true_news.info())\n",
|
|
|
|
|
"display(true_news.head(10))\n",
|
|
|
|
|
"display(Markdown(r\"### Fake news\"))\n",
|
|
|
|
|
"display(fake_news.info())\n",
|
|
|
|
|
"display(fake_news.head(10))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 7,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>text</th>\n",
|
|
|
|
|
" <th>Value</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - The head of a conservat...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - Transgender people will...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - The special counsel inv...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>WASHINGTON (Reuters) - Trump campaign adviser ...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>SEATTLE/WASHINGTON (Reuters) - President Donal...</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>23476</th>\n",
|
|
|
|
|
" <td>21st Century Wire says As 21WIRE reported earl...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>23477</th>\n",
|
|
|
|
|
" <td>21st Century Wire says It s a familiar theme. ...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>23478</th>\n",
|
|
|
|
|
" <td>Patrick Henningsen 21st Century WireRemember ...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>23479</th>\n",
|
|
|
|
|
" <td>21st Century Wire says Al Jazeera America will...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>23480</th>\n",
|
|
|
|
|
" <td>21st Century Wire says As 21WIRE predicted in ...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>44898 rows × 2 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" text Value\n",
|
|
|
|
|
"0 WASHINGTON (Reuters) - The head of a conservat... 1\n",
|
|
|
|
|
"1 WASHINGTON (Reuters) - Transgender people will... 1\n",
|
|
|
|
|
"2 WASHINGTON (Reuters) - The special counsel inv... 1\n",
|
|
|
|
|
"3 WASHINGTON (Reuters) - Trump campaign adviser ... 1\n",
|
|
|
|
|
"4 SEATTLE/WASHINGTON (Reuters) - President Donal... 1\n",
|
|
|
|
|
"... ... ...\n",
|
|
|
|
|
"23476 21st Century Wire says As 21WIRE reported earl... 0\n",
|
|
|
|
|
"23477 21st Century Wire says It s a familiar theme. ... 0\n",
|
|
|
|
|
"23478 Patrick Henningsen 21st Century WireRemember ... 0\n",
|
|
|
|
|
"23479 21st Century Wire says Al Jazeera America will... 0\n",
|
|
|
|
|
"23480 21st Century Wire says As 21WIRE predicted in ... 0\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[44898 rows x 2 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# merging dataset\n",
|
|
|
|
|
"dataset = pd.concat([true_news,fake_news],axis=0)\n",
|
|
|
|
|
"display(dataset)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"execution_count": 8,
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"Int64Index: 44898 entries, 0 to 23480\n",
|
|
|
|
|
"Data columns (total 2 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 text 44898 non-null object\n",
|
|
|
|
|
" 1 Value 44898 non-null int64 \n",
|
|
|
|
|
"dtypes: int64(1), object(1)\n",
|
|
|
|
|
"memory usage: 1.0+ MB\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"None"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"display(dataset.info())"
|
|
|
|
|
]
|
2023-03-21 23:45:10 +01:00
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 28,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/markdown": [
|
|
|
|
|
"### STD"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<IPython.core.display.Markdown object>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"y_train std: 0.49939397301167954\n",
|
|
|
|
|
"y_val std: 0.4997839588710888\n",
|
|
|
|
|
"y_test std: 0.4998194469400359\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/markdown": [
|
|
|
|
|
"### MEAN"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<IPython.core.display.Markdown object>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"y_train mean: 0.475249178684782\n",
|
|
|
|
|
"y_val mean: 0.4835189309576837\n",
|
|
|
|
|
"y_test mean: 0.4846325167037862\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/markdown": [
|
|
|
|
|
"### Count"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<IPython.core.display.Markdown object>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"y_train count: 35918\n",
|
|
|
|
|
"y_val count: 4490\n",
|
|
|
|
|
"y_test count: 4490\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# creating train, val , test datasets dataset 8:1:1\n",
|
|
|
|
|
"X_train, X_val_test, y_train, y_valtest = train_test_split(dataset[\"text\"],dataset[\"Value\"],test_size=0.2, shuffle=True)\n",
|
|
|
|
|
"X_test, X_val, y_test, y_val = train_test_split(X_val_test,y_valtest,test_size=0.5, shuffle=True)\n",
|
|
|
|
|
"display(Markdown(\"### STD\"))\n",
|
|
|
|
|
"print(f\"y_train std: {y_train.std()}\")\n",
|
|
|
|
|
"print(f\"y_val std: {y_val.std()}\")\n",
|
|
|
|
|
"print(f\"y_test std: {y_test.std()}\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"display(Markdown(\"### MEAN\"))\n",
|
|
|
|
|
"print(f\"y_train mean: {y_train.mean()}\")\n",
|
|
|
|
|
"print(f\"y_val mean: {y_val.mean()}\")\n",
|
|
|
|
|
"print(f\"y_test mean: {y_test.mean()}\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"display(Markdown(\"### Count\"))\n",
|
|
|
|
|
"print(f\"y_train count: {y_train.count()}\")\n",
|
|
|
|
|
"print(f\"y_val count: {y_val.count()}\")\n",
|
|
|
|
|
"print(f\"y_test count: {y_test.count()}\")\n"
|
|
|
|
|
]
|
2023-03-21 17:37:51 +01:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"display_name": "dl",
|
2023-03-21 17:37:51 +01:00
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.11.0"
|
|
|
|
|
},
|
|
|
|
|
"orig_nbformat": 4,
|
|
|
|
|
"vscode": {
|
|
|
|
|
"interpreter": {
|
2023-03-21 23:45:10 +01:00
|
|
|
|
"hash": "6e9239598a6712340c2b580c5c929949b8a813e86738fb7cf0a67c11d0863b74"
|
2023-03-21 17:37:51 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|