15 KiB
15 KiB
import numpy as np
import pandas as pd
from IPython.display import display,Markdown
TRUE_NEWS_PATH = "data/True.csv"
FAKE_NEWS_PATH = "data/Fake.csv"
#loading datasets
true_news = pd.read_csv(TRUE_NEWS_PATH)
fake_news = pd.read_csv(FAKE_NEWS_PATH)
# clearing dataset
true_news = true_news.drop(columns=['title','subject','date'])
fake_news = fake_news.drop(columns=['title','subject','date'])
Seting binary classifiaction values
true_news['Value'] = 1
fake_news['Value'] = 0
display(Markdown(r"### True news"))
display(true_news.info())
display(true_news.head(10))
display(Markdown(r"### Fake news"))
display(fake_news.info())
display(fake_news.head(10))
True news
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21417 entries, 0 to 21416 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 text 21417 non-null object 1 Value 21417 non-null int64 dtypes: int64(1), object(1) memory usage: 334.8+ KB
None
text | Value | |
---|---|---|
0 | WASHINGTON (Reuters) - The head of a conservat... | 1 |
1 | WASHINGTON (Reuters) - Transgender people will... | 1 |
2 | WASHINGTON (Reuters) - The special counsel inv... | 1 |
3 | WASHINGTON (Reuters) - Trump campaign adviser ... | 1 |
4 | SEATTLE/WASHINGTON (Reuters) - President Donal... | 1 |
5 | WEST PALM BEACH, Fla./WASHINGTON (Reuters) - T... | 1 |
6 | WEST PALM BEACH, Fla (Reuters) - President Don... | 1 |
7 | The following statements were posted to the ve... | 1 |
8 | The following statements were posted to the ve... | 1 |
9 | WASHINGTON (Reuters) - Alabama Secretary of St... | 1 |
Fake news
<class 'pandas.core.frame.DataFrame'> RangeIndex: 23481 entries, 0 to 23480 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 text 23481 non-null object 1 Value 23481 non-null int64 dtypes: int64(1), object(1) memory usage: 367.0+ KB
None
text | Value | |
---|---|---|
0 | Donald Trump just couldn t wish all Americans ... | 0 |
1 | House Intelligence Committee Chairman Devin Nu... | 0 |
2 | On Friday, it was revealed that former Milwauk... | 0 |
3 | On Christmas day, Donald Trump announced that ... | 0 |
4 | Pope Francis used his annual Christmas Day mes... | 0 |
5 | The number of cases of cops brutalizing and ki... | 0 |
6 | Donald Trump spent a good portion of his day a... | 0 |
7 | In the wake of yet another court decision that... | 0 |
8 | Many people have raised the alarm regarding th... | 0 |
9 | Just when you might have thought we d get a br... | 0 |
# merging dataset
dataset = pd.concat([true_news,fake_news],axis=0)
display(dataset)
text | Value | |
---|---|---|
0 | WASHINGTON (Reuters) - The head of a conservat... | 1 |
1 | WASHINGTON (Reuters) - Transgender people will... | 1 |
2 | WASHINGTON (Reuters) - The special counsel inv... | 1 |
3 | WASHINGTON (Reuters) - Trump campaign adviser ... | 1 |
4 | SEATTLE/WASHINGTON (Reuters) - President Donal... | 1 |
... | ... | ... |
23476 | 21st Century Wire says As 21WIRE reported earl... | 0 |
23477 | 21st Century Wire says It s a familiar theme. ... | 0 |
23478 | Patrick Henningsen 21st Century WireRemember ... | 0 |
23479 | 21st Century Wire says Al Jazeera America will... | 0 |
23480 | 21st Century Wire says As 21WIRE predicted in ... | 0 |
44898 rows × 2 columns
display(dataset.info())
<class 'pandas.core.frame.DataFrame'> Int64Index: 44898 entries, 0 to 23480 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 text 44898 non-null object 1 Value 44898 non-null int64 dtypes: int64(1), object(1) memory usage: 1.0+ MB
None