ium_424714/dane.ipynb
2023-03-21 17:37:51 +01:00

15 KiB
Raw Blame History

import numpy as np
import pandas as pd
from IPython.display import display,Markdown
TRUE_NEWS_PATH = "data/True.csv"
FAKE_NEWS_PATH = "data/Fake.csv"

#loading datasets
true_news = pd.read_csv(TRUE_NEWS_PATH)
fake_news = pd.read_csv(FAKE_NEWS_PATH)
# clearing dataset
true_news = true_news.drop(columns=['title','subject','date'])

fake_news = fake_news.drop(columns=['title','subject','date'])

Seting binary classifiaction values

true_news['Value'] = 1
fake_news['Value'] = 0
display(Markdown(r"### True news"))
display(true_news.info())
display(true_news.head(10))
display(Markdown(r"### Fake news"))
display(fake_news.info())
display(fake_news.head(10))

True news

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21417 entries, 0 to 21416
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    21417 non-null  object
 1   Value   21417 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 334.8+ KB
None
text Value
0 WASHINGTON (Reuters) - The head of a conservat... 1
1 WASHINGTON (Reuters) - Transgender people will... 1
2 WASHINGTON (Reuters) - The special counsel inv... 1
3 WASHINGTON (Reuters) - Trump campaign adviser ... 1
4 SEATTLE/WASHINGTON (Reuters) - President Donal... 1
5 WEST PALM BEACH, Fla./WASHINGTON (Reuters) - T... 1
6 WEST PALM BEACH, Fla (Reuters) - President Don... 1
7 The following statements were posted to the ve... 1
8 The following statements were posted to the ve... 1
9 WASHINGTON (Reuters) - Alabama Secretary of St... 1

Fake news

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23481 entries, 0 to 23480
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    23481 non-null  object
 1   Value   23481 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 367.0+ KB
None
text Value
0 Donald Trump just couldn t wish all Americans ... 0
1 House Intelligence Committee Chairman Devin Nu... 0
2 On Friday, it was revealed that former Milwauk... 0
3 On Christmas day, Donald Trump announced that ... 0
4 Pope Francis used his annual Christmas Day mes... 0
5 The number of cases of cops brutalizing and ki... 0
6 Donald Trump spent a good portion of his day a... 0
7 In the wake of yet another court decision that... 0
8 Many people have raised the alarm regarding th... 0
9 Just when you might have thought we d get a br... 0
# merging dataset
dataset = pd.concat([true_news,fake_news],axis=0)
display(dataset)
text Value
0 WASHINGTON (Reuters) - The head of a conservat... 1
1 WASHINGTON (Reuters) - Transgender people will... 1
2 WASHINGTON (Reuters) - The special counsel inv... 1
3 WASHINGTON (Reuters) - Trump campaign adviser ... 1
4 SEATTLE/WASHINGTON (Reuters) - President Donal... 1
... ... ...
23476 21st Century Wire says As 21WIRE reported earl... 0
23477 21st Century Wire says It s a familiar theme. ... 0
23478 Patrick Henningsen 21st Century WireRemember ... 0
23479 21st Century Wire says Al Jazeera America will... 0
23480 21st Century Wire says As 21WIRE predicted in ... 0

44898 rows × 2 columns

display(dataset.info())
<class 'pandas.core.frame.DataFrame'>
Int64Index: 44898 entries, 0 to 23480
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    44898 non-null  object
 1   Value   44898 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.0+ MB
None