billionaries-wizualizacja/EDA_Billionaires .ipynb
2024-04-09 19:53:24 +02:00

279 KiB
Raw Blame History

Statystyki miliarderów

Zbiór danych zawiera statystyki dotyczące miliarderów na świecie, zawarte są również ich dane osobowe, branże którymi się zajmują oraz firmy.

Źródło

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
plt.style.use('ggplot')
df = pd.read_csv('data.csv')
df
rank finalWorth category personName age country city source industries countryOfCitizenship ... cpi_change_country gdp_country gross_tertiary_education_enrollment gross_primary_education_enrollment_country life_expectancy_country tax_revenue_country_country total_tax_rate_country population_country latitude_country longitude_country
0 1 211000 Fashion & Retail Bernard Arnault & family 74.0 France Paris LVMH Fashion & Retail France ... 1.1 $2,715,518,274,227 65.6 102.5 82.5 24.2 60.7 6.705989e+07 46.227638 2.213749
1 2 180000 Automotive Elon Musk 51.0 United States Austin Tesla, SpaceX Automotive United States ... 7.5 $21,427,700,000,000 88.2 101.8 78.5 9.6 36.6 3.282395e+08 37.090240 -95.712891
2 3 114000 Technology Jeff Bezos 59.0 United States Medina Amazon Technology United States ... 7.5 $21,427,700,000,000 88.2 101.8 78.5 9.6 36.6 3.282395e+08 37.090240 -95.712891
3 4 107000 Technology Larry Ellison 78.0 United States Lanai Oracle Technology United States ... 7.5 $21,427,700,000,000 88.2 101.8 78.5 9.6 36.6 3.282395e+08 37.090240 -95.712891
4 5 106000 Finance & Investments Warren Buffett 92.0 United States Omaha Berkshire Hathaway Finance & Investments United States ... 7.5 $21,427,700,000,000 88.2 101.8 78.5 9.6 36.6 3.282395e+08 37.090240 -95.712891
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2635 2540 1000 Healthcare Yu Rong 51.0 China Shanghai Health clinics Healthcare China ... 2.9 $19,910,000,000,000 50.6 100.2 77.0 9.4 59.2 1.397715e+09 35.861660 104.195397
2636 2540 1000 Food & Beverage Richard Yuengling, Jr. 80.0 United States Pottsville Beer Food & Beverage United States ... 7.5 $21,427,700,000,000 88.2 101.8 78.5 9.6 36.6 3.282395e+08 37.090240 -95.712891
2637 2540 1000 Manufacturing Zhang Gongyun 60.0 China Gaomi Tyre manufacturing machinery Manufacturing China ... 2.9 $19,910,000,000,000 50.6 100.2 77.0 9.4 59.2 1.397715e+09 35.861660 104.195397
2638 2540 1000 Real Estate Zhang Guiping & family 71.0 China Nanjing Real estate Real Estate China ... 2.9 $19,910,000,000,000 50.6 100.2 77.0 9.4 59.2 1.397715e+09 35.861660 104.195397
2639 2540 1000 Diversified Inigo Zobel 66.0 Philippines Makati Diversified Diversified Philippines ... 2.5 $376,795,508,680 35.5 107.5 71.1 14.0 43.1 1.081166e+08 12.879721 121.774017

2640 rows × 35 columns

df.isnull().sum()
rank                                             0
finalWorth                                       0
category                                         0
personName                                       0
age                                             65
country                                         38
city                                            72
source                                           0
industries                                       0
countryOfCitizenship                             0
organization                                  2315
selfMade                                         0
status                                           0
gender                                           0
birthDate                                       76
lastName                                         0
firstName                                        3
title                                         2301
date                                             0
state                                         1887
residenceStateRegion                          1893
birthYear                                       76
birthMonth                                      76
birthDay                                        76
cpi_country                                    184
cpi_change_country                             184
gdp_country                                    164
gross_tertiary_education_enrollment            182
gross_primary_education_enrollment_country     181
life_expectancy_country                        182
tax_revenue_country_country                    183
total_tax_rate_country                         182
population_country                             164
latitude_country                               164
longitude_country                              164
dtype: int64
ax = sns.histplot(df,x='finalWorth',bins = 20)
ax.set_xlabel('Majątek')
ax.set_ylabel('Liczba osób')
plt.show()
ax = sns.barplot(df.head(10), x='finalWorth',y='personName', hue = 'personName', legend = False,orient='h', palette='rainbow')
ax.set_title('Najbardziej majętne osoby')
ax.set_xlabel('Osoba')
ax.set_ylabel('Majątek')
for container in ax.containers:
    ax.bar_label(container, fontsize=8)
plt.show()
ppl_in_countries = df.groupby('countryOfCitizenship')['rank'].count().reset_index().sort_values(by ='rank',ascending=False).head(20)
ax = sns.barplot(ppl_in_countries,x='rank',y='countryOfCitizenship',orient='h',hue = 'countryOfCitizenship', legend = False, palette='rainbow')
ax.set_title('Kraj z największą liczbą miliarderów')
ax.set_ylabel('Kraje')
ax.set_xlabel('Liczba milarderów')
for container in ax.containers:
    ax.bar_label(container, fontsize=8)
plt.show()
age_and_wealthy = df.groupby('age')['finalWorth'].mean()
age_and_wealthy
age
18.0     3500.000000
19.0     1700.000000
20.0     2300.000000
21.0     2600.000000
26.0     1450.000000
            ...     
96.0     4366.666667
97.0     1425.000000
98.0     1750.000000
99.0     4375.000000
101.0    1300.000000
Name: finalWorth, Length: 79, dtype: float64
sns.lineplot(df, x='age', y='finalWorth',errorbar=None)
plt.show()
#category = df.explode('category')
categories = df.groupby('category')['rank'].count().sort_values(ascending=False).reset_index().head(20)
ax = sns.barplot(categories,x='rank',y='category',orient='h',hue = 'category', legend = False, palette='rainbow')
ax.set_title('Najpopularniejsze kategorie biznesu')
ax.set_ylabel('Kategoria')
ax.set_xlabel('Liczba osób')
for container in ax.containers:
    ax.bar_label(container, fontsize=8)
plt.show()