279 KiB
279 KiB
Statystyki miliarderów
Zbiór danych zawiera statystyki dotyczące miliarderów na świecie, zawarte są również ich dane osobowe, branże którymi się zajmują oraz firmy.
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
plt.style.use('ggplot')
df = pd.read_csv('data.csv')
df
rank | finalWorth | category | personName | age | country | city | source | industries | countryOfCitizenship | ... | cpi_change_country | gdp_country | gross_tertiary_education_enrollment | gross_primary_education_enrollment_country | life_expectancy_country | tax_revenue_country_country | total_tax_rate_country | population_country | latitude_country | longitude_country | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 211000 | Fashion & Retail | Bernard Arnault & family | 74.0 | France | Paris | LVMH | Fashion & Retail | France | ... | 1.1 | $2,715,518,274,227 | 65.6 | 102.5 | 82.5 | 24.2 | 60.7 | 6.705989e+07 | 46.227638 | 2.213749 |
1 | 2 | 180000 | Automotive | Elon Musk | 51.0 | United States | Austin | Tesla, SpaceX | Automotive | United States | ... | 7.5 | $21,427,700,000,000 | 88.2 | 101.8 | 78.5 | 9.6 | 36.6 | 3.282395e+08 | 37.090240 | -95.712891 |
2 | 3 | 114000 | Technology | Jeff Bezos | 59.0 | United States | Medina | Amazon | Technology | United States | ... | 7.5 | $21,427,700,000,000 | 88.2 | 101.8 | 78.5 | 9.6 | 36.6 | 3.282395e+08 | 37.090240 | -95.712891 |
3 | 4 | 107000 | Technology | Larry Ellison | 78.0 | United States | Lanai | Oracle | Technology | United States | ... | 7.5 | $21,427,700,000,000 | 88.2 | 101.8 | 78.5 | 9.6 | 36.6 | 3.282395e+08 | 37.090240 | -95.712891 |
4 | 5 | 106000 | Finance & Investments | Warren Buffett | 92.0 | United States | Omaha | Berkshire Hathaway | Finance & Investments | United States | ... | 7.5 | $21,427,700,000,000 | 88.2 | 101.8 | 78.5 | 9.6 | 36.6 | 3.282395e+08 | 37.090240 | -95.712891 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2635 | 2540 | 1000 | Healthcare | Yu Rong | 51.0 | China | Shanghai | Health clinics | Healthcare | China | ... | 2.9 | $19,910,000,000,000 | 50.6 | 100.2 | 77.0 | 9.4 | 59.2 | 1.397715e+09 | 35.861660 | 104.195397 |
2636 | 2540 | 1000 | Food & Beverage | Richard Yuengling, Jr. | 80.0 | United States | Pottsville | Beer | Food & Beverage | United States | ... | 7.5 | $21,427,700,000,000 | 88.2 | 101.8 | 78.5 | 9.6 | 36.6 | 3.282395e+08 | 37.090240 | -95.712891 |
2637 | 2540 | 1000 | Manufacturing | Zhang Gongyun | 60.0 | China | Gaomi | Tyre manufacturing machinery | Manufacturing | China | ... | 2.9 | $19,910,000,000,000 | 50.6 | 100.2 | 77.0 | 9.4 | 59.2 | 1.397715e+09 | 35.861660 | 104.195397 |
2638 | 2540 | 1000 | Real Estate | Zhang Guiping & family | 71.0 | China | Nanjing | Real estate | Real Estate | China | ... | 2.9 | $19,910,000,000,000 | 50.6 | 100.2 | 77.0 | 9.4 | 59.2 | 1.397715e+09 | 35.861660 | 104.195397 |
2639 | 2540 | 1000 | Diversified | Inigo Zobel | 66.0 | Philippines | Makati | Diversified | Diversified | Philippines | ... | 2.5 | $376,795,508,680 | 35.5 | 107.5 | 71.1 | 14.0 | 43.1 | 1.081166e+08 | 12.879721 | 121.774017 |
2640 rows × 35 columns
df.isnull().sum()
rank 0 finalWorth 0 category 0 personName 0 age 65 country 38 city 72 source 0 industries 0 countryOfCitizenship 0 organization 2315 selfMade 0 status 0 gender 0 birthDate 76 lastName 0 firstName 3 title 2301 date 0 state 1887 residenceStateRegion 1893 birthYear 76 birthMonth 76 birthDay 76 cpi_country 184 cpi_change_country 184 gdp_country 164 gross_tertiary_education_enrollment 182 gross_primary_education_enrollment_country 181 life_expectancy_country 182 tax_revenue_country_country 183 total_tax_rate_country 182 population_country 164 latitude_country 164 longitude_country 164 dtype: int64
ax = sns.histplot(df,x='finalWorth',bins = 20)
ax.set_xlabel('Majątek')
ax.set_ylabel('Liczba osób')
plt.show()
ax = sns.barplot(df.head(10), x='finalWorth',y='personName', hue = 'personName', legend = False,orient='h', palette='rainbow')
ax.set_title('Najbardziej majętne osoby')
ax.set_xlabel('Osoba')
ax.set_ylabel('Majątek')
for container in ax.containers:
ax.bar_label(container, fontsize=8)
plt.show()
ppl_in_countries = df.groupby('countryOfCitizenship')['rank'].count().reset_index().sort_values(by ='rank',ascending=False).head(20)
ax = sns.barplot(ppl_in_countries,x='rank',y='countryOfCitizenship',orient='h',hue = 'countryOfCitizenship', legend = False, palette='rainbow')
ax.set_title('Kraj z największą liczbą miliarderów')
ax.set_ylabel('Kraje')
ax.set_xlabel('Liczba milarderów')
for container in ax.containers:
ax.bar_label(container, fontsize=8)
plt.show()
age_and_wealthy = df.groupby('age')['finalWorth'].mean()
age_and_wealthy
age 18.0 3500.000000 19.0 1700.000000 20.0 2300.000000 21.0 2600.000000 26.0 1450.000000 ... 96.0 4366.666667 97.0 1425.000000 98.0 1750.000000 99.0 4375.000000 101.0 1300.000000 Name: finalWorth, Length: 79, dtype: float64
sns.lineplot(df, x='age', y='finalWorth',errorbar=None)
plt.show()
#category = df.explode('category')
categories = df.groupby('category')['rank'].count().sort_values(ascending=False).reset_index().head(20)
ax = sns.barplot(categories,x='rank',y='category',orient='h',hue = 'category', legend = False, palette='rainbow')
ax.set_title('Najpopularniejsze kategorie biznesu')
ax.set_ylabel('Kategoria')
ax.set_xlabel('Liczba osób')
for container in ax.containers:
ax.bar_label(container, fontsize=8)
plt.show()