358 KiB
358 KiB
import folium
import warnings
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from difflib import get_close_matches
from plotly.subplots import make_subplots
plt.style.use('ggplot')
warnings.filterwarnings("ignore")
Read and clean up data
Remove nans value and replace them from: latitude country, longitude_country, country, city, gdp_country.
df = pd.read_csv('data.csv')
df.latitude_country = df.latitude_country.fillna(0.0)
df.longitude_country = df.longitude_country.fillna(0.0)
df.country = df.country.fillna('Unknown')
df.city = df.city.fillna('Unknown')
df.gdp_country = df.gdp_country.fillna(0.0)
df.finalWorth = df.finalWorth / 1000
Interactive plots using plotly
Plot made using ploty that dipslay on hover age, net worth and name of billionaire
fig = px.scatter(df.head(20), x='age', y='finalWorth', color='age', size='finalWorth', hover_data=['personName'], title='Age vs Net Worth')
fig.update_layout(
xaxis_title='Age',
yaxis_title='Net Worth (Billions)',
yaxis=dict(tickprefix='$', ticksuffix='b'),
xaxis=dict(type='log'),
legend_title='Age',
font=dict(
family='Courier New, monospace',
size=18,
color='RebeccaPurple'
)
)
fig.show()
Plot made using plotly that on hover displays: industry, number of billionaries and % of billionaires that are from that industry
industry_counts = df['industries'].str.split(',').explode().value_counts()
fig = go.Figure(data=[go.Pie(labels=industry_counts.index, values=industry_counts, hole=0.3)])
fig.update_layout(
title='Billionaires by Industry',
annotations=[dict(text='Industry', x=0.5, y=0.5, font_size=20, showarrow=False)]
)
fig.show()
Pie plots made with plotly that shows gender distribution and self-made by gender
gender_counts = df['gender'].str.split(',').explode().value_counts()
df_women = df[df['gender'] == 'F']
selfMade_counts_women = df_women['selfMade'].value_counts()
df_men = df[df['gender'] == 'M']
selfMade_counts_men = df_men['selfMade'].value_counts()
fig1 = go.Figure(data=[go.Pie(labels=gender_counts.index, values=gender_counts, hole=0.3)])
fig1.update_traces(textinfo='percent+label')
fig2 = go.Figure(data=[go.Pie(labels=selfMade_counts_men.index, values=selfMade_counts_men, hole=0.3)])
fig2.update_traces(textinfo='percent+label')
fig3 = go.Figure(data=[go.Pie(labels=selfMade_counts_women.index, values=selfMade_counts_women, hole=0.3)])
fig3.update_traces(textinfo='percent+label')
fig = make_subplots(rows=1, cols=3, specs=[[{'type': 'domain'}, {'type': 'domain'},{'type': 'sunburst'}]])
fig.add_trace(fig1.data[0], row=1, col=1)
fig.add_trace(fig2.data[0], row=1, col=2)
fig.add_trace(fig3.data[0], row=1, col=3)
fig.update_layout(height=400, showlegend=False, title_text="Distribution of Gender and SelfMade by Gender")
fig.add_annotation(dict(x=0.14, y=-0.1, ax=0, ay=0, text="% Gender Distribution"))
fig.add_annotation(dict(x=0.50, y=-0.1, ax=0, ay=0, text="% SelfMade (M)"))
fig.add_annotation(dict(x=0.85, y=-0.1, ax=0, ay=0, text="% SelfMade (F)"))
fig.show()
Maps using plotly and folium
Map made using folium on click show name of country and numbers of billionaries from that country
m = folium.Map(location=[40, -100], zoom_start=4)
for country in df['country'].unique():
lat, lon = df[df['country'] == country]['latitude_country'].mean(), df[df['country'] == country]['longitude_country'].mean()
num_billionaires = len(df[df['country'] == country])
folium.Marker([lat, lon], popup=f"{country} Num of billionaires {num_billionaires}").add_to(m)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
Adjust states names to display
dd= df.dropna(subset=["state"])
states = {
'AK': 'Alaska', 'AL': 'Alabama', 'AR': 'Arkansas', 'AS': 'American Samoa',
'AZ': 'Arizona', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut',
'DC': 'District of Columbia', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia',
'GU': 'Guam', 'HI': 'Hawaii', 'IA': 'Iowa', 'ID': 'Idaho', 'IL': 'Illinois',
'IN': 'Indiana', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'MA': 'Massachusetts',
'MD': 'Maryland', 'ME': 'Maine', 'MI': 'Michigan', 'MN': 'Minnesota', 'MO': 'Missouri',
'MP': 'Northern Mariana Islands', 'MS': 'Mississippi', 'MT': 'Montana', 'NA': 'National',
'NC': 'North Carolina', 'ND': 'North Dakota', 'NE': 'Nebraska', 'NH': 'New Hampshire',
'NJ': 'New Jersey', 'NM': 'New Mexico', 'NV': 'Nevada', 'NY': 'New York', 'OH': 'Ohio',
'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'PR': 'Puerto Rico', 'RI': 'Rhode Island',
'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
'VA': 'Virginia', 'VI': 'Virgin Islands', 'VT': 'Vermont', 'WA': 'Washington', 'WI': 'Wisconsin',
'WV': 'West Virginia', 'WY': 'Wyoming'
}
def best_match(x):
if len(x) == 2:
abbr = x.upper()
if abbr in states:
return abbr
else:
matches = get_close_matches(x, states.values(), n=1, cutoff=0.8)
if matches:
abbr = list(states.keys())[list(states.values()).index(matches[0])]
return abbr
return None
dd['state_corrected'] = dd['state'].apply(lambda x: best_match(x))
dd["state_corrected"].unique()
array(['TX', 'WA', 'HI', 'NE', 'NY', 'CA', 'KS', 'AR', 'OR', 'VA', 'WY', 'NV', 'FL', 'PA', 'MA', 'IL', 'TN', 'CT', 'OK', 'MI', 'WI', 'CO', 'IN', 'NH', 'GA', 'MO', 'NJ', 'NC', 'IA', 'KY', 'MD', 'MT', 'OH', 'SC', 'AZ', 'LA', 'UT', 'RI', 'ID', 'VI', 'MN', 'ME', 'MS', 'SD', 'AL'], dtype=object)
Plot make by plotly, displaying states in USA on hover: name of state, billionaries population in this state
d = dd[dd['country'] == 'United States']
d = dd.groupby('state_corrected')['personName'].count().reset_index(name='rich_pop_usa')
fig = px.choropleth(d, locations="state_corrected", locationmode='USA-states',
scope="usa", color="rich_pop_usa",
color_continuous_scale="Viridis")
fig.update_traces(customdata=d['rich_pop_usa'].values,
hovertemplate='<b>%{text}</b><br>Billionaires Population: %{customdata}',
text=[f"{states[abbr]} ({abbr})" for abbr in d['state_corrected']],
marker_line_color='white', marker_line_width=0.5)
fig.update_layout(
title_text='Rich Population by U.S. State',
geo_scope='usa',
)
fig.show()
Plot make using plotly to display life expectancy in each conutry, on hover: name of country, life expectancy
fig = px.choropleth(df, locations="country", locationmode='country names',
color="life_expectancy_country", hover_name="country",
color_continuous_scale="Viridis")
fig.show()