358 KiB

Raw Permalink Blame History

import folium
import warnings
import pandas as pd 
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from difflib import get_close_matches
from plotly.subplots import make_subplots

plt.style.use('ggplot')
warnings.filterwarnings("ignore")

Read and clean up data

Remove nans value and replace them from: latitude country, longitude_country, country, city, gdp_country.

df = pd.read_csv('data.csv')

df.latitude_country = df.latitude_country.fillna(0.0)
df.longitude_country = df.longitude_country.fillna(0.0)
df.country = df.country.fillna('Unknown')
df.city = df.city.fillna('Unknown')
df.gdp_country = df.gdp_country.fillna(0.0)
df.finalWorth = df.finalWorth / 1000

Interactive plots using plotly

Plot made using ploty that dipslay on hover age, net worth and name of billionaire

fig = px.scatter(df.head(20), x='age', y='finalWorth', color='age', size='finalWorth', hover_data=['personName'], title='Age vs Net Worth')
fig.update_layout(
    xaxis_title='Age',
    yaxis_title='Net Worth (Billions)',
    yaxis=dict(tickprefix='$', ticksuffix='b'),
    xaxis=dict(type='log'),
    legend_title='Age',
    font=dict(
        family='Courier New, monospace',
        size=18,
        color='RebeccaPurple'
    )

)
fig.show()

Plot made using plotly that on hover displays: industry, number of billionaries and % of billionaires that are from that industry

industry_counts = df['industries'].str.split(',').explode().value_counts()
fig = go.Figure(data=[go.Pie(labels=industry_counts.index, values=industry_counts, hole=0.3)])
fig.update_layout(
    title='Billionaires by Industry',
    annotations=[dict(text='Industry', x=0.5, y=0.5, font_size=20, showarrow=False)]
)
fig.show()

Pie plots made with plotly that shows gender distribution and self-made by gender

gender_counts = df['gender'].str.split(',').explode().value_counts()

df_women = df[df['gender'] == 'F']
selfMade_counts_women = df_women['selfMade'].value_counts()

df_men = df[df['gender'] == 'M']
selfMade_counts_men = df_men['selfMade'].value_counts()

fig1 = go.Figure(data=[go.Pie(labels=gender_counts.index, values=gender_counts, hole=0.3)])
fig1.update_traces(textinfo='percent+label')

fig2 = go.Figure(data=[go.Pie(labels=selfMade_counts_men.index, values=selfMade_counts_men, hole=0.3)])
fig2.update_traces(textinfo='percent+label')

fig3 = go.Figure(data=[go.Pie(labels=selfMade_counts_women.index, values=selfMade_counts_women, hole=0.3)])
fig3.update_traces(textinfo='percent+label')

fig = make_subplots(rows=1, cols=3, specs=[[{'type': 'domain'}, {'type': 'domain'},{'type': 'sunburst'}]])

fig.add_trace(fig1.data[0], row=1, col=1)
fig.add_trace(fig2.data[0], row=1, col=2)
fig.add_trace(fig3.data[0], row=1, col=3)

fig.update_layout(height=400, showlegend=False, title_text="Distribution of Gender and SelfMade by Gender")
fig.add_annotation(dict(x=0.14, y=-0.1, ax=0, ay=0, text="% Gender Distribution"))
fig.add_annotation(dict(x=0.50, y=-0.1, ax=0, ay=0, text="% SelfMade (M)"))
fig.add_annotation(dict(x=0.85, y=-0.1, ax=0, ay=0, text="% SelfMade (F)"))

fig.show()

Maps using plotly and folium

Map made using folium on click show name of country and numbers of billionaries from that country

m = folium.Map(location=[40, -100], zoom_start=4)

for country in df['country'].unique():
    lat, lon = df[df['country'] == country]['latitude_country'].mean(), df[df['country'] == country]['longitude_country'].mean()
    num_billionaires = len(df[df['country'] == country])
    folium.Marker([lat, lon], popup=f"{country} Num of billionaires {num_billionaires}").add_to(m)

m

Make this Notebook Trusted to load map: File -> Trust Notebook

Adjust states names to display

dd= df.dropna(subset=["state"])

states = {
    'AK': 'Alaska', 'AL': 'Alabama', 'AR': 'Arkansas', 'AS': 'American Samoa',
    'AZ': 'Arizona', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut',
    'DC': 'District of Columbia', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia',
    'GU': 'Guam', 'HI': 'Hawaii', 'IA': 'Iowa', 'ID': 'Idaho', 'IL': 'Illinois',
    'IN': 'Indiana', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'MA': 'Massachusetts',
    'MD': 'Maryland', 'ME': 'Maine', 'MI': 'Michigan', 'MN': 'Minnesota', 'MO': 'Missouri',
    'MP': 'Northern Mariana Islands', 'MS': 'Mississippi', 'MT': 'Montana', 'NA': 'National',
    'NC': 'North Carolina', 'ND': 'North Dakota', 'NE': 'Nebraska', 'NH': 'New Hampshire',
    'NJ': 'New Jersey', 'NM': 'New Mexico', 'NV': 'Nevada', 'NY': 'New York', 'OH': 'Ohio',
    'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'PR': 'Puerto Rico', 'RI': 'Rhode Island',
    'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
    'VA': 'Virginia', 'VI': 'Virgin Islands', 'VT': 'Vermont', 'WA': 'Washington', 'WI': 'Wisconsin',
    'WV': 'West Virginia', 'WY': 'Wyoming'
}

def best_match(x):
    if len(x) == 2:
        abbr = x.upper()
        if abbr in states:
            return abbr
    else:
        matches = get_close_matches(x, states.values(), n=1, cutoff=0.8)
        if matches:
            abbr = list(states.keys())[list(states.values()).index(matches[0])]
            return abbr
    return None

dd['state_corrected'] = dd['state'].apply(lambda x: best_match(x))

dd["state_corrected"].unique()

array(['TX', 'WA', 'HI', 'NE', 'NY', 'CA', 'KS', 'AR', 'OR', 'VA', 'WY',
       'NV', 'FL', 'PA', 'MA', 'IL', 'TN', 'CT', 'OK', 'MI', 'WI', 'CO',
       'IN', 'NH', 'GA', 'MO', 'NJ', 'NC', 'IA', 'KY', 'MD', 'MT', 'OH',
       'SC', 'AZ', 'LA', 'UT', 'RI', 'ID', 'VI', 'MN', 'ME', 'MS', 'SD',
       'AL'], dtype=object)

Plot make by plotly, displaying states in USA on hover: name of state, billionaries population in this state

d = dd[dd['country'] == 'United States']
d = dd.groupby('state_corrected')['personName'].count().reset_index(name='rich_pop_usa')

fig = px.choropleth(d, locations="state_corrected", locationmode='USA-states', 
                    scope="usa", color="rich_pop_usa", 
                    color_continuous_scale="Viridis")

fig.update_traces(customdata=d['rich_pop_usa'].values,
                hovertemplate='<b>%{text}</b><br>Billionaires Population: %{customdata}',
                  
                  text=[f"{states[abbr]} ({abbr})" for abbr in d['state_corrected']],
                  marker_line_color='white', marker_line_width=0.5)

fig.update_layout(
    title_text='Rich Population by U.S. State',
    geo_scope='usa',
)

fig.show()

Plot make using plotly to display life expectancy in each conutry, on hover: name of country, life expectancy

fig = px.choropleth(df, locations="country", locationmode='country names',
                    color="life_expectancy_country", hover_name="country",
                    color_continuous_scale="Viridis")
fig.show()

358 KiB Raw Permalink Blame History

Read and clean up data

Interactive plots using plotly

Plot made using ploty that dipslay on hover age, net worth and name of billionaire

Plot made using plotly that on hover displays: industry, number of billionaries and % of billionaires that are from that industry

Pie plots made with plotly that shows gender distribution and self-made by gender

Maps using plotly and folium

Map made using folium on click show name of country and numbers of billionaries from that country

Adjust states names to display

Plot make by plotly, displaying states in USA on hover: name of state, billionaries population in this state

Plot make using plotly to display life expectancy in each conutry, on hover: name of country, life expectancy

358 KiB

Raw Permalink Blame History