wizualizacja_projekt_3/main.py

import dash
import dash_bootstrap_components as dbc
import plotly.express as px
from dash.dependencies import Input, Output
from dash import dcc, html, dash_table
from wordcloud import WordCloud
from utils.graphs import create_engine_vol_histogram, create_price_boxplot
from utils.data import cleanup
import pandas as pd

PAGE_SIZE = 15

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.config.suppress_callback_exceptions = True

df = pd.read_csv("car_prices.csv")
df = cleanup(df)

engine_vol_fig = create_engine_vol_histogram(df)

price_boxplot_fig = px.box(df, x='year', y='price')
total_cars = df.shape[0]
average_price = int(df['price'].mean())
average_mileage = int(df['mileage'].mean())

sidebar = html.Div(
    [
        html.Img(src=app.get_asset_url("otomoto.svg"), width=222),
        html.Hr(),
        dbc.Nav(
            [
                dbc.NavLink("Main Dashboard", href="/", active="exact"),
                dbc.NavLink("Data Explorer", href="/explorer", active="exact"),
                dbc.NavLink("Documentation", href="/docs", active="exact"),
            ],
            vertical=True,
            pills=True,
        ),
    ],
    className="sidebar"
)

content = html.Div(id="page-content", children=[], className="content")

app.layout = html.Div([
    dcc.Location(id="url"),
    sidebar,
    content
])


@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")]
)
def render_page_content(pathname):
    if pathname == "/":
        return [
                dbc.Card(
                    dbc.CardBody([
                        dbc.Button([
                                "Total cars",
                                dbc.Badge(total_cars, color="light", text_color="primary", className="ms-1"),
                            ],
                            color="primary",
                        ),
                        dbc.Button(
                            [
                                "Average price",
                                dbc.Badge(average_price, color="light", text_color="primary", className="ms-1"),
                            ],
                            color="primary",
                        ),
                        dbc.Button(
                            [
                                "Average mileage",
                                dbc.Badge(average_mileage, color="light", text_color="primary", className="ms-1"),
                            ],
                            color="primary",
                        ),
                    ],
                    className="summary"),
                className="summary-wrapper"),
                html.Div(
                    [
                        html.Div([
                            html.Img(
                                    id="word_cloud",
                                    src=app.get_asset_url("wordcloud.png"),
                                    className="wordcloud"
                                ),
                            dcc.Graph(
                                id='bargraph',
                                figure=engine_vol_fig,
                                className="engine_vol_histogram"
                            )
                        ], className="graphs-left"),
                        html.Div([
                            dbc.Select(
                                id="select",
                                options=[{'label': 'All', 'value': 'All' }] + [{ 'label': x, 'value': x } for x in df['mark'].unique()]
                            ),
                            dcc.Graph(id='price_boxplot', className='price-boxplot')
                        ], className="graphs-right")
                    ],
                    className="graphs"
                ),
                ]
    elif pathname == "/explorer":
        return [
                html.H1('Data explorer'),
                dash_table.DataTable(
                    id='data-explorer',
                    columns=[
                        {'name': i, 'id': i, 'deletable': True} for i in sorted(df.columns)
                    ],
                    page_current= 0,
                    page_size= PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='',

                    sort_action='custom',
                    sort_mode='multi',
                    sort_by=[],

                    style_header={
                        'backgroundColor': 'rgb(30, 30, 30)',
                        'color': 'white'
                    },
                    style_filter={
                        'backgroundColor': 'rgb(30, 30, 30)',
                        'color': 'white'
                    },
                    style_data={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    }
                )
            ]
    elif pathname == "/docs":
        return [
            html.H1('Documentation',
                    style={'textAlign':'center'}),
            html.P('This dataset contains +150000 observations about cars for sale from otomoto.pl site.'),
            html.P('Each row describes a single car. There are 7 features:'),
            html.Ul([
                html.Li('- mark'),
                html.Li('- model'),
                html.Li('- price (in PLN)'),
                html.Li('- year'),
                html.Li('- milage (in KM)'),
                html.Li('- vol_engine (in DM^3)'),
                html.Li('- fuel (type of fuel)')
            ])
        ]

    # If the user tries to reach a different page, return a 404 message
    return dbc.Jumbotron(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ]
    )

@app.callback(
    Output('price_boxplot', 'figure'),
    Input('select', 'value')
)
def update_output(value):
    filtered_df = df
    if (value != 'All' and value != None):
        filtered_df = df.loc[df['mark'] == value]

    return create_price_boxplot(filtered_df)

operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains '],
             ['datestartswith ']]


def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part

                # word operators need spaces after them in the filter string,
                # but we don't want these later
                return name, operator_type[0].strip(), value

    return [None] * 3


@app.callback(
    Output('data-explorer', 'data'),
    Input('data-explorer', "page_current"),
    Input('data-explorer', "page_size"),
    Input('data-explorer', 'sort_by'),
    Input('data-explorer', 'filter_query'))
def update_table(page_current, page_size, sort_by, filter):
    filtering_expressions = filter.split(' && ')
    dff = df
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)

        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            dff = dff.loc[dff[col_name].str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            dff = dff.loc[dff[col_name].str.startswith(filter_value)]

    if len(sort_by):
        dff = dff.sort_values(
            [col['column_id'] for col in sort_by],
            ascending=[
                col['direction'] == 'asc'
                for col in sort_by
            ],
            inplace=False
        )

    page = page_current
    size = page_size
    return dff.iloc[page * size: (page + 1) * size].to_dict('records')

if __name__=='__main__':
    app.run_server(debug=True, port=3000)