kerreg-MPSKICB/KernelRegression.ipynb
2021-05-31 21:29:28 +02:00

4.2 MiB

import numpy as np
import plotly.express as px
from statsmodels.nonparametric.kernel_regression import KernelReg
import plotly.graph_objs as go
import pandas as pd 
import KernelRegression
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
# Plot the x and y values 
px.scatter(x=new_x,y=fun_y(new_x), title='Figure 1:  Visualizing the generated data')
#ker_log = KernelReg(new_x, fun_y(new_x), 'c')
#fig = px.scatter(x=new_x,y=fun_y(new_x),  title='Figure 2: Statsmodels fit to generated data')
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodels fit',  mode='lines'))
kernel_x = np.arange(-2, 2, 0.01)
bw_manual = 3

def epanechnikov_one(h, ker_x, xi): 
    """
    Returns the epanechnikov function value.
    """
    value = 0.75*(1-np.square((xi-ker_x)/h))
    if (value < 0):
        value = 0
    return value

def epanechnikov_list(h, ker_x, xi): 
    """
    Returns the epanechnikov function value.
    """
    value = 0.75*(1-np.square((xi-ker_x)/h))
    value = [0 if i < 0 else i for i in value]
    return value


def weights_epanechnikov(bw_manual, input_x, all_input_values ): 
    w_row = []
    for x_i in all_input_values: 
        ki = epanechnikov_one(bw_manual, x_i, input_x)
        ki_sum = np.sum(epanechnikov_list(bw_manual, all_input_values, input_x))
        w_row.append(ki/ki_sum)
    return w_row

def single_y_pred_epanechnikov(bw_manual, input_x, x_values, y_values): 
    w = weights_epanechnikov(bw_manual, input_x, x_values)
    y_single = np.sum(np.dot(y_values,w))
    return y_single

# We are selecting a single point and calculating the Kernel value
input_x = 0
col1 = KernelRegression.gauss_const(bw_manual)
col2= KernelRegression.gauss_exp(kernel_x, input_x, bw_manual)
col3 = epanechnikov_list(1, kernel_x, input_x)
# Plotting a scatter plot of Kernel 
px.line(x=kernel_x, y=col3, title='Figure 3: Kernel function for a single input value')
kernel_x = np.arange(-20,20,0.1)
## Plotting gaussian for all input x points 
kernel_fns = {'kernel_x': kernel_x}
for input_x in new_x: 
    input_string= 'x_value_{}'.format(np.round(input_x,2)) 
    kernel_fns[input_string] = epanechnikov_list(bw_manual, kernel_x, input_x)

kernels_df = pd.DataFrame(data=kernel_fns)
y_all = kernels_df.drop(columns='kernel_x')
px.line(kernels_df, x='kernel_x', y=y_all.columns, title='Gaussian for all input points', range_x=[-5,20])
Y_pred = []
for input_x in new_x: 
    w = []
    Y_single = single_y_pred_epanechnikov(bw_manual, input_x, new_x,fun_y(new_x))
    Y_pred.append(Y_single)
0.75
0.7245362220487647
0.544723374613922
0.24547972278828456
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.7245362220487647
0.75
0.6638572233017156
0.4467050915628227
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.544723374613922
0.6638572233017156
0.75
0.6838372168982741
0.3172878295848327
0.16685392672748403
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.24547972278828456
0.4467050915628227
0.6838372168982741
0.75
0.589529783309622
0.49354005688658215
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.3172878295848327
0.589529783309622
0.75
0.7387997140966217
0.3554618855656637
0.15384187025557097
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.16685392672748403
0.49354005688658215
0.7387997140966217
0.75
0.47721181111671457
0.3060691401285782
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.3554618855656637
0.47721181111671457
0.75
0.729266857004094
0.5049420855397245
0.18616233928180906
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.15384187025557097
0.3060691401285782
0.729266857004094
0.75
0.6267686953884891
0.3816709762096485
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.5049420855397245
0.6267686953884891
0.75
0.6845366846267166
0.33610672268335007
0.2190496587316103
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.18616233928180906
0.3816709762096485
0.6845366846267166
0.75
0.5998535295351387
0.5264550582594569
0.007286896723078945
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.33610672268335007
0.5998535295351387
0.75
0.7427208364578497
0.5250199094736663
0.1732551784801908
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.2190496587316103
0.5264550582594569
0.7427208364578497
0.75
0.5986769398788213
0.2955633581152344
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.007286896723078945
0.5250199094736663
0.5986769398788213
0.75
0.668708574560694
0.31251929174672033
0.07573251993303509
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.1732551784801908
0.2955633581152344
0.668708574560694
0.75
0.6083932431707011
0.46268105255802106
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.31251929174672033
0.6083932431707011
0.75
0.7244912231022096
0.4980593356571671
0.18767396386514232
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.07573251993303509
0.46268105255802106
0.7244912231022096
0.75
0.6328839389171269
0.4017001480864578
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.4980593356571671
0.6328839389171269
0.75
0.6885222344066844
0.40220363248716107
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.18767396386514232
0.4017001480864578
0.6885222344066844
0.75
0.6331758185516356
0.31217653303032067
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.40220363248716107
0.6331758185516356
0.75
0.6476724512795832
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.31217653303032067
0.6476724512795832
0.75
data= {'x': new_x, 'y': fun_y(new_x), 'y_manual': np.array(y_all)}
fig = px.scatter(x=new_x,y=fun_y(x))
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodel KR',  mode='lines'))
fig.add_trace(go.Scatter(x=new_x, y=np.array(Y_pred), name='Manual KR',  mode='lines'))
fires_thefts = pd.read_csv('fires_thefts.csv', names=['x','y'])
XXX = np.sort(np.array(fires_thefts.x))
YYY = np.array(fires_thefts.y)

Y_pred = KernelRegression.ker_reg(XXX, YYY, 2, 'gauss')

fig = px.scatter(x=XXX,y=YYY)
fig.add_trace(go.Scatter(x=XXX, y=np.array(Y_pred), name='Manual KR',  mode='lines'))