4.2 MiB
4.2 MiB
import numpy as np
import plotly.express as px
from statsmodels.nonparametric.kernel_regression import KernelReg
import plotly.graph_objs as go
import pandas as pd
import KernelRegression
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
# Plot the x and y values
px.scatter(x=new_x,y=fun_y(new_x), title='Figure 1: Visualizing the generated data')
#ker_log = KernelReg(new_x, fun_y(new_x), 'c')
#fig = px.scatter(x=new_x,y=fun_y(new_x), title='Figure 2: Statsmodels fit to generated data')
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodels fit', mode='lines'))
kernel_x = np.arange(-2, 2, 0.01)
bw_manual = 3
def epanechnikov_one(h, ker_x, xi):
"""
Returns the epanechnikov function value.
"""
value = 0.75*(1-np.square((xi-ker_x)/h))
if (value < 0):
value = 0
return value
def epanechnikov_list(h, ker_x, xi):
"""
Returns the epanechnikov function value.
"""
value = 0.75*(1-np.square((xi-ker_x)/h))
value = [0 if i < 0 else i for i in value]
return value
def weights_epanechnikov(bw_manual, input_x, all_input_values ):
w_row = []
for x_i in all_input_values:
ki = epanechnikov_one(bw_manual, x_i, input_x)
ki_sum = np.sum(epanechnikov_list(bw_manual, all_input_values, input_x))
w_row.append(ki/ki_sum)
return w_row
def single_y_pred_epanechnikov(bw_manual, input_x, x_values, y_values):
w = weights_epanechnikov(bw_manual, input_x, x_values)
y_single = np.sum(np.dot(y_values,w))
return y_single
# We are selecting a single point and calculating the Kernel value
input_x = 0
col1 = KernelRegression.gauss_const(bw_manual)
col2= KernelRegression.gauss_exp(kernel_x, input_x, bw_manual)
col3 = epanechnikov_list(1, kernel_x, input_x)
# Plotting a scatter plot of Kernel
px.line(x=kernel_x, y=col3, title='Figure 3: Kernel function for a single input value')
kernel_x = np.arange(-20,20,0.1)
## Plotting gaussian for all input x points
kernel_fns = {'kernel_x': kernel_x}
for input_x in new_x:
input_string= 'x_value_{}'.format(np.round(input_x,2))
kernel_fns[input_string] = epanechnikov_list(bw_manual, kernel_x, input_x)
kernels_df = pd.DataFrame(data=kernel_fns)
y_all = kernels_df.drop(columns='kernel_x')
px.line(kernels_df, x='kernel_x', y=y_all.columns, title='Gaussian for all input points', range_x=[-5,20])
Y_pred = []
for input_x in new_x:
w = []
Y_single = single_y_pred_epanechnikov(bw_manual, input_x, new_x,fun_y(new_x))
Y_pred.append(Y_single)
0.75 0.7245362220487647 0.544723374613922 0.24547972278828456 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.7245362220487647 0.75 0.6638572233017156 0.4467050915628227 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.544723374613922 0.6638572233017156 0.75 0.6838372168982741 0.3172878295848327 0.16685392672748403 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.24547972278828456 0.4467050915628227 0.6838372168982741 0.75 0.589529783309622 0.49354005688658215 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.3172878295848327 0.589529783309622 0.75 0.7387997140966217 0.3554618855656637 0.15384187025557097 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.16685392672748403 0.49354005688658215 0.7387997140966217 0.75 0.47721181111671457 0.3060691401285782 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.3554618855656637 0.47721181111671457 0.75 0.729266857004094 0.5049420855397245 0.18616233928180906 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.15384187025557097 0.3060691401285782 0.729266857004094 0.75 0.6267686953884891 0.3816709762096485 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.5049420855397245 0.6267686953884891 0.75 0.6845366846267166 0.33610672268335007 0.2190496587316103 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.18616233928180906 0.3816709762096485 0.6845366846267166 0.75 0.5998535295351387 0.5264550582594569 0.007286896723078945 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.33610672268335007 0.5998535295351387 0.75 0.7427208364578497 0.5250199094736663 0.1732551784801908 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.2190496587316103 0.5264550582594569 0.7427208364578497 0.75 0.5986769398788213 0.2955633581152344 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.007286896723078945 0.5250199094736663 0.5986769398788213 0.75 0.668708574560694 0.31251929174672033 0.07573251993303509 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.1732551784801908 0.2955633581152344 0.668708574560694 0.75 0.6083932431707011 0.46268105255802106 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.31251929174672033 0.6083932431707011 0.75 0.7244912231022096 0.4980593356571671 0.18767396386514232 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.07573251993303509 0.46268105255802106 0.7244912231022096 0.75 0.6328839389171269 0.4017001480864578 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.4980593356571671 0.6328839389171269 0.75 0.6885222344066844 0.40220363248716107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.18767396386514232 0.4017001480864578 0.6885222344066844 0.75 0.6331758185516356 0.31217653303032067 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.40220363248716107 0.6331758185516356 0.75 0.6476724512795832 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.31217653303032067 0.6476724512795832 0.75
data= {'x': new_x, 'y': fun_y(new_x), 'y_manual': np.array(y_all)}
fig = px.scatter(x=new_x,y=fun_y(x))
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodel KR', mode='lines'))
fig.add_trace(go.Scatter(x=new_x, y=np.array(Y_pred), name='Manual KR', mode='lines'))
fires_thefts = pd.read_csv('fires_thefts.csv', names=['x','y'])
XXX = np.sort(np.array(fires_thefts.x))
YYY = np.array(fires_thefts.y)
Y_pred = KernelRegression.ker_reg(XXX, YYY, 2, 'gauss')
fig = px.scatter(x=XXX,y=YYY)
fig.add_trace(go.Scatter(x=XXX, y=np.array(Y_pred), name='Manual KR', mode='lines'))