7.9 MiB
7.9 MiB
import numpy as np
import plotly.express as px
from statsmodels.nonparametric.kernel_regression import KernelReg
import plotly.graph_objs as go
import pandas as pd
import KernelRegression
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
new_x
array([ 0.32486907, 0.87764872, 1.89436565, 2.78540628, 4.17308153, 4.53969226, 6.34896235, 6.84775862, 8.06380782, 8.95012592, 10.29242159, 10.58797186, 11.93551656, 12.92318913, 14.22675389, 14.78002175, 15.96551436, 16.82442832, 18.00844275, 19.11656304])
np.random.seed(1)# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals
# Plot the x and y values
px.scatter(x=new_x,y=fun_y(new_x), title='Figure 1: Visualizing the generated data')
#ker_log = KernelReg(new_x, fun_y(new_x), 'c')
#fig = px.scatter(x=new_x,y=fun_y(new_x), title='Figure 2: Statsmodels fit to generated data')
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodels fit', mode='lines'))
kernel_x = np.arange(-xwidth,xwidth, 0.1)
bw_manual = 2
#def gauss_const(h):
# """
# #Returns the normalization constant for a gaussian
# """
# return 1/(h*np.sqrt(np.pi*2))
#
#def gauss_exp(ker_x, xi, h):
# """
# #Returns the gaussian function exponent term
# """
# num = - 0.5*np.square((xi- ker_x))
# den = h*h
# return num/den
#
#def kernel_function(h, ker_x, xi):
# """
# #Returns the gaussian function value. Combines the gauss_const and
# #gauss_exp to get this result
# """
# const = gauss_const(h)
# gauss_val = const*np.exp(gauss_exp(ker_x,xi,h))
# return gauss_val
# We are selecting a single point and calculating the Kernel value
input_x = new_x[0]
col1 = KernelRegression.gauss_const(bw_manual)
col2= KernelRegression.gauss_exp(kernel_x, input_x, bw_manual)
col3 = KernelRegression.kernel_function(bw_manual, kernel_x, input_x)
# Dataframe for a single observation point x_i. In the code x_i comes from new_x
data = {'Input_x': [input_x for x in range(col2.shape[0])],
'kernel_x': kernel_x,
'gaussian_const': [col1 for x in range(col2.shape[0])],
'gaussian_exp': col2,
'full_gaussian_value': col3,
'bw':[bw_manual for x in range(col2.shape[0])],
}
single_pt_KE = pd.DataFrame(data=data)
single_pt_KE
Input_x | kernel_x | gaussian_const | gaussian_exp | full_gaussian_value | bw | |
---|---|---|---|---|---|---|
0 | 0.324869 | -20.0 | 0.199471 | -51.637538 | 7.481390e-24 | 2 |
1 | 0.324869 | -19.9 | 0.199471 | -51.130666 | 1.241978e-23 | 2 |
2 | 0.324869 | -19.8 | 0.199471 | -50.626294 | 2.056647e-23 | 2 |
3 | 0.324869 | -19.7 | 0.199471 | -50.124423 | 3.397190e-23 | 2 |
4 | 0.324869 | -19.6 | 0.199471 | -49.625051 | 5.597502e-23 | 2 |
... | ... | ... | ... | ... | ... | ... |
395 | 0.324869 | 19.5 | 0.199471 | -45.960706 | 2.184737e-21 | 2 |
396 | 0.324869 | 19.6 | 0.199471 | -46.441334 | 1.351030e-21 | 2 |
397 | 0.324869 | 19.7 | 0.199471 | -46.924462 | 8.333837e-22 | 2 |
398 | 0.324869 | 19.8 | 0.199471 | -47.410091 | 5.127898e-22 | 2 |
399 | 0.324869 | 19.9 | 0.199471 | -47.898219 | 3.147371e-22 | 2 |
400 rows × 6 columns
# Plotting a scatter plot of Kernel
px.line(x=kernel_x, y=col3, title='Figure 3: Kernel function for a single input value')
## Plotting gaussian for all input x points
kernel_fns = {'kernel_x': kernel_x}
for input_x in new_x:
input_string= 'x_value_{}'.format(np.round(input_x,2))
kernel_fns[input_string] = KernelRegression.kernel_function(bw_manual, kernel_x, input_x)
kernels_df = pd.DataFrame(data=kernel_fns)
y_all = kernels_df.drop(columns='kernel_x')
px.line(kernels_df, x='kernel_x', y=y_all.columns, title='Gaussian for all input points', range_x=[-5,20])
def weights(bw_manual, input_x, all_input_values ):
w_row = []
for x_i in all_input_values:
ki = kernel_function(bw_manual, x_i, input_x)
ki_sum = np.sum(KernelRegression.kernel_function(bw_manual, all_input_values, input_x))
w_row.append(ki/ki_sum)
return w_row
def single_y_pred(bw_manual, input_x, iks, igrek):
w = weights(bw_manual, input_x, iks)
y_single = np.sum(np.dot(igrek,w))
return y_single
#ypred_single = single_y_pred(bw_manual, new_x[0], new_x)
Y_pred = []
for input_x in new_x:
w = []
Y_single = single_y_pred(bw_manual, input_x, new_x)
Y_pred.append(Y_single)
[0;31m---------------------------------------------------------------------------[0m [0;31mTypeError[0m Traceback (most recent call last) [0;32m<ipython-input-16-4be17de67e18>[0m in [0;36m<module>[0;34m[0m [1;32m 2[0m [0;32mfor[0m [0minput_x[0m [0;32min[0m [0mnew_x[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m [1;32m 3[0m [0mw[0m [0;34m=[0m [0;34m[[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m [0;32m----> 4[0;31m [0mY_single[0m [0;34m=[0m [0msingle_y_pred[0m[0;34m([0m[0mbw_manual[0m[0;34m,[0m [0minput_x[0m[0;34m,[0m [0mnew_x[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m [0m[1;32m 5[0m [0mY_pred[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mY_single[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m [0;31mTypeError[0m: single_y_pred() missing 1 required positional argument: 'igrek'
data= {'x': new_x, 'y': fun_y(new_x), 'y_manual': np.array(y_all)}
fig = px.scatter(x=new_x,y=fun_y(x))
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodel KR', mode='lines'))
fig.add_trace(go.Scatter(x=new_x, y=np.array(Y_pred), name='Manual KR', mode='lines'))
fires_thefts = pd.read_csv('fires_thefts.csv', names=['x','y'])
single_pt_KE
Input_x | kernel_x | gaussian_const | gaussian_exp | full_gaussian_value | bw | |
---|---|---|---|---|---|---|
0 | 39.7 | -21 | 0.398942 | -264.5 | 5.370560e-116 | 1 |
1 | 39.7 | -20 | 0.398942 | -242.0 | 3.174282e-106 | 1 |
2 | 39.7 | -19 | 0.398942 | -220.5 | 6.902029e-97 | 1 |
3 | 39.7 | -18 | 0.398942 | -200.0 | 5.520948e-88 | 1 |
4 | 39.7 | -17 | 0.398942 | -180.5 | 1.624636e-79 | 1 |
5 | 39.7 | -16 | 0.398942 | -162.0 | 1.758750e-71 | 1 |
6 | 39.7 | -15 | 0.398942 | -144.5 | 7.004182e-64 | 1 |
7 | 39.7 | -14 | 0.398942 | -128.0 | 1.026163e-56 | 1 |
8 | 39.7 | -13 | 0.398942 | -112.5 | 5.530710e-50 | 1 |
9 | 39.7 | -12 | 0.398942 | -98.0 | 1.096607e-43 | 1 |
10 | 39.7 | -11 | 0.398942 | -84.5 | 7.998828e-38 | 1 |
11 | 39.7 | -10 | 0.398942 | -72.0 | 2.146384e-32 | 1 |
12 | 39.7 | -9 | 0.398942 | -60.5 | 2.118819e-27 | 1 |
13 | 39.7 | -8 | 0.398942 | -50.0 | 7.694599e-23 | 1 |
14 | 39.7 | -7 | 0.398942 | -40.5 | 1.027977e-18 | 1 |
15 | 39.7 | -6 | 0.398942 | -32.0 | 5.052271e-15 | 1 |
16 | 39.7 | -5 | 0.398942 | -24.5 | 9.134720e-12 | 1 |
17 | 39.7 | -4 | 0.398942 | -18.0 | 6.075883e-09 | 1 |
18 | 39.7 | -3 | 0.398942 | -12.5 | 1.486720e-06 | 1 |
19 | 39.7 | -2 | 0.398942 | -8.0 | 1.338302e-04 | 1 |
20 | 39.7 | -1 | 0.398942 | -4.5 | 4.431848e-03 | 1 |
21 | 39.7 | 0 | 0.398942 | -2.0 | 5.399097e-02 | 1 |
22 | 39.7 | 1 | 0.398942 | -0.5 | 2.419707e-01 | 1 |
23 | 39.7 | 2 | 0.398942 | -0.0 | 3.989423e-01 | 1 |
24 | 39.7 | 3 | 0.398942 | -0.5 | 2.419707e-01 | 1 |
25 | 39.7 | 4 | 0.398942 | -2.0 | 5.399097e-02 | 1 |
26 | 39.7 | 5 | 0.398942 | -4.5 | 4.431848e-03 | 1 |
27 | 39.7 | 6 | 0.398942 | -8.0 | 1.338302e-04 | 1 |
28 | 39.7 | 7 | 0.398942 | -12.5 | 1.486720e-06 | 1 |
29 | 39.7 | 8 | 0.398942 | -18.0 | 6.075883e-09 | 1 |
30 | 39.7 | 9 | 0.398942 | -24.5 | 9.134720e-12 | 1 |
31 | 39.7 | 10 | 0.398942 | -32.0 | 5.052271e-15 | 1 |
32 | 39.7 | 11 | 0.398942 | -40.5 | 1.027977e-18 | 1 |
33 | 39.7 | 12 | 0.398942 | -50.0 | 7.694599e-23 | 1 |
34 | 39.7 | 13 | 0.398942 | -60.5 | 2.118819e-27 | 1 |
35 | 39.7 | 14 | 0.398942 | -72.0 | 2.146384e-32 | 1 |
36 | 39.7 | 15 | 0.398942 | -84.5 | 7.998828e-38 | 1 |
37 | 39.7 | 16 | 0.398942 | -98.0 | 1.096607e-43 | 1 |
38 | 39.7 | 17 | 0.398942 | -112.5 | 5.530710e-50 | 1 |
39 | 39.7 | 18 | 0.398942 | -128.0 | 1.026163e-56 | 1 |
40 | 39.7 | 19 | 0.398942 | -144.5 | 7.004182e-64 | 1 |
41 | 39.7 | 20 | 0.398942 | -162.0 | 1.758750e-71 | 1 |
XXX = np.sort(np.array(fires_thefts.x))
YYY = np.array(fires_thefts.y)
bw_manual =3
kernel_x = np.arange(-20,20, 0.1)
## Plotting gaussian for all input x points
kernel_fns = {'kernel_x': kernel_x}
for input_x in XXX:
input_string= 'x_value_{}'.format(np.round(input_x,2))
kernel_fns[input_string] = KernelRegression.kernel_function(bw_manual, kernel_x, input_x)
kernels_df = pd.DataFrame(data=kernel_fns)
y_all = kernels_df.drop(columns='kernel_x')
px.line(kernels_df, x='kernel_x', y=y_all.columns, title='Gaussian for all input points', range_x=[-5,100])
Y_pred = []
for input_x in XXX:
w = []
Y_single = KernelRegression.single_y_pred(bw_manual, input_x, XXX, YYY)
Y_pred.append(Y_single)
data= {'x': XXX, 'y': YYY, 'y_manual': np.array(y_all)}
fig = px.scatter(x=XXX,y=YYY)
#fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodel KR', mode='lines'))
fig.add_trace(go.Scatter(x=XXX, y=np.array(Y_pred), name='Manual KR', mode='lines'))