52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
|
|
from sklearn.gaussian_process.kernels import GenericKernelMixin
|
|
from sklearn.gaussian_process.kernels import StationaryKernelMixin
|
|
import numpy as np
|
|
from sklearn.base import clone
|
|
|
|
|
|
class MiniSeqKernel(GenericKernelMixin,
|
|
StationaryKernelMixin,
|
|
Kernel):
|
|
'''
|
|
A minimal (but valid) convolutional kernel for sequences of variable
|
|
length.
|
|
'''
|
|
def __init__(self,
|
|
baseline_similarity=0.5,
|
|
baseline_similarity_bounds=(1e-5, 1)):
|
|
self.baseline_similarity = baseline_similarity
|
|
self.baseline_similarity_bounds = baseline_similarity_bounds
|
|
|
|
@property
|
|
def hyperparameter_baseline_similarity(self):
|
|
return Hyperparameter("baseline_similarity",
|
|
"numeric",
|
|
self.baseline_similarity_bounds)
|
|
|
|
def _f(self, s1, s2):
|
|
return sum([1.0 if c1 == c2 else self.baseline_similarity
|
|
for c1 in s1
|
|
for c2 in s2])
|
|
|
|
def _g(self, s1, s2):
|
|
return sum([0.0 if c1 == c2 else 1.0 for c1 in s1 for c2 in s2])
|
|
|
|
def __call__(self, X, Y=None, eval_gradient=False):
|
|
if Y is None:
|
|
Y = X
|
|
|
|
if eval_gradient:
|
|
return (np.array([[self._f(x, y) for y in Y] for x in X]),
|
|
np.array([[[self._g(x, y)] for y in Y] for x in X]))
|
|
else:
|
|
return np.array([[self._f(x, y) for y in Y] for x in X])
|
|
|
|
def diag(self, X):
|
|
return np.array([self._f(x, x) for x in X])
|
|
|
|
def clone_with_theta(self, theta):
|
|
cloned = clone(self)
|
|
cloned.theta = theta
|
|
return cloned
|