286 lines
11 KiB
Python
286 lines
11 KiB
Python
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
|
|
"""Keras layers that implement explicit (approximate) kernel feature maps."""
|
|
|
|
import numpy as np
|
|
import tensorflow.compat.v2 as tf
|
|
|
|
from keras import initializers
|
|
from keras.engine import base_layer
|
|
from keras.engine import input_spec
|
|
|
|
# isort: off
|
|
from tensorflow.python.util.tf_export import keras_export
|
|
|
|
_SUPPORTED_RBF_KERNEL_TYPES = ["gaussian", "laplacian"]
|
|
|
|
|
|
@keras_export("keras.layers.experimental.RandomFourierFeatures")
|
|
class RandomFourierFeatures(base_layer.Layer):
|
|
r"""Layer that projects its inputs into a random feature space.
|
|
|
|
This layer implements a mapping from input space to a space with
|
|
`output_dim` dimensions, which approximates shift-invariant kernels. A
|
|
kernel function `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for
|
|
some function `k`. Many popular Radial Basis Functions (RBF), including
|
|
Gaussian and Laplacian kernels, are shift-invariant.
|
|
|
|
The implementation of this layer is based on the following paper:
|
|
["Random Features for Large-Scale Kernel Machines"](
|
|
https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)
|
|
by Ali Rahimi and Ben Recht.
|
|
|
|
The distribution from which the parameters of the random features map
|
|
(layer) are sampled determines which shift-invariant kernel the layer
|
|
approximates (see paper for more details). You can use the distribution of
|
|
your choice. The layer supports out-of-the-box approximations of the
|
|
following two RBF kernels:
|
|
|
|
- Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))`
|
|
- Laplacian: `K(x, y) = exp(-abs(x - y) / scale))`
|
|
|
|
**Note:** Unlike what is described in the paper and unlike what is used in
|
|
the Scikit-Learn implementation, the output of this layer does not apply
|
|
the `sqrt(2 / D)` normalization factor.
|
|
|
|
**Usage:** Typically, this layer is used to "kernelize" linear models by
|
|
applying a non-linear transformation (this layer) to the input features and
|
|
then training a linear model on top of the transformed features. Depending
|
|
on the loss function of the linear model, the composition of this layer and
|
|
the linear model results to models that are equivalent (up to approximation)
|
|
to kernel SVMs (for hinge loss), kernel logistic regression (for logistic
|
|
loss), kernel linear regression (for squared loss), etc.
|
|
|
|
Examples:
|
|
|
|
A kernel multinomial logistic regression model with Gaussian kernel for
|
|
MNIST:
|
|
|
|
```python
|
|
model = keras.Sequential([
|
|
keras.Input(shape=(784,)),
|
|
RandomFourierFeatures(
|
|
output_dim=4096,
|
|
scale=10.,
|
|
kernel_initializer='gaussian'),
|
|
layers.Dense(units=10, activation='softmax'),
|
|
])
|
|
model.compile(
|
|
optimizer='adam',
|
|
loss='categorical_crossentropy',
|
|
metrics=['categorical_accuracy']
|
|
)
|
|
```
|
|
|
|
A quasi-SVM classifier for MNIST:
|
|
|
|
```python
|
|
model = keras.Sequential([
|
|
keras.Input(shape=(784,)),
|
|
RandomFourierFeatures(
|
|
output_dim=4096,
|
|
scale=10.,
|
|
kernel_initializer='gaussian'),
|
|
layers.Dense(units=10),
|
|
])
|
|
model.compile(
|
|
optimizer='adam',
|
|
loss='hinge',
|
|
metrics=['categorical_accuracy']
|
|
)
|
|
```
|
|
|
|
To use another kernel, just replace the layer creation line with:
|
|
|
|
```python
|
|
random_features_layer = RandomFourierFeatures(
|
|
output_dim=500,
|
|
kernel_initializer=<my_initializer>,
|
|
scale=...,
|
|
...)
|
|
```
|
|
|
|
Args:
|
|
output_dim: Positive integer, the dimension of the layer's output, i.e.,
|
|
the number of random features used to approximate the kernel.
|
|
kernel_initializer: Determines the distribution of the parameters of the
|
|
random features map (and therefore the kernel approximated by the
|
|
layer). It can be either a string identifier or a Keras `Initializer`
|
|
instance. Currently only 'gaussian' and 'laplacian' are supported
|
|
string identifiers (case insensitive). Note that the kernel matrix is
|
|
not trainable.
|
|
scale: For Gaussian and Laplacian kernels, this corresponds to a scaling
|
|
factor of the corresponding kernel approximated by the layer (see
|
|
concrete definitions above). When provided, it should be a positive
|
|
float. If None, a default value is used: if the kernel initializer is
|
|
set to "gaussian", `scale` defaults to `sqrt(input_dim / 2)`, otherwise,
|
|
it defaults to 1.0. Both the approximation error of the kernel and the
|
|
classification quality are sensitive to this parameter. If `trainable`
|
|
is set to `True`, this parameter is learned end-to-end during training
|
|
and the provided value serves as the initial value.
|
|
**Note:** When features from this layer are fed to a linear model,
|
|
by making `scale` trainable, the resulting optimization problem is
|
|
no longer convex (even if the loss function used by the linear model
|
|
is convex).
|
|
trainable: Whether the scaling parameter of the layer should be trainable.
|
|
Defaults to `False`.
|
|
name: String, name to use for this layer.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
output_dim,
|
|
kernel_initializer="gaussian",
|
|
scale=None,
|
|
trainable=False,
|
|
name=None,
|
|
**kwargs,
|
|
):
|
|
if output_dim <= 0:
|
|
raise ValueError(
|
|
"`output_dim` should be a positive integer. "
|
|
f"Received: {output_dim}"
|
|
)
|
|
if isinstance(kernel_initializer, str):
|
|
if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES:
|
|
raise ValueError(
|
|
f"Unsupported `kernel_initializer`: {kernel_initializer} "
|
|
f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}"
|
|
)
|
|
if scale is not None and scale <= 0.0:
|
|
raise ValueError(
|
|
"When provided, `scale` should be a positive float. "
|
|
f"Received: {scale}"
|
|
)
|
|
super().__init__(trainable=trainable, name=name, **kwargs)
|
|
self.output_dim = output_dim
|
|
self.kernel_initializer = kernel_initializer
|
|
self.scale = scale
|
|
|
|
def build(self, input_shape):
|
|
input_shape = tf.TensorShape(input_shape)
|
|
# TODO(pmol): Allow higher dimension inputs. Currently the input is
|
|
# expected to have shape [batch_size, dimension].
|
|
if input_shape.rank != 2:
|
|
raise ValueError(
|
|
"The rank of the input tensor should be 2. "
|
|
f"Received input with rank {input_shape.ndims} instead. "
|
|
f"Full input shape received: {input_shape}"
|
|
)
|
|
if input_shape.dims[1].value is None:
|
|
raise ValueError(
|
|
"The last dimension of the input tensor should be defined. "
|
|
f"Found `None`. Full input shape received: {input_shape}"
|
|
)
|
|
self.input_spec = input_spec.InputSpec(
|
|
ndim=2, axes={1: input_shape.dims[1].value}
|
|
)
|
|
input_dim = input_shape.dims[1].value
|
|
|
|
kernel_initializer = _get_random_features_initializer(
|
|
self.kernel_initializer, shape=(input_dim, self.output_dim)
|
|
)
|
|
|
|
self.unscaled_kernel = self.add_weight(
|
|
name="unscaled_kernel",
|
|
shape=(input_dim, self.output_dim),
|
|
dtype=tf.float32,
|
|
initializer=kernel_initializer,
|
|
trainable=False,
|
|
)
|
|
|
|
self.bias = self.add_weight(
|
|
name="bias",
|
|
shape=(self.output_dim,),
|
|
dtype=tf.float32,
|
|
initializer=initializers.RandomUniform(
|
|
minval=0.0, maxval=2 * np.pi
|
|
),
|
|
trainable=False,
|
|
)
|
|
|
|
if self.scale is None:
|
|
self.scale = _get_default_scale(self.kernel_initializer, input_dim)
|
|
self.kernel_scale = self.add_weight(
|
|
name="kernel_scale",
|
|
shape=(1,),
|
|
dtype=tf.float32,
|
|
initializer=tf.compat.v1.constant_initializer(self.scale),
|
|
trainable=True,
|
|
constraint="NonNeg",
|
|
)
|
|
super().build(input_shape)
|
|
|
|
def call(self, inputs):
|
|
inputs = tf.convert_to_tensor(inputs, dtype=self.dtype)
|
|
inputs = tf.cast(inputs, tf.float32)
|
|
kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel
|
|
outputs = tf.matmul(a=inputs, b=kernel)
|
|
outputs = tf.nn.bias_add(outputs, self.bias)
|
|
return tf.cos(outputs)
|
|
|
|
def compute_output_shape(self, input_shape):
|
|
input_shape = tf.TensorShape(input_shape)
|
|
input_shape = input_shape.with_rank(2)
|
|
if input_shape.dims[-1].value is None:
|
|
raise ValueError(
|
|
"The last dimension of the input tensor should be defined. "
|
|
f"Found `None`. Full input shape received: {input_shape}"
|
|
)
|
|
return input_shape[:-1].concatenate(self.output_dim)
|
|
|
|
def get_config(self):
|
|
kernel_initializer = self.kernel_initializer
|
|
if not isinstance(kernel_initializer, str):
|
|
kernel_initializer = initializers.serialize(kernel_initializer)
|
|
config = {
|
|
"output_dim": self.output_dim,
|
|
"kernel_initializer": kernel_initializer,
|
|
"scale": self.scale,
|
|
}
|
|
base_config = super().get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
|
|
def _get_random_features_initializer(initializer, shape):
|
|
"""Returns Initializer object for random features."""
|
|
|
|
def _get_cauchy_samples(loc, scale, shape):
|
|
probs = np.random.uniform(low=0.0, high=1.0, size=shape)
|
|
return loc + scale * np.tan(np.pi * (probs - 0.5))
|
|
|
|
random_features_initializer = initializer
|
|
if isinstance(initializer, str):
|
|
if initializer.lower() == "gaussian":
|
|
random_features_initializer = initializers.RandomNormal(stddev=1.0)
|
|
elif initializer.lower() == "laplacian":
|
|
random_features_initializer = initializers.Constant(
|
|
_get_cauchy_samples(loc=0.0, scale=1.0, shape=shape)
|
|
)
|
|
|
|
else:
|
|
raise ValueError(
|
|
f'Unsupported `kernel_initializer`: "{initializer}" '
|
|
f"Expected one of: {_SUPPORTED_RBF_KERNEL_TYPES}"
|
|
)
|
|
return random_features_initializer
|
|
|
|
|
|
def _get_default_scale(initializer, input_dim):
|
|
if isinstance(initializer, str) and initializer.lower() == "gaussian":
|
|
return np.sqrt(input_dim / 2.0)
|
|
return 1.0
|