239 lines
9.7 KiB
Python
239 lines
9.7 KiB
Python
|
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# ==============================================================================
|
||
|
"""Built-in WideNDeep model classes."""
|
||
|
|
||
|
import tensorflow.compat.v2 as tf
|
||
|
|
||
|
from keras import activations
|
||
|
from keras import backend
|
||
|
from keras import layers as layer_module
|
||
|
from keras.engine import base_layer
|
||
|
from keras.engine import data_adapter
|
||
|
from keras.engine import training as keras_training
|
||
|
from keras.saving.legacy import serialization
|
||
|
|
||
|
# isort: off
|
||
|
from tensorflow.python.util import deprecation
|
||
|
from tensorflow.python.util.tf_export import keras_export
|
||
|
|
||
|
|
||
|
@keras_export(
|
||
|
"keras.experimental.WideDeepModel",
|
||
|
v1=["keras.experimental.WideDeepModel", "keras.models.WideDeepModel"],
|
||
|
)
|
||
|
@deprecation.deprecated_endpoints("keras.experimental.WideDeepModel")
|
||
|
class WideDeepModel(keras_training.Model):
|
||
|
r"""Wide & Deep Model for regression and classification problems.
|
||
|
|
||
|
This model jointly train a linear and a dnn model.
|
||
|
|
||
|
Example:
|
||
|
|
||
|
```python
|
||
|
linear_model = LinearModel()
|
||
|
dnn_model = keras.Sequential([keras.layers.Dense(units=64),
|
||
|
keras.layers.Dense(units=1)])
|
||
|
combined_model = WideDeepModel(linear_model, dnn_model)
|
||
|
combined_model.compile(optimizer=['sgd', 'adam'],
|
||
|
loss='mse', metrics=['mse'])
|
||
|
# define dnn_inputs and linear_inputs as separate numpy arrays or
|
||
|
# a single numpy array if dnn_inputs is same as linear_inputs.
|
||
|
combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
|
||
|
# or define a single `tf.data.Dataset` that contains a single tensor or
|
||
|
# separate tensors for dnn_inputs and linear_inputs.
|
||
|
dataset = tf.data.Dataset.from_tensors(([linear_inputs, dnn_inputs], y))
|
||
|
combined_model.fit(dataset, epochs)
|
||
|
```
|
||
|
|
||
|
Both linear and dnn model can be pre-compiled and trained separately
|
||
|
before jointly training:
|
||
|
|
||
|
Example:
|
||
|
```python
|
||
|
linear_model = LinearModel()
|
||
|
linear_model.compile('adagrad', 'mse')
|
||
|
linear_model.fit(linear_inputs, y, epochs)
|
||
|
dnn_model = keras.Sequential([keras.layers.Dense(units=1)])
|
||
|
dnn_model.compile('rmsprop', 'mse')
|
||
|
dnn_model.fit(dnn_inputs, y, epochs)
|
||
|
combined_model = WideDeepModel(linear_model, dnn_model)
|
||
|
combined_model.compile(optimizer=['sgd', 'adam'],
|
||
|
loss='mse', metrics=['mse'])
|
||
|
combined_model.fit([linear_inputs, dnn_inputs], y, epochs)
|
||
|
```
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, linear_model, dnn_model, activation=None, **kwargs):
|
||
|
"""Create a Wide & Deep Model.
|
||
|
|
||
|
Args:
|
||
|
linear_model: a premade LinearModel, its output must match the output
|
||
|
of the dnn model.
|
||
|
dnn_model: a `tf.keras.Model`, its output must match the output of the
|
||
|
linear model.
|
||
|
activation: Activation function. Set it to None to maintain a linear
|
||
|
activation.
|
||
|
**kwargs: The keyword arguments that are passed on to
|
||
|
BaseLayer.__init__. Allowed keyword arguments include `name`.
|
||
|
"""
|
||
|
super().__init__(**kwargs)
|
||
|
base_layer.keras_premade_model_gauge.get_cell("WideDeep").set(True)
|
||
|
self.linear_model = linear_model
|
||
|
self.dnn_model = dnn_model
|
||
|
self.activation = activations.get(activation)
|
||
|
|
||
|
def call(self, inputs, training=None):
|
||
|
if not isinstance(inputs, (tuple, list)) or len(inputs) != 2:
|
||
|
linear_inputs = dnn_inputs = inputs
|
||
|
else:
|
||
|
linear_inputs, dnn_inputs = inputs
|
||
|
linear_output = self.linear_model(linear_inputs)
|
||
|
|
||
|
if self.dnn_model._expects_training_arg:
|
||
|
if training is None:
|
||
|
training = backend.learning_phase()
|
||
|
dnn_output = self.dnn_model(dnn_inputs, training=training)
|
||
|
else:
|
||
|
dnn_output = self.dnn_model(dnn_inputs)
|
||
|
output = tf.nest.map_structure(
|
||
|
lambda x, y: (x + y), linear_output, dnn_output
|
||
|
)
|
||
|
if self.activation:
|
||
|
return tf.nest.map_structure(self.activation, output)
|
||
|
return output
|
||
|
|
||
|
# This does not support gradient scaling and LossScaleOptimizer.
|
||
|
def train_step(self, data):
|
||
|
x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
|
||
|
with tf.GradientTape() as tape:
|
||
|
y_pred = self(x, training=True)
|
||
|
loss = self.compiled_loss(
|
||
|
y, y_pred, sample_weight, regularization_losses=self.losses
|
||
|
)
|
||
|
self.compiled_metrics.update_state(y, y_pred, sample_weight)
|
||
|
|
||
|
if isinstance(self.optimizer, (list, tuple)):
|
||
|
linear_vars = self.linear_model.trainable_variables
|
||
|
dnn_vars = self.dnn_model.trainable_variables
|
||
|
linear_grads, dnn_grads = tape.gradient(
|
||
|
loss, (linear_vars, dnn_vars)
|
||
|
)
|
||
|
|
||
|
linear_optimizer = self.optimizer[0]
|
||
|
dnn_optimizer = self.optimizer[1]
|
||
|
linear_optimizer.apply_gradients(zip(linear_grads, linear_vars))
|
||
|
dnn_optimizer.apply_gradients(zip(dnn_grads, dnn_vars))
|
||
|
else:
|
||
|
trainable_variables = self.trainable_variables
|
||
|
grads = tape.gradient(loss, trainable_variables)
|
||
|
self.optimizer.apply_gradients(zip(grads, trainable_variables))
|
||
|
|
||
|
return {m.name: m.result() for m in self.metrics}
|
||
|
|
||
|
def _make_train_function(self):
|
||
|
# Only needed for graph mode and model_to_estimator.
|
||
|
has_recompiled = self._recompile_weights_loss_and_weighted_metrics()
|
||
|
self._check_trainable_weights_consistency()
|
||
|
# If we have re-compiled the loss/weighted metric sub-graphs then create
|
||
|
# train function even if one exists already. This is because
|
||
|
# `_feed_sample_weights` list has been updated on re-compile.
|
||
|
if getattr(self, "train_function", None) is None or has_recompiled:
|
||
|
# Restore the compiled trainable state.
|
||
|
current_trainable_state = self._get_trainable_state()
|
||
|
self._set_trainable_state(self._compiled_trainable_state)
|
||
|
|
||
|
inputs = (
|
||
|
self._feed_inputs
|
||
|
+ self._feed_targets
|
||
|
+ self._feed_sample_weights
|
||
|
)
|
||
|
if not isinstance(backend.symbolic_learning_phase(), int):
|
||
|
inputs += [backend.symbolic_learning_phase()]
|
||
|
|
||
|
if isinstance(self.optimizer, (list, tuple)):
|
||
|
linear_optimizer = self.optimizer[0]
|
||
|
dnn_optimizer = self.optimizer[1]
|
||
|
else:
|
||
|
linear_optimizer = self.optimizer
|
||
|
dnn_optimizer = self.optimizer
|
||
|
|
||
|
with backend.get_graph().as_default():
|
||
|
with backend.name_scope("training"):
|
||
|
# Training updates
|
||
|
updates = []
|
||
|
linear_updates = linear_optimizer.get_updates(
|
||
|
params=self.linear_model.trainable_weights,
|
||
|
loss=self.total_loss,
|
||
|
)
|
||
|
updates += linear_updates
|
||
|
dnn_updates = dnn_optimizer.get_updates(
|
||
|
params=self.dnn_model.trainable_weights,
|
||
|
loss=self.total_loss,
|
||
|
)
|
||
|
updates += dnn_updates
|
||
|
# Unconditional updates
|
||
|
updates += self.get_updates_for(None)
|
||
|
# Conditional updates relevant to this model
|
||
|
updates += self.get_updates_for(self.inputs)
|
||
|
|
||
|
metrics = self._get_training_eval_metrics()
|
||
|
metrics_tensors = [
|
||
|
m._call_result
|
||
|
for m in metrics
|
||
|
if hasattr(m, "_call_result")
|
||
|
]
|
||
|
|
||
|
with backend.name_scope("training"):
|
||
|
# Gets loss and metrics. Updates weights at each call.
|
||
|
fn = backend.function(
|
||
|
inputs,
|
||
|
[self.total_loss] + metrics_tensors,
|
||
|
updates=updates,
|
||
|
name="train_function",
|
||
|
**self._function_kwargs
|
||
|
)
|
||
|
setattr(self, "train_function", fn)
|
||
|
|
||
|
# Restore the current trainable state
|
||
|
self._set_trainable_state(current_trainable_state)
|
||
|
|
||
|
def get_config(self):
|
||
|
linear_config = serialization.serialize_keras_object(self.linear_model)
|
||
|
dnn_config = serialization.serialize_keras_object(self.dnn_model)
|
||
|
config = {
|
||
|
"linear_model": linear_config,
|
||
|
"dnn_model": dnn_config,
|
||
|
"activation": activations.serialize(self.activation),
|
||
|
}
|
||
|
base_config = base_layer.Layer.get_config(self)
|
||
|
return dict(list(base_config.items()) + list(config.items()))
|
||
|
|
||
|
@classmethod
|
||
|
def from_config(cls, config, custom_objects=None):
|
||
|
linear_config = config.pop("linear_model")
|
||
|
linear_model = layer_module.deserialize(linear_config, custom_objects)
|
||
|
dnn_config = config.pop("dnn_model")
|
||
|
dnn_model = layer_module.deserialize(dnn_config, custom_objects)
|
||
|
activation = activations.deserialize(
|
||
|
config.pop("activation", None), custom_objects=custom_objects
|
||
|
)
|
||
|
return cls(
|
||
|
linear_model=linear_model,
|
||
|
dnn_model=dnn_model,
|
||
|
activation=activation,
|
||
|
**config
|
||
|
)
|