1103 lines
47 KiB
Python
1103 lines
47 KiB
Python
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Implementation of Loss operations for use in neural networks."""
|
|
|
|
from tensorflow.python.eager import context
|
|
from tensorflow.python.framework import dtypes
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import cond
|
|
from tensorflow.python.ops import confusion_matrix
|
|
from tensorflow.python.ops import control_flow_ops
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.ops import nn
|
|
from tensorflow.python.ops import nn_ops
|
|
from tensorflow.python.ops import weights_broadcast_ops
|
|
from tensorflow.python.ops.losses import util
|
|
from tensorflow.python.util import dispatch
|
|
from tensorflow.python.util.deprecation import deprecated_args
|
|
from tensorflow.python.util.deprecation import deprecated_argument_lookup
|
|
from tensorflow.python.util.tf_export import tf_export
|
|
|
|
|
|
@tf_export(v1=["losses.Reduction"])
|
|
class Reduction:
|
|
"""Types of loss reduction.
|
|
|
|
Contains the following values:
|
|
|
|
* `NONE`: Un-reduced weighted losses with the same shape as input.
|
|
* `SUM`: Scalar sum of weighted losses.
|
|
* `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED.
|
|
* `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
|
|
* `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
|
|
weights. DEPRECATED.
|
|
* `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED.
|
|
"""
|
|
|
|
NONE = "none"
|
|
SUM = "weighted_sum"
|
|
SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
|
|
MEAN = "weighted_mean"
|
|
SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights"
|
|
SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS
|
|
|
|
@classmethod
|
|
def all(cls):
|
|
return (
|
|
cls.NONE,
|
|
cls.SUM,
|
|
cls.MEAN,
|
|
cls.SUM_OVER_BATCH_SIZE,
|
|
cls.SUM_OVER_NONZERO_WEIGHTS,
|
|
cls.SUM_BY_NONZERO_WEIGHTS)
|
|
|
|
@classmethod
|
|
def validate(cls, key):
|
|
if key not in cls.all():
|
|
raise ValueError(f"Invalid Reduction Key {key}. Key should be one of "
|
|
f"{cls.all()}.")
|
|
|
|
|
|
def _safe_mean(losses, num_present):
|
|
"""Computes a safe mean of the losses.
|
|
|
|
Args:
|
|
losses: `Tensor` whose elements contain individual loss measurements.
|
|
num_present: The number of measurable elements in `losses`.
|
|
|
|
Returns:
|
|
A scalar representing the mean of `losses`. If `num_present` is zero,
|
|
then zero is returned.
|
|
"""
|
|
total_loss = math_ops.reduce_sum(losses)
|
|
return math_ops.div_no_nan(total_loss, num_present, name="value")
|
|
|
|
|
|
def _num_present(losses, weights, per_batch=False):
|
|
"""Computes the number of elements in the loss function induced by `weights`.
|
|
|
|
A given weights tensor induces different numbers of usable elements in the
|
|
`losses` tensor. The `weights` tensor is broadcast across `losses` for all
|
|
possible dimensions. For example, if `losses` is a tensor of dimension
|
|
`[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
|
|
in effect, tiled to match the shape of `losses`. Following this effective
|
|
tile, the total number of present elements is the number of non-zero weights.
|
|
|
|
Args:
|
|
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
|
|
weights: `Tensor` of shape `[]`, `[batch_size]` or
|
|
`[batch_size, d1, ... dK]`, where K < N.
|
|
per_batch: Whether to return the number of elements per batch or as a sum
|
|
total.
|
|
|
|
Returns:
|
|
The number of present (non-zero) elements in the losses tensor. If
|
|
`per_batch` is `True`, the value is returned as a tensor of size
|
|
`[batch_size]`. Otherwise, a single scalar tensor is returned.
|
|
"""
|
|
if ((isinstance(weights, float) and weights != 0.0) or
|
|
(context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access
|
|
and not math_ops.equal(weights, 0.0))):
|
|
return _num_elements(losses)
|
|
with ops.name_scope(None, "num_present", (losses, weights)) as scope:
|
|
weights = math_ops.cast(weights, dtype=dtypes.float32)
|
|
present = array_ops.where(
|
|
math_ops.equal(weights, 0.0),
|
|
array_ops.zeros_like(weights),
|
|
array_ops.ones_like(weights))
|
|
present = weights_broadcast_ops.broadcast_weights(present, losses)
|
|
if per_batch:
|
|
return math_ops.reduce_sum(
|
|
present,
|
|
axis=math_ops.range(1, array_ops.rank(present)),
|
|
keepdims=True,
|
|
name=scope)
|
|
return math_ops.reduce_sum(present, name=scope)
|
|
|
|
|
|
def _num_elements(losses):
|
|
"""Computes the number of elements in `losses` tensor."""
|
|
with ops.name_scope(None, "num_elements", values=[losses]) as scope:
|
|
return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
|
|
|
|
|
|
@tf_export(v1=["losses.compute_weighted_loss"])
|
|
@dispatch.add_dispatch_support
|
|
def compute_weighted_loss(
|
|
losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Computes the weighted loss.
|
|
|
|
Args:
|
|
losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`losses`, and must be broadcastable to `losses` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
scope: the scope for the operations performed in computing the loss.
|
|
loss_collection: the loss will be added to these collections.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
|
|
`NONE`, this has the same shape as `losses`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If `weights` is `None` or the shape is not compatible with
|
|
`losses`, or if the number of dimensions (rank) of either `losses` or
|
|
`weights` is missing.
|
|
|
|
Note:
|
|
When calculating the gradient of a weighted loss contributions from
|
|
both `losses` and `weights` are considered. If your `weights` depend
|
|
on some model parameters but you do not want this to affect the loss
|
|
gradient, you need to apply `tf.stop_gradient` to `weights` before
|
|
passing them to `compute_weighted_loss`.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
Reduction.validate(reduction)
|
|
with ops.name_scope(scope, "weighted_loss", (losses, weights)):
|
|
# Save the `reduction` argument for loss normalization when distributing
|
|
# to multiple replicas. Used only for estimator + v1 optimizer flow.
|
|
ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access
|
|
|
|
def compute_loss(losses, weights, loss_collection, reduction):
|
|
losses = ops.convert_to_tensor(losses)
|
|
input_dtype = losses.dtype
|
|
losses = math_ops.cast(losses, dtype=dtypes.float32)
|
|
weights = math_ops.cast(weights, dtype=dtypes.float32)
|
|
weighted_losses = math_ops.multiply(losses, weights)
|
|
if reduction == Reduction.NONE:
|
|
loss = weighted_losses
|
|
else:
|
|
loss = math_ops.reduce_sum(weighted_losses)
|
|
if reduction == Reduction.MEAN:
|
|
loss = _safe_mean(
|
|
loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
|
|
elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
|
|
reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
|
|
loss = _safe_mean(loss, _num_present(losses, weights))
|
|
elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
|
|
loss = _safe_mean(loss, _num_elements(losses))
|
|
|
|
# Convert the result back to the input type.
|
|
loss = math_ops.cast(loss, input_dtype)
|
|
util.add_loss(loss, loss_collection)
|
|
return loss
|
|
|
|
# Skip the assert_broadcastable in XLA context because asserts are not
|
|
# supported so it only causes unnecessary ops. Also skip it because it uses
|
|
# a DenseToDenseSetOperation op that is incompatible with XLA when
|
|
# the shape(s) are dynamic.
|
|
if control_flow_ops.get_enclosing_xla_context() is not None:
|
|
return compute_loss(losses, weights, loss_collection, reduction)
|
|
else:
|
|
with ops.control_dependencies(
|
|
(weights_broadcast_ops.assert_broadcastable(weights, losses),)):
|
|
return compute_loss(losses, weights, loss_collection, reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.absolute_difference"])
|
|
@dispatch.add_dispatch_support
|
|
def absolute_difference(
|
|
labels, predictions, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Adds an Absolute Difference loss to the training procedure.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided, then
|
|
the loss is simply scaled by the given value. If `weights` is a `Tensor` of
|
|
shape `[batch_size]`, then the total loss for each sample of the batch is
|
|
rescaled by the corresponding element in the `weights` vector. If the shape of
|
|
`weights` matches the shape of `predictions`, then the loss of each
|
|
measurable element of `predictions` is scaled by the corresponding value of
|
|
`weights`.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor, same dimensions as 'predictions'.
|
|
predictions: The predicted outputs.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which this loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `predictions` doesn't match that of
|
|
`labels` or if the shape of `weights` is invalid or if `labels`
|
|
or `predictions` is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "absolute_difference",
|
|
(predictions, labels, weights)) as scope:
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
losses = math_ops.abs(math_ops.subtract(predictions, labels))
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.cosine_distance"])
|
|
@dispatch.add_dispatch_support
|
|
@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
|
|
def cosine_distance(
|
|
labels, predictions, axis=None, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
|
|
dim=None):
|
|
"""Adds a cosine-distance loss to the training procedure.
|
|
|
|
Note that the function assumes that `predictions` and `labels` are already
|
|
unit-normalized.
|
|
|
|
Args:
|
|
labels: `Tensor` whose shape matches 'predictions'
|
|
predictions: An arbitrary matrix.
|
|
axis: The dimension along which the cosine distance is computed.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which this loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
dim: The old (deprecated) name for `axis`.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If `predictions` shape doesn't match `labels` shape, or
|
|
`axis`, `labels`, `predictions` or `weights` is `None`.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
axis = deprecated_argument_lookup("axis", axis, "dim", dim)
|
|
if axis is None:
|
|
raise ValueError("You must specify argument `axis`.")
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "cosine_distance_loss",
|
|
(predictions, labels, weights)) as scope:
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
|
|
radial_diffs = math_ops.multiply(predictions, labels)
|
|
losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True)
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.hinge_loss"])
|
|
@dispatch.add_dispatch_support
|
|
def hinge_loss(labels, logits, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Adds a hinge loss to the training procedure.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor. Its shape should match the shape of
|
|
logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
|
|
the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
|
|
logits: The logits, a float tensor. Note that logits are assumed to be
|
|
unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
|
|
(resp. negative) binary prediction.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shapes of `logits` and `labels` don't match or
|
|
if `labels` or `logits` is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if logits is None:
|
|
raise ValueError("Argument `logits` must not be None.")
|
|
with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
|
|
logits = math_ops.cast(logits, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
logits.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
# We first need to convert binary labels to -1/1 labels (as floats).
|
|
all_ones = array_ops.ones_like(labels)
|
|
labels = math_ops.subtract(2 * labels, all_ones)
|
|
losses = nn_ops.relu(
|
|
math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.huber_loss"])
|
|
@dispatch.add_dispatch_support
|
|
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure.
|
|
|
|
For each value x in `error=labels-predictions`, the following is calculated:
|
|
|
|
```
|
|
0.5 * x^2 if |x| <= d
|
|
0.5 * d^2 + d * (|x| - d) if |x| > d
|
|
```
|
|
|
|
where d is `delta`.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided, then
|
|
the loss is simply scaled by the given value. If `weights` is a tensor of size
|
|
`[batch_size]`, then the total loss for each sample of the batch is rescaled
|
|
by the corresponding element in the `weights` vector. If the shape of
|
|
`weights` matches the shape of `predictions`, then the loss of each
|
|
measurable element of `predictions` is scaled by the corresponding value of
|
|
`weights`.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor, same dimensions as 'predictions'.
|
|
predictions: The predicted outputs.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
delta: `float`, the point where the huber loss function changes from a
|
|
quadratic to linear.
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `predictions` doesn't match that of `labels` or
|
|
if the shape of `weights` is invalid. Also if `labels` or
|
|
`predictions` is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "huber_loss",
|
|
(predictions, labels, weights)) as scope:
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
error = math_ops.subtract(predictions, labels)
|
|
abs_error = math_ops.abs(error)
|
|
quadratic = math_ops.minimum(abs_error, delta)
|
|
# The following expression is the same in value as
|
|
# tf.maximum(abs_error - delta, 0), but importantly the gradient for the
|
|
# expression when abs_error == delta is 0 (for tf.maximum it would be 1).
|
|
# This is necessary to avoid doubling the gradient, since there is already a
|
|
# nonzero contribution to the gradient from the quadratic term.
|
|
linear = math_ops.subtract(abs_error, quadratic)
|
|
losses = math_ops.add(
|
|
math_ops.multiply(
|
|
ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
|
|
math_ops.multiply(quadratic, quadratic)),
|
|
math_ops.multiply(delta, linear))
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.log_loss"])
|
|
@dispatch.add_dispatch_support
|
|
def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Adds a Log Loss term to the training procedure.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided, then
|
|
the loss is simply scaled by the given value. If `weights` is a tensor of size
|
|
`[batch_size]`, then the total loss for each sample of the batch is rescaled
|
|
by the corresponding element in the `weights` vector. If the shape of
|
|
`weights` matches the shape of `predictions`, then the loss of each
|
|
measurable element of `predictions` is scaled by the corresponding value of
|
|
`weights`.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor, same dimensions as 'predictions'.
|
|
predictions: The predicted outputs.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
epsilon: A small increment to add to avoid taking a log of zero.
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `predictions` doesn't match that of `labels` or
|
|
if the shape of `weights` is invalid. Also if `labels` or `predictions`
|
|
is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "log_loss",
|
|
(predictions, labels, weights)) as scope:
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
losses = -math_ops.multiply(
|
|
labels,
|
|
math_ops.log(predictions + epsilon)) - math_ops.multiply(
|
|
(1 - labels), math_ops.log(1 - predictions + epsilon))
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
# TODO(b/37208492): Add reduction arg.
|
|
@tf_export(v1=["losses.mean_pairwise_squared_error"])
|
|
@dispatch.add_dispatch_support
|
|
def mean_pairwise_squared_error(
|
|
labels, predictions, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES):
|
|
"""Adds a pairwise-errors-squared loss to the training procedure.
|
|
|
|
Unlike `mean_squared_error`, which is a measure of the differences between
|
|
corresponding elements of `predictions` and `labels`,
|
|
`mean_pairwise_squared_error` is a measure of the differences between pairs of
|
|
corresponding elements of `predictions` and `labels`.
|
|
|
|
For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
|
|
three pairs of differences are summed to compute the loss:
|
|
loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
|
|
|
|
Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
|
|
corresponding pairs are computed within each batch sample but not across
|
|
samples within a batch. For example, if `predictions` represents a batch of
|
|
16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
|
|
is drawn from each image, but not across images.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided, then
|
|
the loss is simply scaled by the given value. If `weights` is a tensor of size
|
|
`[batch_size]`, then the total loss for each sample of the batch is rescaled
|
|
by the corresponding element in the `weights` vector.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor, whose shape must match the shape of
|
|
`predictions`.
|
|
predictions: The predicted outputs, a tensor of size
|
|
`[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
|
|
`predictions`.
|
|
weights: Coefficients for the loss a scalar, a tensor of shape
|
|
`[batch_size]` or a tensor whose shape matches `predictions`.
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
|
|
Returns:
|
|
A scalar `Tensor` that returns the weighted loss.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `predictions` doesn't match that of `labels` or
|
|
if the shape of `weights` is invalid. Also if `labels` or `predictions`
|
|
is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "mean_pairwise_squared_error",
|
|
(predictions, labels, weights)) as scope:
|
|
weights = math_ops.cast(weights, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
|
|
def compute_loss(labels, predictions, weights, loss_collection):
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
|
|
diffs = math_ops.subtract(predictions, labels)
|
|
|
|
axis = math_ops.range(1, array_ops.rank(diffs))
|
|
|
|
sum_squares_diff_per_batch = math_ops.reduce_sum(
|
|
math_ops.square(diffs), axis=axis, keepdims=True)
|
|
num_present_per_batch = _num_present(diffs, weights, per_batch=True)
|
|
|
|
term1 = 2.0 * math_ops.div_no_nan(
|
|
sum_squares_diff_per_batch,
|
|
math_ops.maximum(num_present_per_batch - 1, 0),
|
|
name="value")
|
|
|
|
sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
|
|
term2 = 2.0 * math_ops.div_no_nan(
|
|
math_ops.square(sum_diff),
|
|
math_ops.maximum(
|
|
math_ops.multiply(num_present_per_batch,
|
|
num_present_per_batch - 1), 0),
|
|
name="value")
|
|
|
|
weighted_losses = math_ops.multiply(term1 - term2, weights)
|
|
loss = math_ops.reduce_sum(weighted_losses)
|
|
|
|
mean_loss = array_ops.where(
|
|
math_ops.reduce_sum(num_present_per_batch) > 0,
|
|
loss,
|
|
array_ops.zeros_like(loss),
|
|
name="value")
|
|
util.add_loss(mean_loss, loss_collection)
|
|
return mean_loss
|
|
|
|
# Skip the assert_broadcastable in XLA context because asserts are not
|
|
# supported so it only causes unnecessary ops. Also skip it because it uses
|
|
# a DenseToDenseSetOperation op that is incompatible with XLA when
|
|
# the shape(s) are dynamic.
|
|
if control_flow_ops.get_enclosing_xla_context() is not None:
|
|
return compute_loss(labels, predictions, weights, loss_collection)
|
|
else:
|
|
with ops.control_dependencies(
|
|
(weights_broadcast_ops.assert_broadcastable(weights, labels),)):
|
|
return compute_loss(labels, predictions, weights, loss_collection)
|
|
|
|
|
|
@tf_export(v1=["losses.mean_squared_error"])
|
|
@dispatch.add_dispatch_support
|
|
def mean_squared_error(
|
|
labels, predictions, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Adds a Sum-of-Squares loss to the training procedure.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided, then
|
|
the loss is simply scaled by the given value. If `weights` is a tensor of size
|
|
`[batch_size]`, then the total loss for each sample of the batch is rescaled
|
|
by the corresponding element in the `weights` vector. If the shape of
|
|
`weights` matches the shape of `predictions`, then the loss of each
|
|
measurable element of `predictions` is scaled by the corresponding value of
|
|
`weights`.
|
|
|
|
Args:
|
|
labels: The ground truth output tensor, same dimensions as 'predictions'.
|
|
predictions: The predicted outputs.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
|
|
be either `1`, or the same as the corresponding `losses` dimension).
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
|
|
shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `predictions` doesn't match that of `labels` or
|
|
if the shape of `weights` is invalid. Also if `labels` or `predictions`
|
|
is None.
|
|
|
|
@compatibility(TF2)
|
|
|
|
`tf.compat.v1.losses.mean_squared_error` is mostly compatible with eager
|
|
execution and `tf.function`. But, the `loss_collection` argument is
|
|
ignored when executing eagerly and no loss will be written to the loss
|
|
collections. You will need to either hold on to the return value manually
|
|
or rely on `tf.keras.Model` loss tracking.
|
|
|
|
|
|
To switch to native TF2 style, instantiate the
|
|
`tf.keras.losses.MeanSquaredError` class and call the object instead.
|
|
|
|
|
|
#### Structural Mapping to Native TF2
|
|
|
|
Before:
|
|
|
|
```python
|
|
loss = tf.compat.v1.losses.mean_squared_error(
|
|
labels=labels,
|
|
predictions=predictions,
|
|
weights=weights,
|
|
reduction=reduction)
|
|
```
|
|
|
|
After:
|
|
|
|
```python
|
|
loss_fn = tf.keras.losses.MeanSquaredError(
|
|
reduction=reduction)
|
|
loss = loss_fn(
|
|
y_true=labels,
|
|
y_pred=predictions,
|
|
sample_weight=weights)
|
|
```
|
|
|
|
#### How to Map Arguments
|
|
|
|
| TF1 Arg Name | TF2 Arg Name | Note |
|
|
| :-------------------- | :--------------- | :------------------------- |
|
|
| `labels` | `y_true` | In `__call__()` method |
|
|
| `predictions` | `y_pred` | In `__call__()` method |
|
|
| `weights` | `sample_weight` | In `__call__()` method. |
|
|
: : : The shape requirements for `sample_weight` is different from :
|
|
: : : `weights`. Please check the [argument definition][api_docs] for :
|
|
: : : details. :
|
|
| `scope` | Not supported | - |
|
|
| `loss_collection` | Not supported | Losses should be tracked |
|
|
: : : explicitly or with Keras APIs, for example, [add_loss][add_loss], :
|
|
: : : instead of via collections :
|
|
| `reduction` | `reduction` | In constructor. Value of |
|
|
: : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
|
|
: : : `tf.compat.v1.losses.Reduction.SUM`, :
|
|
: : : `tf.compat.v1.losses.Reduction.NONE` in :
|
|
: : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to :
|
|
: : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
|
|
: : : `tf.keras.losses.Reduction.SUM`, :
|
|
: : : `tf.keras.losses.Reduction.NONE`, respectively. If you :
|
|
: : : used other value for `reduction`, including the default value :
|
|
: : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
|
|
: : : no directly corresponding value. Please modify the loss :
|
|
: : : implementation manually. :
|
|
|
|
[add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
|
|
[api_docs]:https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanSquaredError#__call__
|
|
|
|
|
|
#### Before & After Usage Example
|
|
|
|
Before:
|
|
|
|
>>> y_true = [1, 2, 3]
|
|
>>> y_pred = [1, 3, 5]
|
|
>>> weights = [0, 1, 0.25]
|
|
>>> # samples with zero-weight are excluded from calculation when `reduction`
|
|
>>> # argument is set to default value `Reduction.SUM_BY_NONZERO_WEIGHTS`
|
|
>>> tf.compat.v1.losses.mean_squared_error(
|
|
... labels=y_true,
|
|
... predictions=y_pred,
|
|
... weights=weights).numpy()
|
|
1.0
|
|
|
|
>>> tf.compat.v1.losses.mean_squared_error(
|
|
... labels=y_true,
|
|
... predictions=y_pred,
|
|
... weights=weights,
|
|
... reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE).numpy()
|
|
0.66667
|
|
|
|
After:
|
|
|
|
>>> y_true = [[1.0], [2.0], [3.0]]
|
|
>>> y_pred = [[1.0], [3.0], [5.0]]
|
|
>>> weights = [1, 1, 0.25]
|
|
>>> mse = tf.keras.losses.MeanSquaredError(
|
|
... reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)
|
|
>>> mse(y_true=y_true, y_pred=y_pred, sample_weight=weights).numpy()
|
|
0.66667
|
|
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if predictions is None:
|
|
raise ValueError("Argument `predictions` must not be None.")
|
|
with ops.name_scope(scope, "mean_squared_error",
|
|
(predictions, labels, weights)) as scope:
|
|
predictions = math_ops.cast(predictions, dtype=dtypes.float32)
|
|
labels = math_ops.cast(labels, dtype=dtypes.float32)
|
|
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
|
|
losses = math_ops.squared_difference(predictions, labels)
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.sigmoid_cross_entropy"])
|
|
@dispatch.add_dispatch_support
|
|
def sigmoid_cross_entropy(
|
|
multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided,
|
|
then the loss is simply scaled by the given value. If `weights` is a
|
|
tensor of shape `[batch_size]`, then the loss weights apply to each
|
|
corresponding sample.
|
|
|
|
If `label_smoothing` is nonzero, smooth the labels towards 1/2:
|
|
|
|
new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
|
|
+ 0.5 * label_smoothing
|
|
|
|
Args:
|
|
multi_class_labels: `[batch_size, num_classes]` target integer labels in
|
|
`{0, 1}`.
|
|
logits: Float `[batch_size, num_classes]` logits outputs of the network.
|
|
weights: Optional `Tensor` whose rank is either 0, or the same rank as
|
|
`multi_class_labels`, and must be broadcastable to `multi_class_labels`
|
|
(i.e., all dimensions must be either `1`, or the same as the
|
|
corresponding `losses` dimension).
|
|
label_smoothing: If greater than `0` then smooth the labels.
|
|
scope: The scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
|
|
`NONE`, this has the same shape as `logits`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `logits` doesn't match that of
|
|
`multi_class_labels` or if the shape of `weights` is invalid, or if
|
|
`weights` is None. Also if `multi_class_labels` or `logits` is None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if multi_class_labels is None:
|
|
raise ValueError("Argument `multi_class_labels` must not be None.")
|
|
if logits is None:
|
|
raise ValueError("Argument `logits` must not be None.")
|
|
with ops.name_scope(scope, "sigmoid_cross_entropy_loss",
|
|
(logits, multi_class_labels, weights)) as scope:
|
|
logits = ops.convert_to_tensor(logits)
|
|
multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype)
|
|
logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape())
|
|
|
|
if label_smoothing > 0:
|
|
multi_class_labels = (multi_class_labels * (1 - label_smoothing) +
|
|
0.5 * label_smoothing)
|
|
|
|
losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels,
|
|
logits=logits,
|
|
name="xentropy")
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
@tf_export(v1=["losses.softmax_cross_entropy"])
|
|
@dispatch.add_dispatch_support
|
|
def softmax_cross_entropy(
|
|
onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
r"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided,
|
|
then the loss is simply scaled by the given value. If `weights` is a
|
|
tensor of shape `[batch_size]`, then the loss weights apply to each
|
|
corresponding sample.
|
|
|
|
If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
|
|
new_onehot_labels = onehot_labels * (1 - label_smoothing)
|
|
+ label_smoothing / num_classes
|
|
|
|
Note that `onehot_labels` and `logits` must have the same shape,
|
|
e.g. `[batch_size, num_classes]`. The shape of `weights` must be
|
|
broadcastable to loss, whose shape is decided by the shape of `logits`.
|
|
In case the shape of `logits` is `[batch_size, num_classes]`, loss is
|
|
a `Tensor` of shape `[batch_size]`.
|
|
|
|
Args:
|
|
onehot_labels: One-hot-encoded labels.
|
|
logits: Logits outputs of the network.
|
|
weights: Optional `Tensor` that is broadcastable to loss.
|
|
label_smoothing: If greater than 0 then smooth the labels.
|
|
scope: the scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
|
|
`NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
|
|
or if the shape of `weights` is invalid or if `weights` is None. Also if
|
|
`onehot_labels` or `logits` is None.
|
|
|
|
@compatibility(TF2)
|
|
|
|
`tf.compat.v1.losses.softmax_cross_entropy` is mostly compatible with eager
|
|
execution and `tf.function`. But, the `loss_collection` argument is
|
|
ignored when executing eagerly and no loss will be written to the loss
|
|
collections. You will need to either hold on to the return value manually
|
|
or rely on `tf.keras.Model` loss tracking.
|
|
|
|
|
|
To switch to native TF2 style, instantiate the
|
|
`tf.keras.losses.CategoricalCrossentropy` class with `from_logits` set
|
|
as `True` and call the object instead.
|
|
|
|
|
|
#### Structural Mapping to Native TF2
|
|
|
|
Before:
|
|
|
|
```python
|
|
loss = tf.compat.v1.losses.softmax_cross_entropy(
|
|
onehot_labels=onehot_labels,
|
|
logits=logits,
|
|
weights=weights,
|
|
label_smoothing=smoothing)
|
|
```
|
|
|
|
After:
|
|
|
|
```python
|
|
loss_fn = tf.keras.losses.CategoricalCrossentropy(
|
|
from_logits=True,
|
|
label_smoothing=smoothing)
|
|
loss = loss_fn(
|
|
y_true=onehot_labels,
|
|
y_pred=logits,
|
|
sample_weight=weights)
|
|
```
|
|
|
|
#### How to Map Arguments
|
|
|
|
| TF1 Arg Name | TF2 Arg Name | Note |
|
|
| :-------------------- | :--------------- | :------------------------- |
|
|
| - | `from_logits` | Set `from_logits` as True |
|
|
: : : to have identical behavior :
|
|
| `onehot_labels` | `y_true` | In `__call__()` method |
|
|
| `logits` | `y_pred` | In `__call__()` method |
|
|
| `weights` | `sample_weight` | In `__call__()` method |
|
|
| `label_smoothing` | `label_smoothing`| In constructor |
|
|
| `scope` | Not supported | - |
|
|
| `loss_collection` | Not supported | Losses should be tracked |
|
|
: : : explicitly or with Keras :
|
|
: : : APIs, for example, :
|
|
: : : [add_loss][add_loss], :
|
|
: : : instead of via collections :
|
|
| `reduction` | `reduction` | In constructor. Value of |
|
|
: : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
|
|
: : : `tf.compat.v1.losses.Reduction.SUM`, :
|
|
: : : `tf.compat.v1.losses.Reduction.NONE` in :
|
|
: : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to :
|
|
: : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
|
|
: : : `tf.keras.losses.Reduction.SUM`, :
|
|
: : : `tf.keras.losses.Reduction.NONE`, respectively. If you :
|
|
: : : used other value for `reduction`, including the default value :
|
|
: : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
|
|
: : : no directly corresponding value. Please modify the loss :
|
|
: : : implementation manually. :
|
|
|
|
[add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
|
|
|
|
|
|
#### Before & After Usage Example
|
|
|
|
Before:
|
|
|
|
>>> y_true = [[0, 1, 0], [0, 0, 1]]
|
|
>>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
|
|
>>> weights = [0.3, 0.7]
|
|
>>> smoothing = 0.2
|
|
>>> tf.compat.v1.losses.softmax_cross_entropy(y_true, y_pred, weights=weights,
|
|
... label_smoothing=smoothing).numpy()
|
|
0.57618
|
|
|
|
After:
|
|
|
|
>>> cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True,
|
|
... label_smoothing=smoothing)
|
|
>>> cce(y_true, y_pred, sample_weight=weights).numpy()
|
|
0.57618
|
|
|
|
@end_compatibility
|
|
"""
|
|
if onehot_labels is None:
|
|
raise ValueError("Argument `onehot_labels` must not be None.")
|
|
if logits is None:
|
|
raise ValueError("Argument `logits` must not be None.")
|
|
with ops.name_scope(scope, "softmax_cross_entropy_loss",
|
|
(logits, onehot_labels, weights)) as scope:
|
|
logits = ops.convert_to_tensor(logits)
|
|
onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
|
|
logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
|
|
|
|
if label_smoothing > 0:
|
|
num_classes = math_ops.cast(
|
|
array_ops.shape(onehot_labels)[-1], logits.dtype)
|
|
smooth_positives = 1.0 - label_smoothing
|
|
smooth_negatives = label_smoothing / num_classes
|
|
onehot_labels = onehot_labels * smooth_positives + smooth_negatives
|
|
|
|
onehot_labels = array_ops.stop_gradient(
|
|
onehot_labels, name="labels_stop_gradient")
|
|
losses = nn.softmax_cross_entropy_with_logits_v2(
|
|
labels=onehot_labels, logits=logits, name="xentropy")
|
|
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|
|
|
|
|
|
# TODO(ptucker): Merge this with similar method in metrics_impl.
|
|
def _remove_squeezable_dimensions(
|
|
labels, predictions, weights=None, expected_rank_diff=0):
|
|
"""Internal version of _remove_squeezable_dimensions which handles weights.
|
|
|
|
Squeezes `predictions` and `labels` if their ranks differ from expected by
|
|
exactly 1.
|
|
Squeezes `weights` if its rank is 1 more than the new rank of `predictions`
|
|
|
|
This will use static shape if available. Otherwise, it will add graph
|
|
operations, which could result in a performance hit.
|
|
|
|
Args:
|
|
labels: Label values, a `Tensor` whose dimensions match `predictions`.
|
|
predictions: Predicted values, a `Tensor` of arbitrary dimensions.
|
|
weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
|
|
and its rank is 1 more than the new rank of `labels`.
|
|
expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
|
|
|
|
Returns:
|
|
Tuple of `predictions`, `labels` and `weights`, possibly with the last
|
|
dimension squeezed.
|
|
"""
|
|
labels, predictions = confusion_matrix.remove_squeezable_dimensions(
|
|
labels, predictions, expected_rank_diff=expected_rank_diff)
|
|
|
|
if weights is not None:
|
|
weights = ops.convert_to_tensor(weights)
|
|
labels_rank = labels.get_shape().ndims
|
|
weights_shape = weights.get_shape()
|
|
weights_rank = weights_shape.ndims
|
|
|
|
if (labels_rank is not None) and (weights_rank is not None):
|
|
# Use static rank.
|
|
rank_diff = weights_rank - labels_rank
|
|
if rank_diff == 1:
|
|
weights = array_ops.squeeze(weights, [-1])
|
|
return labels, predictions, weights
|
|
|
|
# Use dynamic rank.
|
|
rank_diff = array_ops.rank(weights) - array_ops.rank(labels)
|
|
if (weights_rank is None) or (
|
|
weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)):
|
|
weights = cond.cond(
|
|
math_ops.equal(1, rank_diff),
|
|
lambda: array_ops.squeeze(weights, [-1]),
|
|
lambda: weights)
|
|
|
|
return labels, predictions, weights
|
|
|
|
|
|
@tf_export(v1=["losses.sparse_softmax_cross_entropy"])
|
|
@dispatch.add_dispatch_support
|
|
def sparse_softmax_cross_entropy(
|
|
labels, logits, weights=1.0, scope=None,
|
|
loss_collection=ops.GraphKeys.LOSSES,
|
|
reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
|
|
"""Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.
|
|
|
|
`weights` acts as a coefficient for the loss. If a scalar is provided,
|
|
then the loss is simply scaled by the given value. If `weights` is a
|
|
tensor of shape `[batch_size]`, then the loss weights apply to each
|
|
corresponding sample.
|
|
|
|
Args:
|
|
labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
|
|
`labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
|
|
must be an index in `[0, num_classes)`. Other values will raise an
|
|
exception when this op is run on CPU, and return `NaN` for corresponding
|
|
loss and gradient rows on GPU.
|
|
logits: Unscaled log probabilities of shape
|
|
`[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
|
|
`float64`.
|
|
weights: Coefficients for the loss. This must be scalar or broadcastable to
|
|
`labels` (i.e. same rank and each dimension is either 1 or the same).
|
|
scope: the scope for the operations performed in computing the loss.
|
|
loss_collection: collection to which the loss will be added.
|
|
reduction: Type of reduction to apply to loss.
|
|
|
|
Returns:
|
|
Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
|
|
`NONE`, this has the same shape as `labels`; otherwise, it is scalar.
|
|
|
|
Raises:
|
|
ValueError: If the shapes of `logits`, `labels`, and `weights` are
|
|
incompatible, or if any of them are None.
|
|
|
|
@compatibility(eager)
|
|
The `loss_collection` argument is ignored when executing eagerly. Consider
|
|
holding on to the return value or collecting losses via a `tf.keras.Model`.
|
|
@end_compatibility
|
|
"""
|
|
if labels is None:
|
|
raise ValueError("Argument `labels` must not be None.")
|
|
if logits is None:
|
|
raise ValueError("Argument `logits` must not be None.")
|
|
with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
|
|
(logits, labels, weights)) as scope:
|
|
# As documented above in Args, labels contain class IDs and logits contains
|
|
# 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
|
|
# therefore, expected_rank_diff=1.
|
|
labels, logits, weights = _remove_squeezable_dimensions(
|
|
labels, logits, weights, expected_rank_diff=1)
|
|
losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
|
|
logits=logits,
|
|
name="xentropy")
|
|
return compute_weighted_loss(
|
|
losses, weights, scope, loss_collection, reduction=reduction)
|