582 lines
20 KiB
Python
582 lines
20 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Utilities used by convolution layers."""
|
|
|
|
import itertools
|
|
|
|
import numpy as np
|
|
import tensorflow.compat.v2 as tf
|
|
|
|
from keras import backend
|
|
|
|
|
|
def convert_data_format(data_format, ndim):
|
|
if data_format == "channels_last":
|
|
if ndim == 3:
|
|
return "NWC"
|
|
elif ndim == 4:
|
|
return "NHWC"
|
|
elif ndim == 5:
|
|
return "NDHWC"
|
|
else:
|
|
raise ValueError(
|
|
f"Input rank not supported: {ndim}. "
|
|
"Expected values are [3, 4, 5]"
|
|
)
|
|
elif data_format == "channels_first":
|
|
if ndim == 3:
|
|
return "NCW"
|
|
elif ndim == 4:
|
|
return "NCHW"
|
|
elif ndim == 5:
|
|
return "NCDHW"
|
|
else:
|
|
raise ValueError(
|
|
f"Input rank not supported: {ndim}. "
|
|
"Expected values are [3, 4, 5]"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Invalid data_format: {data_format}. "
|
|
'Expected values are ["channels_first", "channels_last"]'
|
|
)
|
|
|
|
|
|
def normalize_tuple(value, n, name, allow_zero=False):
|
|
"""Transforms non-negative/positive integer/integers into an integer tuple.
|
|
|
|
Args:
|
|
value: The value to validate and convert. Could an int, or any iterable of
|
|
ints.
|
|
n: The size of the tuple to be returned.
|
|
name: The name of the argument being validated, e.g. "strides" or
|
|
"kernel_size". This is only used to format error messages.
|
|
allow_zero: Default to False. A ValueError will raised if zero is received
|
|
and this param is False.
|
|
|
|
Returns:
|
|
A tuple of n integers.
|
|
|
|
Raises:
|
|
ValueError: If something else than an int/long or iterable thereof or a
|
|
negative value is
|
|
passed.
|
|
"""
|
|
error_msg = (
|
|
f"The `{name}` argument must be a tuple of {n} "
|
|
f"integers. Received: {value}"
|
|
)
|
|
|
|
if isinstance(value, int):
|
|
value_tuple = (value,) * n
|
|
else:
|
|
try:
|
|
value_tuple = tuple(value)
|
|
except TypeError:
|
|
raise ValueError(error_msg)
|
|
if len(value_tuple) != n:
|
|
raise ValueError(error_msg)
|
|
for single_value in value_tuple:
|
|
try:
|
|
int(single_value)
|
|
except (ValueError, TypeError):
|
|
error_msg += (
|
|
f"including element {single_value} of "
|
|
f"type {type(single_value)}"
|
|
)
|
|
raise ValueError(error_msg)
|
|
|
|
if allow_zero:
|
|
unqualified_values = {v for v in value_tuple if v < 0}
|
|
req_msg = ">= 0"
|
|
else:
|
|
unqualified_values = {v for v in value_tuple if v <= 0}
|
|
req_msg = "> 0"
|
|
|
|
if unqualified_values:
|
|
error_msg += (
|
|
f" including {unqualified_values}"
|
|
f" that does not satisfy the requirement `{req_msg}`."
|
|
)
|
|
raise ValueError(error_msg)
|
|
|
|
return value_tuple
|
|
|
|
|
|
def conv_output_length(input_length, filter_size, padding, stride, dilation=1):
|
|
"""Determines output length of a convolution given input length.
|
|
|
|
Args:
|
|
input_length: integer.
|
|
filter_size: integer.
|
|
padding: one of "same", "valid", "full", "causal"
|
|
stride: integer.
|
|
dilation: dilation rate, integer.
|
|
|
|
Returns:
|
|
The output length (integer).
|
|
"""
|
|
if input_length is None:
|
|
return None
|
|
assert padding in {"same", "valid", "full", "causal"}
|
|
dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
|
if padding in ["same", "causal"]:
|
|
output_length = input_length
|
|
elif padding == "valid":
|
|
output_length = input_length - dilated_filter_size + 1
|
|
elif padding == "full":
|
|
output_length = input_length + dilated_filter_size - 1
|
|
return (output_length + stride - 1) // stride
|
|
|
|
|
|
def conv_input_length(output_length, filter_size, padding, stride):
|
|
"""Determines input length of a convolution given output length.
|
|
|
|
Args:
|
|
output_length: integer.
|
|
filter_size: integer.
|
|
padding: one of "same", "valid", "full".
|
|
stride: integer.
|
|
|
|
Returns:
|
|
The input length (integer).
|
|
"""
|
|
if output_length is None:
|
|
return None
|
|
assert padding in {"same", "valid", "full"}
|
|
if padding == "same":
|
|
pad = filter_size // 2
|
|
elif padding == "valid":
|
|
pad = 0
|
|
elif padding == "full":
|
|
pad = filter_size - 1
|
|
return (output_length - 1) * stride - 2 * pad + filter_size
|
|
|
|
|
|
def deconv_output_length(
|
|
input_length,
|
|
filter_size,
|
|
padding,
|
|
output_padding=None,
|
|
stride=0,
|
|
dilation=1,
|
|
):
|
|
"""Determines output length of a transposed convolution given input length.
|
|
|
|
Args:
|
|
input_length: Integer.
|
|
filter_size: Integer.
|
|
padding: one of `"same"`, `"valid"`, `"full"`.
|
|
output_padding: Integer, amount of padding along the output dimension.
|
|
Can be set to `None` in which case the output length is inferred.
|
|
stride: Integer.
|
|
dilation: Integer.
|
|
|
|
Returns:
|
|
The output length (integer).
|
|
"""
|
|
assert padding in {"same", "valid", "full"}
|
|
if input_length is None:
|
|
return None
|
|
|
|
# Get the dilated kernel size
|
|
filter_size = filter_size + (filter_size - 1) * (dilation - 1)
|
|
|
|
# Infer length if output padding is None, else compute the exact length
|
|
if output_padding is None:
|
|
if padding == "valid":
|
|
length = input_length * stride + max(filter_size - stride, 0)
|
|
elif padding == "full":
|
|
length = input_length * stride - (stride + filter_size - 2)
|
|
elif padding == "same":
|
|
length = input_length * stride
|
|
|
|
else:
|
|
if padding == "same":
|
|
pad = filter_size // 2
|
|
elif padding == "valid":
|
|
pad = 0
|
|
elif padding == "full":
|
|
pad = filter_size - 1
|
|
|
|
length = (
|
|
(input_length - 1) * stride + filter_size - 2 * pad + output_padding
|
|
)
|
|
return length
|
|
|
|
|
|
def normalize_data_format(value):
|
|
if value is None:
|
|
value = backend.image_data_format()
|
|
data_format = value.lower()
|
|
if data_format not in {"channels_first", "channels_last"}:
|
|
raise ValueError(
|
|
"The `data_format` argument must be one of "
|
|
f'"channels_first", "channels_last". Received: {value}'
|
|
)
|
|
return data_format
|
|
|
|
|
|
def normalize_padding(value):
|
|
if isinstance(value, (list, tuple)):
|
|
return value
|
|
padding = value.lower()
|
|
if padding not in {"valid", "same", "causal"}:
|
|
raise ValueError(
|
|
"The `padding` argument must be a list/tuple or one of "
|
|
'"valid", "same" (or "causal", only for `Conv1D). '
|
|
f"Received: {padding}"
|
|
)
|
|
return padding
|
|
|
|
|
|
def conv_kernel_mask(input_shape, kernel_shape, strides, padding):
|
|
"""Compute a mask representing the connectivity of a convolution operation.
|
|
|
|
Assume a convolution with given parameters is applied to an input having N
|
|
spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an
|
|
output with shape `(d_out1, ..., d_outN)`. This method returns a boolean
|
|
array of shape `(d_in1, ..., d_inN, d_out1, ..., d_outN)` with `True`
|
|
entries indicating pairs of input and output locations that are connected by
|
|
a weight.
|
|
|
|
Example:
|
|
|
|
>>> input_shape = (4,)
|
|
>>> kernel_shape = (2,)
|
|
>>> strides = (1,)
|
|
>>> padding = "valid"
|
|
>>> conv_kernel_mask(input_shape, kernel_shape, strides, padding)
|
|
array([[ True, False, False],
|
|
[ True, True, False],
|
|
[False, True, True],
|
|
[False, False, True]])
|
|
|
|
where rows and columns correspond to inputs and outputs respectively.
|
|
|
|
|
|
Args:
|
|
input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
|
|
input.
|
|
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
|
|
receptive field.
|
|
strides: tuple of size N, strides along each spatial dimension.
|
|
padding: type of padding, string `"same"` or `"valid"`.
|
|
`"valid"` means no padding. `"same"` results in padding evenly to
|
|
the left/right or up/down of the input such that output has the same
|
|
height/width dimension as the input.
|
|
|
|
Returns:
|
|
A boolean 2N-D `np.ndarray` of shape
|
|
`(d_in1, ..., d_inN, d_out1, ..., d_outN)`, where `(d_out1, ..., d_outN)`
|
|
is the spatial shape of the output. `True` entries in the mask represent
|
|
pairs of input-output locations that are connected by a weight.
|
|
|
|
Raises:
|
|
ValueError: if `input_shape`, `kernel_shape` and `strides` don't have the
|
|
same number of dimensions.
|
|
NotImplementedError: if `padding` is not in {`"same"`, `"valid"`}.
|
|
"""
|
|
if padding not in {"same", "valid"}:
|
|
raise NotImplementedError(
|
|
f"Padding type {padding} not supported. "
|
|
'Only "valid" and "same" are implemented.'
|
|
)
|
|
|
|
in_dims = len(input_shape)
|
|
if isinstance(kernel_shape, int):
|
|
kernel_shape = (kernel_shape,) * in_dims
|
|
if isinstance(strides, int):
|
|
strides = (strides,) * in_dims
|
|
|
|
kernel_dims = len(kernel_shape)
|
|
stride_dims = len(strides)
|
|
if kernel_dims != in_dims or stride_dims != in_dims:
|
|
raise ValueError(
|
|
"Number of strides, input and kernel dimensions must all "
|
|
f"match. Received: stride_dims={stride_dims}, "
|
|
f"in_dims={in_dims}, kernel_dims={kernel_dims}"
|
|
)
|
|
|
|
output_shape = conv_output_shape(
|
|
input_shape, kernel_shape, strides, padding
|
|
)
|
|
|
|
mask_shape = input_shape + output_shape
|
|
mask = np.zeros(mask_shape, bool)
|
|
|
|
output_axes_ticks = [range(dim) for dim in output_shape]
|
|
for output_position in itertools.product(*output_axes_ticks):
|
|
input_axes_ticks = conv_connected_inputs(
|
|
input_shape, kernel_shape, output_position, strides, padding
|
|
)
|
|
for input_position in itertools.product(*input_axes_ticks):
|
|
mask[input_position + output_position] = True
|
|
|
|
return mask
|
|
|
|
|
|
def conv_kernel_idxs(
|
|
input_shape,
|
|
kernel_shape,
|
|
strides,
|
|
padding,
|
|
filters_in,
|
|
filters_out,
|
|
data_format,
|
|
):
|
|
"""Yields output-input tuples of indices in a CNN layer.
|
|
|
|
The generator iterates over all `(output_idx, input_idx)` tuples, where
|
|
`output_idx` is an integer index in a flattened tensor representing a single
|
|
output image of a convolutional layer that is connected (via the layer
|
|
weights) to the respective single input image at `input_idx`
|
|
|
|
Example:
|
|
|
|
>>> input_shape = (2, 2)
|
|
>>> kernel_shape = (2, 1)
|
|
>>> strides = (1, 1)
|
|
>>> padding = "valid"
|
|
>>> filters_in = 1
|
|
>>> filters_out = 1
|
|
>>> data_format = "channels_last"
|
|
>>> list(conv_kernel_idxs(input_shape, kernel_shape, strides, padding,
|
|
... filters_in, filters_out, data_format))
|
|
[(0, 0), (0, 2), (1, 1), (1, 3)]
|
|
|
|
Args:
|
|
input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
|
|
input.
|
|
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
|
|
receptive field.
|
|
strides: tuple of size N, strides along each spatial dimension.
|
|
padding: type of padding, string `"same"` or `"valid"`.
|
|
`"valid"` means no padding. `"same"` results in padding evenly to
|
|
the left/right or up/down of the input such that output has the same
|
|
height/width dimension as the input.
|
|
filters_in: `int`, number if filters in the input to the layer.
|
|
filters_out: `int', number if filters in the output of the layer.
|
|
data_format: string, "channels_first" or "channels_last".
|
|
|
|
Yields:
|
|
The next tuple `(output_idx, input_idx)`, where `output_idx` is an integer
|
|
index in a flattened tensor representing a single output image of a
|
|
convolutional layer that is connected (via the layer weights) to the
|
|
respective single input image at `input_idx`.
|
|
|
|
Raises:
|
|
ValueError: if `data_format` is neither `"channels_last"` nor
|
|
`"channels_first"`, or if number of strides, input, and kernel number
|
|
of dimensions do not match.
|
|
|
|
NotImplementedError: if `padding` is neither `"same"` nor `"valid"`.
|
|
"""
|
|
if padding not in ("same", "valid"):
|
|
raise NotImplementedError(
|
|
f"Padding type {padding} not supported. "
|
|
'Only "valid" and "same" are implemented.'
|
|
)
|
|
|
|
in_dims = len(input_shape)
|
|
if isinstance(kernel_shape, int):
|
|
kernel_shape = (kernel_shape,) * in_dims
|
|
if isinstance(strides, int):
|
|
strides = (strides,) * in_dims
|
|
|
|
kernel_dims = len(kernel_shape)
|
|
stride_dims = len(strides)
|
|
if kernel_dims != in_dims or stride_dims != in_dims:
|
|
raise ValueError(
|
|
"Number of strides, input and kernel dimensions must all "
|
|
f"match. Received: stride_dims={stride_dims}, "
|
|
f"in_dims={in_dims}, kernel_dims={kernel_dims}"
|
|
)
|
|
|
|
output_shape = conv_output_shape(
|
|
input_shape, kernel_shape, strides, padding
|
|
)
|
|
output_axes_ticks = [range(dim) for dim in output_shape]
|
|
|
|
if data_format == "channels_first":
|
|
concat_idxs = (
|
|
lambda spatial_idx, filter_idx: (filter_idx,) + spatial_idx
|
|
)
|
|
elif data_format == "channels_last":
|
|
concat_idxs = lambda spatial_idx, filter_idx: spatial_idx + (
|
|
filter_idx,
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Data format `{data_format}` not recognized."
|
|
'`data_format` must be "channels_first" or "channels_last".'
|
|
)
|
|
|
|
for output_position in itertools.product(*output_axes_ticks):
|
|
input_axes_ticks = conv_connected_inputs(
|
|
input_shape, kernel_shape, output_position, strides, padding
|
|
)
|
|
for input_position in itertools.product(*input_axes_ticks):
|
|
for f_in in range(filters_in):
|
|
for f_out in range(filters_out):
|
|
out_idx = np.ravel_multi_index(
|
|
multi_index=concat_idxs(output_position, f_out),
|
|
dims=concat_idxs(output_shape, filters_out),
|
|
)
|
|
in_idx = np.ravel_multi_index(
|
|
multi_index=concat_idxs(input_position, f_in),
|
|
dims=concat_idxs(input_shape, filters_in),
|
|
)
|
|
yield (out_idx, in_idx)
|
|
|
|
|
|
def conv_connected_inputs(
|
|
input_shape, kernel_shape, output_position, strides, padding
|
|
):
|
|
"""Return locations of the input connected to an output position.
|
|
|
|
Assume a convolution with given parameters is applied to an input having N
|
|
spatial dimensions with `input_shape = (d_in1, ..., d_inN)`. This method
|
|
returns N ranges specifying the input region that was convolved with the
|
|
kernel to produce the output at position
|
|
`output_position = (p_out1, ..., p_outN)`.
|
|
|
|
Example:
|
|
|
|
>>> input_shape = (4, 4)
|
|
>>> kernel_shape = (2, 1)
|
|
>>> output_position = (1, 1)
|
|
>>> strides = (1, 1)
|
|
>>> padding = "valid"
|
|
>>> conv_connected_inputs(input_shape, kernel_shape, output_position,
|
|
... strides, padding)
|
|
[range(1, 3), range(1, 2)]
|
|
|
|
Args:
|
|
input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
|
|
input.
|
|
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
|
|
receptive field.
|
|
output_position: tuple of size N: `(p_out1, ..., p_outN)`, a single
|
|
position in the output of the convolution.
|
|
strides: tuple of size N, strides along each spatial dimension.
|
|
padding: type of padding, string `"same"` or `"valid"`.
|
|
`"valid"` means no padding. `"same"` results in padding evenly to
|
|
the left/right or up/down of the input such that output has the same
|
|
height/width dimension as the input.
|
|
|
|
Returns:
|
|
N ranges `[[p_in_left1, ..., p_in_right1], ...,
|
|
[p_in_leftN, ..., p_in_rightN]]` specifying the region in the
|
|
input connected to output_position.
|
|
"""
|
|
ranges = []
|
|
|
|
ndims = len(input_shape)
|
|
for d in range(ndims):
|
|
left_shift = int(kernel_shape[d] / 2)
|
|
right_shift = kernel_shape[d] - left_shift
|
|
|
|
center = output_position[d] * strides[d]
|
|
|
|
if padding == "valid":
|
|
center += left_shift
|
|
|
|
start = max(0, center - left_shift)
|
|
end = min(input_shape[d], center + right_shift)
|
|
|
|
ranges.append(range(start, end))
|
|
|
|
return ranges
|
|
|
|
|
|
def conv_output_shape(input_shape, kernel_shape, strides, padding):
|
|
"""Return the output shape of an N-D convolution.
|
|
|
|
Forces dimensions where input is empty (size 0) to remain empty.
|
|
|
|
Args:
|
|
input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
|
|
input.
|
|
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
|
|
receptive field.
|
|
strides: tuple of size N, strides along each spatial dimension.
|
|
padding: type of padding, string `"same"` or `"valid"`.
|
|
`"valid"` means no padding. `"same"` results in padding evenly to
|
|
the left/right or up/down of the input such that output has the same
|
|
height/width dimension as the input.
|
|
|
|
Returns:
|
|
tuple of size N: `(d_out1, ..., d_outN)`, spatial shape of the output.
|
|
"""
|
|
dims = range(len(kernel_shape))
|
|
output_shape = [
|
|
conv_output_length(input_shape[d], kernel_shape[d], padding, strides[d])
|
|
for d in dims
|
|
]
|
|
output_shape = tuple(
|
|
[0 if input_shape[d] == 0 else output_shape[d] for d in dims]
|
|
)
|
|
return output_shape
|
|
|
|
|
|
def squeeze_batch_dims(inp, op, inner_rank):
|
|
"""Returns `unsqueeze_batch(op(squeeze_batch(inp)))`.
|
|
|
|
Where `squeeze_batch` reshapes `inp` to shape
|
|
`[prod(inp.shape[:-inner_rank])] + inp.shape[-inner_rank:]`
|
|
and `unsqueeze_batch` does the reverse reshape but on the output.
|
|
|
|
Args:
|
|
inp: A tensor with dims `batch_shape + inner_shape` where `inner_shape`
|
|
is length `inner_rank`.
|
|
op: A callable that takes a single input tensor and returns a single.
|
|
output tensor.
|
|
inner_rank: A python integer.
|
|
|
|
Returns:
|
|
`unsqueeze_batch_op(squeeze_batch(inp))`.
|
|
"""
|
|
with tf.name_scope("squeeze_batch_dims"):
|
|
shape = inp.shape
|
|
|
|
inner_shape = shape[-inner_rank:]
|
|
if not inner_shape.is_fully_defined():
|
|
inner_shape = tf.shape(inp)[-inner_rank:]
|
|
|
|
batch_shape = shape[:-inner_rank]
|
|
if not batch_shape.is_fully_defined():
|
|
batch_shape = tf.shape(inp)[:-inner_rank]
|
|
|
|
if isinstance(inner_shape, tf.TensorShape):
|
|
inp_reshaped = tf.reshape(inp, [-1] + inner_shape.as_list())
|
|
else:
|
|
inp_reshaped = tf.reshape(
|
|
inp, tf.concat(([-1], inner_shape), axis=-1)
|
|
)
|
|
|
|
out_reshaped = op(inp_reshaped)
|
|
|
|
out_inner_shape = out_reshaped.shape[-inner_rank:]
|
|
if not out_inner_shape.is_fully_defined():
|
|
out_inner_shape = tf.shape(out_reshaped)[-inner_rank:]
|
|
|
|
out = tf.reshape(
|
|
out_reshaped, tf.concat((batch_shape, out_inner_shape), axis=-1)
|
|
)
|
|
|
|
out.set_shape(inp.shape[:-inner_rank] + out.shape[-inner_rank:])
|
|
return out
|