# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Private utilities for locally-connected layers."""

import numpy as np
import tensorflow.compat.v2 as tf

from keras import backend
from keras.utils import conv_utils


def get_locallyconnected_mask(
    input_shape, kernel_shape, strides, padding, data_format
):
    """Return a mask representing connectivity of a locally-connected operation.

    This method returns a masking numpy array of 0s and 1s (of type
    `np.float32`) that, when element-wise multiplied with a fully-connected
    weight tensor, masks out the weights between disconnected input-output pairs
    and thus implements local connectivity through a sparse fully-connected
    weight tensor.

    Assume an unshared convolution with given parameters is applied to an input
    having N spatial dimensions with `input_shape = (d_in1, ..., d_inN)`
    to produce an output with spatial shape `(d_out1, ..., d_outN)` (determined
    by layer parameters such as `strides`).

    This method returns a mask which can be broadcast-multiplied (element-wise)
    with a 2*(N+1)-D weight matrix (equivalent to a fully-connected layer
    between (N+1)-D activations (N spatial + 1 channel dimensions for input and
    output) to make it perform an unshared convolution with given
    `kernel_shape`, `strides`, `padding` and `data_format`.

    Args:
      input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the
        input.
      kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
        receptive field.
      strides: tuple of size N, strides along each spatial dimension.
      padding: type of padding, string `"same"` or `"valid"`.
      data_format: a string, `"channels_first"` or `"channels_last"`.

    Returns:
      a `np.float32`-type `np.ndarray` of shape
      `(1, d_in1, ..., d_inN, 1, d_out1, ..., d_outN)`
      if `data_format == `"channels_first"`, or
      `(d_in1, ..., d_inN, 1, d_out1, ..., d_outN, 1)`
      if `data_format == "channels_last"`.

    Raises:
      ValueError: if `data_format` is neither `"channels_first"` nor
                  `"channels_last"`.
    """
    mask = conv_utils.conv_kernel_mask(
        input_shape=input_shape,
        kernel_shape=kernel_shape,
        strides=strides,
        padding=padding,
    )

    ndims = int(mask.ndim / 2)

    if data_format == "channels_first":
        mask = np.expand_dims(mask, 0)
        mask = np.expand_dims(mask, -ndims - 1)

    elif data_format == "channels_last":
        mask = np.expand_dims(mask, ndims)
        mask = np.expand_dims(mask, -1)

    else:
        raise ValueError("Unrecognized data_format: " + str(data_format))

    return mask


def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
    """Apply N-D convolution with un-shared weights using a single matmul call.

    This method outputs `inputs . (kernel * kernel_mask)`
    (with `.` standing for matrix-multiply and `*` for element-wise multiply)
    and requires a precomputed `kernel_mask` to zero-out weights in `kernel` and
    hence perform the same operation as a convolution with un-shared
    (the remaining entries in `kernel`) weights. It also does the necessary
    reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D.

    Args:
        inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ...,
          d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`.
        kernel: the unshared weights for N-D convolution,
            an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in,
            d_out2, ..., d_outN, channels_out)` or `(channels_in, d_in1, ...,
            d_inN, channels_out, d_out2, ..., d_outN)`, with the ordering of
            channels and spatial dimensions matching that of the input. Each
            entry is the weight between a particular input and output location,
            similarly to a fully-connected weight matrix.
        kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1,
          d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ...,
          d_outN)`, with the ordering of singleton and spatial dimensions
          matching that of the input. Mask represents the connectivity pattern
          of the layer and is precomputed elsewhere based on layer parameters:
          stride, padding, and the receptive field shape.
        output_shape: a tuple of (N+2) elements representing the output shape:
          `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size,
          d_out1, ..., d_outN, channels_out)`, with the ordering of channels and
          spatial dimensions matching that of the input.

    Returns:
        Output (N+2)-D tensor with shape `output_shape`.
    """
    inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1))

    kernel = kernel_mask * kernel
    kernel = make_2d(kernel, split_dim=backend.ndim(kernel) // 2)

    output_flat = tf.matmul(inputs_flat, kernel, b_is_sparse=True)
    output = backend.reshape(
        output_flat,
        [
            backend.shape(output_flat)[0],
        ]
        + output_shape.as_list()[1:],
    )
    return output


def local_conv_sparse_matmul(
    inputs, kernel, kernel_idxs, kernel_shape, output_shape
):
    """Apply N-D convolution with unshared weights using a single sparse matmul.

    This method outputs `inputs . tf.sparse.SparseTensor(indices=kernel_idxs,
    values=kernel, dense_shape=kernel_shape)`, with `.` standing for
    matrix-multiply. It also reshapes `inputs` to 2-D and `output` to (N+2)-D.

    Args:
        inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ...,
          d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`.
        kernel: a 1-D tensor with shape `(len(kernel_idxs),)` containing all the
          weights of the layer.
        kernel_idxs:  a list of integer tuples representing indices in a sparse
          matrix performing the un-shared convolution as a matrix-multiply.
        kernel_shape: a tuple `(input_size, output_size)`, where `input_size =
          channels_in * d_in1 * ... * d_inN` and `output_size = channels_out *
          d_out1 * ... * d_outN`.
        output_shape: a tuple of (N+2) elements representing the output shape:
          `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size,
          d_out1, ..., d_outN, channels_out)`, with the ordering of channels and
          spatial dimensions matching that of the input.

    Returns:
        Output (N+2)-D dense tensor with shape `output_shape`.
    """
    inputs_flat = backend.reshape(inputs, (backend.shape(inputs)[0], -1))
    output_flat = tf.sparse.sparse_dense_matmul(
        sp_a=tf.SparseTensor(kernel_idxs, kernel, kernel_shape),
        b=inputs_flat,
        adjoint_b=True,
    )
    output_flat_transpose = backend.transpose(output_flat)

    output_reshaped = backend.reshape(
        output_flat_transpose,
        [
            backend.shape(output_flat_transpose)[0],
        ]
        + output_shape.as_list()[1:],
    )
    return output_reshaped


def make_2d(tensor, split_dim):
    """Reshapes an N-dimensional tensor into a 2D tensor.

    Dimensions before (excluding) and after (including) `split_dim` are grouped
    together.

    Args:
      tensor: a tensor of shape `(d0, ..., d(N-1))`.
      split_dim: an integer from 1 to N-1, index of the dimension to group
        dimensions before (excluding) and after (including).

    Returns:
      Tensor of shape
      `(d0 * ... * d(split_dim-1), d(split_dim) * ... * d(N-1))`.
    """
    shape = tf.shape(tensor)
    in_dims = shape[:split_dim]
    out_dims = shape[split_dim:]

    in_size = tf.reduce_prod(in_dims)
    out_size = tf.reduce_prod(out_dims)

    return tf.reshape(tensor, (in_size, out_size))