# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mixin holding dropout fields for RNN cells."""


import tensorflow.compat.v2 as tf
from tensorflow.tools.docs import doc_controls

from keras import backend


@doc_controls.do_not_generate_docs
class DropoutRNNCellMixin:
    """Object that hold dropout related fields for RNN Cell.

    This class is not a standalone RNN cell. It suppose to be used with a RNN
    cell by multiple inheritance. Any cell that mix with class should have
    following fields:
      dropout: a float number within range [0, 1). The ratio that the input
        tensor need to dropout.
      recurrent_dropout: a float number within range [0, 1). The ratio that the
        recurrent state weights need to dropout.
      _random_generator: A backend.RandomGenerator instance, which will be used
        to produce outputs based on the inputs and dropout rate.
    This object will create and cache created dropout masks, and reuse them for
    the incoming data, so that the same mask is used for every batch input.
    """

    def __init__(self, *args, **kwargs):
        self._create_non_trackable_mask_cache()
        super().__init__(*args, **kwargs)

    @tf.__internal__.tracking.no_automatic_dependency_tracking
    def _create_non_trackable_mask_cache(self):
        """Create the cache for dropout and recurrent dropout mask.

        Note that the following two masks will be used in "graph function" mode,
        e.g. these masks are symbolic tensors. In eager mode, the `eager_*_mask`
        tensors will be generated differently than in the "graph function" case,
        and they will be cached.

        Also note that in graph mode, we still cache those masks only because
        the RNN could be created with `unroll=True`. In that case, the
        `cell.call()` function will be invoked multiple times, and we want to
        ensure same mask is used every time.

        Also the caches are created without tracking. Since they are not
        picklable by python when deepcopy, we don't want
        `layer._obj_reference_counts_dict` to track it by default.
        """
        self._dropout_mask_cache = backend.ContextValueCache(
            self._create_dropout_mask
        )
        self._recurrent_dropout_mask_cache = backend.ContextValueCache(
            self._create_recurrent_dropout_mask
        )

    def reset_dropout_mask(self):
        """Reset the cached dropout masks if any.

        This is important for the RNN layer to invoke this in it `call()` method
        so that the cached mask is cleared before calling the `cell.call()`. The
        mask should be cached across the timestep within the same batch, but
        shouldn't be cached between batches. Otherwise it will introduce
        unreasonable bias against certain index of data within the batch.
        """
        self._dropout_mask_cache.clear()

    def reset_recurrent_dropout_mask(self):
        """Reset the cached recurrent dropout masks if any.

        This is important for the RNN layer to invoke this in it call() method
        so that the cached mask is cleared before calling the cell.call(). The
        mask should be cached across the timestep within the same batch, but
        shouldn't be cached between batches. Otherwise it will introduce
        unreasonable bias against certain index of data within the batch.
        """
        self._recurrent_dropout_mask_cache.clear()

    def _create_dropout_mask(self, inputs, training, count=1):
        return _generate_dropout_mask(
            self._random_generator,
            tf.ones_like(inputs),
            self.dropout,
            training=training,
            count=count,
        )

    def _create_recurrent_dropout_mask(self, inputs, training, count=1):
        return _generate_dropout_mask(
            self._random_generator,
            tf.ones_like(inputs),
            self.recurrent_dropout,
            training=training,
            count=count,
        )

    def get_dropout_mask_for_cell(self, inputs, training, count=1):
        """Get the dropout mask for RNN cell's input.

        It will create mask based on context if there isn't any existing cached
        mask. If a new mask is generated, it will update the cache in the cell.

        Args:
          inputs: The input tensor whose shape will be used to generate dropout
            mask.
          training: Boolean tensor, whether its in training mode, dropout will
            be ignored in non-training mode.
          count: Int, how many dropout mask will be generated. It is useful for
            cell that has internal weights fused together.
        Returns:
          List of mask tensor, generated or cached mask based on context.
        """
        if self.dropout == 0:
            return None
        init_kwargs = dict(inputs=inputs, training=training, count=count)
        return self._dropout_mask_cache.setdefault(kwargs=init_kwargs)

    def get_recurrent_dropout_mask_for_cell(self, inputs, training, count=1):
        """Get the recurrent dropout mask for RNN cell.

        It will create mask based on context if there isn't any existing cached
        mask. If a new mask is generated, it will update the cache in the cell.

        Args:
          inputs: The input tensor whose shape will be used to generate dropout
            mask.
          training: Boolean tensor, whether its in training mode, dropout will
            be ignored in non-training mode.
          count: Int, how many dropout mask will be generated. It is useful for
            cell that has internal weights fused together.
        Returns:
          List of mask tensor, generated or cached mask based on context.
        """
        if self.recurrent_dropout == 0:
            return None
        init_kwargs = dict(inputs=inputs, training=training, count=count)
        return self._recurrent_dropout_mask_cache.setdefault(kwargs=init_kwargs)

    def __getstate__(self):
        # Used for deepcopy. The caching can't be pickled by python, since it
        # will contain tensor and graph.
        state = super().__getstate__()
        state.pop("_dropout_mask_cache", None)
        state.pop("_recurrent_dropout_mask_cache", None)
        return state

    def __setstate__(self, state):
        state["_dropout_mask_cache"] = backend.ContextValueCache(
            self._create_dropout_mask
        )
        state["_recurrent_dropout_mask_cache"] = backend.ContextValueCache(
            self._create_recurrent_dropout_mask
        )
        super().__setstate__(state)


def _generate_dropout_mask(generator, ones, rate, training=None, count=1):
    def dropped_inputs():
        return generator.dropout(ones, rate)

    if count > 1:
        return [
            backend.in_train_phase(dropped_inputs, ones, training=training)
            for _ in range(count)
        ]
    return backend.in_train_phase(dropped_inputs, ones, training=training)