# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Functionality for loading events from a record file."""

import contextlib

from tensorboard import data_compat
from tensorboard import dataclass_compat
from tensorboard.compat import tf
from tensorboard.compat.proto import event_pb2
from tensorboard.util import platform_util
from tensorboard.util import tb_logging


logger = tb_logging.get_logger()


@contextlib.contextmanager
def _nullcontext():
    """Pre-Python-3.7-compatible standin for contextlib.nullcontext."""
    yield


# Might as well make this a singleton.
_NULLCONTEXT = _nullcontext()


def _silence_deprecation_warnings():
    """Context manager that best-effort silences TF deprecation warnings."""
    try:
        # Learn this one weird trick to make TF deprecation warnings go away.
        from tensorflow.python.util import deprecation

        return deprecation.silence()
    except (ImportError, AttributeError):
        return _NULLCONTEXT


def _make_tf_record_iterator(file_path):
    """Returns an iterator over TF records for the given tfrecord file."""
    # If we don't have TF at all, use the stub implementation.
    if tf.__version__ == "stub":
        # TODO(#1711): Reshape stub implementation to fit tf_record_iterator API
        # rather than needlessly emulating the old PyRecordReader_New API.
        logger.debug("Opening a stub record reader pointing at %s", file_path)
        return _PyRecordReaderIterator(
            tf.pywrap_tensorflow.PyRecordReader_New, file_path
        )
    # If PyRecordReader exists, use it, otherwise use tf_record_iterator().
    # Check old first, then new, since tf_record_iterator existed previously but
    # only gained the semantics we need at the time PyRecordReader was removed.
    #
    # TODO(#1711): Eventually remove PyRecordReader fallback once we can drop
    # support for TF 2.1 and prior, and find a non-deprecated replacement for
    # tf.compat.v1.io.tf_record_iterator.
    try:
        from tensorflow.python import pywrap_tensorflow

        py_record_reader_new = pywrap_tensorflow.PyRecordReader_New
    except (ImportError, AttributeError):
        py_record_reader_new = None
    if py_record_reader_new:
        logger.debug("Opening a PyRecordReader pointing at %s", file_path)
        return _PyRecordReaderIterator(py_record_reader_new, file_path)
    else:
        logger.debug("Opening a tf_record_iterator pointing at %s", file_path)
        # TODO(#1711): Find non-deprecated replacement for tf_record_iterator.
        with _silence_deprecation_warnings():
            return tf.compat.v1.io.tf_record_iterator(file_path)


class _PyRecordReaderIterator:
    """Python iterator for TF Records based on PyRecordReader."""

    def __init__(self, py_record_reader_new, file_path):
        """Constructs a _PyRecordReaderIterator for the given file path.

        Args:
          py_record_reader_new: pywrap_tensorflow.PyRecordReader_New
          file_path: file path of the tfrecord file to read
        """
        with tf.compat.v1.errors.raise_exception_on_not_ok_status() as status:
            self._reader = py_record_reader_new(
                tf.compat.as_bytes(file_path), 0, tf.compat.as_bytes(""), status
            )
        if not self._reader:
            raise IOError(
                "Failed to open a record reader pointing to %s" % file_path
            )

    def __iter__(self):
        return self

    def __next__(self):
        try:
            self._reader.GetNext()
        except tf.errors.OutOfRangeError as e:
            raise StopIteration
        return self._reader.record()

    next = __next__  # for python2 compatibility


class RawEventFileLoader:
    """An iterator that yields Event protos as serialized bytestrings."""

    def __init__(self, file_path, detect_file_replacement=False):
        """Constructs a RawEventFileLoader for the given file path.

        Args:
          file_path: the event file path to read from
          detect_file_replacement: if True, when Load() is called, the loader
              will make a stat() call to check the size of the file. If it sees
              that the file has grown, it will reopen the file entirely (while
              preserving the current offset) before attempting to read from it.
              Otherwise, Load() will simply poll at EOF for new data.
        """
        if file_path is None:
            raise ValueError("A file path is required")
        self._file_path = platform_util.readahead_file_path(file_path)
        self._detect_file_replacement = detect_file_replacement
        self._file_size = None
        self._iterator = _make_tf_record_iterator(self._file_path)
        if self._detect_file_replacement and not hasattr(
            self._iterator, "reopen"
        ):
            logger.warning(
                "File replacement detection requested, but not enabled because "
                "TF record iterator impl does not support reopening. This "
                "functionality requires TensorFlow 2.9+"
            )
            self._detect_file_replacement = False

    def Load(self):
        """Loads all new events from disk as raw serialized proto bytestrings.

        Calling Load multiple times in a row will not 'drop' events as long as the
        return value is not iterated over.

        Yields:
          All event proto bytestrings in the file that have not been yielded yet.
        """
        logger.debug("Loading events from %s", self._file_path)
        if self._detect_file_replacement:
            has_increased = self.CheckForIncreasedFileSize()
            # Only act on the file size information if we got a concrete result.
            if has_increased is not None:
                if has_increased:
                    logger.debug(
                        "Reopening %s since file size has changed",
                        self._file_path,
                    )
                    self._iterator.close()
                    self._iterator.reopen()
                else:
                    logger.debug(
                        "Skipping attempt to poll %s since file size has not "
                        "changed (still %d)",
                        self._file_path,
                        self._file_size,
                    )
                    return
        while True:
            try:
                yield next(self._iterator)
            except StopIteration:
                logger.debug("End of file in %s", self._file_path)
                break
            except tf.errors.DataLossError as e:
                # We swallow partial read exceptions; if the record was truncated
                # and a later update completes it, retrying can then resume from
                # the same point in the file since the iterator holds the offset.
                logger.debug("Truncated record in %s (%s)", self._file_path, e)
                break
        logger.debug("No more events in %s", self._file_path)

    def CheckForIncreasedFileSize(self):
        """Stats the file to get its updated size, returning True if it grew.

        If the stat call fails or reports a smaller size than was previously
        seen, then any previously cached size is left unchanged.

        Returns:
            boolean or None: True if the file size increased; False if it was
                the same or decreased; or None if neither case could be detected
                (either because the previous size had not been recorded yet, or
                because the stat call for the current size failed).
        """
        previous_size = self._file_size
        try:
            self._file_size = tf.io.gfile.stat(self._file_path).length
        except tf.errors.OpError as e:
            logger.error("Failed to stat %s: %s", self._file_path, e)
            return None
        logger.debug(
            "Stat on %s got size %d, previous size %s",
            self._file_path,
            self._file_size,
            previous_size,
        )
        if previous_size is None:
            return None
        if self._file_size > previous_size:
            return True
        if self._file_size < previous_size:
            logger.warning(
                "File %s shrank from previous size %d to size %d",
                self._file_path,
                previous_size,
                self._file_size,
            )
            # In case this was transient, preserve the previously cached size,
            # to avoid reporting a spurious increase next time. If the file was
            # actually truncated, we can't recover anyway, so just ignore it.
            self._file_size = previous_size
        return False


class LegacyEventFileLoader(RawEventFileLoader):
    """An iterator that yields parsed Event protos."""

    def Load(self):
        """Loads all new events from disk.

        Calling Load multiple times in a row will not 'drop' events as long as the
        return value is not iterated over.

        Yields:
          All events in the file that have not been yielded yet.
        """
        for record in super().Load():
            yield event_pb2.Event.FromString(record)


class EventFileLoader(LegacyEventFileLoader):
    """An iterator that passes events through read-time compat layers.

    Specifically, this includes `data_compat` and `dataclass_compat`.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Track initial metadata for each tag, for `dataclass_compat`.
        # This is meant to be tracked per run, not per event file, so
        # there is a potential failure case when the second event file
        # in a single run has no summary metadata. This only occurs when
        # all of the following hold: (a) the events were written with
        # the TensorFlow 1.x (not 2.x) writer, (b) the summaries were
        # created by `tensorboard.summary.v1` ops and so do not undergo
        # `data_compat` transformation, and (c) the file writer was
        # reopened by calling `.reopen()` on it, which creates a new
        # file but does not clear the tag cache. This is considered
        # sufficiently improbable that we don't take extra mitigations.
        self._initial_metadata = {}  # from tag name to `SummaryMetadata`

    def Load(self):
        for event in super().Load():
            event = data_compat.migrate_event(event)
            events = dataclass_compat.migrate_event(
                event, self._initial_metadata
            )
            for event in events:
                yield event


class TimestampedEventFileLoader(EventFileLoader):
    """An iterator that yields (UNIX timestamp float, Event proto) pairs."""

    def Load(self):
        """Loads all new events and their wall time values from disk.

        Calling Load multiple times in a row will not 'drop' events as long as the
        return value is not iterated over.

        Yields:
          Pairs of (UNIX timestamp float, Event proto) for all events in the file
          that have not been yielded yet.
        """
        for event in super().Load():
            yield (event.wall_time, event)