# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data ingestion logic backed by local event processing."""

import os
import re
import threading
import time


from tensorboard.backend.event_processing import data_provider
from tensorboard.backend.event_processing import plugin_event_multiplexer
from tensorboard.backend.event_processing import tag_types
from tensorboard.compat import tf
from tensorboard.data import ingester
from tensorboard.plugins.audio import metadata as audio_metadata
from tensorboard.plugins.histogram import metadata as histogram_metadata
from tensorboard.plugins.image import metadata as image_metadata
from tensorboard.plugins.pr_curve import metadata as pr_curve_metadata
from tensorboard.plugins.scalar import metadata as scalar_metadata
from tensorboard.util import tb_logging


DEFAULT_SIZE_GUIDANCE = {
    tag_types.TENSORS: 10,
}

# TODO(@wchargin): Replace with something that works for third-party plugins.
DEFAULT_TENSOR_SIZE_GUIDANCE = {
    scalar_metadata.PLUGIN_NAME: 1000,
    image_metadata.PLUGIN_NAME: 10,
    audio_metadata.PLUGIN_NAME: 10,
    histogram_metadata.PLUGIN_NAME: 500,
    pr_curve_metadata.PLUGIN_NAME: 100,
}

logger = tb_logging.get_logger()


class LocalDataIngester(ingester.DataIngester):
    """Data ingestion implementation to use when running locally."""

    def __init__(self, flags):
        """Initializes a `LocalDataIngester` from `flags`.

        Args:
          flags: An argparse.Namespace containing TensorBoard CLI flags.

        Returns:
          The new `LocalDataIngester`.
        """
        tensor_size_guidance = dict(DEFAULT_TENSOR_SIZE_GUIDANCE)
        tensor_size_guidance.update(flags.samples_per_plugin)
        self._multiplexer = plugin_event_multiplexer.EventMultiplexer(
            size_guidance=DEFAULT_SIZE_GUIDANCE,
            tensor_size_guidance=tensor_size_guidance,
            purge_orphaned_data=flags.purge_orphaned_data,
            max_reload_threads=flags.max_reload_threads,
            event_file_active_filter=_get_event_file_active_filter(flags),
            detect_file_replacement=flags.detect_file_replacement,
        )
        self._data_provider = data_provider.MultiplexerDataProvider(
            self._multiplexer, flags.logdir or flags.logdir_spec
        )
        self._reload_interval = flags.reload_interval
        self._reload_task = flags.reload_task
        if flags.logdir:
            self._path_to_run = {os.path.expanduser(flags.logdir): None}
        else:
            self._path_to_run = _parse_event_files_spec(flags.logdir_spec)

        # Conditionally import tensorflow_io.
        if getattr(tf, "__version__", "stub") != "stub":
            _check_filesystem_support(self._path_to_run.keys())

    @property
    def data_provider(self):
        return self._data_provider

    @property
    def deprecated_multiplexer(self):
        return self._multiplexer

    def start(self):
        """Starts ingesting data based on the ingester flag configuration."""

        def _reload():
            while True:
                start = time.time()
                logger.info("TensorBoard reload process beginning")
                for path, name in self._path_to_run.items():
                    self._multiplexer.AddRunsFromDirectory(path, name)
                logger.info(
                    "TensorBoard reload process: Reload the whole Multiplexer"
                )
                self._multiplexer.Reload()
                duration = time.time() - start
                logger.info(
                    "TensorBoard done reloading. Load took %0.3f secs", duration
                )
                if self._reload_interval == 0:
                    # Only load the multiplexer once. Do not continuously reload.
                    break
                time.sleep(self._reload_interval)

        if self._reload_task == "process":
            logger.info("Launching reload in a child process")
            import multiprocessing

            process = multiprocessing.Process(target=_reload, name="Reloader")
            # Best-effort cleanup; on exit, the main TB parent process will attempt to
            # kill all its daemonic children.
            process.daemon = True
            process.start()
        elif self._reload_task in ("thread", "auto"):
            logger.info("Launching reload in a daemon thread")
            thread = threading.Thread(target=_reload, name="Reloader")
            # Make this a daemon thread, which won't block TB from exiting.
            thread.daemon = True
            thread.start()
        elif self._reload_task == "blocking":
            if self._reload_interval != 0:
                raise ValueError(
                    "blocking reload only allowed with load_interval=0"
                )
            _reload()
        else:
            raise ValueError("unrecognized reload_task: %s" % self._reload_task)


def _get_event_file_active_filter(flags):
    """Returns a predicate for whether an event file load timestamp is active.

    Returns:
      A predicate function accepting a single UNIX timestamp float argument, or
      None if multi-file loading is not enabled.
    """
    if not flags.reload_multifile:
        return None
    inactive_secs = flags.reload_multifile_inactive_secs
    if inactive_secs == 0:
        return None
    if inactive_secs < 0:
        return lambda timestamp: True
    return lambda timestamp: timestamp + inactive_secs >= time.time()


def _parse_event_files_spec(logdir_spec):
    """Parses `logdir_spec` into a map from paths to run group names.

    The `--logdir_spec` flag format is a comma-separated list of path
    specifications. A path spec looks like 'group_name:/path/to/directory' or
    '/path/to/directory'; in the latter case, the group is unnamed. Group names
    cannot start with a forward slash: /foo:bar/baz will be interpreted as a spec
    with no name and path '/foo:bar/baz'.

    Globs are not supported.

    Args:
      logdir: A comma-separated list of run specifications.
    Returns:
      A dict mapping directory paths to names like {'/path/to/directory': 'name'}.
      Groups without an explicit name are named after their path. If logdir is
      None, returns an empty dict, which is helpful for testing things that don't
      require any valid runs.
    """
    files = {}
    if logdir_spec is None:
        return files
    # Make sure keeping consistent with ParseURI in core/lib/io/path.cc
    uri_pattern = re.compile("[a-zA-Z][0-9a-zA-Z.]*://.*")
    for specification in logdir_spec.split(","):
        # Check if the spec contains group. A spec start with xyz:// is regarded as
        # URI path spec instead of group spec. If the spec looks like /foo:bar/baz,
        # then we assume it's a path with a colon. If the spec looks like
        # [a-zA-z]:\foo then we assume its a Windows path and not a single letter
        # group
        if (
            uri_pattern.match(specification) is None
            and ":" in specification
            and specification[0] != "/"
            and not os.path.splitdrive(specification)[0]
        ):
            # We split at most once so run_name:/path:with/a/colon will work.
            run_name, _, path = specification.partition(":")
        else:
            run_name = None
            path = specification
        if uri_pattern.match(path) is None:
            path = os.path.realpath(os.path.expanduser(path))
        files[path] = run_name
    return files


def _get_filesystem_scheme(path):
    """Extracts filesystem scheme from a given path.

    The filesystem scheme is usually separated by `://` from the local filesystem
    path if given. For example, the scheme of `file://tmp/tf` is `file`.

    Args:
        path: A strings representing an input log directory.
    Returns:
        Filesystem scheme, None if the path doesn't contain one.
    """
    if "://" not in path:
        return None
    return path.split("://")[0]


def _check_filesystem_support(paths):
    """Examines the list of filesystems user requested.

    If TF I/O schemes are requested, try to import tensorflow_io module.

    Args:
        paths: A list of strings representing input log directories.
    """
    get_registered_schemes = getattr(
        tf.io.gfile, "get_registered_schemes", None
    )
    registered_schemes = (
        None if get_registered_schemes is None else get_registered_schemes()
    )

    # Only need to check one path for each scheme.
    scheme_to_path = {_get_filesystem_scheme(path): path for path in paths}
    missing_scheme = None
    for scheme, path in scheme_to_path.items():
        if scheme is None:
            continue
        # Use `tf.io.gfile.exists.get_registered_schemes` if possible.
        if registered_schemes is not None:
            if scheme not in registered_schemes:
                missing_scheme = scheme
                break
        else:
            # Fall back to `tf.io.gfile.exists`.
            try:
                tf.io.gfile.exists(path)
            except tf.errors.UnimplementedError:
                missing_scheme = scheme
                break
            except tf.errors.OpError:
                # Swallow other errors; we aren't concerned about them at this point.
                pass

    if missing_scheme:
        try:
            import tensorflow_io  # noqa: F401
        except ImportError as e:
            supported_schemes_msg = (
                " (supported schemes: {})".format(registered_schemes)
                if registered_schemes
                else ""
            )
            raise tf.errors.UnimplementedError(
                None,
                None,
                (
                    "Error: Unsupported filename scheme '{}'{}. For additional"
                    + " filesystem support, consider installing TensorFlow I/O"
                    + " (https://www.tensorflow.org/io) via `pip install tensorflow-io`."
                ).format(missing_scheme, supported_schemes_msg),
            ) from e