265 lines
9.8 KiB
Python
265 lines
9.8 KiB
Python
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""TensorBoard HTTP utilities."""
|
|
|
|
|
|
import gzip
|
|
import io
|
|
import json
|
|
import re
|
|
import struct
|
|
import time
|
|
import wsgiref.handlers
|
|
|
|
import werkzeug
|
|
|
|
from tensorboard.backend import json_util
|
|
|
|
_DISALLOWED_CHAR_IN_DOMAIN = re.compile(r"\s")
|
|
|
|
# TODO(stephanwlee): Refactor this to not use the module variable but
|
|
# instead use a configurable via some kind of assets provider which would
|
|
# hold configurations for the CSP.
|
|
|
|
# @vaadin/vaadin-lumo-styles/font-icons(via vaadin-grid) uses data URI for
|
|
# loading font icons.
|
|
_CSP_FONT_DOMAINS_WHITELIST = ["data:"]
|
|
_CSP_FRAME_DOMAINS_WHITELIST = []
|
|
_CSP_IMG_DOMAINS_WHITELIST = []
|
|
_CSP_SCRIPT_DOMAINS_WHITELIST = []
|
|
_CSP_CONNECT_DOMAINS_WHITELIST = []
|
|
_CSP_SCRIPT_SELF = True
|
|
# numericjs (via projector) uses unsafe-eval :(.
|
|
_CSP_SCRIPT_UNSAFE_EVAL = True
|
|
_CSP_STYLE_DOMAINS_WHITELIST = []
|
|
|
|
_EXTRACT_MIMETYPE_PATTERN = re.compile(r"^[^;\s]*")
|
|
_EXTRACT_CHARSET_PATTERN = re.compile(r"charset=([-_0-9A-Za-z]+)")
|
|
|
|
# Allows *, gzip or x-gzip, but forbid gzip;q=0
|
|
# https://tools.ietf.org/html/rfc7231#section-5.3.4
|
|
_ALLOWS_GZIP_PATTERN = re.compile(
|
|
r"(?:^|,|\s)(?:(?:x-)?gzip|\*)(?!;q=0)(?:\s|,|$)"
|
|
)
|
|
|
|
_TEXTUAL_MIMETYPES = set(
|
|
[
|
|
"application/javascript",
|
|
"application/json",
|
|
"application/json+protobuf",
|
|
"image/svg+xml",
|
|
"text/css",
|
|
"text/csv",
|
|
"text/html",
|
|
"text/javascript",
|
|
"text/plain",
|
|
"text/tab-separated-values",
|
|
"text/x-protobuf",
|
|
]
|
|
)
|
|
|
|
_JSON_MIMETYPES = set(
|
|
[
|
|
"application/json",
|
|
"application/json+protobuf",
|
|
]
|
|
)
|
|
|
|
# Do not support xhtml for now.
|
|
_HTML_MIMETYPE = "text/html"
|
|
|
|
|
|
def Respond(
|
|
request,
|
|
content,
|
|
content_type,
|
|
code=200,
|
|
expires=0,
|
|
content_encoding=None,
|
|
encoding="utf-8",
|
|
csp_scripts_sha256s=None,
|
|
headers=None,
|
|
):
|
|
"""Construct a werkzeug Response.
|
|
|
|
Responses are transmitted to the browser with compression if: a) the browser
|
|
supports it; b) it's sane to compress the content_type in question; and c)
|
|
the content isn't already compressed, as indicated by the content_encoding
|
|
parameter.
|
|
|
|
Browser and proxy caching is completely disabled by default. If the expires
|
|
parameter is greater than zero then the response will be able to be cached by
|
|
the browser for that many seconds; however, proxies are still forbidden from
|
|
caching so that developers can bypass the cache with Ctrl+Shift+R.
|
|
|
|
For textual content that isn't JSON, the encoding parameter is used as the
|
|
transmission charset which is automatically appended to the Content-Type
|
|
header. That is unless of course the content_type parameter contains a
|
|
charset parameter. If the two disagree, the characters in content will be
|
|
transcoded to the latter.
|
|
|
|
If content_type declares a JSON media type, then content MAY be a dict, list,
|
|
tuple, or set, in which case this function has an implicit composition with
|
|
json_util.Cleanse and json.dumps. The encoding parameter is used to decode
|
|
byte strings within the JSON object; therefore transmitting binary data
|
|
within JSON is not permitted. JSON is transmitted as ASCII unless the
|
|
content_type parameter explicitly defines a charset parameter, in which case
|
|
the serialized JSON bytes will use that instead of escape sequences.
|
|
|
|
Args:
|
|
request: A werkzeug Request object. Used mostly to check the
|
|
Accept-Encoding header.
|
|
content: Payload data as byte string, unicode string, or maybe JSON.
|
|
content_type: Media type and optionally an output charset.
|
|
code: Numeric HTTP status code to use.
|
|
expires: Second duration for browser caching.
|
|
content_encoding: Encoding if content is already encoded, e.g. 'gzip'.
|
|
encoding: Input charset if content parameter has byte strings.
|
|
csp_scripts_sha256s: List of base64 serialized sha256 of whitelisted script
|
|
elements for script-src of the Content-Security-Policy. If it is None, the
|
|
HTML will disallow any script to execute. It is only be used when the
|
|
content_type is text/html.
|
|
headers: Any additional headers to include on the response, as a
|
|
list of key-value tuples: e.g., `[("Allow", "GET")]`. In case of
|
|
conflict, these may be overridden with headers added by this function.
|
|
|
|
Returns:
|
|
A werkzeug Response object (a WSGI application).
|
|
"""
|
|
|
|
mimetype = _EXTRACT_MIMETYPE_PATTERN.search(content_type).group(0)
|
|
charset_match = _EXTRACT_CHARSET_PATTERN.search(content_type)
|
|
charset = charset_match.group(1) if charset_match else encoding
|
|
textual = charset_match or mimetype in _TEXTUAL_MIMETYPES
|
|
if mimetype in _JSON_MIMETYPES and isinstance(
|
|
content, (dict, list, set, tuple)
|
|
):
|
|
content = json.dumps(
|
|
json_util.Cleanse(content, encoding), ensure_ascii=not charset_match
|
|
)
|
|
|
|
# Ensure correct output encoding, transcoding if necessary.
|
|
if charset != encoding and isinstance(content, bytes):
|
|
content = content.decode(encoding)
|
|
if isinstance(content, str):
|
|
content = content.encode(charset)
|
|
|
|
if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
|
|
content_type += "; charset=" + charset
|
|
gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
|
|
request.headers.get("Accept-Encoding", "")
|
|
)
|
|
# Automatically gzip uncompressed text data if accepted.
|
|
if textual and not content_encoding and gzip_accepted:
|
|
out = io.BytesIO()
|
|
# Set mtime to zero to make payload for a given input deterministic.
|
|
with gzip.GzipFile(
|
|
fileobj=out, mode="wb", compresslevel=3, mtime=0
|
|
) as f:
|
|
f.write(content)
|
|
content = out.getvalue()
|
|
content_encoding = "gzip"
|
|
|
|
content_length = len(content)
|
|
direct_passthrough = False
|
|
# Automatically streamwise-gunzip precompressed data if not accepted.
|
|
if content_encoding == "gzip" and not gzip_accepted:
|
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(content), mode="rb")
|
|
# Last 4 bytes of gzip formatted data (little-endian) store the original
|
|
# content length mod 2^32; we just assume it's the content length. That
|
|
# means we can't streamwise-gunzip >4 GB precompressed file; this is ok.
|
|
content_length = struct.unpack("<I", content[-4:])[0]
|
|
content = werkzeug.wsgi.wrap_file(request.environ, gzip_file)
|
|
content_encoding = None
|
|
direct_passthrough = True
|
|
|
|
headers = list(headers or [])
|
|
headers.append(("Content-Length", str(content_length)))
|
|
headers.append(("X-Content-Type-Options", "nosniff"))
|
|
if content_encoding:
|
|
headers.append(("Content-Encoding", content_encoding))
|
|
if expires > 0:
|
|
e = wsgiref.handlers.format_date_time(time.time() + float(expires))
|
|
headers.append(("Expires", e))
|
|
headers.append(("Cache-Control", "private, max-age=%d" % expires))
|
|
else:
|
|
headers.append(("Expires", "0"))
|
|
headers.append(("Cache-Control", "no-cache, must-revalidate"))
|
|
if mimetype == _HTML_MIMETYPE:
|
|
frags = (
|
|
_CSP_SCRIPT_DOMAINS_WHITELIST
|
|
+ [
|
|
"'self'" if _CSP_SCRIPT_SELF else "",
|
|
"'unsafe-eval'" if _CSP_SCRIPT_UNSAFE_EVAL else "",
|
|
]
|
|
+ [
|
|
"'sha256-{}'".format(sha256)
|
|
for sha256 in (csp_scripts_sha256s or [])
|
|
]
|
|
)
|
|
script_srcs = _create_csp_string(*frags)
|
|
|
|
csp_string = ";".join(
|
|
[
|
|
"default-src 'self'",
|
|
"font-src %s"
|
|
% _create_csp_string("'self'", *_CSP_FONT_DOMAINS_WHITELIST),
|
|
# Dynamic plugins are rendered inside an iframe.
|
|
"frame-src %s"
|
|
% _create_csp_string("'self'", *_CSP_FRAME_DOMAINS_WHITELIST),
|
|
"img-src %s"
|
|
% _create_csp_string(
|
|
"'self'",
|
|
# used by favicon
|
|
"data:",
|
|
# used by What-If tool for image sprites.
|
|
"blob:",
|
|
*_CSP_IMG_DOMAINS_WHITELIST,
|
|
),
|
|
"object-src 'none'",
|
|
"style-src %s"
|
|
% _create_csp_string(
|
|
"'self'",
|
|
# used by google-chart
|
|
"https://www.gstatic.com",
|
|
"data:",
|
|
# inline styles: Polymer templates + d3 uses inline styles.
|
|
"'unsafe-inline'",
|
|
*_CSP_STYLE_DOMAINS_WHITELIST,
|
|
),
|
|
"connect-src %s"
|
|
% _create_csp_string("'self'", *_CSP_CONNECT_DOMAINS_WHITELIST),
|
|
"script-src %s" % script_srcs,
|
|
]
|
|
)
|
|
|
|
headers.append(("Content-Security-Policy", csp_string))
|
|
|
|
if request.method == "HEAD":
|
|
content = None
|
|
|
|
return werkzeug.wrappers.Response(
|
|
response=content,
|
|
status=code,
|
|
headers=headers,
|
|
content_type=content_type,
|
|
direct_passthrough=direct_passthrough,
|
|
)
|
|
|
|
|
|
def _create_csp_string(*csp_fragments):
|
|
csp_string = " ".join([frag for frag in csp_fragments if frag])
|
|
return csp_string if csp_string else "'none'"
|