2832 lines
116 KiB
Python
2832 lines
116 KiB
Python
"""Python wrappers around TensorFlow ops.
|
|
|
|
This file is MACHINE GENERATED! Do not edit.
|
|
"""
|
|
|
|
import collections
|
|
|
|
from tensorflow.python import pywrap_tfe as pywrap_tfe
|
|
from tensorflow.python.eager import context as _context
|
|
from tensorflow.python.eager import core as _core
|
|
from tensorflow.python.eager import execute as _execute
|
|
from tensorflow.python.framework import dtypes as _dtypes
|
|
|
|
from tensorflow.python.framework import op_def_registry as _op_def_registry
|
|
from tensorflow.python.framework import ops as _ops
|
|
from tensorflow.python.framework import op_def_library as _op_def_library
|
|
from tensorflow.python.util.deprecation import deprecated_endpoints
|
|
from tensorflow.python.util import dispatch as _dispatch
|
|
from tensorflow.python.util.tf_export import tf_export
|
|
|
|
from typing import TypeVar
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.as_string', 'as_string', v1=['dtypes.as_string', 'strings.as_string', 'as_string'])
|
|
@deprecated_endpoints('dtypes.as_string')
|
|
def as_string(input, precision=-1, scientific=False, shortest=False, width=-1, fill="", name=None):
|
|
r"""Converts each entry in the given tensor to strings.
|
|
|
|
Supports many numeric types and boolean.
|
|
|
|
|
|
|
|
For Unicode, see the
|
|
|
|
[https://www.tensorflow.org/tutorials/representation/unicode](Working with Unicode text)
|
|
|
|
tutorial.
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.as_string([3, 2])
|
|
|
|
<tf.Tensor: shape=(2,), dtype=string, numpy=array([b'3', b'2'], dtype=object)>
|
|
|
|
>>> tf.strings.as_string([3.1415926, 2.71828], precision=2).numpy()
|
|
|
|
array([b'3.14', b'2.72'], dtype=object)
|
|
|
|
Args:
|
|
input: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `uint8`, `int16`, `int8`, `int64`, `bfloat16`, `uint16`, `half`, `uint32`, `uint64`, `complex64`, `complex128`, `bool`, `variant`.
|
|
precision: An optional `int`. Defaults to `-1`.
|
|
The post-decimal precision to use for floating point numbers.
|
|
|
|
Only used if precision > -1.
|
|
scientific: An optional `bool`. Defaults to `False`.
|
|
Use scientific notation for floating point numbers.
|
|
shortest: An optional `bool`. Defaults to `False`.
|
|
Use shortest representation (either scientific or standard) for
|
|
|
|
floating point numbers.
|
|
width: An optional `int`. Defaults to `-1`.
|
|
Pad pre-decimal numbers to this width.
|
|
|
|
Applies to both floating point and integer numbers.
|
|
|
|
Only used if width > -1.
|
|
fill: An optional `string`. Defaults to `""`.
|
|
The value to pad if width > -1. If empty, pads with spaces.
|
|
|
|
Another typical value is '0'. String cannot be longer than 1 character.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "AsString", name, input, "precision", precision, "scientific",
|
|
scientific, "shortest", shortest, "width", width, "fill", fill)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_as_string(
|
|
(input, precision, scientific, shortest, width, fill, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return as_string_eager_fallback(
|
|
input, precision=precision, scientific=scientific,
|
|
shortest=shortest, width=width, fill=fill, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
as_string, (), dict(input=input, precision=precision,
|
|
scientific=scientific, shortest=shortest,
|
|
width=width, fill=fill, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_as_string(
|
|
(input, precision, scientific, shortest, width, fill, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
if precision is None:
|
|
precision = -1
|
|
precision = _execute.make_int(precision, "precision")
|
|
if scientific is None:
|
|
scientific = False
|
|
scientific = _execute.make_bool(scientific, "scientific")
|
|
if shortest is None:
|
|
shortest = False
|
|
shortest = _execute.make_bool(shortest, "shortest")
|
|
if width is None:
|
|
width = -1
|
|
width = _execute.make_int(width, "width")
|
|
if fill is None:
|
|
fill = ""
|
|
fill = _execute.make_str(fill, "fill")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"AsString", input=input, precision=precision, scientific=scientific,
|
|
shortest=shortest, width=width, fill=fill, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
as_string, (), dict(input=input, precision=precision,
|
|
scientific=scientific, shortest=shortest,
|
|
width=width, fill=fill, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("T", _op._get_attr_type("T"), "precision",
|
|
_op._get_attr_int("precision"), "scientific",
|
|
_op._get_attr_bool("scientific"), "shortest",
|
|
_op._get_attr_bool("shortest"), "width",
|
|
_op._get_attr_int("width"), "fill", _op.get_attr("fill"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"AsString", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
AsString = tf_export("raw_ops.AsString")(_ops.to_raw_op(as_string))
|
|
_dispatcher_for_as_string = as_string._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def as_string_eager_fallback(input, precision, scientific, shortest, width, fill, name, ctx):
|
|
if precision is None:
|
|
precision = -1
|
|
precision = _execute.make_int(precision, "precision")
|
|
if scientific is None:
|
|
scientific = False
|
|
scientific = _execute.make_bool(scientific, "scientific")
|
|
if shortest is None:
|
|
shortest = False
|
|
shortest = _execute.make_bool(shortest, "shortest")
|
|
if width is None:
|
|
width = -1
|
|
width = _execute.make_int(width, "width")
|
|
if fill is None:
|
|
fill = ""
|
|
fill = _execute.make_str(fill, "fill")
|
|
_attr_T, (input,) = _execute.args_to_matching_eager([input], ctx, [_dtypes.float32, _dtypes.float64, _dtypes.int32, _dtypes.uint8, _dtypes.int16, _dtypes.int8, _dtypes.int64, _dtypes.bfloat16, _dtypes.uint16, _dtypes.half, _dtypes.uint32, _dtypes.uint64, _dtypes.complex64, _dtypes.complex128, _dtypes.bool, _dtypes.variant, ])
|
|
_inputs_flat = [input]
|
|
_attrs = ("T", _attr_T, "precision", precision, "scientific", scientific,
|
|
"shortest", shortest, "width", width, "fill", fill)
|
|
_result = _execute.execute(b"AsString", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"AsString", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('io.decode_base64', v1=['io.decode_base64', 'decode_base64'])
|
|
@deprecated_endpoints('decode_base64')
|
|
def decode_base64(input, name=None):
|
|
r"""Decode web-safe base64-encoded strings.
|
|
|
|
Input may or may not have padding at the end. See
|
|
|
|
[EncodeBase64](https://www.tensorflow.org/api_docs/python/tf/io/encode_base64)
|
|
|
|
for padding. Web-safe means that input must use - and _ instead of + and /.
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. Base64 strings to decode.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "DecodeBase64", name, input)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_decode_base64(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return decode_base64_eager_fallback(
|
|
input, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
decode_base64, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_decode_base64(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"DecodeBase64", input=input, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
decode_base64, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ()
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"DecodeBase64", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
DecodeBase64 = tf_export("raw_ops.DecodeBase64")(_ops.to_raw_op(decode_base64))
|
|
_dispatcher_for_decode_base64 = decode_base64._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def decode_base64_eager_fallback(input, name, ctx):
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = None
|
|
_result = _execute.execute(b"DecodeBase64", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"DecodeBase64", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('io.encode_base64', v1=['io.encode_base64', 'encode_base64'])
|
|
@deprecated_endpoints('encode_base64')
|
|
def encode_base64(input, pad=False, name=None):
|
|
r"""Encode strings into web-safe base64 format.
|
|
|
|
Refer to [this article](https://en.wikipedia.org/wiki/Base64) for more information on
|
|
|
|
base64 format. Base64 strings may have padding with '=' at the
|
|
|
|
end so that the encoded has length multiple of 4. See Padding section of the
|
|
|
|
link above.
|
|
|
|
|
|
|
|
Web-safe means that the encoder uses - and _ instead of + and /.
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. Strings to be encoded.
|
|
pad: An optional `bool`. Defaults to `False`.
|
|
Bool whether padding is applied at the ends.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "EncodeBase64", name, input, "pad", pad)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_encode_base64(
|
|
(input, pad, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return encode_base64_eager_fallback(
|
|
input, pad=pad, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
encode_base64, (), dict(input=input, pad=pad, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_encode_base64(
|
|
(input, pad, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
if pad is None:
|
|
pad = False
|
|
pad = _execute.make_bool(pad, "pad")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"EncodeBase64", input=input, pad=pad, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
encode_base64, (), dict(input=input, pad=pad, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("pad", _op._get_attr_bool("pad"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"EncodeBase64", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
EncodeBase64 = tf_export("raw_ops.EncodeBase64")(_ops.to_raw_op(encode_base64))
|
|
_dispatcher_for_encode_base64 = encode_base64._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def encode_base64_eager_fallback(input, pad, name, ctx):
|
|
if pad is None:
|
|
pad = False
|
|
pad = _execute.make_bool(pad, "pad")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("pad", pad)
|
|
_result = _execute.execute(b"EncodeBase64", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"EncodeBase64", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def reduce_join(inputs, reduction_indices, keep_dims=False, separator="", name=None):
|
|
r"""Joins a string Tensor across the given dimensions.
|
|
|
|
Computes the string join across dimensions in the given string Tensor of shape
|
|
|
|
`[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input
|
|
|
|
strings with the given separator (default: empty string). Negative indices are
|
|
|
|
counted backwards from the end, with `-1` being equivalent to `n - 1`. If
|
|
|
|
indices are not specified, joins across all dimensions beginning from `n - 1`
|
|
|
|
through `0`.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
|
|
|
|
```python
|
|
|
|
# tensor `a` is [["a", "b"], ["c", "d"]]
|
|
|
|
tf.reduce_join(a, 0) ==> ["ac", "bd"]
|
|
|
|
tf.reduce_join(a, 1) ==> ["ab", "cd"]
|
|
|
|
tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
|
|
|
|
tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
|
|
|
|
tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
|
|
|
|
tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
|
|
|
|
tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
|
|
|
|
tf.reduce_join(a, [0, 1]) ==> "acbd"
|
|
|
|
tf.reduce_join(a, [1, 0]) ==> "abcd"
|
|
|
|
tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]]
|
|
|
|
tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd"
|
|
|
|
```
|
|
|
|
Args:
|
|
inputs: A `Tensor` of type `string`.
|
|
The input to be joined. All reduced indices must have non-zero size.
|
|
reduction_indices: A `Tensor` of type `int32`.
|
|
The dimensions to reduce over. Dimensions are reduced in the
|
|
|
|
order specified. Omitting `reduction_indices` is equivalent to passing
|
|
|
|
`[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported.
|
|
keep_dims: An optional `bool`. Defaults to `False`.
|
|
If `True`, retain reduced dimensions with length `1`.
|
|
separator: An optional `string`. Defaults to `""`.
|
|
The separator to use when joining.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "ReduceJoin", name, inputs, reduction_indices, "keep_dims",
|
|
keep_dims, "separator", separator)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return reduce_join_eager_fallback(
|
|
inputs, reduction_indices, keep_dims=keep_dims, separator=separator,
|
|
name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if keep_dims is None:
|
|
keep_dims = False
|
|
keep_dims = _execute.make_bool(keep_dims, "keep_dims")
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"ReduceJoin", inputs=inputs, reduction_indices=reduction_indices,
|
|
keep_dims=keep_dims, separator=separator, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("keep_dims", _op._get_attr_bool("keep_dims"), "separator",
|
|
_op.get_attr("separator"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"ReduceJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
ReduceJoin = tf_export("raw_ops.ReduceJoin")(_ops.to_raw_op(reduce_join))
|
|
|
|
|
|
def reduce_join_eager_fallback(inputs, reduction_indices, keep_dims, separator, name, ctx):
|
|
if keep_dims is None:
|
|
keep_dims = False
|
|
keep_dims = _execute.make_bool(keep_dims, "keep_dims")
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
inputs = _ops.convert_to_tensor(inputs, _dtypes.string)
|
|
reduction_indices = _ops.convert_to_tensor(reduction_indices, _dtypes.int32)
|
|
_inputs_flat = [inputs, reduction_indices]
|
|
_attrs = ("keep_dims", keep_dims, "separator", separator)
|
|
_result = _execute.execute(b"ReduceJoin", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"ReduceJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def regex_full_match(input, pattern, name=None):
|
|
r"""Check if the input matches the regex pattern.
|
|
|
|
The input is a string tensor of any shape. The pattern is a scalar
|
|
|
|
string tensor which is applied to every element of the input tensor.
|
|
|
|
The boolean values (True or False) of the output tensor indicate
|
|
|
|
if the input matches the regex pattern provided.
|
|
|
|
|
|
|
|
The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*lib$")
|
|
|
|
<tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>
|
|
|
|
>>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*TF$")
|
|
|
|
<tf.Tensor: shape=(2,), dtype=bool, numpy=array([False, True])>
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
A string tensor of the text to be processed.
|
|
pattern: A `Tensor` of type `string`.
|
|
A scalar string tensor containing the regular expression to match the input.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `bool`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "RegexFullMatch", name, input, pattern)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return regex_full_match_eager_fallback(
|
|
input, pattern, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"RegexFullMatch", input=input, pattern=pattern, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ()
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"RegexFullMatch", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
RegexFullMatch = tf_export("raw_ops.RegexFullMatch")(_ops.to_raw_op(regex_full_match))
|
|
|
|
|
|
def regex_full_match_eager_fallback(input, pattern, name, ctx):
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
pattern = _ops.convert_to_tensor(pattern, _dtypes.string)
|
|
_inputs_flat = [input, pattern]
|
|
_attrs = None
|
|
_result = _execute.execute(b"RegexFullMatch", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"RegexFullMatch", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def regex_replace(input, pattern, rewrite, replace_global=True, name=None):
|
|
r"""Replaces matches of the `pattern` regular expression in `input` with the
|
|
|
|
replacement string provided in `rewrite`.
|
|
|
|
|
|
It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The text to be processed.
|
|
pattern: A `Tensor` of type `string`.
|
|
The regular expression to be matched in the `input` strings.
|
|
rewrite: A `Tensor` of type `string`.
|
|
The rewrite string to be substituted for the `pattern` expression where it is
|
|
|
|
matched in the `input` strings.
|
|
replace_global: An optional `bool`. Defaults to `True`.
|
|
If True, the replacement is global (that is, all matches of the `pattern` regular
|
|
|
|
expression in each input string are rewritten), otherwise the `rewrite`
|
|
|
|
substitution is only made for the first `pattern` match.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "RegexReplace", name, input, pattern, rewrite, "replace_global",
|
|
replace_global)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return regex_replace_eager_fallback(
|
|
input, pattern, rewrite, replace_global=replace_global, name=name,
|
|
ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if replace_global is None:
|
|
replace_global = True
|
|
replace_global = _execute.make_bool(replace_global, "replace_global")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"RegexReplace", input=input, pattern=pattern, rewrite=rewrite,
|
|
replace_global=replace_global, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("replace_global", _op._get_attr_bool("replace_global"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"RegexReplace", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
RegexReplace = tf_export("raw_ops.RegexReplace")(_ops.to_raw_op(regex_replace))
|
|
|
|
|
|
def regex_replace_eager_fallback(input, pattern, rewrite, replace_global, name, ctx):
|
|
if replace_global is None:
|
|
replace_global = True
|
|
replace_global = _execute.make_bool(replace_global, "replace_global")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
pattern = _ops.convert_to_tensor(pattern, _dtypes.string)
|
|
rewrite = _ops.convert_to_tensor(rewrite, _dtypes.string)
|
|
_inputs_flat = [input, pattern, rewrite]
|
|
_attrs = ("replace_global", replace_global)
|
|
_result = _execute.execute(b"RegexReplace", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"RegexReplace", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def static_regex_full_match(input, pattern, name=None):
|
|
r"""Check if the input matches the regex pattern.
|
|
|
|
The input is a string tensor of any shape. The pattern is the
|
|
|
|
regular expression to be matched with every element of the input tensor.
|
|
|
|
The boolean values (True or False) of the output tensor indicate
|
|
|
|
if the input matches the regex pattern provided.
|
|
|
|
|
|
|
|
The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
A string tensor of the text to be processed.
|
|
pattern: A `string`. The regular expression to match the input.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `bool`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StaticRegexFullMatch", name, input, "pattern", pattern)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return static_regex_full_match_eager_fallback(
|
|
input, pattern=pattern, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
pattern = _execute.make_str(pattern, "pattern")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StaticRegexFullMatch", input=input, pattern=pattern, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("pattern", _op.get_attr("pattern"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StaticRegexFullMatch", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StaticRegexFullMatch = tf_export("raw_ops.StaticRegexFullMatch")(_ops.to_raw_op(static_regex_full_match))
|
|
|
|
|
|
def static_regex_full_match_eager_fallback(input, pattern, name, ctx):
|
|
pattern = _execute.make_str(pattern, "pattern")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("pattern", pattern)
|
|
_result = _execute.execute(b"StaticRegexFullMatch", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StaticRegexFullMatch", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def static_regex_replace(input, pattern, rewrite, replace_global=True, name=None):
|
|
r"""Replaces the match of pattern in input with rewrite.
|
|
|
|
It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The text to be processed.
|
|
pattern: A `string`. The regular expression to match the input.
|
|
rewrite: A `string`. The rewrite to be applied to the matched expression.
|
|
replace_global: An optional `bool`. Defaults to `True`.
|
|
If True, the replacement is global, otherwise the replacement
|
|
is done only on the first match.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StaticRegexReplace", name, input, "pattern", pattern,
|
|
"rewrite", rewrite, "replace_global", replace_global)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return static_regex_replace_eager_fallback(
|
|
input, pattern=pattern, rewrite=rewrite,
|
|
replace_global=replace_global, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
pattern = _execute.make_str(pattern, "pattern")
|
|
rewrite = _execute.make_str(rewrite, "rewrite")
|
|
if replace_global is None:
|
|
replace_global = True
|
|
replace_global = _execute.make_bool(replace_global, "replace_global")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StaticRegexReplace", input=input, pattern=pattern, rewrite=rewrite,
|
|
replace_global=replace_global, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("pattern", _op.get_attr("pattern"), "rewrite",
|
|
_op.get_attr("rewrite"), "replace_global",
|
|
_op._get_attr_bool("replace_global"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StaticRegexReplace", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StaticRegexReplace = tf_export("raw_ops.StaticRegexReplace")(_ops.to_raw_op(static_regex_replace))
|
|
|
|
|
|
def static_regex_replace_eager_fallback(input, pattern, rewrite, replace_global, name, ctx):
|
|
pattern = _execute.make_str(pattern, "pattern")
|
|
rewrite = _execute.make_str(rewrite, "rewrite")
|
|
if replace_global is None:
|
|
replace_global = True
|
|
replace_global = _execute.make_bool(replace_global, "replace_global")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("pattern", pattern, "rewrite", rewrite, "replace_global",
|
|
replace_global)
|
|
_result = _execute.execute(b"StaticRegexReplace", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StaticRegexReplace", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def string_format(inputs, template="%s", placeholder="%s", summarize=3, name=None):
|
|
r"""Formats a string template using a list of tensors.
|
|
|
|
Formats a string template using a list of tensors, pretty-printing tensor summaries.
|
|
|
|
Args:
|
|
inputs: A list of `Tensor` objects.
|
|
The list of tensors to format into the placeholder string.
|
|
template: An optional `string`. Defaults to `"%s"`.
|
|
A string, the template to format tensor summaries into.
|
|
placeholder: An optional `string`. Defaults to `"%s"`.
|
|
A string, at each placeholder in the template a subsequent tensor summary will be inserted.
|
|
summarize: An optional `int`. Defaults to `3`.
|
|
When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringFormat", name, inputs, "template", template,
|
|
"placeholder", placeholder, "summarize", summarize)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_format_eager_fallback(
|
|
inputs, template=template, placeholder=placeholder,
|
|
summarize=summarize, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if template is None:
|
|
template = "%s"
|
|
template = _execute.make_str(template, "template")
|
|
if placeholder is None:
|
|
placeholder = "%s"
|
|
placeholder = _execute.make_str(placeholder, "placeholder")
|
|
if summarize is None:
|
|
summarize = 3
|
|
summarize = _execute.make_int(summarize, "summarize")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringFormat", inputs=inputs, template=template,
|
|
placeholder=placeholder, summarize=summarize,
|
|
name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("T", _op.get_attr("T"), "template", _op.get_attr("template"),
|
|
"placeholder", _op.get_attr("placeholder"), "summarize",
|
|
_op._get_attr_int("summarize"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringFormat", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringFormat = tf_export("raw_ops.StringFormat")(_ops.to_raw_op(string_format))
|
|
|
|
|
|
def string_format_eager_fallback(inputs, template, placeholder, summarize, name, ctx):
|
|
if template is None:
|
|
template = "%s"
|
|
template = _execute.make_str(template, "template")
|
|
if placeholder is None:
|
|
placeholder = "%s"
|
|
placeholder = _execute.make_str(placeholder, "placeholder")
|
|
if summarize is None:
|
|
summarize = 3
|
|
summarize = _execute.make_int(summarize, "summarize")
|
|
_attr_T, inputs = _execute.convert_to_mixed_eager_tensors(inputs, ctx)
|
|
_inputs_flat = list(inputs)
|
|
_attrs = ("T", _attr_T, "template", template, "placeholder", placeholder,
|
|
"summarize", summarize)
|
|
_result = _execute.execute(b"StringFormat", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringFormat", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def string_join(inputs, separator="", name=None):
|
|
r"""Joins the strings in the given list of string tensors into one tensor;
|
|
|
|
with the given separator (default is an empty separator).
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> s = ["hello", "world", "tensorflow"]
|
|
|
|
>>> tf.strings.join(s, " ")
|
|
|
|
<tf.Tensor: shape=(), dtype=string, numpy=b'hello world tensorflow'>
|
|
|
|
Args:
|
|
inputs: A list of at least 1 `Tensor` objects with type `string`.
|
|
A list of string tensors. The tensors must all have the same shape,
|
|
|
|
or be scalars. Scalars may be mixed in; these will be broadcast to the shape
|
|
|
|
of non-scalar inputs.
|
|
separator: An optional `string`. Defaults to `""`.
|
|
string, an optional join separator.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringJoin", name, inputs, "separator", separator)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_join_eager_fallback(
|
|
inputs, separator=separator, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if not isinstance(inputs, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'inputs' argument to "
|
|
"'string_join' Op, not %r." % inputs)
|
|
_attr_N = len(inputs)
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringJoin", inputs=inputs, separator=separator, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("N", _op._get_attr_int("N"), "separator",
|
|
_op.get_attr("separator"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringJoin = tf_export("raw_ops.StringJoin")(_ops.to_raw_op(string_join))
|
|
|
|
|
|
def string_join_eager_fallback(inputs, separator, name, ctx):
|
|
if not isinstance(inputs, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'inputs' argument to "
|
|
"'string_join' Op, not %r." % inputs)
|
|
_attr_N = len(inputs)
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
inputs = _ops.convert_n_to_tensor(inputs, _dtypes.string)
|
|
_inputs_flat = list(inputs)
|
|
_attrs = ("N", _attr_N, "separator", separator)
|
|
_result = _execute.execute(b"StringJoin", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def string_length(input, unit="BYTE", name=None):
|
|
r"""String lengths of `input`.
|
|
|
|
Computes the length of each string given in the input tensor.
|
|
|
|
|
|
|
|
>>> strings = tf.constant(['Hello','TensorFlow', '\U0001F642'])
|
|
|
|
>>> tf.strings.length(strings).numpy() # default counts bytes
|
|
|
|
array([ 5, 10, 4], dtype=int32)
|
|
|
|
>>> tf.strings.length(strings, unit="UTF8_CHAR").numpy()
|
|
|
|
array([ 5, 10, 1], dtype=int32)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
The strings for which to compute the length for each element.
|
|
unit: An optional `string` from: `"BYTE", "UTF8_CHAR"`. Defaults to `"BYTE"`.
|
|
The unit that is counted to compute string length. One of: `"BYTE"` (for
|
|
|
|
the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
|
|
|
|
encoded Unicode code points in each string). Results are undefined
|
|
|
|
if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
|
|
|
|
valid UTF-8.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `int32`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringLength", name, input, "unit", unit)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_length_eager_fallback(
|
|
input, unit=unit, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if unit is None:
|
|
unit = "BYTE"
|
|
unit = _execute.make_str(unit, "unit")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringLength", input=input, unit=unit, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("unit", _op.get_attr("unit"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringLength", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringLength = tf_export("raw_ops.StringLength")(_ops.to_raw_op(string_length))
|
|
|
|
|
|
def string_length_eager_fallback(input, unit, name, ctx):
|
|
if unit is None:
|
|
unit = "BYTE"
|
|
unit = _execute.make_str(unit, "unit")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("unit", unit)
|
|
_result = _execute.execute(b"StringLength", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringLength", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.lower')
|
|
def string_lower(input, encoding="", name=None):
|
|
r"""Converts all uppercase characters into their respective lowercase replacements.
|
|
|
|
Example:
|
|
|
|
|
|
|
|
>>> tf.strings.lower("CamelCase string and ALL CAPS")
|
|
|
|
<tf.Tensor: shape=(), dtype=string, numpy=b'camelcase string and all caps'>
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The input to be lower-cased.
|
|
encoding: An optional `string`. Defaults to `""`.
|
|
Character encoding of `input`. Allowed values are '' and 'utf-8'.
|
|
|
|
Value '' is interpreted as ASCII.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringLower", name, input, "encoding", encoding)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_string_lower(
|
|
(input, encoding, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return string_lower_eager_fallback(
|
|
input, encoding=encoding, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_lower, (), dict(input=input, encoding=encoding, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_string_lower(
|
|
(input, encoding, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
if encoding is None:
|
|
encoding = ""
|
|
encoding = _execute.make_str(encoding, "encoding")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringLower", input=input, encoding=encoding, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_lower, (), dict(input=input, encoding=encoding, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("encoding", _op.get_attr("encoding"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringLower", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringLower = tf_export("raw_ops.StringLower")(_ops.to_raw_op(string_lower))
|
|
_dispatcher_for_string_lower = string_lower._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def string_lower_eager_fallback(input, encoding, name, ctx):
|
|
if encoding is None:
|
|
encoding = ""
|
|
encoding = _execute.make_str(encoding, "encoding")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("encoding", encoding)
|
|
_result = _execute.execute(b"StringLower", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringLower", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
_StringNGramsOutput = collections.namedtuple(
|
|
"StringNGrams",
|
|
["ngrams", "ngrams_splits"])
|
|
|
|
|
|
def string_n_grams(data, data_splits, separator, ngram_widths, left_pad, right_pad, pad_width, preserve_short_sequences, name=None):
|
|
r"""Creates ngrams from ragged string data.
|
|
|
|
This op accepts a ragged tensor with 1 ragged dimension containing only
|
|
|
|
strings and outputs a ragged tensor with 1 ragged dimension containing ngrams
|
|
|
|
of that string, joined along the innermost axis.
|
|
|
|
Args:
|
|
data: A `Tensor` of type `string`.
|
|
The values tensor of the ragged string tensor to make ngrams out of. Must be a
|
|
|
|
1D string tensor.
|
|
data_splits: A `Tensor`. Must be one of the following types: `int32`, `int64`.
|
|
The splits tensor of the ragged string tensor to make ngrams out of.
|
|
separator: A `string`.
|
|
The string to append between elements of the token. Use "" for no separator.
|
|
ngram_widths: A list of `ints`. The sizes of the ngrams to create.
|
|
left_pad: A `string`.
|
|
The string to use to pad the left side of the ngram sequence. Only used if
|
|
|
|
pad_width != 0.
|
|
right_pad: A `string`.
|
|
The string to use to pad the right side of the ngram sequence. Only used if
|
|
|
|
pad_width != 0.
|
|
pad_width: An `int`.
|
|
The number of padding elements to add to each side of each
|
|
|
|
sequence. Note that padding will never be greater than 'ngram_widths'-1
|
|
|
|
regardless of this value. If `pad_width=-1`, then add `max(ngram_widths)-1`
|
|
|
|
elements.
|
|
preserve_short_sequences: A `bool`.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A tuple of `Tensor` objects (ngrams, ngrams_splits).
|
|
|
|
ngrams: A `Tensor` of type `string`.
|
|
ngrams_splits: A `Tensor`. Has the same type as `data_splits`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringNGrams", name, data, data_splits, "separator", separator,
|
|
"ngram_widths", ngram_widths, "left_pad", left_pad, "right_pad",
|
|
right_pad, "pad_width", pad_width, "preserve_short_sequences",
|
|
preserve_short_sequences)
|
|
_result = _StringNGramsOutput._make(_result)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_n_grams_eager_fallback(
|
|
data, data_splits, separator=separator, ngram_widths=ngram_widths,
|
|
left_pad=left_pad, right_pad=right_pad, pad_width=pad_width,
|
|
preserve_short_sequences=preserve_short_sequences, name=name,
|
|
ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
separator = _execute.make_str(separator, "separator")
|
|
if not isinstance(ngram_widths, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'ngram_widths' argument to "
|
|
"'string_n_grams' Op, not %r." % ngram_widths)
|
|
ngram_widths = [_execute.make_int(_i, "ngram_widths") for _i in ngram_widths]
|
|
left_pad = _execute.make_str(left_pad, "left_pad")
|
|
right_pad = _execute.make_str(right_pad, "right_pad")
|
|
pad_width = _execute.make_int(pad_width, "pad_width")
|
|
preserve_short_sequences = _execute.make_bool(preserve_short_sequences, "preserve_short_sequences")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringNGrams", data=data, data_splits=data_splits,
|
|
separator=separator, ngram_widths=ngram_widths,
|
|
left_pad=left_pad, right_pad=right_pad,
|
|
pad_width=pad_width,
|
|
preserve_short_sequences=preserve_short_sequences,
|
|
name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("separator", _op.get_attr("separator"), "ngram_widths",
|
|
_op.get_attr("ngram_widths"), "left_pad",
|
|
_op.get_attr("left_pad"), "right_pad",
|
|
_op.get_attr("right_pad"), "pad_width",
|
|
_op._get_attr_int("pad_width"), "preserve_short_sequences",
|
|
_op._get_attr_bool("preserve_short_sequences"), "Tsplits",
|
|
_op._get_attr_type("Tsplits"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringNGrams", _inputs_flat, _attrs, _result)
|
|
_result = _StringNGramsOutput._make(_result)
|
|
return _result
|
|
|
|
StringNGrams = tf_export("raw_ops.StringNGrams")(_ops.to_raw_op(string_n_grams))
|
|
|
|
|
|
def string_n_grams_eager_fallback(data, data_splits, separator, ngram_widths, left_pad, right_pad, pad_width, preserve_short_sequences, name, ctx):
|
|
separator = _execute.make_str(separator, "separator")
|
|
if not isinstance(ngram_widths, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'ngram_widths' argument to "
|
|
"'string_n_grams' Op, not %r." % ngram_widths)
|
|
ngram_widths = [_execute.make_int(_i, "ngram_widths") for _i in ngram_widths]
|
|
left_pad = _execute.make_str(left_pad, "left_pad")
|
|
right_pad = _execute.make_str(right_pad, "right_pad")
|
|
pad_width = _execute.make_int(pad_width, "pad_width")
|
|
preserve_short_sequences = _execute.make_bool(preserve_short_sequences, "preserve_short_sequences")
|
|
_attr_Tsplits, (data_splits,) = _execute.args_to_matching_eager([data_splits], ctx, [_dtypes.int32, _dtypes.int64, ], _dtypes.int64)
|
|
data = _ops.convert_to_tensor(data, _dtypes.string)
|
|
_inputs_flat = [data, data_splits]
|
|
_attrs = ("separator", separator, "ngram_widths", ngram_widths, "left_pad",
|
|
left_pad, "right_pad", right_pad, "pad_width", pad_width,
|
|
"preserve_short_sequences", preserve_short_sequences, "Tsplits",
|
|
_attr_Tsplits)
|
|
_result = _execute.execute(b"StringNGrams", 2, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringNGrams", _inputs_flat, _attrs, _result)
|
|
_result = _StringNGramsOutput._make(_result)
|
|
return _result
|
|
|
|
_StringSplitOutput = collections.namedtuple(
|
|
"StringSplit",
|
|
["indices", "values", "shape"])
|
|
|
|
|
|
def string_split(input, delimiter, skip_empty=True, name=None):
|
|
r"""Split elements of `input` based on `delimiter` into a `SparseTensor`.
|
|
|
|
Let N be the size of source (typically N will be the batch size). Split each
|
|
|
|
element of `input` based on `delimiter` and return a `SparseTensor`
|
|
|
|
containing the splitted tokens. Empty tokens are ignored.
|
|
|
|
|
|
|
|
`delimiter` can be empty, or a string of split characters. If `delimiter` is an
|
|
|
|
empty string, each element of `input` is split into individual single-byte
|
|
|
|
character strings, including splitting of UTF-8 multibyte sequences. Otherwise
|
|
|
|
every character of `delimiter` is a potential split point.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
|
|
|
|
will be
|
|
|
|
|
|
|
|
indices = [0, 0;
|
|
|
|
0, 1;
|
|
|
|
1, 0;
|
|
|
|
1, 1;
|
|
|
|
1, 2]
|
|
|
|
shape = [2, 3]
|
|
|
|
values = ['hello', 'world', 'a', 'b', 'c']
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. 1-D. Strings to split.
|
|
delimiter: A `Tensor` of type `string`.
|
|
0-D. Delimiter characters (bytes), or empty string.
|
|
skip_empty: An optional `bool`. Defaults to `True`.
|
|
A `bool`. If `True`, skip the empty strings from the result.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A tuple of `Tensor` objects (indices, values, shape).
|
|
|
|
indices: A `Tensor` of type `int64`.
|
|
values: A `Tensor` of type `string`.
|
|
shape: A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringSplit", name, input, delimiter, "skip_empty", skip_empty)
|
|
_result = _StringSplitOutput._make(_result)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_split_eager_fallback(
|
|
input, delimiter, skip_empty=skip_empty, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if skip_empty is None:
|
|
skip_empty = True
|
|
skip_empty = _execute.make_bool(skip_empty, "skip_empty")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringSplit", input=input, delimiter=delimiter,
|
|
skip_empty=skip_empty, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("skip_empty", _op._get_attr_bool("skip_empty"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringSplit", _inputs_flat, _attrs, _result)
|
|
_result = _StringSplitOutput._make(_result)
|
|
return _result
|
|
|
|
StringSplit = tf_export("raw_ops.StringSplit")(_ops.to_raw_op(string_split))
|
|
|
|
|
|
def string_split_eager_fallback(input, delimiter, skip_empty, name, ctx):
|
|
if skip_empty is None:
|
|
skip_empty = True
|
|
skip_empty = _execute.make_bool(skip_empty, "skip_empty")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
delimiter = _ops.convert_to_tensor(delimiter, _dtypes.string)
|
|
_inputs_flat = [input, delimiter]
|
|
_attrs = ("skip_empty", skip_empty)
|
|
_result = _execute.execute(b"StringSplit", 3, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringSplit", _inputs_flat, _attrs, _result)
|
|
_result = _StringSplitOutput._make(_result)
|
|
return _result
|
|
|
|
_StringSplitV2Output = collections.namedtuple(
|
|
"StringSplitV2",
|
|
["indices", "values", "shape"])
|
|
|
|
|
|
def string_split_v2(input, sep, maxsplit=-1, name=None):
|
|
r"""Split elements of `source` based on `sep` into a `SparseTensor`.
|
|
|
|
Let N be the size of source (typically N will be the batch size). Split each
|
|
|
|
element of `source` based on `sep` and return a `SparseTensor`
|
|
|
|
containing the split tokens. Empty tokens are ignored.
|
|
|
|
|
|
|
|
For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
|
|
|
|
then the output will be
|
|
|
|
```
|
|
|
|
st.indices = [0, 0;
|
|
|
|
0, 1;
|
|
|
|
1, 0;
|
|
|
|
1, 1;
|
|
|
|
1, 2]
|
|
|
|
st.shape = [2, 3]
|
|
|
|
st.values = ['hello', 'world', 'a', 'b', 'c']
|
|
|
|
```
|
|
|
|
|
|
|
|
If `sep` is given, consecutive delimiters are not grouped together and are
|
|
|
|
deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
|
|
|
|
sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
|
|
|
|
string, consecutive whitespace are regarded as a single separator, and the
|
|
|
|
result will contain no empty strings at the startor end if the string has
|
|
|
|
leading or trailing whitespace.
|
|
|
|
|
|
|
|
Note that the above mentioned behavior matches python's str.split.
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
`1-D` string `Tensor`, the strings to split.
|
|
sep: A `Tensor` of type `string`.
|
|
`0-D` string `Tensor`, the delimiter character.
|
|
maxsplit: An optional `int`. Defaults to `-1`.
|
|
An `int`. If `maxsplit > 0`, limit of the split of the result.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A tuple of `Tensor` objects (indices, values, shape).
|
|
|
|
indices: A `Tensor` of type `int64`.
|
|
values: A `Tensor` of type `string`.
|
|
shape: A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringSplitV2", name, input, sep, "maxsplit", maxsplit)
|
|
_result = _StringSplitV2Output._make(_result)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_split_v2_eager_fallback(
|
|
input, sep, maxsplit=maxsplit, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if maxsplit is None:
|
|
maxsplit = -1
|
|
maxsplit = _execute.make_int(maxsplit, "maxsplit")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringSplitV2", input=input, sep=sep, maxsplit=maxsplit, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("maxsplit", _op._get_attr_int("maxsplit"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringSplitV2", _inputs_flat, _attrs, _result)
|
|
_result = _StringSplitV2Output._make(_result)
|
|
return _result
|
|
|
|
StringSplitV2 = tf_export("raw_ops.StringSplitV2")(_ops.to_raw_op(string_split_v2))
|
|
|
|
|
|
def string_split_v2_eager_fallback(input, sep, maxsplit, name, ctx):
|
|
if maxsplit is None:
|
|
maxsplit = -1
|
|
maxsplit = _execute.make_int(maxsplit, "maxsplit")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
sep = _ops.convert_to_tensor(sep, _dtypes.string)
|
|
_inputs_flat = [input, sep]
|
|
_attrs = ("maxsplit", maxsplit)
|
|
_result = _execute.execute(b"StringSplitV2", 3, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringSplitV2", _inputs_flat, _attrs, _result)
|
|
_result = _StringSplitV2Output._make(_result)
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.strip', v1=['strings.strip', 'string_strip'])
|
|
@deprecated_endpoints('string_strip')
|
|
def string_strip(input, name=None):
|
|
r"""Strip leading and trailing whitespaces from the Tensor.
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.strip(["\nTensorFlow", " The python library "]).numpy()
|
|
|
|
array([b'TensorFlow', b'The python library'], dtype=object)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. A string `Tensor` of any shape.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringStrip", name, input)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_string_strip(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return string_strip_eager_fallback(
|
|
input, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_strip, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_string_strip(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringStrip", input=input, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_strip, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ()
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringStrip", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringStrip = tf_export("raw_ops.StringStrip")(_ops.to_raw_op(string_strip))
|
|
_dispatcher_for_string_strip = string_strip._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def string_strip_eager_fallback(input, name, ctx):
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = None
|
|
_result = _execute.execute(b"StringStrip", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringStrip", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def string_to_hash_bucket(string_tensor, num_buckets, name=None):
|
|
r"""Converts each string in the input Tensor to its hash mod by a number of buckets.
|
|
|
|
The hash function is deterministic on the content of the string within the
|
|
|
|
process.
|
|
|
|
|
|
|
|
Note that the hash function may change from time to time.
|
|
|
|
This functionality will be deprecated and it's recommended to use
|
|
|
|
`tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
|
|
|
|
Args:
|
|
string_tensor: A `Tensor` of type `string`.
|
|
num_buckets: An `int` that is `>= 1`. The number of buckets.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringToHashBucket", name, string_tensor, "num_buckets",
|
|
num_buckets)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return string_to_hash_bucket_eager_fallback(
|
|
string_tensor, num_buckets=num_buckets, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringToHashBucket", string_tensor=string_tensor,
|
|
num_buckets=num_buckets, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("num_buckets", _op._get_attr_int("num_buckets"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringToHashBucket", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringToHashBucket = tf_export("raw_ops.StringToHashBucket")(_ops.to_raw_op(string_to_hash_bucket))
|
|
|
|
|
|
def string_to_hash_bucket_eager_fallback(string_tensor, num_buckets, name, ctx):
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
string_tensor = _ops.convert_to_tensor(string_tensor, _dtypes.string)
|
|
_inputs_flat = [string_tensor]
|
|
_attrs = ("num_buckets", num_buckets)
|
|
_result = _execute.execute(b"StringToHashBucket", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringToHashBucket", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.to_hash_bucket_fast', v1=['strings.to_hash_bucket_fast', 'string_to_hash_bucket_fast'])
|
|
@deprecated_endpoints('string_to_hash_bucket_fast')
|
|
def string_to_hash_bucket_fast(input, num_buckets, name=None):
|
|
r"""Converts each string in the input Tensor to its hash mod by a number of buckets.
|
|
|
|
The hash function is deterministic on the content of the string within the
|
|
|
|
process and will never change. However, it is not suitable for cryptography.
|
|
|
|
This function may be used when CPU time is scarce and inputs are trusted or
|
|
|
|
unimportant. There is a risk of adversaries constructing inputs that all hash
|
|
|
|
to the same bucket. To prevent this problem, use a strong hash function with
|
|
|
|
`tf.string_to_hash_bucket_strong`.
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.to_hash_bucket_fast(["Hello", "TensorFlow", "2.x"], 3).numpy()
|
|
|
|
array([0, 2, 2])
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The strings to assign a hash bucket.
|
|
num_buckets: An `int` that is `>= 1`. The number of buckets.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringToHashBucketFast", name, input, "num_buckets",
|
|
num_buckets)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_string_to_hash_bucket_fast(
|
|
(input, num_buckets, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return string_to_hash_bucket_fast_eager_fallback(
|
|
input, num_buckets=num_buckets, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_to_hash_bucket_fast, (), dict(input=input,
|
|
num_buckets=num_buckets,
|
|
name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_string_to_hash_bucket_fast(
|
|
(input, num_buckets, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringToHashBucketFast", input=input, num_buckets=num_buckets,
|
|
name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_to_hash_bucket_fast, (), dict(input=input,
|
|
num_buckets=num_buckets,
|
|
name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("num_buckets", _op._get_attr_int("num_buckets"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringToHashBucketFast", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringToHashBucketFast = tf_export("raw_ops.StringToHashBucketFast")(_ops.to_raw_op(string_to_hash_bucket_fast))
|
|
_dispatcher_for_string_to_hash_bucket_fast = string_to_hash_bucket_fast._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def string_to_hash_bucket_fast_eager_fallback(input, num_buckets, name, ctx):
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("num_buckets", num_buckets)
|
|
_result = _execute.execute(b"StringToHashBucketFast", 1,
|
|
inputs=_inputs_flat, attrs=_attrs, ctx=ctx,
|
|
name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringToHashBucketFast", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.to_hash_bucket_strong', v1=['strings.to_hash_bucket_strong', 'string_to_hash_bucket_strong'])
|
|
@deprecated_endpoints('string_to_hash_bucket_strong')
|
|
def string_to_hash_bucket_strong(input, num_buckets, key, name=None):
|
|
r"""Converts each string in the input Tensor to its hash mod by a number of buckets.
|
|
|
|
The hash function is deterministic on the content of the string within the
|
|
|
|
process. The hash function is a keyed hash function, where attribute `key`
|
|
|
|
defines the key of the hash function. `key` is an array of 2 elements.
|
|
|
|
|
|
|
|
A strong hash is important when inputs may be malicious, e.g. URLs with
|
|
|
|
additional components. Adversaries could try to make their inputs hash to the
|
|
|
|
same bucket for a denial-of-service attack or to skew the results. A strong
|
|
|
|
hash can be used to make it difficult to find inputs with a skewed hash value
|
|
|
|
distribution over buckets. This requires that the hash function is
|
|
|
|
seeded by a high-entropy (random) "key" unknown to the adversary.
|
|
|
|
|
|
|
|
The additional robustness comes at a cost of roughly 4x higher compute
|
|
|
|
time than `tf.string_to_hash_bucket_fast`.
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.to_hash_bucket_strong(["Hello", "TF"], 3, [1, 2]).numpy()
|
|
|
|
array([2, 0])
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The strings to assign a hash bucket.
|
|
num_buckets: An `int` that is `>= 1`. The number of buckets.
|
|
key: A list of `ints`.
|
|
The key used to seed the hash function, passed as a list of two uint64
|
|
|
|
elements.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringToHashBucketStrong", name, input, "num_buckets",
|
|
num_buckets, "key", key)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_string_to_hash_bucket_strong(
|
|
(input, num_buckets, key, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return string_to_hash_bucket_strong_eager_fallback(
|
|
input, num_buckets=num_buckets, key=key, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_to_hash_bucket_strong, (), dict(input=input,
|
|
num_buckets=num_buckets,
|
|
key=key, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_string_to_hash_bucket_strong(
|
|
(input, num_buckets, key, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
if not isinstance(key, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'key' argument to "
|
|
"'string_to_hash_bucket_strong' Op, not %r." % key)
|
|
key = [_execute.make_int(_i, "key") for _i in key]
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringToHashBucketStrong", input=input, num_buckets=num_buckets,
|
|
key=key, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_to_hash_bucket_strong, (), dict(input=input,
|
|
num_buckets=num_buckets,
|
|
key=key, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("num_buckets", _op._get_attr_int("num_buckets"), "key",
|
|
_op.get_attr("key"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringToHashBucketStrong", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringToHashBucketStrong = tf_export("raw_ops.StringToHashBucketStrong")(_ops.to_raw_op(string_to_hash_bucket_strong))
|
|
_dispatcher_for_string_to_hash_bucket_strong = string_to_hash_bucket_strong._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def string_to_hash_bucket_strong_eager_fallback(input, num_buckets, key, name, ctx):
|
|
num_buckets = _execute.make_int(num_buckets, "num_buckets")
|
|
if not isinstance(key, (list, tuple)):
|
|
raise TypeError(
|
|
"Expected list for 'key' argument to "
|
|
"'string_to_hash_bucket_strong' Op, not %r." % key)
|
|
key = [_execute.make_int(_i, "key") for _i in key]
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("num_buckets", num_buckets, "key", key)
|
|
_result = _execute.execute(b"StringToHashBucketStrong", 1,
|
|
inputs=_inputs_flat, attrs=_attrs, ctx=ctx,
|
|
name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringToHashBucketStrong", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.upper')
|
|
def string_upper(input, encoding="", name=None):
|
|
r"""Converts all lowercase characters into their respective uppercase replacements.
|
|
|
|
Example:
|
|
|
|
|
|
|
|
>>> tf.strings.upper("CamelCase string and ALL CAPS")
|
|
|
|
<tf.Tensor: shape=(), dtype=string, numpy=b'CAMELCASE STRING AND ALL CAPS'>
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. The input to be upper-cased.
|
|
encoding: An optional `string`. Defaults to `""`.
|
|
Character encoding of `input`. Allowed values are '' and 'utf-8'.
|
|
|
|
Value '' is interpreted as ASCII.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "StringUpper", name, input, "encoding", encoding)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_string_upper(
|
|
(input, encoding, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return string_upper_eager_fallback(
|
|
input, encoding=encoding, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_upper, (), dict(input=input, encoding=encoding, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_string_upper(
|
|
(input, encoding, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
if encoding is None:
|
|
encoding = ""
|
|
encoding = _execute.make_str(encoding, "encoding")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"StringUpper", input=input, encoding=encoding, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
string_upper, (), dict(input=input, encoding=encoding, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("encoding", _op.get_attr("encoding"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"StringUpper", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
StringUpper = tf_export("raw_ops.StringUpper")(_ops.to_raw_op(string_upper))
|
|
_dispatcher_for_string_upper = string_upper._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def string_upper_eager_fallback(input, encoding, name, ctx):
|
|
if encoding is None:
|
|
encoding = ""
|
|
encoding = _execute.make_str(encoding, "encoding")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("encoding", encoding)
|
|
_result = _execute.execute(b"StringUpper", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"StringUpper", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def substr(input, pos, len, unit="BYTE", name=None):
|
|
r"""Return substrings from `Tensor` of strings.
|
|
|
|
For each string in the input `Tensor`, creates a substring starting at index
|
|
|
|
`pos` with a total length of `len`.
|
|
|
|
|
|
|
|
If `len` defines a substring that would extend beyond the length of the input
|
|
|
|
string, or if `len` is negative, then as many characters as possible are used.
|
|
|
|
|
|
|
|
A negative `pos` indicates distance within the string backwards from the end.
|
|
|
|
|
|
|
|
If `pos` specifies an index which is out of range for any of the input strings,
|
|
|
|
then an `InvalidArgumentError` is thrown.
|
|
|
|
|
|
|
|
`pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
|
|
|
|
Op creation.
|
|
|
|
|
|
|
|
*NOTE*: `Substr` supports broadcasting up to two dimensions. More about
|
|
|
|
broadcasting
|
|
|
|
[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
Examples
|
|
|
|
|
|
|
|
Using scalar `pos` and `len`:
|
|
|
|
|
|
|
|
```python
|
|
|
|
input = [b'Hello', b'World']
|
|
|
|
position = 1
|
|
|
|
length = 3
|
|
|
|
|
|
|
|
output = [b'ell', b'orl']
|
|
|
|
```
|
|
|
|
|
|
|
|
Using `pos` and `len` with same shape as `input`:
|
|
|
|
|
|
|
|
```python
|
|
|
|
input = [[b'ten', b'eleven', b'twelve'],
|
|
|
|
[b'thirteen', b'fourteen', b'fifteen'],
|
|
|
|
[b'sixteen', b'seventeen', b'eighteen']]
|
|
|
|
position = [[1, 2, 3],
|
|
|
|
[1, 2, 3],
|
|
|
|
[1, 2, 3]]
|
|
|
|
length = [[2, 3, 4],
|
|
|
|
[4, 3, 2],
|
|
|
|
[5, 5, 5]]
|
|
|
|
|
|
|
|
output = [[b'en', b'eve', b'lve'],
|
|
|
|
[b'hirt', b'urt', b'te'],
|
|
|
|
[b'ixtee', b'vente', b'hteen']]
|
|
|
|
```
|
|
|
|
|
|
|
|
Broadcasting `pos` and `len` onto `input`:
|
|
|
|
|
|
|
|
```
|
|
|
|
input = [[b'ten', b'eleven', b'twelve'],
|
|
|
|
[b'thirteen', b'fourteen', b'fifteen'],
|
|
|
|
[b'sixteen', b'seventeen', b'eighteen'],
|
|
|
|
[b'nineteen', b'twenty', b'twentyone']]
|
|
|
|
position = [1, 2, 3]
|
|
|
|
length = [1, 2, 3]
|
|
|
|
|
|
|
|
output = [[b'e', b'ev', b'lve'],
|
|
|
|
[b'h', b'ur', b'tee'],
|
|
|
|
[b'i', b've', b'hte'],
|
|
|
|
[b'i', b'en', b'nty']]
|
|
|
|
```
|
|
|
|
|
|
|
|
Broadcasting `input` onto `pos` and `len`:
|
|
|
|
|
|
|
|
```
|
|
|
|
input = b'thirteen'
|
|
|
|
position = [1, 5, 7]
|
|
|
|
length = [3, 2, 1]
|
|
|
|
|
|
|
|
output = [b'hir', b'ee', b'n']
|
|
|
|
```
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
|
|
|
* `ValueError`: If the first argument cannot be converted to a
|
|
|
|
Tensor of `dtype string`.
|
|
|
|
* `InvalidArgumentError`: If indices are out of range.
|
|
|
|
* `ValueError`: If `pos` and `len` are not the same shape.
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`. Tensor of strings
|
|
pos: A `Tensor`. Must be one of the following types: `int32`, `int64`.
|
|
Scalar defining the position of first character in each substring
|
|
len: A `Tensor`. Must have the same type as `pos`.
|
|
Scalar defining the number of characters to include in each substring
|
|
unit: An optional `string` from: `"BYTE", "UTF8_CHAR"`. Defaults to `"BYTE"`.
|
|
The unit that is used to create the substring. One of: `"BYTE"` (for
|
|
|
|
defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
|
|
|
|
encoded Unicode code points). The default is `"BYTE"`. Results are undefined if
|
|
|
|
`unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
|
|
|
|
UTF-8.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "Substr", name, input, pos, len, "unit", unit)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return substr_eager_fallback(
|
|
input, pos, len, unit=unit, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if unit is None:
|
|
unit = "BYTE"
|
|
unit = _execute.make_str(unit, "unit")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"Substr", input=input, pos=pos, len=len, unit=unit, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("T", _op._get_attr_type("T"), "unit", _op.get_attr("unit"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"Substr", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
Substr = tf_export("raw_ops.Substr")(_ops.to_raw_op(substr))
|
|
|
|
|
|
def substr_eager_fallback(input, pos, len, unit, name, ctx):
|
|
if unit is None:
|
|
unit = "BYTE"
|
|
unit = _execute.make_str(unit, "unit")
|
|
_attr_T, _inputs_T = _execute.args_to_matching_eager([pos, len], ctx, [_dtypes.int32, _dtypes.int64, ])
|
|
(pos, len) = _inputs_T
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input, pos, len]
|
|
_attrs = ("T", _attr_T, "unit", unit)
|
|
_result = _execute.execute(b"Substr", 1, inputs=_inputs_flat, attrs=_attrs,
|
|
ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"Substr", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
_UnicodeDecodeOutput = collections.namedtuple(
|
|
"UnicodeDecode",
|
|
["row_splits", "char_values"])
|
|
|
|
|
|
def unicode_decode(input, input_encoding, errors="replace", replacement_char=65533, replace_control_characters=False, Tsplits=_dtypes.int64, name=None):
|
|
r"""Decodes each string in `input` into a sequence of Unicode code points.
|
|
|
|
|
|
The character codepoints for all strings are returned using a single vector
|
|
|
|
`char_values`, with strings expanded to characters in row-major order.
|
|
|
|
|
|
|
|
The `row_splits` tensor indicates where the codepoints for
|
|
|
|
each input string begin and end within the `char_values` tensor.
|
|
|
|
In particular, the values for the `i`th
|
|
|
|
string (in row-major order) are stored in the slice
|
|
|
|
`[row_splits[i]:row_splits[i+1]]`. Thus:
|
|
|
|
|
|
|
|
* `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
|
|
|
|
character in the `i`th string (in row-major order).
|
|
|
|
* `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
|
|
|
|
string (in row-major order).
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
The text to be decoded. Can have any shape. Note that the output is flattened
|
|
|
|
to a vector of char values.
|
|
input_encoding: A `string`.
|
|
Text encoding of the input strings. This is any of the encodings supported
|
|
|
|
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
|
|
errors: An optional `string` from: `"strict", "replace", "ignore"`. Defaults to `"replace"`.
|
|
Error handling policy when there is invalid formatting found in the input.
|
|
|
|
The value of 'strict' will cause the operation to produce a InvalidArgument
|
|
|
|
error on any invalid input formatting. A value of 'replace' (the default) will
|
|
|
|
cause the operation to replace any invalid formatting in the input with the
|
|
|
|
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
|
|
|
|
skip any invalid formatting in the input and produce no corresponding output
|
|
|
|
character.
|
|
replacement_char: An optional `int`. Defaults to `65533`.
|
|
The replacement character codepoint to be used in place of any invalid
|
|
|
|
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
|
|
|
|
be used. The default value is the default unicode replacement character is
|
|
|
|
0xFFFD or U+65533.)
|
|
replace_control_characters: An optional `bool`. Defaults to `False`.
|
|
Whether to replace the C0 control characters (00-1F) with the
|
|
|
|
`replacement_char`. Default is false.
|
|
Tsplits: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to `tf.int64`.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A tuple of `Tensor` objects (row_splits, char_values).
|
|
|
|
row_splits: A `Tensor` of type `Tsplits`.
|
|
char_values: A `Tensor` of type `int32`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnicodeDecode", name, input, "input_encoding", input_encoding,
|
|
"errors", errors, "replacement_char", replacement_char,
|
|
"replace_control_characters", replace_control_characters, "Tsplits",
|
|
Tsplits)
|
|
_result = _UnicodeDecodeOutput._make(_result)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return unicode_decode_eager_fallback(
|
|
input, input_encoding=input_encoding, errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
Tsplits=Tsplits, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
if Tsplits is None:
|
|
Tsplits = _dtypes.int64
|
|
Tsplits = _execute.make_type(Tsplits, "Tsplits")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnicodeDecode", input=input, input_encoding=input_encoding,
|
|
errors=errors, replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
Tsplits=Tsplits, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("input_encoding", _op.get_attr("input_encoding"), "errors",
|
|
_op.get_attr("errors"), "replacement_char",
|
|
_op._get_attr_int("replacement_char"),
|
|
"replace_control_characters",
|
|
_op._get_attr_bool("replace_control_characters"), "Tsplits",
|
|
_op._get_attr_type("Tsplits"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnicodeDecode", _inputs_flat, _attrs, _result)
|
|
_result = _UnicodeDecodeOutput._make(_result)
|
|
return _result
|
|
|
|
UnicodeDecode = tf_export("raw_ops.UnicodeDecode")(_ops.to_raw_op(unicode_decode))
|
|
|
|
|
|
def unicode_decode_eager_fallback(input, input_encoding, errors, replacement_char, replace_control_characters, Tsplits, name, ctx):
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
if Tsplits is None:
|
|
Tsplits = _dtypes.int64
|
|
Tsplits = _execute.make_type(Tsplits, "Tsplits")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("input_encoding", input_encoding, "errors", errors,
|
|
"replacement_char", replacement_char, "replace_control_characters",
|
|
replace_control_characters, "Tsplits", Tsplits)
|
|
_result = _execute.execute(b"UnicodeDecode", 2, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnicodeDecode", _inputs_flat, _attrs, _result)
|
|
_result = _UnicodeDecodeOutput._make(_result)
|
|
return _result
|
|
|
|
_UnicodeDecodeWithOffsetsOutput = collections.namedtuple(
|
|
"UnicodeDecodeWithOffsets",
|
|
["row_splits", "char_values", "char_to_byte_starts"])
|
|
|
|
|
|
def unicode_decode_with_offsets(input, input_encoding, errors="replace", replacement_char=65533, replace_control_characters=False, Tsplits=_dtypes.int64, name=None):
|
|
r"""Decodes each string in `input` into a sequence of Unicode code points.
|
|
|
|
|
|
The character codepoints for all strings are returned using a single vector
|
|
|
|
`char_values`, with strings expanded to characters in row-major order.
|
|
|
|
Similarly, the character start byte offsets are returned using a single vector
|
|
|
|
`char_to_byte_starts`, with strings expanded in row-major order.
|
|
|
|
|
|
|
|
The `row_splits` tensor indicates where the codepoints and start offsets for
|
|
|
|
each input string begin and end within the `char_values` and
|
|
|
|
`char_to_byte_starts` tensors. In particular, the values for the `i`th
|
|
|
|
string (in row-major order) are stored in the slice
|
|
|
|
`[row_splits[i]:row_splits[i+1]]`. Thus:
|
|
|
|
|
|
|
|
* `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
|
|
|
|
character in the `i`th string (in row-major order).
|
|
|
|
* `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
|
|
|
|
character in the `i`th string (in row-major order).
|
|
|
|
* `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
|
|
|
|
string (in row-major order).
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
The text to be decoded. Can have any shape. Note that the output is flattened
|
|
|
|
to a vector of char values.
|
|
input_encoding: A `string`.
|
|
Text encoding of the input strings. This is any of the encodings supported
|
|
|
|
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
|
|
errors: An optional `string` from: `"strict", "replace", "ignore"`. Defaults to `"replace"`.
|
|
Error handling policy when there is invalid formatting found in the input.
|
|
|
|
The value of 'strict' will cause the operation to produce a InvalidArgument
|
|
|
|
error on any invalid input formatting. A value of 'replace' (the default) will
|
|
|
|
cause the operation to replace any invalid formatting in the input with the
|
|
|
|
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
|
|
|
|
skip any invalid formatting in the input and produce no corresponding output
|
|
|
|
character.
|
|
replacement_char: An optional `int`. Defaults to `65533`.
|
|
The replacement character codepoint to be used in place of any invalid
|
|
|
|
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
|
|
|
|
be used. The default value is the default unicode replacement character is
|
|
|
|
0xFFFD or U+65533.)
|
|
replace_control_characters: An optional `bool`. Defaults to `False`.
|
|
Whether to replace the C0 control characters (00-1F) with the
|
|
|
|
`replacement_char`. Default is false.
|
|
Tsplits: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to `tf.int64`.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A tuple of `Tensor` objects (row_splits, char_values, char_to_byte_starts).
|
|
|
|
row_splits: A `Tensor` of type `Tsplits`.
|
|
char_values: A `Tensor` of type `int32`.
|
|
char_to_byte_starts: A `Tensor` of type `int64`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnicodeDecodeWithOffsets", name, input, "input_encoding",
|
|
input_encoding, "errors", errors, "replacement_char",
|
|
replacement_char, "replace_control_characters",
|
|
replace_control_characters, "Tsplits", Tsplits)
|
|
_result = _UnicodeDecodeWithOffsetsOutput._make(_result)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return unicode_decode_with_offsets_eager_fallback(
|
|
input, input_encoding=input_encoding, errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
Tsplits=Tsplits, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
if Tsplits is None:
|
|
Tsplits = _dtypes.int64
|
|
Tsplits = _execute.make_type(Tsplits, "Tsplits")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnicodeDecodeWithOffsets", input=input,
|
|
input_encoding=input_encoding,
|
|
errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
Tsplits=Tsplits, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("input_encoding", _op.get_attr("input_encoding"), "errors",
|
|
_op.get_attr("errors"), "replacement_char",
|
|
_op._get_attr_int("replacement_char"),
|
|
"replace_control_characters",
|
|
_op._get_attr_bool("replace_control_characters"), "Tsplits",
|
|
_op._get_attr_type("Tsplits"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnicodeDecodeWithOffsets", _inputs_flat, _attrs, _result)
|
|
_result = _UnicodeDecodeWithOffsetsOutput._make(_result)
|
|
return _result
|
|
|
|
UnicodeDecodeWithOffsets = tf_export("raw_ops.UnicodeDecodeWithOffsets")(_ops.to_raw_op(unicode_decode_with_offsets))
|
|
|
|
|
|
def unicode_decode_with_offsets_eager_fallback(input, input_encoding, errors, replacement_char, replace_control_characters, Tsplits, name, ctx):
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
if Tsplits is None:
|
|
Tsplits = _dtypes.int64
|
|
Tsplits = _execute.make_type(Tsplits, "Tsplits")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("input_encoding", input_encoding, "errors", errors,
|
|
"replacement_char", replacement_char, "replace_control_characters",
|
|
replace_control_characters, "Tsplits", Tsplits)
|
|
_result = _execute.execute(b"UnicodeDecodeWithOffsets", 3,
|
|
inputs=_inputs_flat, attrs=_attrs, ctx=ctx,
|
|
name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnicodeDecodeWithOffsets", _inputs_flat, _attrs, _result)
|
|
_result = _UnicodeDecodeWithOffsetsOutput._make(_result)
|
|
return _result
|
|
|
|
|
|
def unicode_encode(input_values, input_splits, output_encoding, errors="replace", replacement_char=65533, name=None):
|
|
r"""Encode a tensor of ints into unicode strings.
|
|
|
|
Returns a vector of strings, where `output[i]` is constructed by encoding the
|
|
|
|
Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
|
|
|
|
using `output_encoding`.
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
```
|
|
|
|
input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
|
|
|
|
input_splits = [0, 5, 10]
|
|
|
|
output_encoding = 'UTF-8'
|
|
|
|
|
|
|
|
output = ['Hello', 'World']
|
|
|
|
```
|
|
|
|
Args:
|
|
input_values: A `Tensor` of type `int32`.
|
|
A 1D tensor containing the unicode codepoints that should be encoded.
|
|
input_splits: A `Tensor`. Must be one of the following types: `int32`, `int64`.
|
|
A 1D tensor specifying how the unicode codepoints should be split into strings.
|
|
|
|
In particular, `output[i]` is constructed by encoding the codepoints in the
|
|
|
|
slice `input_values[input_splits[i]:input_splits[i+1]]`.
|
|
output_encoding: A `string` from: `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
|
|
Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
|
|
|
|
"UTF-16-BE", and "UTF-32-BE"`.
|
|
errors: An optional `string` from: `"ignore", "replace", "strict"`. Defaults to `"replace"`.
|
|
Error handling policy when there is invalid formatting found in the input.
|
|
|
|
The value of 'strict' will cause the operation to produce a InvalidArgument
|
|
|
|
error on any invalid input formatting. A value of 'replace' (the default) will
|
|
|
|
cause the operation to replace any invalid formatting in the input with the
|
|
|
|
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
|
|
|
|
skip any invalid formatting in the input and produce no corresponding output
|
|
|
|
character.
|
|
replacement_char: An optional `int`. Defaults to `65533`.
|
|
The replacement character codepoint to be used in place of any invalid
|
|
|
|
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
|
|
|
|
be used. The default value is the default unicode replacement character is
|
|
|
|
0xFFFD (U+65533).
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnicodeEncode", name, input_values, input_splits, "errors",
|
|
errors, "output_encoding", output_encoding, "replacement_char",
|
|
replacement_char)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return unicode_encode_eager_fallback(
|
|
input_values, input_splits, errors=errors,
|
|
output_encoding=output_encoding, replacement_char=replacement_char,
|
|
name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
output_encoding = _execute.make_str(output_encoding, "output_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnicodeEncode", input_values=input_values, input_splits=input_splits,
|
|
output_encoding=output_encoding, errors=errors,
|
|
replacement_char=replacement_char, name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("errors", _op.get_attr("errors"), "output_encoding",
|
|
_op.get_attr("output_encoding"), "replacement_char",
|
|
_op._get_attr_int("replacement_char"), "Tsplits",
|
|
_op._get_attr_type("Tsplits"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnicodeEncode", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
UnicodeEncode = tf_export("raw_ops.UnicodeEncode")(_ops.to_raw_op(unicode_encode))
|
|
|
|
|
|
def unicode_encode_eager_fallback(input_values, input_splits, output_encoding, errors, replacement_char, name, ctx):
|
|
output_encoding = _execute.make_str(output_encoding, "output_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
_attr_Tsplits, (input_splits,) = _execute.args_to_matching_eager([input_splits], ctx, [_dtypes.int32, _dtypes.int64, ], _dtypes.int64)
|
|
input_values = _ops.convert_to_tensor(input_values, _dtypes.int32)
|
|
_inputs_flat = [input_values, input_splits]
|
|
_attrs = ("errors", errors, "output_encoding", output_encoding,
|
|
"replacement_char", replacement_char, "Tsplits", _attr_Tsplits)
|
|
_result = _execute.execute(b"UnicodeEncode", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnicodeEncode", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.unicode_script')
|
|
def unicode_script(input, name=None):
|
|
r"""Determine the script codes of a given tensor of Unicode integer code points.
|
|
|
|
|
|
This operation converts Unicode code points to script codes corresponding to
|
|
|
|
each code point. Script codes correspond to International Components for
|
|
|
|
Unicode (ICU) UScriptCode values.
|
|
|
|
|
|
|
|
See
|
|
|
|
[ICU project docs](http://icu-project.org/apiref/icu4c/uscript_8h.html)
|
|
|
|
for more details on script codes.
|
|
|
|
|
|
|
|
For an example, see the unicode strings guide on [unicode scripts]
|
|
|
|
(https://www.tensorflow.org/tutorials/load_data/unicode#representing_unicode).
|
|
|
|
|
|
|
|
Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
|
|
|
|
match input shape.
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.unicode_script([1, 31, 38])
|
|
|
|
<tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 0, 0], dtype=int32)>
|
|
|
|
Args:
|
|
input: A `Tensor` of type `int32`. A Tensor of int32 Unicode code points.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `int32`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnicodeScript", name, input)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_unicode_script(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return unicode_script_eager_fallback(
|
|
input, name=name, ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
unicode_script, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_unicode_script(
|
|
(input, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnicodeScript", input=input, name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
unicode_script, (), dict(input=input, name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ()
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnicodeScript", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
UnicodeScript = tf_export("raw_ops.UnicodeScript")(_ops.to_raw_op(unicode_script))
|
|
_dispatcher_for_unicode_script = unicode_script._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def unicode_script_eager_fallback(input, name, ctx):
|
|
input = _ops.convert_to_tensor(input, _dtypes.int32)
|
|
_inputs_flat = [input]
|
|
_attrs = None
|
|
_result = _execute.execute(b"UnicodeScript", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnicodeScript", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
@_dispatch.add_fallback_dispatch_list
|
|
@_dispatch.add_type_based_api_dispatcher
|
|
@tf_export('strings.unicode_transcode')
|
|
def unicode_transcode(input, input_encoding, output_encoding, errors="replace", replacement_char=65533, replace_control_characters=False, name=None):
|
|
r"""Transcode the input text from a source encoding to a destination encoding.
|
|
|
|
|
|
The input is a string tensor of any shape. The output is a string tensor of
|
|
|
|
the same shape containing the transcoded strings. Output strings are always
|
|
|
|
valid unicode. If the input contains invalid encoding positions, the
|
|
|
|
`errors` attribute sets the policy for how to deal with them. If the default
|
|
|
|
error-handling policy is used, invalid formatting will be substituted in the
|
|
|
|
output by the `replacement_char`. If the errors policy is to `ignore`, any
|
|
|
|
invalid encoding positions in the input are skipped and not included in the
|
|
|
|
output. If it set to `strict` then any invalid formatting will result in an
|
|
|
|
InvalidArgument error.
|
|
|
|
|
|
|
|
This operation can be used with `output_encoding = input_encoding` to enforce
|
|
|
|
correct formatting for inputs even if they are already in the desired encoding.
|
|
|
|
|
|
|
|
If the input is prefixed by a Byte Order Mark needed to determine encoding
|
|
|
|
(e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that
|
|
|
|
BOM will be consumed and not emitted into the output. If the input encoding
|
|
|
|
is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is
|
|
|
|
interpreted as a non-breaking-space and is preserved in the output (including
|
|
|
|
always for UTF-8).
|
|
|
|
|
|
|
|
The end result is that if the input is marked as an explicit endianness the
|
|
|
|
transcoding is faithful to all codepoints in the source. If it is not marked
|
|
|
|
with an explicit endianness, the BOM is not considered part of the string itself
|
|
|
|
but as metadata, and so is not preserved in the output.
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
>>> tf.strings.unicode_transcode(["Hello", "TensorFlow", "2.x"], "UTF-8", "UTF-16-BE")
|
|
|
|
<tf.Tensor: shape=(3,), dtype=string, numpy=
|
|
|
|
array([b'\x00H\x00e\x00l\x00l\x00o',
|
|
|
|
b'\x00T\x00e\x00n\x00s\x00o\x00r\x00F\x00l\x00o\x00w',
|
|
|
|
b'\x002\x00.\x00x'], dtype=object)>
|
|
|
|
>>> tf.strings.unicode_transcode(["A", "B", "C"], "US ASCII", "UTF-8").numpy()
|
|
|
|
array([b'A', b'B', b'C'], dtype=object)
|
|
|
|
Args:
|
|
input: A `Tensor` of type `string`.
|
|
The text to be processed. Can have any shape.
|
|
input_encoding: A `string`.
|
|
Text encoding of the input strings. This is any of the encodings supported
|
|
|
|
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
|
|
output_encoding: A `string` from: `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
|
|
The unicode encoding to use in the output. Must be one of
|
|
|
|
`"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian.
|
|
errors: An optional `string` from: `"strict", "replace", "ignore"`. Defaults to `"replace"`.
|
|
Error handling policy when there is invalid formatting found in the input.
|
|
|
|
The value of 'strict' will cause the operation to produce a InvalidArgument
|
|
|
|
error on any invalid input formatting. A value of 'replace' (the default) will
|
|
|
|
cause the operation to replace any invalid formatting in the input with the
|
|
|
|
`replacement_char` codepoint. A value of 'ignore' will cause the operation to
|
|
|
|
skip any invalid formatting in the input and produce no corresponding output
|
|
|
|
character.
|
|
replacement_char: An optional `int`. Defaults to `65533`.
|
|
The replacement character codepoint to be used in place of any invalid
|
|
|
|
formatting in the input when `errors='replace'`. Any valid unicode codepoint may
|
|
|
|
be used. The default value is the default unicode replacement character is
|
|
|
|
0xFFFD or U+65533.)
|
|
|
|
|
|
|
|
Note that for UTF-8, passing a replacement character expressible in 1 byte, such
|
|
|
|
as ' ', will preserve string alignment to the source since invalid bytes will be
|
|
|
|
replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte
|
|
|
|
replacement character will preserve byte alignment to the source.
|
|
replace_control_characters: An optional `bool`. Defaults to `False`.
|
|
Whether to replace the C0 control characters (00-1F) with the
|
|
|
|
`replacement_char`. Default is false.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnicodeTranscode", name, input, "input_encoding",
|
|
input_encoding, "output_encoding", output_encoding, "errors", errors,
|
|
"replacement_char", replacement_char, "replace_control_characters",
|
|
replace_control_characters)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
_result = _dispatcher_for_unicode_transcode(
|
|
(input, input_encoding, output_encoding, errors, replacement_char,
|
|
replace_control_characters, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
return unicode_transcode_eager_fallback(
|
|
input, input_encoding=input_encoding,
|
|
output_encoding=output_encoding, errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters, name=name,
|
|
ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
unicode_transcode, (), dict(input=input,
|
|
input_encoding=input_encoding,
|
|
output_encoding=output_encoding,
|
|
errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
else:
|
|
_result = _dispatcher_for_unicode_transcode(
|
|
(input, input_encoding, output_encoding, errors, replacement_char,
|
|
replace_control_characters, name,), None)
|
|
if _result is not NotImplemented:
|
|
return _result
|
|
# Add nodes to the TensorFlow graph.
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
output_encoding = _execute.make_str(output_encoding, "output_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
try:
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnicodeTranscode", input=input, input_encoding=input_encoding,
|
|
output_encoding=output_encoding, errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
name=name)
|
|
except (TypeError, ValueError):
|
|
_result = _dispatch.dispatch(
|
|
unicode_transcode, (), dict(input=input,
|
|
input_encoding=input_encoding,
|
|
output_encoding=output_encoding,
|
|
errors=errors,
|
|
replacement_char=replacement_char,
|
|
replace_control_characters=replace_control_characters,
|
|
name=name)
|
|
)
|
|
if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED:
|
|
return _result
|
|
raise
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("input_encoding", _op.get_attr("input_encoding"),
|
|
"output_encoding", _op.get_attr("output_encoding"), "errors",
|
|
_op.get_attr("errors"), "replacement_char",
|
|
_op._get_attr_int("replacement_char"),
|
|
"replace_control_characters",
|
|
_op._get_attr_bool("replace_control_characters"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnicodeTranscode", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
UnicodeTranscode = tf_export("raw_ops.UnicodeTranscode")(_ops.to_raw_op(unicode_transcode))
|
|
_dispatcher_for_unicode_transcode = unicode_transcode._tf_type_based_dispatcher.Dispatch
|
|
|
|
|
|
def unicode_transcode_eager_fallback(input, input_encoding, output_encoding, errors, replacement_char, replace_control_characters, name, ctx):
|
|
input_encoding = _execute.make_str(input_encoding, "input_encoding")
|
|
output_encoding = _execute.make_str(output_encoding, "output_encoding")
|
|
if errors is None:
|
|
errors = "replace"
|
|
errors = _execute.make_str(errors, "errors")
|
|
if replacement_char is None:
|
|
replacement_char = 65533
|
|
replacement_char = _execute.make_int(replacement_char, "replacement_char")
|
|
if replace_control_characters is None:
|
|
replace_control_characters = False
|
|
replace_control_characters = _execute.make_bool(replace_control_characters, "replace_control_characters")
|
|
input = _ops.convert_to_tensor(input, _dtypes.string)
|
|
_inputs_flat = [input]
|
|
_attrs = ("input_encoding", input_encoding, "output_encoding",
|
|
output_encoding, "errors", errors, "replacement_char", replacement_char,
|
|
"replace_control_characters", replace_control_characters)
|
|
_result = _execute.execute(b"UnicodeTranscode", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnicodeTranscode", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
|
|
def unsorted_segment_join(inputs, segment_ids, num_segments, separator="", name=None):
|
|
r"""TODO: add doc.
|
|
|
|
Args:
|
|
inputs: A `Tensor` of type `string`.
|
|
segment_ids: A `Tensor`. Must be one of the following types: `int32`, `int64`.
|
|
num_segments: A `Tensor`. Must be one of the following types: `int32`, `int64`.
|
|
separator: An optional `string`. Defaults to `""`.
|
|
name: A name for the operation (optional).
|
|
|
|
Returns:
|
|
A `Tensor` of type `string`.
|
|
"""
|
|
_ctx = _context._context or _context.context()
|
|
tld = _ctx._thread_local_data
|
|
if tld.is_eager:
|
|
try:
|
|
_result = pywrap_tfe.TFE_Py_FastPathExecute(
|
|
_ctx, "UnsortedSegmentJoin", name, inputs, segment_ids, num_segments,
|
|
"separator", separator)
|
|
return _result
|
|
except _core._NotOkStatusException as e:
|
|
_ops.raise_from_not_ok_status(e, name)
|
|
except _core._FallbackException:
|
|
pass
|
|
try:
|
|
return unsorted_segment_join_eager_fallback(
|
|
inputs, segment_ids, num_segments, separator=separator, name=name,
|
|
ctx=_ctx)
|
|
except _core._SymbolicException:
|
|
pass # Add nodes to the TensorFlow graph.
|
|
# Add nodes to the TensorFlow graph.
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
_, _, _op, _outputs = _op_def_library._apply_op_helper(
|
|
"UnsortedSegmentJoin", inputs=inputs, segment_ids=segment_ids,
|
|
num_segments=num_segments, separator=separator,
|
|
name=name)
|
|
_result = _outputs[:]
|
|
if _execute.must_record_gradient():
|
|
_attrs = ("separator", _op.get_attr("separator"), "Tindices",
|
|
_op._get_attr_type("Tindices"), "Tnumsegments",
|
|
_op._get_attr_type("Tnumsegments"))
|
|
_inputs_flat = _op.inputs
|
|
_execute.record_gradient(
|
|
"UnsortedSegmentJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|
|
UnsortedSegmentJoin = tf_export("raw_ops.UnsortedSegmentJoin")(_ops.to_raw_op(unsorted_segment_join))
|
|
|
|
|
|
def unsorted_segment_join_eager_fallback(inputs, segment_ids, num_segments, separator, name, ctx):
|
|
if separator is None:
|
|
separator = ""
|
|
separator = _execute.make_str(separator, "separator")
|
|
_attr_Tindices, (segment_ids,) = _execute.args_to_matching_eager([segment_ids], ctx, [_dtypes.int32, _dtypes.int64, ])
|
|
_attr_Tnumsegments, (num_segments,) = _execute.args_to_matching_eager([num_segments], ctx, [_dtypes.int32, _dtypes.int64, ], _dtypes.int32)
|
|
inputs = _ops.convert_to_tensor(inputs, _dtypes.string)
|
|
_inputs_flat = [inputs, segment_ids, num_segments]
|
|
_attrs = ("separator", separator, "Tindices", _attr_Tindices,
|
|
"Tnumsegments", _attr_Tnumsegments)
|
|
_result = _execute.execute(b"UnsortedSegmentJoin", 1, inputs=_inputs_flat,
|
|
attrs=_attrs, ctx=ctx, name=name)
|
|
if _execute.must_record_gradient():
|
|
_execute.record_gradient(
|
|
"UnsortedSegmentJoin", _inputs_flat, _attrs, _result)
|
|
_result, = _result
|
|
return _result
|
|
|