243 lines
8.7 KiB
Python
243 lines
8.7 KiB
Python
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""This API defines FeatureColumn abstraction."""
|
|
|
|
# This file was originally under tf/python/feature_column, and was moved to
|
|
# Keras package in order to remove the reverse dependency from TF to Keras.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import collections
|
|
import re
|
|
|
|
import tensorflow.compat.v2 as tf
|
|
|
|
from keras.engine.base_layer import Layer
|
|
from keras.saving.legacy import serialization
|
|
|
|
|
|
class _BaseFeaturesLayer(Layer):
|
|
"""Base class for DenseFeatures and SequenceFeatures.
|
|
|
|
Defines common methods and helpers.
|
|
|
|
Args:
|
|
feature_columns: An iterable containing the FeatureColumns to use as
|
|
inputs to your model.
|
|
expected_column_type: Expected class for provided feature columns.
|
|
trainable: Boolean, whether the layer's variables will be updated via
|
|
gradient descent during training.
|
|
name: Name to give to the DenseFeatures.
|
|
**kwargs: Keyword arguments to construct a layer.
|
|
|
|
Raises:
|
|
ValueError: if an item in `feature_columns` doesn't match
|
|
`expected_column_type`.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
feature_columns,
|
|
expected_column_type,
|
|
trainable,
|
|
name,
|
|
partitioner=None,
|
|
**kwargs
|
|
):
|
|
super().__init__(name=name, trainable=trainable, **kwargs)
|
|
self._feature_columns = _normalize_feature_columns(feature_columns)
|
|
self._state_manager = tf.__internal__.feature_column.StateManager(
|
|
self, self.trainable
|
|
)
|
|
self._partitioner = partitioner
|
|
for column in self._feature_columns:
|
|
if not isinstance(column, expected_column_type):
|
|
raise ValueError(
|
|
"Items of feature_columns must be a {}. "
|
|
"You can wrap a categorical column with an "
|
|
"embedding_column or indicator_column. Given: {}".format(
|
|
expected_column_type, column
|
|
)
|
|
)
|
|
|
|
def build(self, _):
|
|
for column in self._feature_columns:
|
|
with tf.compat.v1.variable_scope(
|
|
self.name, partitioner=self._partitioner
|
|
):
|
|
with tf.compat.v1.variable_scope(
|
|
_sanitize_column_name_for_variable_scope(column.name)
|
|
):
|
|
column.create_state(self._state_manager)
|
|
super().build(None)
|
|
|
|
def _output_shape(self, input_shape, num_elements):
|
|
"""Computes expected output shape of the dense tensor of the layer.
|
|
|
|
Args:
|
|
input_shape: Tensor or array with batch shape.
|
|
num_elements: Size of the last dimension of the output.
|
|
|
|
Returns:
|
|
Tuple with output shape.
|
|
"""
|
|
raise NotImplementedError("Calling an abstract method.")
|
|
|
|
def compute_output_shape(self, input_shape):
|
|
total_elements = 0
|
|
for column in self._feature_columns:
|
|
total_elements += column.variable_shape.num_elements()
|
|
return self._target_shape(input_shape, total_elements)
|
|
|
|
def _process_dense_tensor(self, column, tensor):
|
|
"""Reshapes the dense tensor output of a column based on expected shape.
|
|
|
|
Args:
|
|
column: A DenseColumn or SequenceDenseColumn object.
|
|
tensor: A dense tensor obtained from the same column.
|
|
|
|
Returns:
|
|
Reshaped dense tensor.
|
|
"""
|
|
num_elements = column.variable_shape.num_elements()
|
|
target_shape = self._target_shape(tf.shape(tensor), num_elements)
|
|
return tf.reshape(tensor, shape=target_shape)
|
|
|
|
def _verify_and_concat_tensors(self, output_tensors):
|
|
"""Verifies and concatenates the dense output of several columns."""
|
|
_verify_static_batch_size_equality(
|
|
output_tensors, self._feature_columns
|
|
)
|
|
return tf.concat(output_tensors, -1)
|
|
|
|
def get_config(self):
|
|
column_configs = [
|
|
tf.__internal__.feature_column.serialize_feature_column(fc)
|
|
for fc in self._feature_columns
|
|
]
|
|
config = {"feature_columns": column_configs}
|
|
config["partitioner"] = serialization.serialize_keras_object(
|
|
self._partitioner
|
|
)
|
|
|
|
base_config = super().get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
@classmethod
|
|
def from_config(cls, config, custom_objects=None):
|
|
config_cp = config.copy()
|
|
columns_by_name = {}
|
|
config_cp["feature_columns"] = [
|
|
tf.__internal__.feature_column.deserialize_feature_column(
|
|
c, custom_objects, columns_by_name
|
|
)
|
|
for c in config["feature_columns"]
|
|
]
|
|
config_cp["partitioner"] = serialization.deserialize_keras_object(
|
|
config["partitioner"], custom_objects
|
|
)
|
|
|
|
return cls(**config_cp)
|
|
|
|
|
|
def _sanitize_column_name_for_variable_scope(name):
|
|
"""Sanitizes user-provided feature names for use as variable scopes."""
|
|
invalid_char = re.compile("[^A-Za-z0-9_.\\-]")
|
|
return invalid_char.sub("_", name)
|
|
|
|
|
|
def _verify_static_batch_size_equality(tensors, columns):
|
|
"""Verify equality between static batch sizes.
|
|
|
|
Args:
|
|
tensors: iterable of input tensors.
|
|
columns: Corresponding feature columns.
|
|
|
|
Raises:
|
|
ValueError: in case of mismatched batch sizes.
|
|
"""
|
|
expected_batch_size = None
|
|
for i in range(0, len(tensors)):
|
|
# bath_size is a Dimension object.
|
|
batch_size = tf.compat.v1.Dimension(
|
|
tf.compat.dimension_value(tensors[i].shape[0])
|
|
)
|
|
if batch_size.value is not None:
|
|
if expected_batch_size is None:
|
|
bath_size_column_index = i
|
|
expected_batch_size = batch_size
|
|
elif not expected_batch_size.is_compatible_with(batch_size):
|
|
raise ValueError(
|
|
"Batch size (first dimension) of each feature must be "
|
|
"same. Batch size of columns ({}, {}): ({}, {})".format(
|
|
columns[bath_size_column_index].name,
|
|
columns[i].name,
|
|
expected_batch_size,
|
|
batch_size,
|
|
)
|
|
)
|
|
|
|
|
|
def _normalize_feature_columns(feature_columns):
|
|
"""Normalizes the `feature_columns` input.
|
|
|
|
This method converts the `feature_columns` to list type as best as it can.
|
|
In addition, verifies the type and other parts of feature_columns, required
|
|
by downstream library.
|
|
|
|
Args:
|
|
feature_columns: The raw feature columns, usually passed by users.
|
|
|
|
Returns:
|
|
The normalized feature column list.
|
|
|
|
Raises:
|
|
ValueError: for any invalid inputs, such as empty, duplicated names, etc.
|
|
"""
|
|
if isinstance(
|
|
feature_columns, tf.__internal__.feature_column.FeatureColumn
|
|
):
|
|
feature_columns = [feature_columns]
|
|
|
|
if isinstance(feature_columns, collections.abc.Iterator):
|
|
feature_columns = list(feature_columns)
|
|
|
|
if isinstance(feature_columns, dict):
|
|
raise ValueError("Expected feature_columns to be iterable, found dict.")
|
|
|
|
for column in feature_columns:
|
|
if not isinstance(column, tf.__internal__.feature_column.FeatureColumn):
|
|
raise ValueError(
|
|
"Items of feature_columns must be a FeatureColumn. "
|
|
"Given (type {}): {}.".format(type(column), column)
|
|
)
|
|
if not feature_columns:
|
|
raise ValueError("feature_columns must not be empty.")
|
|
name_to_column = {}
|
|
for column in feature_columns:
|
|
if column.name in name_to_column:
|
|
raise ValueError(
|
|
"Duplicate feature column name found for columns: {} "
|
|
"and {}. This usually means that these columns refer to "
|
|
"same base feature. Either one must be discarded or a "
|
|
"duplicated but renamed item must be inserted in "
|
|
"features dict.".format(column, name_to_column[column.name])
|
|
)
|
|
name_to_column[column.name] = column
|
|
|
|
return sorted(feature_columns, key=lambda x: x.name)
|