# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """This API defines FeatureColumn abstraction.""" # This file was originally under tf/python/feature_column, and was moved to # Keras package in order to remove the reverse dependency from TF to Keras. from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import re import tensorflow.compat.v2 as tf from keras.engine.base_layer import Layer from keras.saving.legacy import serialization class _BaseFeaturesLayer(Layer): """Base class for DenseFeatures and SequenceFeatures. Defines common methods and helpers. Args: feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. expected_column_type: Expected class for provided feature columns. trainable: Boolean, whether the layer's variables will be updated via gradient descent during training. name: Name to give to the DenseFeatures. **kwargs: Keyword arguments to construct a layer. Raises: ValueError: if an item in `feature_columns` doesn't match `expected_column_type`. """ def __init__( self, feature_columns, expected_column_type, trainable, name, partitioner=None, **kwargs ): super().__init__(name=name, trainable=trainable, **kwargs) self._feature_columns = _normalize_feature_columns(feature_columns) self._state_manager = tf.__internal__.feature_column.StateManager( self, self.trainable ) self._partitioner = partitioner for column in self._feature_columns: if not isinstance(column, expected_column_type): raise ValueError( "Items of feature_columns must be a {}. " "You can wrap a categorical column with an " "embedding_column or indicator_column. Given: {}".format( expected_column_type, column ) ) def build(self, _): for column in self._feature_columns: with tf.compat.v1.variable_scope( self.name, partitioner=self._partitioner ): with tf.compat.v1.variable_scope( _sanitize_column_name_for_variable_scope(column.name) ): column.create_state(self._state_manager) super().build(None) def _output_shape(self, input_shape, num_elements): """Computes expected output shape of the dense tensor of the layer. Args: input_shape: Tensor or array with batch shape. num_elements: Size of the last dimension of the output. Returns: Tuple with output shape. """ raise NotImplementedError("Calling an abstract method.") def compute_output_shape(self, input_shape): total_elements = 0 for column in self._feature_columns: total_elements += column.variable_shape.num_elements() return self._target_shape(input_shape, total_elements) def _process_dense_tensor(self, column, tensor): """Reshapes the dense tensor output of a column based on expected shape. Args: column: A DenseColumn or SequenceDenseColumn object. tensor: A dense tensor obtained from the same column. Returns: Reshaped dense tensor. """ num_elements = column.variable_shape.num_elements() target_shape = self._target_shape(tf.shape(tensor), num_elements) return tf.reshape(tensor, shape=target_shape) def _verify_and_concat_tensors(self, output_tensors): """Verifies and concatenates the dense output of several columns.""" _verify_static_batch_size_equality( output_tensors, self._feature_columns ) return tf.concat(output_tensors, -1) def get_config(self): column_configs = [ tf.__internal__.feature_column.serialize_feature_column(fc) for fc in self._feature_columns ] config = {"feature_columns": column_configs} config["partitioner"] = serialization.serialize_keras_object( self._partitioner ) base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): config_cp = config.copy() columns_by_name = {} config_cp["feature_columns"] = [ tf.__internal__.feature_column.deserialize_feature_column( c, custom_objects, columns_by_name ) for c in config["feature_columns"] ] config_cp["partitioner"] = serialization.deserialize_keras_object( config["partitioner"], custom_objects ) return cls(**config_cp) def _sanitize_column_name_for_variable_scope(name): """Sanitizes user-provided feature names for use as variable scopes.""" invalid_char = re.compile("[^A-Za-z0-9_.\\-]") return invalid_char.sub("_", name) def _verify_static_batch_size_equality(tensors, columns): """Verify equality between static batch sizes. Args: tensors: iterable of input tensors. columns: Corresponding feature columns. Raises: ValueError: in case of mismatched batch sizes. """ expected_batch_size = None for i in range(0, len(tensors)): # bath_size is a Dimension object. batch_size = tf.compat.v1.Dimension( tf.compat.dimension_value(tensors[i].shape[0]) ) if batch_size.value is not None: if expected_batch_size is None: bath_size_column_index = i expected_batch_size = batch_size elif not expected_batch_size.is_compatible_with(batch_size): raise ValueError( "Batch size (first dimension) of each feature must be " "same. Batch size of columns ({}, {}): ({}, {})".format( columns[bath_size_column_index].name, columns[i].name, expected_batch_size, batch_size, ) ) def _normalize_feature_columns(feature_columns): """Normalizes the `feature_columns` input. This method converts the `feature_columns` to list type as best as it can. In addition, verifies the type and other parts of feature_columns, required by downstream library. Args: feature_columns: The raw feature columns, usually passed by users. Returns: The normalized feature column list. Raises: ValueError: for any invalid inputs, such as empty, duplicated names, etc. """ if isinstance( feature_columns, tf.__internal__.feature_column.FeatureColumn ): feature_columns = [feature_columns] if isinstance(feature_columns, collections.abc.Iterator): feature_columns = list(feature_columns) if isinstance(feature_columns, dict): raise ValueError("Expected feature_columns to be iterable, found dict.") for column in feature_columns: if not isinstance(column, tf.__internal__.feature_column.FeatureColumn): raise ValueError( "Items of feature_columns must be a FeatureColumn. " "Given (type {}): {}.".format(type(column), column) ) if not feature_columns: raise ValueError("feature_columns must not be empty.") name_to_column = {} for column in feature_columns: if column.name in name_to_column: raise ValueError( "Duplicate feature column name found for columns: {} " "and {}. This usually means that these columns refer to " "same base feature. Either one must be discarded or a " "duplicated but renamed item must be inserted in " "features dict.".format(column, name_to_column[column.name]) ) name_to_column[column.name] = column return sorted(feature_columns, key=lambda x: x.name)