Intelegentny_Pszczelarz/.venv/Lib/site-packages/keras/feature_column/base_feature_layer.py

243 lines
8.7 KiB
Python
Raw Normal View History

2023-06-19 00:49:18 +02:00
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""This API defines FeatureColumn abstraction."""
# This file was originally under tf/python/feature_column, and was moved to
# Keras package in order to remove the reverse dependency from TF to Keras.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import re
import tensorflow.compat.v2 as tf
from keras.engine.base_layer import Layer
from keras.saving.legacy import serialization
class _BaseFeaturesLayer(Layer):
"""Base class for DenseFeatures and SequenceFeatures.
Defines common methods and helpers.
Args:
feature_columns: An iterable containing the FeatureColumns to use as
inputs to your model.
expected_column_type: Expected class for provided feature columns.
trainable: Boolean, whether the layer's variables will be updated via
gradient descent during training.
name: Name to give to the DenseFeatures.
**kwargs: Keyword arguments to construct a layer.
Raises:
ValueError: if an item in `feature_columns` doesn't match
`expected_column_type`.
"""
def __init__(
self,
feature_columns,
expected_column_type,
trainable,
name,
partitioner=None,
**kwargs
):
super().__init__(name=name, trainable=trainable, **kwargs)
self._feature_columns = _normalize_feature_columns(feature_columns)
self._state_manager = tf.__internal__.feature_column.StateManager(
self, self.trainable
)
self._partitioner = partitioner
for column in self._feature_columns:
if not isinstance(column, expected_column_type):
raise ValueError(
"Items of feature_columns must be a {}. "
"You can wrap a categorical column with an "
"embedding_column or indicator_column. Given: {}".format(
expected_column_type, column
)
)
def build(self, _):
for column in self._feature_columns:
with tf.compat.v1.variable_scope(
self.name, partitioner=self._partitioner
):
with tf.compat.v1.variable_scope(
_sanitize_column_name_for_variable_scope(column.name)
):
column.create_state(self._state_manager)
super().build(None)
def _output_shape(self, input_shape, num_elements):
"""Computes expected output shape of the dense tensor of the layer.
Args:
input_shape: Tensor or array with batch shape.
num_elements: Size of the last dimension of the output.
Returns:
Tuple with output shape.
"""
raise NotImplementedError("Calling an abstract method.")
def compute_output_shape(self, input_shape):
total_elements = 0
for column in self._feature_columns:
total_elements += column.variable_shape.num_elements()
return self._target_shape(input_shape, total_elements)
def _process_dense_tensor(self, column, tensor):
"""Reshapes the dense tensor output of a column based on expected shape.
Args:
column: A DenseColumn or SequenceDenseColumn object.
tensor: A dense tensor obtained from the same column.
Returns:
Reshaped dense tensor.
"""
num_elements = column.variable_shape.num_elements()
target_shape = self._target_shape(tf.shape(tensor), num_elements)
return tf.reshape(tensor, shape=target_shape)
def _verify_and_concat_tensors(self, output_tensors):
"""Verifies and concatenates the dense output of several columns."""
_verify_static_batch_size_equality(
output_tensors, self._feature_columns
)
return tf.concat(output_tensors, -1)
def get_config(self):
column_configs = [
tf.__internal__.feature_column.serialize_feature_column(fc)
for fc in self._feature_columns
]
config = {"feature_columns": column_configs}
config["partitioner"] = serialization.serialize_keras_object(
self._partitioner
)
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config, custom_objects=None):
config_cp = config.copy()
columns_by_name = {}
config_cp["feature_columns"] = [
tf.__internal__.feature_column.deserialize_feature_column(
c, custom_objects, columns_by_name
)
for c in config["feature_columns"]
]
config_cp["partitioner"] = serialization.deserialize_keras_object(
config["partitioner"], custom_objects
)
return cls(**config_cp)
def _sanitize_column_name_for_variable_scope(name):
"""Sanitizes user-provided feature names for use as variable scopes."""
invalid_char = re.compile("[^A-Za-z0-9_.\\-]")
return invalid_char.sub("_", name)
def _verify_static_batch_size_equality(tensors, columns):
"""Verify equality between static batch sizes.
Args:
tensors: iterable of input tensors.
columns: Corresponding feature columns.
Raises:
ValueError: in case of mismatched batch sizes.
"""
expected_batch_size = None
for i in range(0, len(tensors)):
# bath_size is a Dimension object.
batch_size = tf.compat.v1.Dimension(
tf.compat.dimension_value(tensors[i].shape[0])
)
if batch_size.value is not None:
if expected_batch_size is None:
bath_size_column_index = i
expected_batch_size = batch_size
elif not expected_batch_size.is_compatible_with(batch_size):
raise ValueError(
"Batch size (first dimension) of each feature must be "
"same. Batch size of columns ({}, {}): ({}, {})".format(
columns[bath_size_column_index].name,
columns[i].name,
expected_batch_size,
batch_size,
)
)
def _normalize_feature_columns(feature_columns):
"""Normalizes the `feature_columns` input.
This method converts the `feature_columns` to list type as best as it can.
In addition, verifies the type and other parts of feature_columns, required
by downstream library.
Args:
feature_columns: The raw feature columns, usually passed by users.
Returns:
The normalized feature column list.
Raises:
ValueError: for any invalid inputs, such as empty, duplicated names, etc.
"""
if isinstance(
feature_columns, tf.__internal__.feature_column.FeatureColumn
):
feature_columns = [feature_columns]
if isinstance(feature_columns, collections.abc.Iterator):
feature_columns = list(feature_columns)
if isinstance(feature_columns, dict):
raise ValueError("Expected feature_columns to be iterable, found dict.")
for column in feature_columns:
if not isinstance(column, tf.__internal__.feature_column.FeatureColumn):
raise ValueError(
"Items of feature_columns must be a FeatureColumn. "
"Given (type {}): {}.".format(type(column), column)
)
if not feature_columns:
raise ValueError("feature_columns must not be empty.")
name_to_column = {}
for column in feature_columns:
if column.name in name_to_column:
raise ValueError(
"Duplicate feature column name found for columns: {} "
"and {}. This usually means that these columns refer to "
"same base feature. Either one must be discarded or a "
"duplicated but renamed item must be inserted in "
"features dict.".format(column, name_to_column[column.name])
)
name_to_column[column.name] = column
return sorted(feature_columns, key=lambda x: x.name)