Intelegentny_Pszczelarz/.venv/Lib/site-packages/keras/integration_test/preprocessing_test_utils.py

114 lines
4.6 KiB
Python
Raw Normal View History

2023-06-19 00:49:18 +02:00
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utilities for our Keras preprocessing integration tests."""
import os
import tensorflow.compat.v2 as tf
preprocessing = tf.keras.layers
BATCH_SIZE = 64
DS_SIZE = BATCH_SIZE * 16
STEPS = DS_SIZE / BATCH_SIZE
VOCAB_SIZE = 100
def make_dataset():
"""Make a simple structured dataset.
The dataset contains three feature columns.
- float_col: an unnormalized numeric column.
- int_col: an column of integer IDs.
- string_col: a column of fixed vocabulary terms.
Returns:
The dataset.
"""
tf.random.set_seed(197011)
floats = tf.random.uniform((DS_SIZE, 1), maxval=10, dtype="float32")
# Generate a 100 unique integer values, but over a wide range to showcase a
# common use case for IntegerLookup.
ints = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")
ints = ints * 1000
# Use a fixed vocabulary of strings from 0 to 99, to showcase loading a
# vocabulary from a file.
strings = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")
strings = tf.strings.as_string(strings)
features = {"float_col": floats, "int_col": ints, "string_col": strings}
# Random binary label.
labels = tf.random.uniform((DS_SIZE, 1), maxval=2, dtype="int64")
ds = tf.data.Dataset.from_tensor_slices((features, labels))
return ds
def make_preprocessing_model(file_dir):
"""Make a standalone preprocessing model."""
# The name of our keras.Input should match the column name in the dataset.
float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")
int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")
string_in = tf.keras.Input(shape=(1,), dtype="string", name="string_col")
# We need to batch a dataset before adapting.
ds = make_dataset().batch(BATCH_SIZE)
# Normalize floats by adapting the mean and variance of the input.
normalization = preprocessing.Normalization()
normalization.adapt(ds.map(lambda features, labels: features["float_col"]))
float_out = normalization(float_in)
# Lookup ints by adapting a vocab of integer IDs.
int_lookup = preprocessing.IntegerLookup()
int_lookup.adapt(ds.map(lambda features, labels: features["int_col"]))
int_out = int_lookup(int_in)
# Lookup strings from a fixed file based vocabulary.
string_vocab = list(str(i) for i in range(VOCAB_SIZE))
vocab_file = os.path.join(file_dir, "vocab_file.txt")
with open(vocab_file, "w") as f:
f.write("\n".join(string_vocab))
string_lookup = preprocessing.StringLookup(vocabulary=vocab_file)
string_out = string_lookup(string_in)
return tf.keras.Model(
inputs=(float_in, int_in, string_in),
outputs=(float_out, int_out, string_out),
)
def make_training_model():
"""Make a trainable model for the preprocessed inputs."""
float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")
# After preprocessing, both the string and int column are integer ready for
# embedding.
int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")
string_in = tf.keras.Input(shape=(1,), dtype="int64", name="string_col")
# Feed the lookup layers into an embedding.
int_embedding = tf.keras.layers.Embedding(VOCAB_SIZE + 1, 8, input_length=1)
int_out = int_embedding(int_in)
int_out = tf.keras.layers.Flatten()(int_out)
string_embedding = tf.keras.layers.Embedding(
VOCAB_SIZE + 1, 8, input_length=1
)
string_out = string_embedding(string_in)
string_out = tf.keras.layers.Flatten()(string_out)
# Concatenate outputs.
concatate = tf.keras.layers.Concatenate()
# Feed our preprocessed inputs into a simple MLP.
x = concatate((float_in, int_out, string_out))
x = tf.keras.layers.Dense(32, activation="relu")(x)
x = tf.keras.layers.Dense(32, activation="relu")(x)
outputs = tf.keras.layers.Dense(1, activation="softmax")(x)
return tf.keras.Model(inputs=(float_in, int_in, string_in), outputs=outputs)