Intelegentny_Pszczelarz/.venv/Lib/site-packages/keras/integration_test/preprocessing_test_utils.py

# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utilities for our Keras preprocessing integration tests."""

import os

import tensorflow.compat.v2 as tf

preprocessing = tf.keras.layers

BATCH_SIZE = 64
DS_SIZE = BATCH_SIZE * 16
STEPS = DS_SIZE / BATCH_SIZE
VOCAB_SIZE = 100


def make_dataset():
    """Make a simple structured dataset.

    The dataset contains three feature columns.
      - float_col: an unnormalized numeric column.
      - int_col: an column of integer IDs.
      - string_col: a column of fixed vocabulary terms.

    Returns:
      The dataset.
    """
    tf.random.set_seed(197011)
    floats = tf.random.uniform((DS_SIZE, 1), maxval=10, dtype="float32")
    # Generate a 100 unique integer values, but over a wide range to showcase a
    # common use case for IntegerLookup.
    ints = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")
    ints = ints * 1000
    # Use a fixed vocabulary of strings from 0 to 99, to showcase loading a
    # vocabulary from a file.
    strings = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")
    strings = tf.strings.as_string(strings)
    features = {"float_col": floats, "int_col": ints, "string_col": strings}
    # Random binary label.
    labels = tf.random.uniform((DS_SIZE, 1), maxval=2, dtype="int64")
    ds = tf.data.Dataset.from_tensor_slices((features, labels))
    return ds


def make_preprocessing_model(file_dir):
    """Make a standalone preprocessing model."""
    # The name of our keras.Input should match the column name in the dataset.
    float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")
    int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")
    string_in = tf.keras.Input(shape=(1,), dtype="string", name="string_col")

    # We need to batch a dataset before adapting.
    ds = make_dataset().batch(BATCH_SIZE)
    # Normalize floats by adapting the mean and variance of the input.
    normalization = preprocessing.Normalization()
    normalization.adapt(ds.map(lambda features, labels: features["float_col"]))
    float_out = normalization(float_in)
    # Lookup ints by adapting a vocab of integer IDs.
    int_lookup = preprocessing.IntegerLookup()
    int_lookup.adapt(ds.map(lambda features, labels: features["int_col"]))
    int_out = int_lookup(int_in)
    # Lookup strings from a fixed file based vocabulary.
    string_vocab = list(str(i) for i in range(VOCAB_SIZE))
    vocab_file = os.path.join(file_dir, "vocab_file.txt")
    with open(vocab_file, "w") as f:
        f.write("\n".join(string_vocab))
    string_lookup = preprocessing.StringLookup(vocabulary=vocab_file)
    string_out = string_lookup(string_in)

    return tf.keras.Model(
        inputs=(float_in, int_in, string_in),
        outputs=(float_out, int_out, string_out),
    )


def make_training_model():
    """Make a trainable model for the preprocessed inputs."""
    float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")
    # After preprocessing, both the string and int column are integer ready for
    # embedding.
    int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")
    string_in = tf.keras.Input(shape=(1,), dtype="int64", name="string_col")

    # Feed the lookup layers into an embedding.
    int_embedding = tf.keras.layers.Embedding(VOCAB_SIZE + 1, 8, input_length=1)
    int_out = int_embedding(int_in)
    int_out = tf.keras.layers.Flatten()(int_out)
    string_embedding = tf.keras.layers.Embedding(
        VOCAB_SIZE + 1, 8, input_length=1
    )
    string_out = string_embedding(string_in)
    string_out = tf.keras.layers.Flatten()(string_out)

    # Concatenate outputs.
    concatate = tf.keras.layers.Concatenate()
    # Feed our preprocessed inputs into a simple MLP.
    x = concatate((float_in, int_out, string_out))
    x = tf.keras.layers.Dense(32, activation="relu")(x)
    x = tf.keras.layers.Dense(32, activation="relu")(x)
    outputs = tf.keras.layers.Dense(1, activation="softmax")(x)
    return tf.keras.Model(inputs=(float_in, int_in, string_in), outputs=outputs)
feature: "ANN commit 2" 2023-06-19 00:49:18 +02:00			`# Copyright 2021 The TensorFlow Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ==============================================================================`
			`"""Common utilities for our Keras preprocessing integration tests."""`

			`import os`

			`import tensorflow.compat.v2 as tf`

			`preprocessing = tf.keras.layers`

			`BATCH_SIZE = 64`
			`DS_SIZE = BATCH_SIZE * 16`
			`STEPS = DS_SIZE / BATCH_SIZE`
			`VOCAB_SIZE = 100`


			`def make_dataset():`
			`"""Make a simple structured dataset.`

			`The dataset contains three feature columns.`
			`- float_col: an unnormalized numeric column.`
			`- int_col: an column of integer IDs.`
			`- string_col: a column of fixed vocabulary terms.`

			`Returns:`
			`The dataset.`
			`"""`
			`tf.random.set_seed(197011)`
			`floats = tf.random.uniform((DS_SIZE, 1), maxval=10, dtype="float32")`
			`# Generate a 100 unique integer values, but over a wide range to showcase a`
			`# common use case for IntegerLookup.`
			`ints = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")`
			`ints = ints * 1000`
			`# Use a fixed vocabulary of strings from 0 to 99, to showcase loading a`
			`# vocabulary from a file.`
			`strings = tf.random.uniform((DS_SIZE, 1), maxval=VOCAB_SIZE, dtype="int64")`
			`strings = tf.strings.as_string(strings)`
			`features = {"float_col": floats, "int_col": ints, "string_col": strings}`
			`# Random binary label.`
			`labels = tf.random.uniform((DS_SIZE, 1), maxval=2, dtype="int64")`
			`ds = tf.data.Dataset.from_tensor_slices((features, labels))`
			`return ds`


			`def make_preprocessing_model(file_dir):`
			`"""Make a standalone preprocessing model."""`
			`# The name of our keras.Input should match the column name in the dataset.`
			`float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")`
			`int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")`
			`string_in = tf.keras.Input(shape=(1,), dtype="string", name="string_col")`

			`# We need to batch a dataset before adapting.`
			`ds = make_dataset().batch(BATCH_SIZE)`
			`# Normalize floats by adapting the mean and variance of the input.`
			`normalization = preprocessing.Normalization()`
			`normalization.adapt(ds.map(lambda features, labels: features["float_col"]))`
			`float_out = normalization(float_in)`
			`# Lookup ints by adapting a vocab of integer IDs.`
			`int_lookup = preprocessing.IntegerLookup()`
			`int_lookup.adapt(ds.map(lambda features, labels: features["int_col"]))`
			`int_out = int_lookup(int_in)`
			`# Lookup strings from a fixed file based vocabulary.`
			`string_vocab = list(str(i) for i in range(VOCAB_SIZE))`
			`vocab_file = os.path.join(file_dir, "vocab_file.txt")`
			`with open(vocab_file, "w") as f:`
			`f.write("\n".join(string_vocab))`
			`string_lookup = preprocessing.StringLookup(vocabulary=vocab_file)`
			`string_out = string_lookup(string_in)`

			`return tf.keras.Model(`
			`inputs=(float_in, int_in, string_in),`
			`outputs=(float_out, int_out, string_out),`
			`)`


			`def make_training_model():`
			`"""Make a trainable model for the preprocessed inputs."""`
			`float_in = tf.keras.Input(shape=(1,), dtype="float32", name="float_col")`
			`# After preprocessing, both the string and int column are integer ready for`
			`# embedding.`
			`int_in = tf.keras.Input(shape=(1,), dtype="int64", name="int_col")`
			`string_in = tf.keras.Input(shape=(1,), dtype="int64", name="string_col")`

			`# Feed the lookup layers into an embedding.`
			`int_embedding = tf.keras.layers.Embedding(VOCAB_SIZE + 1, 8, input_length=1)`
			`int_out = int_embedding(int_in)`
			`int_out = tf.keras.layers.Flatten()(int_out)`
			`string_embedding = tf.keras.layers.Embedding(`
			`VOCAB_SIZE + 1, 8, input_length=1`
			`)`
			`string_out = string_embedding(string_in)`
			`string_out = tf.keras.layers.Flatten()(string_out)`

			`# Concatenate outputs.`
			`concatate = tf.keras.layers.Concatenate()`
			`# Feed our preprocessed inputs into a simple MLP.`
			`x = concatate((float_in, int_out, string_out))`
			`x = tf.keras.layers.Dense(32, activation="relu")(x)`
			`x = tf.keras.layers.Dense(32, activation="relu")(x)`
			`outputs = tf.keras.layers.Dense(1, activation="softmax")(x)`
			`return tf.keras.Model(inputs=(float_in, int_in, string_in), outputs=outputs)`