# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests utils for preprocessing layers.""" import collections import numpy as np import tensorflow.compat.v2 as tf class ArrayLike: def __init__(self, values): self.values = values def __array__(self): return np.array(self.values) class PreprocessingLayerTest(tf.test.TestCase): """Base test class for preprocessing layer API validation.""" # TODO(b/137303934): Consider incorporating something like this Close vs All # behavior into core tf.test.TestCase. def assertAllCloseOrEqual(self, a, b, msg=None): """Asserts that elements are close (if numeric) or equal (if string).""" if a is None or b is None: self.assertAllEqual(a, b, msg=msg) elif isinstance(a, (list, tuple)): self.assertEqual(len(a), len(b)) for a_value, b_value in zip(a, b): self.assertAllCloseOrEqual(a_value, b_value, msg=msg) elif isinstance(a, collections.abc.Mapping): self.assertEqual(len(a), len(b)) for key, a_value in a.items(): b_value = b[key] error_message = f"{msg} ({key})" if msg else None self.assertAllCloseOrEqual(a_value, b_value, error_message) elif ( isinstance(a, float) or hasattr(a, "dtype") and np.issubdtype(a.dtype, np.number) ): self.assertAllClose(a, b, msg=msg) else: self.assertAllEqual(a, b, msg=msg) def assert_extracted_output_equal(self, combiner, acc1, acc2, msg=None): data_1 = combiner.extract(acc1) data_2 = combiner.extract(acc2) self.assertAllCloseOrEqual(data_1, data_2, msg=msg) # This is an injection seam so that tests like TextVectorizationTest can # define their own methods for asserting that accumulators are equal. compare_accumulators = assertAllCloseOrEqual def validate_accumulator_computation(self, combiner, data, expected): """Validate that various combinations of compute and merge are identical.""" if len(data) < 4: raise AssertionError( "Data must have at least 4 elements. Received " f"len(data)={len(data)}." ) data_0 = np.array([data[0]]) data_1 = np.array([data[1]]) data_2 = np.array(data[2:]) single_compute = combiner.compute(data) all_merge = combiner.merge( [ combiner.compute(data_0), combiner.compute(data_1), combiner.compute(data_2), ] ) self.compare_accumulators( single_compute, all_merge, msg="Sharding data should not change the data output.", ) unordered_all_merge = combiner.merge( [ combiner.compute(data_1), combiner.compute(data_2), combiner.compute(data_0), ] ) self.compare_accumulators( all_merge, unordered_all_merge, msg=( "The order of merge arguments should not change the data " "output." ), ) hierarchical_merge = combiner.merge( [ combiner.compute(data_1), combiner.merge( [combiner.compute(data_2), combiner.compute(data_0)] ), ] ) self.compare_accumulators( all_merge, hierarchical_merge, msg="Nesting merge arguments should not change the data output.", ) nested_compute = combiner.compute( data_0, combiner.compute(data_1, combiner.compute(data_2)) ) self.compare_accumulators( all_merge, nested_compute, msg="Nesting compute arguments should not change the data output.", ) mixed_compute = combiner.merge( [ combiner.compute(data_0), combiner.compute(data_1, combiner.compute(data_2)), ] ) self.compare_accumulators( all_merge, mixed_compute, msg=( "Mixing merge and compute calls should not change the data " "output." ), ) single_merge = combiner.merge( [ combiner.merge([combiner.compute(data_0)]), combiner.compute(data_1, combiner.compute(data_2)), ] ) self.compare_accumulators( all_merge, single_merge, msg=( "Calling merge with a data length of 1 should not change " "the data output." ), ) self.compare_accumulators( expected, all_merge, msg="Calculated accumulators did not match expected accumulator.", ) def validate_accumulator_extract(self, combiner, data, expected): """Validate that the expected results of computing and extracting.""" acc = combiner.compute(data) extracted_data = combiner.extract(acc) self.assertAllCloseOrEqual(expected, extracted_data) def validate_accumulator_extract_and_restore( self, combiner, data, expected ): """Validate that the extract<->restore loop loses no data.""" acc = combiner.compute(data) extracted_data = combiner.extract(acc) restored_acc = combiner.restore(extracted_data) self.assert_extracted_output_equal(combiner, acc, restored_acc) self.assertAllCloseOrEqual(expected, combiner.extract(restored_acc)) def validate_accumulator_serialize_and_deserialize( self, combiner, data, expected ): """Validate that the serialize<->deserialize loop loses no data.""" acc = combiner.compute(data) serialized_data = combiner.serialize(acc) deserialized_data = combiner.deserialize(serialized_data) self.compare_accumulators(acc, deserialized_data) self.compare_accumulators(expected, deserialized_data) def validate_accumulator_uniqueness(self, combiner, data): """Validate that every call to compute creates a unique accumulator.""" acc = combiner.compute(data) acc2 = combiner.compute(data) self.assertIsNot(acc, acc2) self.compare_accumulators(acc, acc2)