3RNN/Lib/site-packages/h5py/tests/test_file_alignment.py
2024-05-26 19:49:15 +02:00

104 lines
4.3 KiB
Python

import h5py
from .common import TestCase
def is_aligned(dataset, offset=4096):
# Here we check if the dataset is aligned
return dataset.id.get_offset() % offset == 0
def dataset_name(i):
return f"data{i:03}"
class TestFileAlignment(TestCase):
"""
Ensure that setting the file alignment has the desired effect
in the internal structure.
"""
def test_no_alignment_set(self):
fname = self.mktemp()
# 881 is a prime number, so hopefully this help randomize the alignment
# enough
# A nice even number might give a pathological case where
# While we don't want the data to be aligned, it ends up aligned...
shape = (881,)
with h5py.File(fname, 'w') as h5file:
# Create up to 1000 datasets
# At least one of them should be misaligned.
# While this isn't perfect, it seems that there
# The case where 1000 datasets get created is one where the data
# is aligned. Therefore, during correct operation, this test is
# expected to finish quickly
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = i
if not is_aligned(dataset):
# Break early asserting that the file is not aligned
break
else:
raise RuntimeError("Data was all found to be aligned to 4096")
def test_alignment_set_above_threshold(self):
# 2022/01/19 hmaarrfk
# UnitTest (TestCase) doesn't play well with pytest parametrization.
alignment_threshold = 1000
alignment_interval = 4096
for shape in [
(1033,), # A prime number above the threshold
(1000,), # Exactly equal to the threshold
(1001,), # one above the threshold
]:
fname = self.mktemp()
with h5py.File(fname, 'w',
alignment_threshold=alignment_threshold,
alignment_interval=alignment_interval) as h5file:
# Create up to 1000 datasets
# They are all expected to be aligned
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = (i % 256) # Truncate to uint8
assert is_aligned(dataset, offset=alignment_interval)
def test_alignment_set_below_threshold(self):
# 2022/01/19 hmaarrfk
# UnitTest (TestCase) doesn't play well with pytest parametrization.
alignment_threshold = 1000
alignment_interval = 1024
for shape in [
(881,), # A prime number below the threshold
(999,), # Exactly one below the threshold
]:
fname = self.mktemp()
with h5py.File(fname, 'w',
alignment_threshold=alignment_threshold,
alignment_interval=alignment_interval) as h5file:
# Create up to 1000 datasets
# At least one of them should be misaligned.
# While this isn't perfect, it seems that there
# The case where 1000 datasets get created is one where the
# data is aligned. Therefore, during correct operation, this
# test is expected to finish quickly
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = i
if not is_aligned(dataset, offset=alignment_interval):
# Break early asserting that the file is not aligned
break
else:
raise RuntimeError(
"Data was all found to be aligned to "
f"{alignment_interval}. This is highly unlikely.")