104 lines
4.3 KiB
Python
104 lines
4.3 KiB
Python
|
import h5py
|
||
|
from .common import TestCase
|
||
|
|
||
|
|
||
|
def is_aligned(dataset, offset=4096):
|
||
|
# Here we check if the dataset is aligned
|
||
|
return dataset.id.get_offset() % offset == 0
|
||
|
|
||
|
|
||
|
def dataset_name(i):
|
||
|
return f"data{i:03}"
|
||
|
|
||
|
|
||
|
class TestFileAlignment(TestCase):
|
||
|
"""
|
||
|
Ensure that setting the file alignment has the desired effect
|
||
|
in the internal structure.
|
||
|
"""
|
||
|
def test_no_alignment_set(self):
|
||
|
fname = self.mktemp()
|
||
|
# 881 is a prime number, so hopefully this help randomize the alignment
|
||
|
# enough
|
||
|
# A nice even number might give a pathological case where
|
||
|
# While we don't want the data to be aligned, it ends up aligned...
|
||
|
shape = (881,)
|
||
|
|
||
|
with h5py.File(fname, 'w') as h5file:
|
||
|
# Create up to 1000 datasets
|
||
|
# At least one of them should be misaligned.
|
||
|
# While this isn't perfect, it seems that there
|
||
|
# The case where 1000 datasets get created is one where the data
|
||
|
# is aligned. Therefore, during correct operation, this test is
|
||
|
# expected to finish quickly
|
||
|
for i in range(1000):
|
||
|
dataset = h5file.create_dataset(
|
||
|
dataset_name(i), shape, dtype='uint8')
|
||
|
# Assign data so that the dataset is instantiated in
|
||
|
# the file
|
||
|
dataset[...] = i
|
||
|
if not is_aligned(dataset):
|
||
|
# Break early asserting that the file is not aligned
|
||
|
break
|
||
|
else:
|
||
|
raise RuntimeError("Data was all found to be aligned to 4096")
|
||
|
|
||
|
def test_alignment_set_above_threshold(self):
|
||
|
# 2022/01/19 hmaarrfk
|
||
|
# UnitTest (TestCase) doesn't play well with pytest parametrization.
|
||
|
alignment_threshold = 1000
|
||
|
alignment_interval = 4096
|
||
|
|
||
|
for shape in [
|
||
|
(1033,), # A prime number above the thresold
|
||
|
(1000,), # Exactly equal to the threshold
|
||
|
(1001,), # one above the threshold
|
||
|
]:
|
||
|
fname = self.mktemp()
|
||
|
with h5py.File(fname, 'w',
|
||
|
alignment_threshold=alignment_threshold,
|
||
|
alignment_interval=alignment_interval) as h5file:
|
||
|
# Create up to 1000 datasets
|
||
|
# They are all expected to be aligned
|
||
|
for i in range(1000):
|
||
|
dataset = h5file.create_dataset(
|
||
|
dataset_name(i), shape, dtype='uint8')
|
||
|
# Assign data so that the dataset is instantiated in
|
||
|
# the file
|
||
|
dataset[...] = (i % 256) # Truncate to uint8
|
||
|
assert is_aligned(dataset, offset=alignment_interval)
|
||
|
|
||
|
def test_alignment_set_below_threshold(self):
|
||
|
# 2022/01/19 hmaarrfk
|
||
|
# UnitTest (TestCase) doesn't play well with pytest parametrization.
|
||
|
alignment_threshold = 1000
|
||
|
alignment_interval = 1024
|
||
|
|
||
|
for shape in [
|
||
|
(881,), # A prime number below the thresold
|
||
|
(999,), # Exactly one below the threshold
|
||
|
]:
|
||
|
fname = self.mktemp()
|
||
|
with h5py.File(fname, 'w',
|
||
|
alignment_threshold=alignment_threshold,
|
||
|
alignment_interval=alignment_interval) as h5file:
|
||
|
# Create up to 1000 datasets
|
||
|
# At least one of them should be misaligned.
|
||
|
# While this isn't perfect, it seems that there
|
||
|
# The case where 1000 datasets get created is one where the
|
||
|
# data is aligned. Therefore, during correct operation, this
|
||
|
# test is expected to finish quickly
|
||
|
for i in range(1000):
|
||
|
dataset = h5file.create_dataset(
|
||
|
dataset_name(i), shape, dtype='uint8')
|
||
|
# Assign data so that the dataset is instantiated in
|
||
|
# the file
|
||
|
dataset[...] = i
|
||
|
if not is_aligned(dataset, offset=alignment_interval):
|
||
|
# Break early asserting that the file is not aligned
|
||
|
break
|
||
|
else:
|
||
|
raise RuntimeError(
|
||
|
"Data was all found to be aligned to "
|
||
|
f"{alignment_interval}. This is highly unlikely.")
|