""" Tests for converting between numpy dtypes and h5py data types """ from itertools import count import platform import numpy as np import h5py try: import tables except ImportError: tables = None from .common import ut, TestCase UNSUPPORTED_LONG_DOUBLE = ('i386', 'i486', 'i586', 'i686', 'ppc64le') UNSUPPORTED_LONG_DOUBLE_TYPES = ('float96', 'float128', 'complex192', 'complex256') class TestVlen(TestCase): """ Check that storage of vlen strings is carried out correctly. """ def assertVlenArrayEqual(self, dset, arr, message=None, precision=None): assert dset.shape == arr.shape, \ "Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message) for (i, d, a) in zip(count(), dset, arr): self.assertArrayEqual(d, a, message, precision) def test_compound(self): fields = [] fields.append(('field_1', h5py.string_dtype())) fields.append(('field_2', np.int32)) dt = np.dtype(fields) self.f['mytype'] = np.dtype(dt) dt_out = self.f['mytype'].dtype.fields['field_1'][0] string_inf = h5py.check_string_dtype(dt_out) self.assertEqual(string_inf.encoding, 'utf-8') def test_compound_vlen_bool(self): vidt = h5py.vlen_dtype(np.uint8) def a(items): return np.array(items, dtype=np.uint8) f = self.f dt_vb = np.dtype([ ('foo', vidt), ('logical', bool)]) vb = f.create_dataset('dt_vb', shape=(4,), dtype=dt_vb) data = np.array([(a([1, 2, 3]), True), (a([1 ]), False), (a([1, 5 ]), True), (a([],), False), ], dtype=dt_vb) vb[:] = data actual = f['dt_vb'][:] self.assertVlenArrayEqual(data['foo'], actual['foo']) self.assertArrayEqual(data['logical'], actual['logical']) dt_vv = np.dtype([ ('foo', vidt), ('bar', vidt)]) f.create_dataset('dt_vv', shape=(4,), dtype=dt_vv) dt_vvb = np.dtype([ ('foo', vidt), ('bar', vidt), ('logical', bool)]) vvb = f.create_dataset('dt_vvb', shape=(2,), dtype=dt_vvb) dt_bvv = np.dtype([ ('logical', bool), ('foo', vidt), ('bar', vidt)]) bvv = f.create_dataset('dt_bvv', shape=(2,), dtype=dt_bvv) data = np.array([(True, a([1, 2, 3]), a([1, 2])), (False, a([]), a([2, 4, 6])), ], dtype=bvv) bvv[:] = data actual = bvv[:] self.assertVlenArrayEqual(data['foo'], actual['foo']) self.assertVlenArrayEqual(data['bar'], actual['bar']) self.assertArrayEqual(data['logical'], actual['logical']) def test_compound_vlen_enum(self): eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8) vidt = h5py.vlen_dtype(np.uint8) def a(items): return np.array(items, dtype=np.uint8) f = self.f dt_vve = np.dtype([ ('foo', vidt), ('bar', vidt), ('switch', eidt)]) vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve) data = np.array([(a([1, 2, 3]), a([1, 2]), 1), (a([]), a([2, 4, 6]), 0), ], dtype=dt_vve) vve[:] = data actual = vve[:] self.assertVlenArrayEqual(data['foo'], actual['foo']) self.assertVlenArrayEqual(data['bar'], actual['bar']) self.assertArrayEqual(data['switch'], actual['switch']) def test_vlen_enum(self): fname = self.mktemp() arr1 = [[1], [1, 2]] dt1 = h5py.vlen_dtype(h5py.enum_dtype(dict(foo=1, bar=2), 'i')) with h5py.File(fname, 'w') as f: df1 = f.create_dataset('test', (len(arr1),), dtype=dt1) df1[:] = np.array(arr1, dtype=object) with h5py.File(fname, 'r') as f: df2 = f['test'] dt2 = df2.dtype arr2 = [e.tolist() for e in df2[:]] self.assertEqual(arr1, arr2) self.assertEqual(h5py.check_enum_dtype(h5py.check_vlen_dtype(dt1)), h5py.check_enum_dtype(h5py.check_vlen_dtype(dt2))) class TestEmptyVlen(TestCase): def test_write_empty_vlen(self): fname = self.mktemp() with h5py.File(fname, 'w') as f: d = np.core.records.fromarrays([[], []], names='a,b', formats='|V16,O') dset = f.create_dataset('test', data=d, dtype=[('a', '|V16'), ('b', h5py.special_dtype(vlen=np.float_))]) self.assertEqual(dset.size, 0) class TestExplicitCast(TestCase): def test_f2_casting(self): fname = self.mktemp() np.random.seed(1) A = np.random.rand(1500, 20) # Save to HDF5 file with h5py.File(fname, "w") as Fid: Fid.create_dataset("Data", data=A, dtype='f2') with h5py.File(fname, "r") as Fid: B = Fid["Data"][:] # Compare self.assertTrue(np.all(A.astype('f2') == B)) class TestOffsets(TestCase): """ Check that compound members with aligned or manual offsets are handled correctly. """ def test_compound_vlen(self): vidt = h5py.vlen_dtype(np.uint8) eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8) for np_align in (False, True): dt = np.dtype([ ('a', eidt), ('foo', vidt), ('bar', vidt), ('switch', eidt)], align=np_align) np_offsets = [dt.fields[i][1] for i in dt.names] for logical in (False, True): if logical and np_align: # Vlen types have different size in the numpy struct self.assertRaises(TypeError, h5py.h5t.py_create, dt, logical=logical) else: ht = h5py.h5t.py_create(dt, logical=logical) offsets = [ht.get_member_offset(i) for i in range(ht.get_nmembers())] if np_align: self.assertEqual(np_offsets, offsets) def test_aligned_offsets(self): dt = np.dtype('i4,i8,i2', align=True) ht = h5py.h5t.py_create(dt) self.assertEqual(dt.itemsize, ht.get_size()) self.assertEqual( [dt.fields[i][1] for i in dt.names], [ht.get_member_offset(i) for i in range(ht.get_nmembers())] ) def test_aligned_data(self): dt = np.dtype('i4,f8,i2', align=True) data = np.zeros(10, dtype=dt) data['f0'] = np.array(np.random.randint(-100, 100, size=data.size), dtype='i4') data['f1'] = np.random.rand(data.size) data['f2'] = np.array(np.random.randint(-100, 100, size=data.size), dtype='i2') fname = self.mktemp() with h5py.File(fname, 'w') as f: f['data'] = data with h5py.File(fname, 'r') as f: self.assertArrayEqual(f['data'], data) def test_compound_robustness(self): # make an out of order compound type with gaps in it, and larger itemsize than minimum # Idea is to be robust to type descriptions we *could* get out of HDF5 files, from custom descriptions # of types in addition to numpy's flakey history on unaligned fields with non-standard or padded layouts. fields = [ ('f0', np.float64, 25), ('f1', np.uint64, 9), ('f2', np.uint32, 0), ('f3', np.uint16, 5) ] lastfield = fields[np.argmax([ x[2] for x in fields ])] itemsize = lastfield[2] + np.dtype(lastfield[1]).itemsize + 6 extract_index = lambda index, sequence: [ x[index] for x in sequence ] dt = np.dtype({ 'names' : extract_index(0, fields), 'formats' : extract_index(1, fields), 'offsets' : extract_index(2, fields), # 'aligned': False, - already defaults to False 'itemsize': itemsize }) self.assertTrue(dt.itemsize == itemsize) data = np.zeros(10, dtype=dt) # don't trust numpy struct handling, keep fields out of band in case content insertion is erroneous # yes... this has also been known to happen. f1 = np.array([1 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f1'][0]) f2 = np.array([2 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f2'][0]) f3 = np.array([3 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f3'][0]) f0c = 3.14 data['f0'] = f0c data['f3'] = f3 data['f1'] = f1 data['f2'] = f2 # numpy consistency checks self.assertTrue(np.all(data['f0'] == f0c)) self.assertArrayEqual(data['f3'], f3) self.assertArrayEqual(data['f1'], f1) self.assertArrayEqual(data['f2'], f2) fname = self.mktemp() with h5py.File(fname, 'w') as fd: fd.create_dataset('data', data=data) with h5py.File(fname, 'r') as fd: readback = fd['data'] self.assertTrue(readback.dtype == dt) self.assertArrayEqual(readback, data) self.assertTrue(np.all(readback['f0'] == f0c)) self.assertArrayEqual(readback['f1'], f1) self.assertArrayEqual(readback['f2'], f2) self.assertArrayEqual(readback['f3'], f3) def test_out_of_order_offsets(self): dt = np.dtype({ 'names' : ['f1', 'f2', 'f3'], 'formats' : ['']: dt_descr = f'{dt_order}M8[{dt_unit}]' dt = h5py.opaque_dtype(np.dtype(dt_descr)) arr = np.array([0], dtype=np.int64).view(dtype=dt) with h5py.File(fname, 'w') as f: dset = f.create_dataset("default", data=arr, dtype=dt) self.assertArrayEqual(arr, dset) self.assertEqual(arr.dtype, dset.dtype) def test_timedelta(self): fname = self.mktemp() for dt_unit in self.datetime_units: for dt_order in ['<', '>']: dt_descr = f'{dt_order}m8[{dt_unit}]' dt = h5py.opaque_dtype(np.dtype(dt_descr)) arr = np.array([np.timedelta64(500, dt_unit)], dtype=dt) with h5py.File(fname, 'w') as f: dset = f.create_dataset("default", data=arr, dtype=dt) self.assertArrayEqual(arr, dset) self.assertEqual(arr.dtype, dset.dtype) @ut.skipUnless(tables is not None, 'tables is required') class TestBitfield(TestCase): """ Test H5T_NATIVE_B8 reading """ def test_b8_bool(self): arr1 = np.array([False, True], dtype=bool) self._test_b8( arr1, expected_default_cast_dtype=np.uint8 ) self._test_b8( arr1, expected_default_cast_dtype=np.uint8, cast_dtype=np.uint8 ) def test_b8_bool_compound(self): arr1 = np.array([(False,), (True,)], dtype=np.dtype([('x', '?')])) self._test_b8( arr1, expected_default_cast_dtype=np.dtype([('x', 'u1')]) ) self._test_b8( arr1, expected_default_cast_dtype=np.dtype([('x', 'u1')]), cast_dtype=np.dtype([('x', 'u1')]) ) def test_b8_bool_compound_nested(self): arr1 = np.array( [(True, (True, False)), (True, (False, True))], dtype=np.dtype([('x', '?'), ('y', [('a', '?'), ('b', '?')])]), ) self._test_b8( arr1, expected_default_cast_dtype=np.dtype( [('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])] ) ) self._test_b8( arr1, expected_default_cast_dtype=np.dtype( [('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])] ), cast_dtype=np.dtype([('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]), ) def test_b8_bool_compound_mixed_types(self): arr1 = np.array( [(True, 0.5), (False, 0.2)], dtype=np.dtype([('x','?'), ('y', '