Intelegentny_Pszczelarz/.venv/Lib/site-packages/h5py/tests/test_dtype.py

"""
    Tests for converting between numpy dtypes and h5py data types
"""

from itertools import count
import platform
import numpy as np
import h5py
try:
    import tables
except ImportError:
    tables = None

from .common import ut, TestCase

UNSUPPORTED_LONG_DOUBLE = ('i386', 'i486', 'i586', 'i686', 'ppc64le')
UNSUPPORTED_LONG_DOUBLE_TYPES = ('float96', 'float128', 'complex192',
                                 'complex256')


class TestVlen(TestCase):

    """
        Check that storage of vlen strings is carried out correctly.
    """
    def assertVlenArrayEqual(self, dset, arr, message=None, precision=None):
        assert dset.shape == arr.shape, \
            "Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message)
        for (i, d, a) in zip(count(), dset, arr):
            self.assertArrayEqual(d, a, message, precision)

    def test_compound(self):

        fields = []
        fields.append(('field_1', h5py.string_dtype()))
        fields.append(('field_2', np.int32))
        dt = np.dtype(fields)
        self.f['mytype'] = np.dtype(dt)
        dt_out = self.f['mytype'].dtype.fields['field_1'][0]
        string_inf = h5py.check_string_dtype(dt_out)
        self.assertEqual(string_inf.encoding, 'utf-8')

    def test_compound_vlen_bool(self):
        vidt = h5py.vlen_dtype(np.uint8)
        def a(items):
            return np.array(items, dtype=np.uint8)

        f = self.f

        dt_vb = np.dtype([
            ('foo', vidt),
            ('logical', bool)])
        vb = f.create_dataset('dt_vb', shape=(4,), dtype=dt_vb)
        data = np.array([(a([1, 2, 3]), True),
                         (a([1    ]), False),
                         (a([1, 5  ]), True),
                         (a([],), False), ],
                     dtype=dt_vb)
        vb[:] = data
        actual = f['dt_vb'][:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertArrayEqual(data['logical'], actual['logical'])

        dt_vv = np.dtype([
            ('foo', vidt),
            ('bar', vidt)])
        f.create_dataset('dt_vv', shape=(4,), dtype=dt_vv)

        dt_vvb = np.dtype([
            ('foo', vidt),
            ('bar', vidt),
            ('logical', bool)])
        vvb = f.create_dataset('dt_vvb', shape=(2,), dtype=dt_vvb)

        dt_bvv = np.dtype([
            ('logical', bool),
            ('foo', vidt),
            ('bar', vidt)])
        bvv = f.create_dataset('dt_bvv', shape=(2,), dtype=dt_bvv)
        data = np.array([(True, a([1, 2, 3]), a([1, 2])),
                         (False, a([]), a([2, 4, 6])), ],
                         dtype=bvv)
        bvv[:] = data
        actual = bvv[:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertVlenArrayEqual(data['bar'], actual['bar'])
        self.assertArrayEqual(data['logical'], actual['logical'])

    def test_compound_vlen_enum(self):
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)
        vidt = h5py.vlen_dtype(np.uint8)
        def a(items):
            return np.array(items, dtype=np.uint8)

        f = self.f

        dt_vve = np.dtype([
            ('foo', vidt),
            ('bar', vidt),
            ('switch', eidt)])
        vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve)
        data = np.array([(a([1, 2, 3]), a([1, 2]), 1),
                         (a([]), a([2, 4, 6]), 0), ],
                         dtype=dt_vve)
        vve[:] = data
        actual = vve[:]
        self.assertVlenArrayEqual(data['foo'], actual['foo'])
        self.assertVlenArrayEqual(data['bar'], actual['bar'])
        self.assertArrayEqual(data['switch'], actual['switch'])

    def test_vlen_enum(self):
        fname = self.mktemp()
        arr1 = [[1], [1, 2]]
        dt1 = h5py.vlen_dtype(h5py.enum_dtype(dict(foo=1, bar=2), 'i'))

        with h5py.File(fname, 'w') as f:
            df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
            df1[:] = np.array(arr1, dtype=object)

        with h5py.File(fname, 'r') as f:
            df2 = f['test']
            dt2 = df2.dtype
            arr2 = [e.tolist() for e in df2[:]]

        self.assertEqual(arr1, arr2)
        self.assertEqual(h5py.check_enum_dtype(h5py.check_vlen_dtype(dt1)),
                         h5py.check_enum_dtype(h5py.check_vlen_dtype(dt2)))


class TestEmptyVlen(TestCase):
    def test_write_empty_vlen(self):
        fname = self.mktemp()
        with h5py.File(fname, 'w') as f:
            d = np.core.records.fromarrays([[], []], names='a,b', formats='|V16,O')
            dset = f.create_dataset('test', data=d, dtype=[('a', '|V16'), ('b', h5py.special_dtype(vlen=np.float_))])
            self.assertEqual(dset.size, 0)


class TestExplicitCast(TestCase):
    def test_f2_casting(self):
        fname = self.mktemp()

        np.random.seed(1)
        A = np.random.rand(1500, 20)

        # Save to HDF5 file
        with h5py.File(fname, "w") as Fid:
            Fid.create_dataset("Data", data=A, dtype='f2')

        with h5py.File(fname, "r") as Fid:
            B = Fid["Data"][:]

        # Compare
        self.assertTrue(np.all(A.astype('f2') == B))


class TestOffsets(TestCase):
    """
        Check that compound members with aligned or manual offsets are handled
        correctly.
    """

    def test_compound_vlen(self):
        vidt = h5py.vlen_dtype(np.uint8)
        eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)

        for np_align in (False, True):
            dt = np.dtype([
                ('a', eidt),
                ('foo', vidt),
                ('bar', vidt),
                ('switch', eidt)], align=np_align)
            np_offsets = [dt.fields[i][1] for i in dt.names]

            for logical in (False, True):
                if logical and np_align:
                    # Vlen types have different size in the numpy struct
                    self.assertRaises(TypeError, h5py.h5t.py_create, dt,
                            logical=logical)
                else:
                    ht = h5py.h5t.py_create(dt, logical=logical)
                    offsets = [ht.get_member_offset(i)
                               for i in range(ht.get_nmembers())]
                    if np_align:
                        self.assertEqual(np_offsets, offsets)

    def test_aligned_offsets(self):
        dt = np.dtype('i4,i8,i2', align=True)
        ht = h5py.h5t.py_create(dt)
        self.assertEqual(dt.itemsize, ht.get_size())
        self.assertEqual(
            [dt.fields[i][1] for i in dt.names],
            [ht.get_member_offset(i) for i in range(ht.get_nmembers())]
        )

    def test_aligned_data(self):
        dt = np.dtype('i4,f8,i2', align=True)
        data = np.zeros(10, dtype=dt)

        data['f0'] = np.array(np.random.randint(-100, 100, size=data.size),
                              dtype='i4')
        data['f1'] = np.random.rand(data.size)
        data['f2'] = np.array(np.random.randint(-100, 100, size=data.size),
                              dtype='i2')

        fname = self.mktemp()

        with h5py.File(fname, 'w') as f:
            f['data'] = data

        with h5py.File(fname, 'r') as f:
            self.assertArrayEqual(f['data'], data)

    def test_compound_robustness(self):
        # make an out of order compound type with gaps in it, and larger itemsize than minimum
        # Idea is to be robust to type descriptions we *could* get out of HDF5 files, from custom descriptions
        # of types in addition to numpy's flakey history on unaligned fields with non-standard or padded layouts.
        fields = [
            ('f0', np.float64, 25),
            ('f1', np.uint64, 9),
            ('f2', np.uint32, 0),
            ('f3', np.uint16, 5)
        ]
        lastfield = fields[np.argmax([ x[2] for x in fields ])]
        itemsize = lastfield[2] + np.dtype(lastfield[1]).itemsize + 6
        extract_index = lambda index, sequence: [ x[index] for x in sequence ]

        dt = np.dtype({
            'names' : extract_index(0, fields),
            'formats' : extract_index(1, fields),
            'offsets' : extract_index(2, fields),
            # 'aligned': False, - already defaults to False
            'itemsize': itemsize
        })

        self.assertTrue(dt.itemsize == itemsize)
        data = np.zeros(10, dtype=dt)

        # don't trust numpy struct handling, keep fields out of band in case content insertion is erroneous
        # yes... this has also been known to happen.
        f1 = np.array([1 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f1'][0])
        f2 = np.array([2 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f2'][0])
        f3 = np.array([3 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f3'][0])
        f0c = 3.14
        data['f0'] = f0c
        data['f3'] = f3
        data['f1'] = f1
        data['f2'] = f2

        # numpy consistency checks
        self.assertTrue(np.all(data['f0'] == f0c))
        self.assertArrayEqual(data['f3'], f3)
        self.assertArrayEqual(data['f1'], f1)
        self.assertArrayEqual(data['f2'], f2)

        fname = self.mktemp()

        with h5py.File(fname, 'w') as fd:
            fd.create_dataset('data', data=data)

        with h5py.File(fname, 'r') as fd:
            readback = fd['data']
            self.assertTrue(readback.dtype == dt)
            self.assertArrayEqual(readback, data)
            self.assertTrue(np.all(readback['f0'] == f0c))
            self.assertArrayEqual(readback['f1'], f1)
            self.assertArrayEqual(readback['f2'], f2)
            self.assertArrayEqual(readback['f3'], f3)

    def test_out_of_order_offsets(self):
        dt = np.dtype({
            'names' : ['f1', 'f2', 'f3'],
            'formats' : ['<f4', '<i4', '<f8'],
            'offsets' : [0, 16, 8]
        })
        data = np.zeros(10, dtype=dt)
        data['f1'] = np.random.rand(data.size)
        data['f2'] = np.random.randint(-10, 11, data.size)
        data['f3'] = np.random.rand(data.size) * -1

        fname = self.mktemp()

        with h5py.File(fname, 'w') as fd:
            fd.create_dataset('data', data=data)

        with h5py.File(fname, 'r') as fd:
            self.assertArrayEqual(fd['data'], data)

    def test_float_round_tripping(self):
        dtypes = set(f for f in np.sctypeDict.values()
                     if (np.issubdtype(f, np.floating) or
                         np.issubdtype(f, np.complexfloating)))

        unsupported_types = []
        if platform.machine() in UNSUPPORTED_LONG_DOUBLE:
            for x in UNSUPPORTED_LONG_DOUBLE_TYPES:
                if hasattr(np, x):
                    unsupported_types.append(getattr(np, x))
        dtype_dset_map = {str(j): d
                          for j, d in enumerate(dtypes)
                          if d not in unsupported_types}

        fname = self.mktemp()

        with h5py.File(fname, 'w') as f:
            for n, d in dtype_dset_map.items():
                data = np.zeros(10, dtype=d)
                data[...] = np.arange(10)

                f.create_dataset(n, data=data)

        with h5py.File(fname, 'r') as f:
            for n, d in dtype_dset_map.items():
                ldata = f[n][:]
                self.assertEqual(ldata.dtype, d)


class TestStrings(TestCase):
    def test_vlen_utf8(self):
        dt = h5py.string_dtype()

        string_info = h5py.check_string_dtype(dt)
        assert string_info.encoding == 'utf-8'
        assert string_info.length is None
        assert h5py.check_vlen_dtype(dt) is str

    def test_vlen_ascii(self):
        dt = h5py.string_dtype(encoding='ascii')

        string_info = h5py.check_string_dtype(dt)
        assert string_info.encoding == 'ascii'
        assert string_info.length is None
        assert h5py.check_vlen_dtype(dt) is bytes

    def test_fixed_utf8(self):
        dt = h5py.string_dtype(length=10)

        string_info = h5py.check_string_dtype(dt)
        assert string_info.encoding == 'utf-8'
        assert string_info.length == 10
        assert h5py.check_vlen_dtype(dt) is None

    def test_fixed_ascii(self):
        dt = h5py.string_dtype(encoding='ascii', length=10)

        string_info = h5py.check_string_dtype(dt)
        assert string_info.encoding == 'ascii'
        assert string_info.length == 10
        assert h5py.check_vlen_dtype(dt) is None

class TestDateTime(TestCase):
    datetime_units = [
        # Dates
        'Y', 'M', 'D',
        # Times
        'h', 'm', 's', 'ms', 'us',
        'ns', 'ps', 'fs', 'as',
    ]

    def test_datetime(self):
        fname = self.mktemp()

        for dt_unit in self.datetime_units:
            for dt_order in ['<', '>']:
                dt_descr = f'{dt_order}M8[{dt_unit}]'
                dt = h5py.opaque_dtype(np.dtype(dt_descr))
                arr = np.array([0], dtype=np.int64).view(dtype=dt)

                with h5py.File(fname, 'w') as f:
                    dset = f.create_dataset("default", data=arr, dtype=dt)
                    self.assertArrayEqual(arr, dset)
                    self.assertEqual(arr.dtype, dset.dtype)

    def test_timedelta(self):
        fname = self.mktemp()

        for dt_unit in self.datetime_units:
            for dt_order in ['<', '>']:
                dt_descr = f'{dt_order}m8[{dt_unit}]'
                dt = h5py.opaque_dtype(np.dtype(dt_descr))
                arr = np.array([np.timedelta64(500, dt_unit)], dtype=dt)

                with h5py.File(fname, 'w') as f:
                    dset = f.create_dataset("default", data=arr, dtype=dt)
                    self.assertArrayEqual(arr, dset)
                    self.assertEqual(arr.dtype, dset.dtype)

@ut.skipUnless(tables is not None, 'tables is required')
class TestBitfield(TestCase):

    """
    Test H5T_NATIVE_B8 reading
    """

    def test_b8_bool(self):
        arr1 = np.array([False, True], dtype=bool)
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.uint8
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.uint8,
            cast_dtype=np.uint8
        )

    def test_b8_bool_compound(self):
        arr1 = np.array([(False,), (True,)], dtype=np.dtype([('x', '?')]))
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', 'u1')])
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', 'u1')]),
            cast_dtype=np.dtype([('x', 'u1')])
        )

    def test_b8_bool_compound_nested(self):
        arr1 = np.array(
            [(True, (True, False)), (True, (False, True))],
            dtype=np.dtype([('x', '?'), ('y', [('a', '?'), ('b', '?')])]),
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype(
                [('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]
            )
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype(
                [('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]
            ),
            cast_dtype=np.dtype([('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]),
        )

    def test_b8_bool_compound_mixed_types(self):
        arr1 = np.array(
            [(True, 0.5), (False, 0.2)], dtype=np.dtype([('x','?'), ('y', '<f8')])
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')])
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')]),
            cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')])
        )

    def test_b8_bool_array(self):
        arr1 = np.array(
            [((True, True, False),), ((True, False, True),)],
            dtype=np.dtype([('x', ('?', (3,)))]),
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', ('u1', (3,)))])
        )
        self._test_b8(
            arr1,
            expected_default_cast_dtype=np.dtype([('x', ('u1', (3,)))]),
            cast_dtype=np.dtype([('x', ('?', (3,)))]),
        )

    def _test_b8(self, arr1, expected_default_cast_dtype, cast_dtype=None):
        path = self.mktemp()

        with tables.open_file(path, 'w') as f:
            if arr1.dtype.names:
                f.create_table('/', 'test', obj=arr1)
            else:
                f.create_array('/', 'test', obj=arr1)

        with h5py.File(path, 'r') as f:
            dset = f['test']

            # This should do an implicit uint8 cast
            # Expect that the "No NumPy equivalent for TypeBitfieldID exists"
            # error is not thrown.
            arr2 = dset[:]

            self.assertArrayEqual(
                arr2,
                arr1.astype(expected_default_cast_dtype, copy=False)
            )

            # read cast dset and make sure it's equal
            if cast_dtype is None:
                cast_dtype = arr1.dtype
            arr3 = dset.astype(cast_dtype)[:]
            self.assertArrayEqual(arr3, arr1.astype(cast_dtype, copy=False))

    def test_b16_uint16(self):
        arr1 = np.arange(10, dtype=np.uint16)
        path = self.mktemp()
        with h5py.File(path, 'w') as f:
            space = h5py.h5s.create_simple(arr1.shape)
            dset_id = h5py.h5d.create(f.id, b'test', h5py.h5t.STD_B16LE, space)
            dset = h5py.Dataset(dset_id)
            dset[:] = arr1

        with h5py.File(path, 'r') as f:
            dset = f['test']
            self.assertArrayEqual(dset[:], arr1)

def test_opaque(writable_file):
    # opaque without an h5py tag corresponds to numpy void dtypes
    arr = np.zeros(3, dtype='V2')
    ds = writable_file.create_dataset('v', data=arr)
    assert isinstance(ds.id.get_type(), h5py.h5t.TypeOpaqueID)
    assert ds.id.get_type().get_size() == 2
    np.testing.assert_array_equal(ds[:], arr)