projektAI/venv/Lib/site-packages/pandas/tests/io/pytables/test_store.py

import datetime
from datetime import timedelta
from distutils.version import LooseVersion
import hashlib
from io import BytesIO
import os
from pathlib import Path
import re
import time
from warnings import catch_warnings, simplefilter

import numpy as np
import pytest

from pandas.compat import is_platform_little_endian, is_platform_windows
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    Categorical,
    CategoricalIndex,
    DataFrame,
    DatetimeIndex,
    Index,
    Int64Index,
    MultiIndex,
    RangeIndex,
    Series,
    Timestamp,
    bdate_range,
    concat,
    date_range,
    isna,
    timedelta_range,
)
import pandas._testing as tm
from pandas.tests.io.pytables.common import (
    _maybe_remove,
    ensure_clean_path,
    ensure_clean_store,
    safe_close,
    tables,
)

from pandas.io.pytables import (
    ClosedFileError,
    HDFStore,
    PossibleDataLossError,
    Term,
    _maybe_adjust_name,
    read_hdf,
)

from pandas.io import pytables as pytables  # isort:skip
from pandas.io.pytables import TableIterator  # isort:skip


_default_compressor = "blosc"
ignore_natural_naming_warning = pytest.mark.filterwarnings(
    "ignore:object name:tables.exceptions.NaturalNameWarning"
)


@pytest.mark.single
class TestHDFStore:
    def test_format_type(self, setup_path):
        df = DataFrame({"A": [1, 2]})
        with ensure_clean_path(setup_path) as path:
            with HDFStore(path) as store:
                store.put("a", df, format="fixed")
                store.put("b", df, format="table")

                assert store.get_storer("a").format_type == "fixed"
                assert store.get_storer("b").format_type == "table"

    def test_format_kwarg_in_constructor(self, setup_path):
        # GH 13291

        msg = "format is not a defined argument for HDFStore"

        with tm.ensure_clean(setup_path) as path:
            with pytest.raises(ValueError, match=msg):
                HDFStore(path, format="table")

    def test_context(self, setup_path):
        with tm.ensure_clean(setup_path) as path:
            try:
                with HDFStore(path) as tbl:
                    raise ValueError("blah")
            except ValueError:
                pass
        with tm.ensure_clean(setup_path) as path:
            with HDFStore(path) as tbl:
                tbl["a"] = tm.makeDataFrame()
                assert len(tbl) == 1
                assert type(tbl["a"]) == DataFrame

    def test_conv_read_write(self, setup_path):
        with tm.ensure_clean() as path:

            def roundtrip(key, obj, **kwargs):
                obj.to_hdf(path, key, **kwargs)
                return read_hdf(path, key)

            o = tm.makeTimeSeries()
            tm.assert_series_equal(o, roundtrip("series", o))

            o = tm.makeStringSeries()
            tm.assert_series_equal(o, roundtrip("string_series", o))

            o = tm.makeDataFrame()
            tm.assert_frame_equal(o, roundtrip("frame", o))

            # table
            df = DataFrame({"A": range(5), "B": range(5)})
            df.to_hdf(path, "table", append=True)
            result = read_hdf(path, "table", where=["index>2"])
            tm.assert_frame_equal(df[df.index > 2], result)

    def test_long_strings(self, setup_path):

        # GH6166
        df = DataFrame(
            {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)
        )

        with ensure_clean_store(setup_path) as store:
            store.append("df", df, data_columns=["a"])

            result = store.select("df")
            tm.assert_frame_equal(df, result)

    def test_api(self, setup_path):

        # GH4584
        # API issue when to_hdf doesn't accept append AND format args
        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()
            df.iloc[:10].to_hdf(path, "df", append=True, format="table")
            df.iloc[10:].to_hdf(path, "df", append=True, format="table")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

            # append to False
            df.iloc[:10].to_hdf(path, "df", append=False, format="table")
            df.iloc[10:].to_hdf(path, "df", append=True, format="table")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()
            df.iloc[:10].to_hdf(path, "df", append=True)
            df.iloc[10:].to_hdf(path, "df", append=True, format="table")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

            # append to False
            df.iloc[:10].to_hdf(path, "df", append=False, format="table")
            df.iloc[10:].to_hdf(path, "df", append=True)
            tm.assert_frame_equal(read_hdf(path, "df"), df)

        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, "df", append=False, format="fixed")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

            df.to_hdf(path, "df", append=False, format="f")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

            df.to_hdf(path, "df", append=False)
            tm.assert_frame_equal(read_hdf(path, "df"), df)

            df.to_hdf(path, "df")
            tm.assert_frame_equal(read_hdf(path, "df"), df)

        with ensure_clean_store(setup_path) as store:

            path = store._path
            df = tm.makeDataFrame()

            _maybe_remove(store, "df")
            store.append("df", df.iloc[:10], append=True, format="table")
            store.append("df", df.iloc[10:], append=True, format="table")
            tm.assert_frame_equal(store.select("df"), df)

            # append to False
            _maybe_remove(store, "df")
            store.append("df", df.iloc[:10], append=False, format="table")
            store.append("df", df.iloc[10:], append=True, format="table")
            tm.assert_frame_equal(store.select("df"), df)

            # formats
            _maybe_remove(store, "df")
            store.append("df", df.iloc[:10], append=False, format="table")
            store.append("df", df.iloc[10:], append=True, format="table")
            tm.assert_frame_equal(store.select("df"), df)

            _maybe_remove(store, "df")
            store.append("df", df.iloc[:10], append=False, format="table")
            store.append("df", df.iloc[10:], append=True, format=None)
            tm.assert_frame_equal(store.select("df"), df)

        with ensure_clean_path(setup_path) as path:
            # Invalid.
            df = tm.makeDataFrame()

            msg = "Can only append to Tables"

            with pytest.raises(ValueError, match=msg):
                df.to_hdf(path, "df", append=True, format="f")

            with pytest.raises(ValueError, match=msg):
                df.to_hdf(path, "df", append=True, format="fixed")

            msg = r"invalid HDFStore format specified \[foo\]"

            with pytest.raises(TypeError, match=msg):
                df.to_hdf(path, "df", append=True, format="foo")

            with pytest.raises(TypeError, match=msg):
                df.to_hdf(path, "df", append=False, format="foo")

        # File path doesn't exist
        path = ""
        msg = f"File {path} does not exist"

        with pytest.raises(FileNotFoundError, match=msg):
            read_hdf(path, "df")

    def test_api_default_format(self, setup_path):

        # default_format option
        with ensure_clean_store(setup_path) as store:
            df = tm.makeDataFrame()

            pd.set_option("io.hdf.default_format", "fixed")
            _maybe_remove(store, "df")
            store.put("df", df)
            assert not store.get_storer("df").is_table

            msg = "Can only append to Tables"

            with pytest.raises(ValueError, match=msg):
                store.append("df2", df)

            pd.set_option("io.hdf.default_format", "table")
            _maybe_remove(store, "df")
            store.put("df", df)
            assert store.get_storer("df").is_table
            _maybe_remove(store, "df2")
            store.append("df2", df)
            assert store.get_storer("df").is_table

            pd.set_option("io.hdf.default_format", None)

        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()

            pd.set_option("io.hdf.default_format", "fixed")
            df.to_hdf(path, "df")
            with HDFStore(path) as store:
                assert not store.get_storer("df").is_table
            with pytest.raises(ValueError, match=msg):
                df.to_hdf(path, "df2", append=True)

            pd.set_option("io.hdf.default_format", "table")
            df.to_hdf(path, "df3")
            with HDFStore(path) as store:
                assert store.get_storer("df3").is_table
            df.to_hdf(path, "df4", append=True)
            with HDFStore(path) as store:
                assert store.get_storer("df4").is_table

            pd.set_option("io.hdf.default_format", None)

    def test_keys(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeSeries()
            store["b"] = tm.makeStringSeries()
            store["c"] = tm.makeDataFrame()

            assert len(store) == 3
            expected = {"/a", "/b", "/c"}
            assert set(store.keys()) == expected
            assert set(store) == expected

    def test_no_track_times(self, setup_path):

        # GH 32682
        # enables to set track_times (see `pytables` `create_table` documentation)

        def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128):
            h = hash_factory()
            with open(filename, "rb") as f:
                for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):
                    h.update(chunk)
            return h.digest()

        def create_h5_and_return_checksum(track_times):
            with ensure_clean_path(setup_path) as path:
                df = DataFrame({"a": [1]})

                with HDFStore(path, mode="w") as hdf:
                    hdf.put(
                        "table",
                        df,
                        format="table",
                        data_columns=True,
                        index=None,
                        track_times=track_times,
                    )

                return checksum(path)

        checksum_0_tt_false = create_h5_and_return_checksum(track_times=False)
        checksum_0_tt_true = create_h5_and_return_checksum(track_times=True)

        # sleep is necessary to create h5 with different creation time
        time.sleep(1)

        checksum_1_tt_false = create_h5_and_return_checksum(track_times=False)
        checksum_1_tt_true = create_h5_and_return_checksum(track_times=True)

        # checksums are the same if track_time = False
        assert checksum_0_tt_false == checksum_1_tt_false

        # checksums are NOT same if track_time = True
        assert checksum_0_tt_true != checksum_1_tt_true

    def test_non_pandas_keys(self, setup_path):
        class Table1(tables.IsDescription):
            value1 = tables.Float32Col()

        class Table2(tables.IsDescription):
            value2 = tables.Float32Col()

        class Table3(tables.IsDescription):
            value3 = tables.Float32Col()

        with ensure_clean_path(setup_path) as path:
            with tables.open_file(path, mode="w") as h5file:
                group = h5file.create_group("/", "group")
                h5file.create_table(group, "table1", Table1, "Table 1")
                h5file.create_table(group, "table2", Table2, "Table 2")
                h5file.create_table(group, "table3", Table3, "Table 3")
            with HDFStore(path) as store:
                assert len(store.keys(include="native")) == 3
                expected = {"/group/table1", "/group/table2", "/group/table3"}
                assert set(store.keys(include="native")) == expected
                assert set(store.keys(include="pandas")) == set()
                for name in expected:
                    df = store.get(name)
                    assert len(df.columns) == 1

    def test_keys_illegal_include_keyword_value(self, setup_path):
        with ensure_clean_store(setup_path) as store:
            with pytest.raises(
                ValueError,
                match="`include` should be either 'pandas' or 'native' "
                "but is 'illegal'",
            ):
                store.keys(include="illegal")

    def test_keys_ignore_hdf_softlink(self, setup_path):

        # GH 20523
        # Puts a softlink into HDF file and rereads

        with ensure_clean_store(setup_path) as store:

            df = DataFrame({"A": range(5), "B": range(5)})
            store.put("df", df)

            assert store.keys() == ["/df"]

            store._handle.create_soft_link(store._handle.root, "symlink", "df")

            # Should ignore the softlink
            assert store.keys() == ["/df"]

    def test_iter_empty(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            # GH 12221
            assert list(store) == []

    def test_repr(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            repr(store)
            store.info()
            store["a"] = tm.makeTimeSeries()
            store["b"] = tm.makeStringSeries()
            store["c"] = tm.makeDataFrame()

            df = tm.makeDataFrame()
            df["obj1"] = "foo"
            df["obj2"] = "bar"
            df["bool1"] = df["A"] > 0
            df["bool2"] = df["B"] > 0
            df["bool3"] = True
            df["int1"] = 1
            df["int2"] = 2
            df["timestamp1"] = Timestamp("20010102")
            df["timestamp2"] = Timestamp("20010103")
            df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
            df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
            df.loc[df.index[3:6], ["obj1"]] = np.nan
            df = df._consolidate()._convert(datetime=True)

            with catch_warnings(record=True):
                simplefilter("ignore", pd.errors.PerformanceWarning)
                store["df"] = df

            # make a random group in hdf space
            store._handle.create_group(store._handle.root, "bah")

            assert store.filename in repr(store)
            assert store.filename in str(store)
            store.info()

        # storers
        with ensure_clean_store(setup_path) as store:

            df = tm.makeDataFrame()
            store.append("df", df)

            s = store.get_storer("df")
            repr(s)
            str(s)

    @ignore_natural_naming_warning
    def test_contains(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeSeries()
            store["b"] = tm.makeDataFrame()
            store["foo/bar"] = tm.makeDataFrame()
            assert "a" in store
            assert "b" in store
            assert "c" not in store
            assert "foo/bar" in store
            assert "/foo/bar" in store
            assert "/foo/b" not in store
            assert "bar" not in store

            # gh-2694: tables.NaturalNameWarning
            with catch_warnings(record=True):
                store["node())"] = tm.makeDataFrame()
            assert "node())" in store

    def test_versioning(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeSeries()
            store["b"] = tm.makeDataFrame()
            df = tm.makeTimeDataFrame()
            _maybe_remove(store, "df1")
            store.append("df1", df[:10])
            store.append("df1", df[10:])
            assert store.root.a._v_attrs.pandas_version == "0.15.2"
            assert store.root.b._v_attrs.pandas_version == "0.15.2"
            assert store.root.df1._v_attrs.pandas_version == "0.15.2"

            # write a file and wipe its versioning
            _maybe_remove(store, "df2")
            store.append("df2", df)

            # this is an error because its table_type is appendable, but no
            # version info
            store.get_node("df2")._v_attrs.pandas_version = None

            msg = "'NoneType' object has no attribute 'startswith'"

            with pytest.raises(Exception, match=msg):
                store.select("df2")

    def test_mode(self, setup_path):

        df = tm.makeTimeDataFrame()

        def check(mode):

            with ensure_clean_path(setup_path) as path:

                # constructor
                if mode in ["r", "r+"]:
                    with pytest.raises(IOError):
                        HDFStore(path, mode=mode)

                else:
                    store = HDFStore(path, mode=mode)
                    assert store._handle.mode == mode
                    store.close()

            with ensure_clean_path(setup_path) as path:

                # context
                if mode in ["r", "r+"]:
                    with pytest.raises(IOError):
                        with HDFStore(path, mode=mode) as store:
                            pass
                else:
                    with HDFStore(path, mode=mode) as store:
                        assert store._handle.mode == mode

            with ensure_clean_path(setup_path) as path:

                # conv write
                if mode in ["r", "r+"]:
                    with pytest.raises(IOError):
                        df.to_hdf(path, "df", mode=mode)
                    df.to_hdf(path, "df", mode="w")
                else:
                    df.to_hdf(path, "df", mode=mode)

                # conv read
                if mode in ["w"]:
                    msg = (
                        "mode w is not allowed while performing a read. "
                        r"Allowed modes are r, r\+ and a."
                    )
                    with pytest.raises(ValueError, match=msg):
                        read_hdf(path, "df", mode=mode)
                else:
                    result = read_hdf(path, "df", mode=mode)
                    tm.assert_frame_equal(result, df)

        def check_default_mode():

            # read_hdf uses default mode
            with ensure_clean_path(setup_path) as path:
                df.to_hdf(path, "df", mode="w")
                result = read_hdf(path, "df")
                tm.assert_frame_equal(result, df)

        check("r")
        check("r+")
        check("a")
        check("w")
        check_default_mode()

    def test_reopen_handle(self, setup_path):

        with ensure_clean_path(setup_path) as path:

            store = HDFStore(path, mode="a")
            store["a"] = tm.makeTimeSeries()

            # invalid mode change
            with pytest.raises(PossibleDataLossError):
                store.open("w")

            store.close()
            assert not store.is_open

            # truncation ok here
            store.open("w")
            assert store.is_open
            assert len(store) == 0
            store.close()
            assert not store.is_open

            store = HDFStore(path, mode="a")
            store["a"] = tm.makeTimeSeries()

            # reopen as read
            store.open("r")
            assert store.is_open
            assert len(store) == 1
            assert store._mode == "r"
            store.close()
            assert not store.is_open

            # reopen as append
            store.open("a")
            assert store.is_open
            assert len(store) == 1
            assert store._mode == "a"
            store.close()
            assert not store.is_open

            # reopen as append (again)
            store.open("a")
            assert store.is_open
            assert len(store) == 1
            assert store._mode == "a"
            store.close()
            assert not store.is_open

    def test_open_args(self, setup_path):

        with tm.ensure_clean(setup_path) as path:

            df = tm.makeDataFrame()

            # create an in memory store
            store = HDFStore(
                path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0
            )
            store["df"] = df
            store.append("df2", df)

            tm.assert_frame_equal(store["df"], df)
            tm.assert_frame_equal(store["df2"], df)

            store.close()

        # the file should not have actually been written
        assert not os.path.exists(path)

    def test_flush(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeSeries()
            store.flush()
            store.flush(fsync=True)

    def test_get(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeSeries()
            left = store.get("a")
            right = store["a"]
            tm.assert_series_equal(left, right)

            left = store.get("/a")
            right = store["/a"]
            tm.assert_series_equal(left, right)

            with pytest.raises(KeyError, match="'No object named b in the file'"):
                store.get("b")

    @pytest.mark.parametrize(
        "where, expected",
        [
            (
                "/",
                {
                    "": ({"first_group", "second_group"}, set()),
                    "/first_group": (set(), {"df1", "df2"}),
                    "/second_group": ({"third_group"}, {"df3", "s1"}),
                    "/second_group/third_group": (set(), {"df4"}),
                },
            ),
            (
                "/second_group",
                {
                    "/second_group": ({"third_group"}, {"df3", "s1"}),
                    "/second_group/third_group": (set(), {"df4"}),
                },
            ),
        ],
    )
    def test_walk(self, where, expected, setup_path):
        # GH10143
        objs = {
            "df1": DataFrame([1, 2, 3]),
            "df2": DataFrame([4, 5, 6]),
            "df3": DataFrame([6, 7, 8]),
            "df4": DataFrame([9, 10, 11]),
            "s1": Series([10, 9, 8]),
            # Next 3 items aren't pandas objects and should be ignored
            "a1": np.array([[1, 2, 3], [4, 5, 6]]),
            "tb1": np.array([(1, 2, 3), (4, 5, 6)], dtype="i,i,i"),
            "tb2": np.array([(7, 8, 9), (10, 11, 12)], dtype="i,i,i"),
        }

        with ensure_clean_store("walk_groups.hdf", mode="w") as store:
            store.put("/first_group/df1", objs["df1"])
            store.put("/first_group/df2", objs["df2"])
            store.put("/second_group/df3", objs["df3"])
            store.put("/second_group/s1", objs["s1"])
            store.put("/second_group/third_group/df4", objs["df4"])
            # Create non-pandas objects
            store._handle.create_array("/first_group", "a1", objs["a1"])
            store._handle.create_table("/first_group", "tb1", obj=objs["tb1"])
            store._handle.create_table("/second_group", "tb2", obj=objs["tb2"])

            assert len(list(store.walk(where=where))) == len(expected)
            for path, groups, leaves in store.walk(where=where):
                assert path in expected
                expected_groups, expected_frames = expected[path]
                assert expected_groups == set(groups)
                assert expected_frames == set(leaves)
                for leaf in leaves:
                    frame_path = "/".join([path, leaf])
                    obj = store.get(frame_path)
                    if "df" in leaf:
                        tm.assert_frame_equal(obj, objs[leaf])
                    else:
                        tm.assert_series_equal(obj, objs[leaf])

    def test_getattr(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            s = tm.makeTimeSeries()
            store["a"] = s

            # test attribute access
            result = store.a
            tm.assert_series_equal(result, s)
            result = getattr(store, "a")
            tm.assert_series_equal(result, s)

            df = tm.makeTimeDataFrame()
            store["df"] = df
            result = store.df
            tm.assert_frame_equal(result, df)

            # errors
            for x in ["d", "mode", "path", "handle", "complib"]:
                with pytest.raises(AttributeError):
                    getattr(store, x)

            # not stores
            for x in ["mode", "path", "handle", "complib"]:
                getattr(store, f"_{x}")

    def test_put(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            ts = tm.makeTimeSeries()
            df = tm.makeTimeDataFrame()
            store["a"] = ts
            store["b"] = df[:10]
            store["foo/bar/bah"] = df[:10]
            store["foo"] = df[:10]
            store["/foo"] = df[:10]
            store.put("c", df[:10], format="table")

            # not OK, not a table
            with pytest.raises(ValueError):
                store.put("b", df[10:], append=True)

            # node does not currently exist, test _is_table_type returns False
            # in this case
            _maybe_remove(store, "f")
            with pytest.raises(ValueError):
                store.put("f", df[10:], append=True)

            # can't put to a table (use append instead)
            with pytest.raises(ValueError):
                store.put("c", df[10:], append=True)

            # overwrite table
            store.put("c", df[:10], format="table", append=False)
            tm.assert_frame_equal(df[:10], store["c"])

    def test_put_string_index(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            index = Index([f"I am a very long string index: {i}" for i in range(20)])
            s = Series(np.arange(20), index=index)
            df = DataFrame({"A": s, "B": s})

            store["a"] = s
            tm.assert_series_equal(store["a"], s)

            store["b"] = df
            tm.assert_frame_equal(store["b"], df)

            # mixed length
            index = Index(
                ["abcdefghijklmnopqrstuvwxyz1234567890"]
                + [f"I am a very long string index: {i}" for i in range(20)]
            )
            s = Series(np.arange(21), index=index)
            df = DataFrame({"A": s, "B": s})
            store["a"] = s
            tm.assert_series_equal(store["a"], s)

            store["b"] = df
            tm.assert_frame_equal(store["b"], df)

    def test_put_compression(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df = tm.makeTimeDataFrame()

            store.put("c", df, format="table", complib="zlib")
            tm.assert_frame_equal(store["c"], df)

            # can't compress if format='fixed'
            with pytest.raises(ValueError):
                store.put("b", df, format="fixed", complib="zlib")

    @td.skip_if_windows_python_3
    def test_put_compression_blosc(self, setup_path):
        df = tm.makeTimeDataFrame()

        with ensure_clean_store(setup_path) as store:

            # can't compress if format='fixed'
            with pytest.raises(ValueError):
                store.put("b", df, format="fixed", complib="blosc")

            store.put("c", df, format="table", complib="blosc")
            tm.assert_frame_equal(store["c"], df)

    def test_complibs_default_settings(self, setup_path):
        # GH15943
        df = tm.makeDataFrame()

        # Set complevel and check if complib is automatically set to
        # default value
        with ensure_clean_path(setup_path) as tmpfile:
            df.to_hdf(tmpfile, "df", complevel=9)
            result = pd.read_hdf(tmpfile, "df")
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode="r") as h5file:
                for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                    assert node.filters.complevel == 9
                    assert node.filters.complib == "zlib"

        # Set complib and check to see if compression is disabled
        with ensure_clean_path(setup_path) as tmpfile:
            df.to_hdf(tmpfile, "df", complib="zlib")
            result = pd.read_hdf(tmpfile, "df")
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode="r") as h5file:
                for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None

        # Check if not setting complib or complevel results in no compression
        with ensure_clean_path(setup_path) as tmpfile:
            df.to_hdf(tmpfile, "df")
            result = pd.read_hdf(tmpfile, "df")
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode="r") as h5file:
                for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None

        # Check if file-defaults can be overridden on a per table basis
        with ensure_clean_path(setup_path) as tmpfile:
            store = HDFStore(tmpfile)
            store.append("dfc", df, complevel=9, complib="blosc")
            store.append("df", df)
            store.close()

            with tables.open_file(tmpfile, mode="r") as h5file:
                for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None
                for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
                    assert node.filters.complevel == 9
                    assert node.filters.complib == "blosc"

    def test_complibs(self, setup_path):
        # GH14478
        df = tm.makeDataFrame()

        # Building list of all complibs and complevels tuples
        all_complibs = tables.filters.all_complibs
        # Remove lzo if its not available on this platform
        if not tables.which_lib_version("lzo"):
            all_complibs.remove("lzo")
        # Remove bzip2 if its not available on this platform
        if not tables.which_lib_version("bzip2"):
            all_complibs.remove("bzip2")

        all_levels = range(0, 10)
        all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]

        for (lib, lvl) in all_tests:
            with ensure_clean_path(setup_path) as tmpfile:
                gname = "foo"

                # Write and read file to see if data is consistent
                df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
                result = pd.read_hdf(tmpfile, gname)
                tm.assert_frame_equal(result, df)

                # Open file and check metadata
                # for correct amount of compression
                h5table = tables.open_file(tmpfile, mode="r")
                for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
                    assert node.filters.complevel == lvl
                    if lvl == 0:
                        assert node.filters.complib is None
                    else:
                        assert node.filters.complib == lib
                h5table.close()

    def test_put_integer(self, setup_path):
        # non-date, non-string index
        df = DataFrame(np.random.randn(50, 100))
        self._check_roundtrip(df, tm.assert_frame_equal, setup_path)

    def test_put_mixed_type(self, setup_path):
        df = tm.makeTimeDataFrame()
        df["obj1"] = "foo"
        df["obj2"] = "bar"
        df["bool1"] = df["A"] > 0
        df["bool2"] = df["B"] > 0
        df["bool3"] = True
        df["int1"] = 1
        df["int2"] = 2
        df["timestamp1"] = Timestamp("20010102")
        df["timestamp2"] = Timestamp("20010103")
        df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
        df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
        df.loc[df.index[3:6], ["obj1"]] = np.nan
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(setup_path) as store:
            _maybe_remove(store, "df")

            # PerformanceWarning
            with catch_warnings(record=True):
                simplefilter("ignore", pd.errors.PerformanceWarning)
                store.put("df", df)

            expected = store.get("df")
            tm.assert_frame_equal(expected, df)

    @pytest.mark.filterwarnings(
        "ignore:object name:tables.exceptions.NaturalNameWarning"
    )
    def test_append(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # this is allowed by almost always don't want to do it
            # tables.NaturalNameWarning):
            with catch_warnings(record=True):

                df = tm.makeTimeDataFrame()
                _maybe_remove(store, "df1")
                store.append("df1", df[:10])
                store.append("df1", df[10:])
                tm.assert_frame_equal(store["df1"], df)

                _maybe_remove(store, "df2")
                store.put("df2", df[:10], format="table")
                store.append("df2", df[10:])
                tm.assert_frame_equal(store["df2"], df)

                _maybe_remove(store, "df3")
                store.append("/df3", df[:10])
                store.append("/df3", df[10:])
                tm.assert_frame_equal(store["df3"], df)

                # this is allowed by almost always don't want to do it
                # tables.NaturalNameWarning
                _maybe_remove(store, "/df3 foo")
                store.append("/df3 foo", df[:10])
                store.append("/df3 foo", df[10:])
                tm.assert_frame_equal(store["df3 foo"], df)

                # dtype issues - mizxed type in a single object column
                df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
                df["mixed_column"] = "testing"
                df.loc[2, "mixed_column"] = np.nan
                _maybe_remove(store, "df")
                store.append("df", df)
                tm.assert_frame_equal(store["df"], df)

                # uints - test storage of uints
                uint_data = DataFrame(
                    {
                        "u08": Series(
                            np.random.randint(0, high=255, size=5), dtype=np.uint8
                        ),
                        "u16": Series(
                            np.random.randint(0, high=65535, size=5), dtype=np.uint16
                        ),
                        "u32": Series(
                            np.random.randint(0, high=2 ** 30, size=5), dtype=np.uint32
                        ),
                        "u64": Series(
                            [2 ** 58, 2 ** 59, 2 ** 60, 2 ** 61, 2 ** 62],
                            dtype=np.uint64,
                        ),
                    },
                    index=np.arange(5),
                )
                _maybe_remove(store, "uints")
                store.append("uints", uint_data)
                tm.assert_frame_equal(store["uints"], uint_data)

                # uints - test storage of uints in indexable columns
                _maybe_remove(store, "uints")
                # 64-bit indices not yet supported
                store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
                tm.assert_frame_equal(store["uints"], uint_data)

    def test_append_series(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # basic
            ss = tm.makeStringSeries()
            ts = tm.makeTimeSeries()
            ns = Series(np.arange(100))

            store.append("ss", ss)
            result = store["ss"]
            tm.assert_series_equal(result, ss)
            assert result.name is None

            store.append("ts", ts)
            result = store["ts"]
            tm.assert_series_equal(result, ts)
            assert result.name is None

            ns.name = "foo"
            store.append("ns", ns)
            result = store["ns"]
            tm.assert_series_equal(result, ns)
            assert result.name == ns.name

            # select on the values
            expected = ns[ns > 60]
            result = store.select("ns", "foo>60")
            tm.assert_series_equal(result, expected)

            # select on the index and values
            expected = ns[(ns > 70) & (ns.index < 90)]
            result = store.select("ns", "foo>70 and index<90")
            tm.assert_series_equal(result, expected)

            # multi-index
            mi = DataFrame(np.random.randn(5, 1), columns=["A"])
            mi["B"] = np.arange(len(mi))
            mi["C"] = "foo"
            mi.loc[3:5, "C"] = "bar"
            mi.set_index(["C", "B"], inplace=True)
            s = mi.stack()
            s.index = s.index.droplevel(2)
            store.append("mi", s)
            tm.assert_series_equal(store["mi"], s)

    def test_store_index_types(self, setup_path):
        # GH5386
        # test storing various index types

        with ensure_clean_store(setup_path) as store:

            def check(format, index):
                df = DataFrame(np.random.randn(10, 2), columns=list("AB"))
                df.index = index(len(df))

                _maybe_remove(store, "df")
                store.put("df", df, format=format)
                tm.assert_frame_equal(df, store["df"])

            for index in [
                tm.makeFloatIndex,
                tm.makeStringIndex,
                tm.makeIntIndex,
                tm.makeDateIndex,
            ]:

                check("table", index)
                check("fixed", index)

            # period index currently broken for table
            # seee GH7796 FIXME
            check("fixed", tm.makePeriodIndex)
            # check('table',tm.makePeriodIndex)

            # unicode
            index = tm.makeUnicodeIndex
            check("table", index)
            check("fixed", index)

    @pytest.mark.skipif(
        not is_platform_little_endian(), reason="reason platform is not little endian"
    )
    def test_encoding(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df = DataFrame({"A": "foo", "B": "bar"}, index=range(5))
            df.loc[2, "A"] = np.nan
            df.loc[3, "B"] = np.nan
            _maybe_remove(store, "df")
            store.append("df", df, encoding="ascii")
            tm.assert_frame_equal(store["df"], df)

            expected = df.reindex(columns=["A"])
            result = store.select("df", Term("columns=A", encoding="ascii"))
            tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "val",
        [
            [b"E\xc9, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"a", b"b", b"c"],
            [b"EE, 17", b"", b"a", b"b", b"c"],
            [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
            [b"", b"a", b"b", b"c"],
            [b"\xf8\xfc", b"a", b"b", b"c"],
            [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
            [np.nan, b"", b"b", b"c"],
            [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
        ],
    )
    @pytest.mark.parametrize("dtype", ["category", object])
    def test_latin_encoding(self, setup_path, dtype, val):
        enc = "latin-1"
        nan_rep = ""
        key = "data"

        val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
        ser = Series(val, dtype=dtype)

        with ensure_clean_path(setup_path) as store:
            ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep)
            retr = read_hdf(store, key)

        s_nan = ser.replace(nan_rep, np.nan)

        tm.assert_series_equal(s_nan, retr)

    def test_append_some_nans(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df = DataFrame(
                {
                    "A": Series(np.random.randn(20)).astype("int32"),
                    "A1": np.random.randn(20),
                    "A2": np.random.randn(20),
                    "B": "foo",
                    "C": "bar",
                    "D": Timestamp("20010101"),
                    "E": datetime.datetime(2001, 1, 2, 0, 0),
                },
                index=np.arange(20),
            )
            # some nans
            _maybe_remove(store, "df1")
            df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan
            store.append("df1", df[:10])
            store.append("df1", df[10:])
            tm.assert_frame_equal(store["df1"], df)

            # first column
            df1 = df.copy()
            df1.loc[:, "A1"] = np.nan
            _maybe_remove(store, "df1")
            store.append("df1", df1[:10])
            store.append("df1", df1[10:])
            tm.assert_frame_equal(store["df1"], df1)

            # 2nd column
            df2 = df.copy()
            df2.loc[:, "A2"] = np.nan
            _maybe_remove(store, "df2")
            store.append("df2", df2[:10])
            store.append("df2", df2[10:])
            tm.assert_frame_equal(store["df2"], df2)

            # datetimes
            df3 = df.copy()
            df3.loc[:, "E"] = np.nan
            _maybe_remove(store, "df3")
            store.append("df3", df3[:10])
            store.append("df3", df3[10:])
            tm.assert_frame_equal(store["df3"], df3)

    def test_append_all_nans(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            df = DataFrame(
                {"A1": np.random.randn(20), "A2": np.random.randn(20)},
                index=np.arange(20),
            )
            df.loc[0:15, :] = np.nan

            # nan some entire rows (dropna=True)
            _maybe_remove(store, "df")
            store.append("df", df[:10], dropna=True)
            store.append("df", df[10:], dropna=True)
            tm.assert_frame_equal(store["df"], df[-4:])

            # nan some entire rows (dropna=False)
            _maybe_remove(store, "df2")
            store.append("df2", df[:10], dropna=False)
            store.append("df2", df[10:], dropna=False)
            tm.assert_frame_equal(store["df2"], df)

            # tests the option io.hdf.dropna_table
            pd.set_option("io.hdf.dropna_table", False)
            _maybe_remove(store, "df3")
            store.append("df3", df[:10])
            store.append("df3", df[10:])
            tm.assert_frame_equal(store["df3"], df)

            pd.set_option("io.hdf.dropna_table", True)
            _maybe_remove(store, "df4")
            store.append("df4", df[:10])
            store.append("df4", df[10:])
            tm.assert_frame_equal(store["df4"], df[-4:])

            # nan some entire rows (string are still written!)
            df = DataFrame(
                {
                    "A1": np.random.randn(20),
                    "A2": np.random.randn(20),
                    "B": "foo",
                    "C": "bar",
                },
                index=np.arange(20),
            )

            df.loc[0:15, :] = np.nan

            _maybe_remove(store, "df")
            store.append("df", df[:10], dropna=True)
            store.append("df", df[10:], dropna=True)
            tm.assert_frame_equal(store["df"], df)

            _maybe_remove(store, "df2")
            store.append("df2", df[:10], dropna=False)
            store.append("df2", df[10:], dropna=False)
            tm.assert_frame_equal(store["df2"], df)

            # nan some entire rows (but since we have dates they are still
            # written!)
            df = DataFrame(
                {
                    "A1": np.random.randn(20),
                    "A2": np.random.randn(20),
                    "B": "foo",
                    "C": "bar",
                    "D": Timestamp("20010101"),
                    "E": datetime.datetime(2001, 1, 2, 0, 0),
                },
                index=np.arange(20),
            )

            df.loc[0:15, :] = np.nan

            _maybe_remove(store, "df")
            store.append("df", df[:10], dropna=True)
            store.append("df", df[10:], dropna=True)
            tm.assert_frame_equal(store["df"], df)

            _maybe_remove(store, "df2")
            store.append("df2", df[:10], dropna=False)
            store.append("df2", df[10:], dropna=False)
            tm.assert_frame_equal(store["df2"], df)

    def test_store_dropna(self, setup_path):
        df_with_missing = DataFrame(
            {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]},
            index=list("abc"),
        )
        df_without_missing = DataFrame(
            {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac")
        )

        # # Test to make sure defaults are to not drop.
        # # Corresponding to Issue 9382
        with ensure_clean_path(setup_path) as path:
            df_with_missing.to_hdf(path, "df", format="table")
            reloaded = read_hdf(path, "df")
            tm.assert_frame_equal(df_with_missing, reloaded)

        with ensure_clean_path(setup_path) as path:
            df_with_missing.to_hdf(path, "df", format="table", dropna=False)
            reloaded = read_hdf(path, "df")
            tm.assert_frame_equal(df_with_missing, reloaded)

        with ensure_clean_path(setup_path) as path:
            df_with_missing.to_hdf(path, "df", format="table", dropna=True)
            reloaded = read_hdf(path, "df")
            tm.assert_frame_equal(df_without_missing, reloaded)

    def test_read_missing_key_close_store(self, setup_path):
        # GH 25766
        with ensure_clean_path(setup_path) as path:
            df = DataFrame({"a": range(2), "b": range(2)})
            df.to_hdf(path, "k1")

            with pytest.raises(KeyError, match="'No object named k2 in the file'"):
                pd.read_hdf(path, "k2")

            # smoke test to test that file is properly closed after
            # read with KeyError before another write
            df.to_hdf(path, "k2")

    def test_read_missing_key_opened_store(self, setup_path):
        # GH 28699
        with ensure_clean_path(setup_path) as path:
            df = DataFrame({"a": range(2), "b": range(2)})
            df.to_hdf(path, "k1")

            with HDFStore(path, "r") as store:

                with pytest.raises(KeyError, match="'No object named k2 in the file'"):
                    pd.read_hdf(store, "k2")

                # Test that the file is still open after a KeyError and that we can
                # still read from it.
                pd.read_hdf(store, "k1")

    def test_append_frame_column_oriented(self, setup_path):
        with ensure_clean_store(setup_path) as store:

            # column oriented
            df = tm.makeTimeDataFrame()
            df.index = df.index._with_freq(None)  # freq doesnt round-trip

            _maybe_remove(store, "df1")
            store.append("df1", df.iloc[:, :2], axes=["columns"])
            store.append("df1", df.iloc[:, 2:])
            tm.assert_frame_equal(store["df1"], df)

            result = store.select("df1", "columns=A")
            expected = df.reindex(columns=["A"])
            tm.assert_frame_equal(expected, result)

            # selection on the non-indexable
            result = store.select("df1", ("columns=A", "index=df.index[0:4]"))
            expected = df.reindex(columns=["A"], index=df.index[0:4])
            tm.assert_frame_equal(expected, result)

            # this isn't supported
            with pytest.raises(TypeError):
                store.select("df1", "columns=A and index>df.index[4]")

    def test_append_with_different_block_ordering(self, setup_path):

        # GH 4096; using same frames, but different block orderings
        with ensure_clean_store(setup_path) as store:

            for i in range(10):

                df = DataFrame(np.random.randn(10, 2), columns=list("AB"))
                df["index"] = range(10)
                df["index"] += i * 10
                df["int64"] = Series([1] * len(df), dtype="int64")
                df["int16"] = Series([1] * len(df), dtype="int16")

                if i % 2 == 0:
                    del df["int64"]
                    df["int64"] = Series([1] * len(df), dtype="int64")
                if i % 3 == 0:
                    a = df.pop("A")
                    df["A"] = a

                df.set_index("index", inplace=True)

                store.append("df", df)

        # test a different ordering but with more fields (like invalid
        # combinate)
        with ensure_clean_store(setup_path) as store:

            df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64")
            df["int64"] = Series([1] * len(df), dtype="int64")
            df["int16"] = Series([1] * len(df), dtype="int16")
            store.append("df", df)

            # store additional fields in different blocks
            df["int16_2"] = Series([1] * len(df), dtype="int16")
            with pytest.raises(ValueError):
                store.append("df", df)

            # store multiple additional fields in different blocks
            df["float_3"] = Series([1.0] * len(df), dtype="float64")
            with pytest.raises(ValueError):
                store.append("df", df)

    def test_append_with_strings(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            with catch_warnings(record=True):

                def check_col(key, name, size):
                    assert (
                        getattr(store.get_storer(key).table.description, name).itemsize
                        == size
                    )

                # avoid truncation on elements
                df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
                store.append("df_big", df)
                tm.assert_frame_equal(store.select("df_big"), df)
                check_col("df_big", "values_block_1", 15)

                # appending smaller string ok
                df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]])
                store.append("df_big", df2)
                expected = concat([df, df2])
                tm.assert_frame_equal(store.select("df_big"), expected)
                check_col("df_big", "values_block_1", 15)

                # avoid truncation on elements
                df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
                store.append("df_big2", df, min_itemsize={"values": 50})
                tm.assert_frame_equal(store.select("df_big2"), df)
                check_col("df_big2", "values_block_1", 50)

                # bigger string on next append
                store.append("df_new", df)
                df_new = DataFrame(
                    [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]]
                )
                with pytest.raises(ValueError):
                    store.append("df_new", df_new)

                # min_itemsize on Series index (GH 11412)
                df = tm.makeMixedDataFrame().set_index("C")
                store.append("ss", df["B"], min_itemsize={"index": 4})
                tm.assert_series_equal(store.select("ss"), df["B"])

                # same as above, with data_columns=True
                store.append(
                    "ss2", df["B"], data_columns=True, min_itemsize={"index": 4}
                )
                tm.assert_series_equal(store.select("ss2"), df["B"])

                # min_itemsize in index without appending (GH 10381)
                store.put("ss3", df, format="table", min_itemsize={"index": 6})
                # just make sure there is a longer string:
                df2 = df.copy().reset_index().assign(C="longer").set_index("C")
                store.append("ss3", df2)
                tm.assert_frame_equal(store.select("ss3"), pd.concat([df, df2]))

                # same as above, with a Series
                store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})
                store.append("ss4", df2["B"])
                tm.assert_series_equal(
                    store.select("ss4"), pd.concat([df["B"], df2["B"]])
                )

                # with nans
                _maybe_remove(store, "df")
                df = tm.makeTimeDataFrame()
                df["string"] = "foo"
                df.loc[df.index[1:4], "string"] = np.nan
                df["string2"] = "bar"
                df.loc[df.index[4:8], "string2"] = np.nan
                df["string3"] = "bah"
                df.loc[df.index[1:], "string3"] = np.nan
                store.append("df", df)
                result = store.select("df")
                tm.assert_frame_equal(result, df)

        with ensure_clean_store(setup_path) as store:

            def check_col(key, name, size):
                assert getattr(
                    store.get_storer(key).table.description, name
                ).itemsize, size

            df = DataFrame({"A": "foo", "B": "bar"}, index=range(10))

            # a min_itemsize that creates a data_column
            _maybe_remove(store, "df")
            store.append("df", df, min_itemsize={"A": 200})
            check_col("df", "A", 200)
            assert store.get_storer("df").data_columns == ["A"]

            # a min_itemsize that creates a data_column2
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=["B"], min_itemsize={"A": 200})
            check_col("df", "A", 200)
            assert store.get_storer("df").data_columns == ["B", "A"]

            # a min_itemsize that creates a data_column2
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=["B"], min_itemsize={"values": 200})
            check_col("df", "B", 200)
            check_col("df", "values_block_0", 200)
            assert store.get_storer("df").data_columns == ["B"]

            # infer the .typ on subsequent appends
            _maybe_remove(store, "df")
            store.append("df", df[:5], min_itemsize=200)
            store.append("df", df[5:], min_itemsize=200)
            tm.assert_frame_equal(store["df"], df)

            # invalid min_itemsize keys
            df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
            _maybe_remove(store, "df")
            with pytest.raises(ValueError):
                store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})

    def test_append_with_empty_string(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # with all empty strings (GH 12242)
            df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
            store.append("df", df[:-1], min_itemsize={"x": 1})
            store.append("df", df[-1:], min_itemsize={"x": 1})
            tm.assert_frame_equal(store.select("df"), df)

    def test_to_hdf_with_min_itemsize(self, setup_path):

        with ensure_clean_path(setup_path) as path:

            # min_itemsize in index with to_hdf (GH 10381)
            df = tm.makeMixedDataFrame().set_index("C")
            df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6})
            # just make sure there is a longer string:
            df2 = df.copy().reset_index().assign(C="longer").set_index("C")
            df2.to_hdf(path, "ss3", append=True, format="table")
            tm.assert_frame_equal(pd.read_hdf(path, "ss3"), pd.concat([df, df2]))

            # same as above, with a Series
            df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6})
            df2["B"].to_hdf(path, "ss4", append=True, format="table")
            tm.assert_series_equal(
                pd.read_hdf(path, "ss4"), pd.concat([df["B"], df2["B"]])
            )

    @pytest.mark.parametrize("format", ["fixed", "table"])
    def test_to_hdf_errors(self, format, setup_path):

        data = ["\ud800foo"]
        ser = Series(data, index=Index(data))
        with ensure_clean_path(setup_path) as path:
            # GH 20835
            ser.to_hdf(path, "table", format=format, errors="surrogatepass")

            result = pd.read_hdf(path, "table", errors="surrogatepass")
            tm.assert_series_equal(result, ser)

    def test_append_with_data_columns(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df = tm.makeTimeDataFrame()
            df.iloc[0, df.columns.get_loc("B")] = 1.0
            _maybe_remove(store, "df")
            store.append("df", df[:2], data_columns=["B"])
            store.append("df", df[2:])
            tm.assert_frame_equal(store["df"], df)

            # check that we have indices created
            assert store._handle.root.df.table.cols.index.is_indexed is True
            assert store._handle.root.df.table.cols.B.is_indexed is True

            # data column searching
            result = store.select("df", "B>0")
            expected = df[df.B > 0]
            tm.assert_frame_equal(result, expected)

            # data column searching (with an indexable and a data_columns)
            result = store.select("df", "B>0 and index>df.index[3]")
            df_new = df.reindex(index=df.index[4:])
            expected = df_new[df_new.B > 0]
            tm.assert_frame_equal(result, expected)

            # data column selection with a string data_column
            df_new = df.copy()
            df_new["string"] = "foo"
            df_new.loc[df_new.index[1:4], "string"] = np.nan
            df_new.loc[df_new.index[5:6], "string"] = "bar"
            _maybe_remove(store, "df")
            store.append("df", df_new, data_columns=["string"])
            result = store.select("df", "string='foo'")
            expected = df_new[df_new.string == "foo"]
            tm.assert_frame_equal(result, expected)

            # using min_itemsize and a data column
            def check_col(key, name, size):
                assert (
                    getattr(store.get_storer(key).table.description, name).itemsize
                    == size
                )

        with ensure_clean_store(setup_path) as store:
            _maybe_remove(store, "df")
            store.append(
                "df", df_new, data_columns=["string"], min_itemsize={"string": 30}
            )
            check_col("df", "string", 30)
            _maybe_remove(store, "df")
            store.append("df", df_new, data_columns=["string"], min_itemsize=30)
            check_col("df", "string", 30)
            _maybe_remove(store, "df")
            store.append(
                "df", df_new, data_columns=["string"], min_itemsize={"values": 30}
            )
            check_col("df", "string", 30)

        with ensure_clean_store(setup_path) as store:
            df_new["string2"] = "foobarbah"
            df_new["string_block1"] = "foobarbah1"
            df_new["string_block2"] = "foobarbah2"
            _maybe_remove(store, "df")
            store.append(
                "df",
                df_new,
                data_columns=["string", "string2"],
                min_itemsize={"string": 30, "string2": 40, "values": 50},
            )
            check_col("df", "string", 30)
            check_col("df", "string2", 40)
            check_col("df", "values_block_1", 50)

        with ensure_clean_store(setup_path) as store:
            # multiple data columns
            df_new = df.copy()
            df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
            df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0
            df_new["string"] = "foo"

            sl = df_new.columns.get_loc("string")
            df_new.iloc[1:4, sl] = np.nan
            df_new.iloc[5:6, sl] = "bar"

            df_new["string2"] = "foo"
            sl = df_new.columns.get_loc("string2")
            df_new.iloc[2:5, sl] = np.nan
            df_new.iloc[7:8, sl] = "bar"
            _maybe_remove(store, "df")
            store.append("df", df_new, data_columns=["A", "B", "string", "string2"])
            result = store.select(
                "df", "string='foo' and string2='foo' and A>0 and B<0"
            )
            expected = df_new[
                (df_new.string == "foo")
                & (df_new.string2 == "foo")
                & (df_new.A > 0)
                & (df_new.B < 0)
            ]
            tm.assert_frame_equal(result, expected, check_freq=False)
            # FIXME: 2020-05-07 freq check randomly fails in the CI

            # yield an empty frame
            result = store.select("df", "string='foo' and string2='cool'")
            expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
            tm.assert_frame_equal(result, expected)

        with ensure_clean_store(setup_path) as store:
            # doc example
            df_dc = df.copy()
            df_dc["string"] = "foo"
            df_dc.loc[df_dc.index[4:6], "string"] = np.nan
            df_dc.loc[df_dc.index[7:9], "string"] = "bar"
            df_dc["string2"] = "cool"
            df_dc["datetime"] = Timestamp("20010102")
            df_dc = df_dc._convert(datetime=True)
            df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan

            _maybe_remove(store, "df_dc")
            store.append(
                "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]
            )
            result = store.select("df_dc", "B>0")

            expected = df_dc[df_dc.B > 0]
            tm.assert_frame_equal(result, expected)

            result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])
            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
            tm.assert_frame_equal(result, expected, check_freq=False)
            # FIXME: 2020-12-07 intermittent build failures here with freq of
            #  None instead of BDay(4)

        with ensure_clean_store(setup_path) as store:
            # doc example part 2
            np.random.seed(1234)
            index = date_range("1/1/2000", periods=8)
            df_dc = DataFrame(
                np.random.randn(8, 3), index=index, columns=["A", "B", "C"]
            )
            df_dc["string"] = "foo"
            df_dc.loc[df_dc.index[4:6], "string"] = np.nan
            df_dc.loc[df_dc.index[7:9], "string"] = "bar"
            df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()
            df_dc["string2"] = "cool"

            # on-disk operations
            store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])

            result = store.select("df_dc", "B>0")
            expected = df_dc[df_dc.B > 0]
            tm.assert_frame_equal(result, expected)

            result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])
            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
            tm.assert_frame_equal(result, expected)

    def test_create_table_index(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            with catch_warnings(record=True):

                def col(t, column):
                    return getattr(store.get_storer(t).table.cols, column)

                # data columns
                df = tm.makeTimeDataFrame()
                df["string"] = "foo"
                df["string2"] = "bar"
                store.append("f", df, data_columns=["string", "string2"])
                assert col("f", "index").is_indexed is True
                assert col("f", "string").is_indexed is True
                assert col("f", "string2").is_indexed is True

                # specify index=columns
                store.append(
                    "f2", df, index=["string"], data_columns=["string", "string2"]
                )
                assert col("f2", "index").is_indexed is False
                assert col("f2", "string").is_indexed is True
                assert col("f2", "string2").is_indexed is False

                # try to index a non-table
                _maybe_remove(store, "f2")
                store.put("f2", df)
                with pytest.raises(TypeError):
                    store.create_table_index("f2")

    def test_create_table_index_data_columns_argument(self, setup_path):
        # GH 28156

        with ensure_clean_store(setup_path) as store:

            with catch_warnings(record=True):

                def col(t, column):
                    return getattr(store.get_storer(t).table.cols, column)

                # data columns
                df = tm.makeTimeDataFrame()
                df["string"] = "foo"
                df["string2"] = "bar"
                store.append("f", df, data_columns=["string"])
                assert col("f", "index").is_indexed is True
                assert col("f", "string").is_indexed is True

                msg = "'Cols' object has no attribute 'string2'"
                with pytest.raises(AttributeError, match=msg):
                    col("f", "string2").is_indexed

                # try to index a col which isn't a data_column
                msg = (
                    "column string2 is not a data_column.\n"
                    "In order to read column string2 you must reload the dataframe \n"
                    "into HDFStore and include string2 with the data_columns argument."
                )
                with pytest.raises(AttributeError, match=msg):
                    store.create_table_index("f", columns=["string2"])

    def test_append_hierarchical(self, setup_path):
        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["foo", "bar"],
        )
        df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])

        with ensure_clean_store(setup_path) as store:
            store.append("mi", df)
            result = store.select("mi")
            tm.assert_frame_equal(result, df)

            # GH 3748
            result = store.select("mi", columns=["A", "B"])
            expected = df.reindex(columns=["A", "B"])
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path("test.hdf") as path:
            df.to_hdf(path, "df", format="table")
            result = read_hdf(path, "df", columns=["A", "B"])
            expected = df.reindex(columns=["A", "B"])
            tm.assert_frame_equal(result, expected)

    def test_column_multiindex(self, setup_path):
        # GH 4710
        # recreate multi-indexes properly

        index = MultiIndex.from_tuples(
            [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
        )
        df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
        expected = df.copy()
        if isinstance(expected.index, RangeIndex):
            expected.index = Int64Index(expected.index)

        with ensure_clean_store(setup_path) as store:

            store.put("df", df)
            tm.assert_frame_equal(
                store["df"], expected, check_index_type=True, check_column_type=True
            )

            store.put("df1", df, format="table")
            tm.assert_frame_equal(
                store["df1"], expected, check_index_type=True, check_column_type=True
            )

            with pytest.raises(ValueError):
                store.put("df2", df, format="table", data_columns=["A"])
            with pytest.raises(ValueError):
                store.put("df3", df, format="table", data_columns=True)

        # appending multi-column on existing table (see GH 6167)
        with ensure_clean_store(setup_path) as store:
            store.append("df2", df)
            store.append("df2", df)

            tm.assert_frame_equal(store["df2"], concat((df, df)))

        # non_index_axes name
        df = DataFrame(
            np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")
        )
        expected = df.copy()
        if isinstance(expected.index, RangeIndex):
            expected.index = Int64Index(expected.index)

        with ensure_clean_store(setup_path) as store:

            store.put("df1", df, format="table")
            tm.assert_frame_equal(
                store["df1"], expected, check_index_type=True, check_column_type=True
            )

    def test_store_multiindex(self, setup_path):

        # validate multi-index names
        # GH 5527
        with ensure_clean_store(setup_path) as store:

            def make_index(names=None):
                return MultiIndex.from_tuples(
                    [
                        (datetime.datetime(2013, 12, d), s, t)
                        for d in range(1, 3)
                        for s in range(2)
                        for t in range(3)
                    ],
                    names=names,
                )

            # no names
            _maybe_remove(store, "df")
            df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
            store.append("df", df)
            tm.assert_frame_equal(store.select("df"), df)

            # partial names
            _maybe_remove(store, "df")
            df = DataFrame(
                np.zeros((12, 2)),
                columns=["a", "b"],
                index=make_index(["date", None, None]),
            )
            store.append("df", df)
            tm.assert_frame_equal(store.select("df"), df)

            # series
            _maybe_remove(store, "s")
            s = Series(np.zeros(12), index=make_index(["date", None, None]))
            store.append("s", s)
            xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
            tm.assert_series_equal(store.select("s"), xp)

            # dup with column
            _maybe_remove(store, "df")
            df = DataFrame(
                np.zeros((12, 2)),
                columns=["a", "b"],
                index=make_index(["date", "a", "t"]),
            )
            with pytest.raises(ValueError):
                store.append("df", df)

            # dup within level
            _maybe_remove(store, "df")
            df = DataFrame(
                np.zeros((12, 2)),
                columns=["a", "b"],
                index=make_index(["date", "date", "date"]),
            )
            with pytest.raises(ValueError):
                store.append("df", df)

            # fully names
            _maybe_remove(store, "df")
            df = DataFrame(
                np.zeros((12, 2)),
                columns=["a", "b"],
                index=make_index(["date", "s", "t"]),
            )
            store.append("df", df)
            tm.assert_frame_equal(store.select("df"), df)

    def test_select_columns_in_where(self, setup_path):

        # GH 6169
        # recreate multi-indexes when columns is passed
        # in the `where` argument
        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["foo_name", "bar_name"],
        )

        # With a DataFrame
        df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])

        with ensure_clean_store(setup_path) as store:
            store.put("df", df, format="table")
            expected = df[["A"]]

            tm.assert_frame_equal(store.select("df", columns=["A"]), expected)

            tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected)

        # With a Series
        s = Series(np.random.randn(10), index=index, name="A")
        with ensure_clean_store(setup_path) as store:
            store.put("s", s, format="table")
            tm.assert_series_equal(store.select("s", where="columns=['A']"), s)

    def test_mi_data_columns(self, setup_path):
        # GH 14435
        idx = MultiIndex.from_arrays(
            [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"]
        )
        df = DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)

        with ensure_clean_store(setup_path) as store:
            store.append("df", df, data_columns=True)

            actual = store.select("df", where="id == 1")
            expected = df.iloc[[1], :]
            tm.assert_frame_equal(actual, expected)

    def test_pass_spec_to_storer(self, setup_path):

        df = tm.makeDataFrame()

        with ensure_clean_store(setup_path) as store:
            store.put("df", df)
            with pytest.raises(TypeError):
                store.select("df", columns=["A"])
            with pytest.raises(TypeError):
                store.select("df", where=[("columns=A")])

    def test_append_misc(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df = tm.makeDataFrame()
            store.append("df", df, chunksize=1)
            result = store.select("df")
            tm.assert_frame_equal(result, df)

            store.append("df1", df, expectedrows=10)
            result = store.select("df1")
            tm.assert_frame_equal(result, df)

        # more chunksize in append tests
        def check(obj, comparator):
            for c in [10, 200, 1000]:
                with ensure_clean_store(setup_path, mode="w") as store:
                    store.append("obj", obj, chunksize=c)
                    result = store.select("obj")
                    comparator(result, obj)

        df = tm.makeDataFrame()
        df["string"] = "foo"
        df["float322"] = 1.0
        df["float322"] = df["float322"].astype("float32")
        df["bool"] = df["float322"] > 0
        df["time1"] = Timestamp("20130101")
        df["time2"] = Timestamp("20130102")
        check(df, tm.assert_frame_equal)

        # empty frame, GH4273
        with ensure_clean_store(setup_path) as store:

            # 0 len
            df_empty = DataFrame(columns=list("ABC"))
            store.append("df", df_empty)
            with pytest.raises(KeyError, match="'No object named df in the file'"):
                store.select("df")

            # repeated append of 0/non-zero frames
            df = DataFrame(np.random.rand(10, 3), columns=list("ABC"))
            store.append("df", df)
            tm.assert_frame_equal(store.select("df"), df)
            store.append("df", df_empty)
            tm.assert_frame_equal(store.select("df"), df)

            # store
            df = DataFrame(columns=list("ABC"))
            store.put("df2", df)
            tm.assert_frame_equal(store.select("df2"), df)

    def test_append_raise(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # test append with invalid input to get good error messages

            # list in column
            df = tm.makeDataFrame()
            df["invalid"] = [["a"]] * len(df)
            assert df.dtypes["invalid"] == np.object_
            with pytest.raises(TypeError):
                store.append("df", df)

            # multiple invalid columns
            df["invalid2"] = [["a"]] * len(df)
            df["invalid3"] = [["a"]] * len(df)
            with pytest.raises(TypeError):
                store.append("df", df)

            # datetime with embedded nans as object
            df = tm.makeDataFrame()
            s = Series(datetime.datetime(2001, 1, 2), index=df.index)
            s = s.astype(object)
            s[0:5] = np.nan
            df["invalid"] = s
            assert df.dtypes["invalid"] == np.object_
            with pytest.raises(TypeError):
                store.append("df", df)

            # directly ndarray
            with pytest.raises(TypeError):
                store.append("df", np.arange(10))

            # series directly
            with pytest.raises(TypeError):
                store.append("df", Series(np.arange(10)))

            # appending an incompatible table
            df = tm.makeDataFrame()
            store.append("df", df)

            df["foo"] = "foo"
            with pytest.raises(ValueError):
                store.append("df", df)

    def test_table_index_incompatible_dtypes(self, setup_path):
        df1 = DataFrame({"a": [1, 2, 3]})
        df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))

        with ensure_clean_store(setup_path) as store:
            store.put("frame", df1, format="table")
            with pytest.raises(TypeError):
                store.put("frame", df2, format="table", append=True)

    def test_table_values_dtypes_roundtrip(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")
            store.append("df_f8", df1)
            tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)

            df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")
            store.append("df_i8", df2)
            tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)

            # incompatible dtype
            with pytest.raises(ValueError):
                store.append("df_i8", df1)

            # check creation/storage/retrieval of float32 (a bit hacky to
            # actually create them thought)
            df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])
            store.append("df_f4", df1)
            tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)
            assert df1.dtypes[0] == "float32"

            # check with mixed dtypes
            df1 = DataFrame(
                {
                    c: Series(np.random.randint(5), dtype=c)
                    for c in ["float32", "float64", "int32", "int64", "int16", "int8"]
                }
            )
            df1["string"] = "foo"
            df1["float322"] = 1.0
            df1["float322"] = df1["float322"].astype("float32")
            df1["bool"] = df1["float32"] > 0
            df1["time1"] = Timestamp("20130101")
            df1["time2"] = Timestamp("20130102")

            store.append("df_mixed_dtypes1", df1)
            result = store.select("df_mixed_dtypes1").dtypes.value_counts()
            result.index = [str(i) for i in result.index]
            expected = Series(
                {
                    "float32": 2,
                    "float64": 1,
                    "int32": 1,
                    "bool": 1,
                    "int16": 1,
                    "int8": 1,
                    "int64": 1,
                    "object": 1,
                    "datetime64[ns]": 2,
                }
            )
            result = result.sort_index()
            expected = expected.sort_index()
            tm.assert_series_equal(result, expected)

    def test_table_mixed_dtypes(self, setup_path):

        # frame
        df = tm.makeDataFrame()
        df["obj1"] = "foo"
        df["obj2"] = "bar"
        df["bool1"] = df["A"] > 0
        df["bool2"] = df["B"] > 0
        df["bool3"] = True
        df["int1"] = 1
        df["int2"] = 2
        df["timestamp1"] = Timestamp("20010102")
        df["timestamp2"] = Timestamp("20010103")
        df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
        df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
        df.loc[df.index[3:6], ["obj1"]] = np.nan
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(setup_path) as store:
            store.append("df1_mixed", df)
            tm.assert_frame_equal(store.select("df1_mixed"), df)

    def test_unimplemented_dtypes_table_columns(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            dtypes = [("date", datetime.date(2001, 1, 2))]

            # currently not supported dtypes ####
            for n, f in dtypes:
                df = tm.makeDataFrame()
                df[n] = f
                with pytest.raises(TypeError):
                    store.append(f"df1_{n}", df)

        # frame
        df = tm.makeDataFrame()
        df["obj1"] = "foo"
        df["obj2"] = "bar"
        df["datetime1"] = datetime.date(2001, 1, 2)
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(setup_path) as store:
            # this fails because we have a date in the object block......
            with pytest.raises(TypeError):
                store.append("df_unimplemented", df)

    def test_calendar_roundtrip_issue(self, setup_path):

        # 8591
        # doc example from tseries holiday section
        weekmask_egypt = "Sun Mon Tue Wed Thu"
        holidays = [
            "2012-05-01",
            datetime.datetime(2013, 5, 1),
            np.datetime64("2014-05-01"),
        ]
        bday_egypt = pd.offsets.CustomBusinessDay(
            holidays=holidays, weekmask=weekmask_egypt
        )
        dt = datetime.datetime(2013, 4, 30)
        dts = date_range(dt, periods=5, freq=bday_egypt)

        s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split()))

        with ensure_clean_store(setup_path) as store:

            store.put("fixed", s)
            result = store.select("fixed")
            tm.assert_series_equal(result, s)

            store.append("table", s)
            result = store.select("table")
            tm.assert_series_equal(result, s)

    def test_append_with_timedelta(self, setup_path):
        # GH 3577
        # append timedelta

        df = DataFrame(
            {
                "A": Timestamp("20130101"),
                "B": [
                    Timestamp("20130101") + timedelta(days=i, seconds=10)
                    for i in range(10)
                ],
            }
        )
        df["C"] = df["A"] - df["B"]
        df.loc[3:5, "C"] = np.nan

        with ensure_clean_store(setup_path) as store:

            # table
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=True)
            result = store.select("df")
            tm.assert_frame_equal(result, df)

            result = store.select("df", where="C<100000")
            tm.assert_frame_equal(result, df)

            result = store.select("df", where="C<pd.Timedelta('-3D')")
            tm.assert_frame_equal(result, df.iloc[3:])

            result = store.select("df", "C<'-3D'")
            tm.assert_frame_equal(result, df.iloc[3:])

            # a bit hacky here as we don't really deal with the NaT properly

            result = store.select("df", "C<'-500000s'")
            result = result.dropna(subset=["C"])
            tm.assert_frame_equal(result, df.iloc[6:])

            result = store.select("df", "C<'-3.5D'")
            result = result.iloc[1:]
            tm.assert_frame_equal(result, df.iloc[4:])

            # fixed
            _maybe_remove(store, "df2")
            store.put("df2", df)
            result = store.select("df2")
            tm.assert_frame_equal(result, df)

    def test_remove(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            ts = tm.makeTimeSeries()
            df = tm.makeDataFrame()
            store["a"] = ts
            store["b"] = df
            _maybe_remove(store, "a")
            assert len(store) == 1
            tm.assert_frame_equal(df, store["b"])

            _maybe_remove(store, "b")
            assert len(store) == 0

            # nonexistence
            with pytest.raises(
                KeyError, match="'No object named a_nonexistent_store in the file'"
            ):
                store.remove("a_nonexistent_store")

            # pathing
            store["a"] = ts
            store["b/foo"] = df
            _maybe_remove(store, "foo")
            _maybe_remove(store, "b/foo")
            assert len(store) == 1

            store["a"] = ts
            store["b/foo"] = df
            _maybe_remove(store, "b")
            assert len(store) == 1

            # __delitem__
            store["a"] = ts
            store["b"] = df
            del store["a"]
            del store["b"]
            assert len(store) == 0

    def test_invalid_terms(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            with catch_warnings(record=True):

                df = tm.makeTimeDataFrame()
                df["string"] = "foo"
                df.loc[df.index[0:4], "string"] = "bar"

                store.put("df", df, format="table")

                # some invalid terms
                with pytest.raises(TypeError):
                    Term()

                # more invalid
                with pytest.raises(ValueError):
                    store.select("df", "df.index[3]")

                with pytest.raises(SyntaxError):
                    store.select("df", "index>")

        # from the docs
        with ensure_clean_path(setup_path) as path:
            dfq = DataFrame(
                np.random.randn(10, 4),
                columns=list("ABCD"),
                index=date_range("20130101", periods=10),
            )
            dfq.to_hdf(path, "dfq", format="table", data_columns=True)

            # check ok
            read_hdf(
                path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']"
            )
            read_hdf(path, "dfq", where="A>0 or C>0")

        # catch the invalid reference
        with ensure_clean_path(setup_path) as path:
            dfq = DataFrame(
                np.random.randn(10, 4),
                columns=list("ABCD"),
                index=date_range("20130101", periods=10),
            )
            dfq.to_hdf(path, "dfq", format="table")

            with pytest.raises(ValueError):
                read_hdf(path, "dfq", where="A>0 or C>0")

    def test_same_name_scoping(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            import pandas as pd

            df = DataFrame(
                np.random.randn(20, 2), index=pd.date_range("20130101", periods=20)
            )
            store.put("df", df, format="table")
            expected = df[df.index > Timestamp("20130105")]

            import datetime

            result = store.select("df", "index>datetime.datetime(2013,1,5)")
            tm.assert_frame_equal(result, expected)

            from datetime import datetime  # noqa

            # technically an error, but allow it
            result = store.select("df", "index>datetime.datetime(2013,1,5)")
            tm.assert_frame_equal(result, expected)

            result = store.select("df", "index>datetime(2013,1,5)")
            tm.assert_frame_equal(result, expected)

    def test_series(self, setup_path):

        s = tm.makeStringSeries()
        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)

        ts = tm.makeTimeSeries()
        self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)

        ts2 = Series(ts.index, Index(ts.index, dtype=object))
        self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)

        ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))
        self._check_roundtrip(
            ts3, tm.assert_series_equal, path=setup_path, check_index_type=False
        )

    def test_float_index(self, setup_path):

        # GH #454
        index = np.random.randn(10)
        s = Series(np.random.randn(10), index=index)
        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)

    def test_tuple_index(self, setup_path):

        # GH #492
        col = np.arange(10)
        idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]
        data = np.random.randn(30).reshape((3, 10))
        DF = DataFrame(data, index=idx, columns=col)

        with catch_warnings(record=True):
            simplefilter("ignore", pd.errors.PerformanceWarning)
            self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)

    @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
    def test_index_types(self, setup_path):

        with catch_warnings(record=True):
            values = np.random.randn(2)

            func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True)

        with catch_warnings(record=True):
            ser = Series(values, [0, "y"])
            self._check_roundtrip(ser, func, path=setup_path)

        with catch_warnings(record=True):
            ser = Series(values, [datetime.datetime.today(), 0])
            self._check_roundtrip(ser, func, path=setup_path)

        with catch_warnings(record=True):
            ser = Series(values, ["y", 0])
            self._check_roundtrip(ser, func, path=setup_path)

        with catch_warnings(record=True):
            ser = Series(values, [datetime.date.today(), "a"])
            self._check_roundtrip(ser, func, path=setup_path)

        with catch_warnings(record=True):

            ser = Series(values, [0, "y"])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, [datetime.datetime.today(), 0])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, ["y", 0])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, [datetime.date.today(), "a"])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, [1.23, "b"])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, [1, 1.53])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(values, [1, 5])
            self._check_roundtrip(ser, func, path=setup_path)

            ser = Series(
                values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]
            )
            self._check_roundtrip(ser, func, path=setup_path)

    def test_timeseries_preepoch(self, setup_path):

        dr = bdate_range("1/1/1940", "1/1/1960")
        ts = Series(np.random.randn(len(dr)), index=dr)
        try:
            self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
        except OverflowError:
            pytest.skip("known failer on some windows platforms")

    @pytest.mark.parametrize(
        "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]
    )
    def test_frame(self, compression, setup_path):

        df = tm.makeDataFrame()

        # put in some random NAs
        df.values[0, 0] = np.nan
        df.values[5, 3] = np.nan

        self._check_roundtrip_table(
            df, tm.assert_frame_equal, path=setup_path, compression=compression
        )
        self._check_roundtrip(
            df, tm.assert_frame_equal, path=setup_path, compression=compression
        )

        tdf = tm.makeTimeDataFrame()
        self._check_roundtrip(
            tdf, tm.assert_frame_equal, path=setup_path, compression=compression
        )

        with ensure_clean_store(setup_path) as store:
            # not consolidated
            df["foo"] = np.random.randn(len(df))
            store["df"] = df
            recons = store["df"]
            assert recons._mgr.is_consolidated()

        # empty
        self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)

    def test_empty_series_frame(self, setup_path):
        s0 = Series(dtype=object)
        s1 = Series(name="myseries", dtype=object)
        df0 = DataFrame()
        df1 = DataFrame(index=["a", "b", "c"])
        df2 = DataFrame(columns=["d", "e", "f"])

        self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path)
        self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path)
        self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)
        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)

    @pytest.mark.parametrize(
        "dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"]
    )
    def test_empty_series(self, dtype, setup_path):
        s = Series(dtype=dtype)
        self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)

    def test_can_serialize_dates(self, setup_path):

        rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)

    def test_store_hierarchical(self, setup_path):
        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["foo", "bar"],
        )
        frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])

        self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
        self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)
        self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)

        # check that the names are stored
        with ensure_clean_store(setup_path) as store:
            store["frame"] = frame
            recons = store["frame"]
            tm.assert_frame_equal(recons, frame)

    def test_store_index_name(self, setup_path):
        df = tm.makeDataFrame()
        df.index.name = "foo"

        with ensure_clean_store(setup_path) as store:
            store["frame"] = df
            recons = store["frame"]
            tm.assert_frame_equal(recons, df)

    @pytest.mark.parametrize("table_format", ["table", "fixed"])
    def test_store_index_name_numpy_str(self, table_format, setup_path):
        # GH #13492
        idx = Index(
            pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]),
            name="cols\u05d2",
        )
        idx1 = Index(
            pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]),
            name="rows\u05d0",
        )
        df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

        # This used to fail, returning numpy strings instead of python strings.
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", format=table_format)
            df2 = read_hdf(path, "df")

            tm.assert_frame_equal(df, df2, check_names=True)

            assert type(df2.index.name) == str
            assert type(df2.columns.name) == str

    def test_store_series_name(self, setup_path):
        df = tm.makeDataFrame()
        series = df["A"]

        with ensure_clean_store(setup_path) as store:
            store["series"] = series
            recons = store["series"]
            tm.assert_series_equal(recons, series)

    @pytest.mark.parametrize(
        "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]
    )
    def test_store_mixed(self, compression, setup_path):
        def _make_one():
            df = tm.makeDataFrame()
            df["obj1"] = "foo"
            df["obj2"] = "bar"
            df["bool1"] = df["A"] > 0
            df["bool2"] = df["B"] > 0
            df["int1"] = 1
            df["int2"] = 2
            return df._consolidate()

        df1 = _make_one()
        df2 = _make_one()

        self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
        self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)

        with ensure_clean_store(setup_path) as store:
            store["obj"] = df1
            tm.assert_frame_equal(store["obj"], df1)
            store["obj"] = df2
            tm.assert_frame_equal(store["obj"], df2)

        # check that can store Series of all of these types
        self._check_roundtrip(
            df1["obj1"],
            tm.assert_series_equal,
            path=setup_path,
            compression=compression,
        )
        self._check_roundtrip(
            df1["bool1"],
            tm.assert_series_equal,
            path=setup_path,
            compression=compression,
        )
        self._check_roundtrip(
            df1["int1"],
            tm.assert_series_equal,
            path=setup_path,
            compression=compression,
        )

    @pytest.mark.filterwarnings(
        "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
    )
    def test_select_with_dups(self, setup_path):

        # single dtypes
        df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])
        df.index = date_range("20130101 9:30", periods=10, freq="T")

        with ensure_clean_store(setup_path) as store:
            store.append("df", df)

            result = store.select("df")
            expected = df
            tm.assert_frame_equal(result, expected, by_blocks=True)

            result = store.select("df", columns=df.columns)
            expected = df
            tm.assert_frame_equal(result, expected, by_blocks=True)

            result = store.select("df", columns=["A"])
            expected = df.loc[:, ["A"]]
            tm.assert_frame_equal(result, expected)

        # dups across dtypes
        df = concat(
            [
                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
                DataFrame(
                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
                ),
            ],
            axis=1,
        )
        df.index = date_range("20130101 9:30", periods=10, freq="T")

        with ensure_clean_store(setup_path) as store:
            store.append("df", df)

            result = store.select("df")
            expected = df
            tm.assert_frame_equal(result, expected, by_blocks=True)

            result = store.select("df", columns=df.columns)
            expected = df
            tm.assert_frame_equal(result, expected, by_blocks=True)

            expected = df.loc[:, ["A"]]
            result = store.select("df", columns=["A"])
            tm.assert_frame_equal(result, expected, by_blocks=True)

            expected = df.loc[:, ["B", "A"]]
            result = store.select("df", columns=["B", "A"])
            tm.assert_frame_equal(result, expected, by_blocks=True)

        # duplicates on both index and columns
        with ensure_clean_store(setup_path) as store:
            store.append("df", df)
            store.append("df", df)

            expected = df.loc[:, ["B", "A"]]
            expected = concat([expected, expected])
            result = store.select("df", columns=["B", "A"])
            tm.assert_frame_equal(result, expected, by_blocks=True)

    def test_overwrite_node(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            store["a"] = tm.makeTimeDataFrame()
            ts = tm.makeTimeSeries()
            store["a"] = ts

            tm.assert_series_equal(store["a"], ts)

    def test_select(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            with catch_warnings(record=True):

                # select with columns=
                df = tm.makeTimeDataFrame()
                _maybe_remove(store, "df")
                store.append("df", df)
                result = store.select("df", columns=["A", "B"])
                expected = df.reindex(columns=["A", "B"])
                tm.assert_frame_equal(expected, result)

                # equivalently
                result = store.select("df", [("columns=['A', 'B']")])
                expected = df.reindex(columns=["A", "B"])
                tm.assert_frame_equal(expected, result)

                # with a data column
                _maybe_remove(store, "df")
                store.append("df", df, data_columns=["A"])
                result = store.select("df", ["A > 0"], columns=["A", "B"])
                expected = df[df.A > 0].reindex(columns=["A", "B"])
                tm.assert_frame_equal(expected, result)

                # all a data columns
                _maybe_remove(store, "df")
                store.append("df", df, data_columns=True)
                result = store.select("df", ["A > 0"], columns=["A", "B"])
                expected = df[df.A > 0].reindex(columns=["A", "B"])
                tm.assert_frame_equal(expected, result)

                # with a data column, but different columns
                _maybe_remove(store, "df")
                store.append("df", df, data_columns=["A"])
                result = store.select("df", ["A > 0"], columns=["C", "D"])
                expected = df[df.A > 0].reindex(columns=["C", "D"])
                tm.assert_frame_equal(expected, result)

    def test_select_dtypes(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            # with a Timestamp data column (GH #2637)
            df = DataFrame(
                {
                    "ts": bdate_range("2012-01-01", periods=300),
                    "A": np.random.randn(300),
                }
            )
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=["ts", "A"])

            result = store.select("df", "ts>=Timestamp('2012-02-01')")
            expected = df[df.ts >= Timestamp("2012-02-01")]
            tm.assert_frame_equal(expected, result)

            # bool columns (GH #2849)
            df = DataFrame(np.random.randn(5, 2), columns=["A", "B"])
            df["object"] = "foo"
            df.loc[4:5, "object"] = "bar"
            df["boolv"] = df["A"] > 0
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=True)

            expected = df[df.boolv == True].reindex(columns=["A", "boolv"])  # noqa
            for v in [True, "true", 1]:
                result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
                tm.assert_frame_equal(expected, result)

            expected = df[df.boolv == False].reindex(columns=["A", "boolv"])  # noqa
            for v in [False, "false", 0]:
                result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
                tm.assert_frame_equal(expected, result)

            # integer index
            df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)})
            _maybe_remove(store, "df_int")
            store.append("df_int", df)
            result = store.select("df_int", "index<10 and columns=['A']")
            expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
            tm.assert_frame_equal(expected, result)

            # float index
            df = DataFrame(
                {
                    "A": np.random.rand(20),
                    "B": np.random.rand(20),
                    "index": np.arange(20, dtype="f8"),
                }
            )
            _maybe_remove(store, "df_float")
            store.append("df_float", df)
            result = store.select("df_float", "index<10.0 and columns=['A']")
            expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
            tm.assert_frame_equal(expected, result)

        with ensure_clean_store(setup_path) as store:

            # floats w/o NaN
            df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
            df["cols"] = (df["cols"] + 10).apply(str)

            store.append("df1", df, data_columns=True)
            result = store.select("df1", where="values>2.0")
            expected = df[df["values"] > 2.0]
            tm.assert_frame_equal(expected, result)

            # floats with NaN
            df.iloc[0] = np.nan
            expected = df[df["values"] > 2.0]

            store.append("df2", df, data_columns=True, index=False)
            result = store.select("df2", where="values>2.0")
            tm.assert_frame_equal(expected, result)

            # https://github.com/PyTables/PyTables/issues/282
            # bug in selection when 0th row has a np.nan and an index
            # store.append('df3',df,data_columns=True)
            # result = store.select(
            #    'df3', where='values>2.0')
            # tm.assert_frame_equal(expected, result)

            # not in first position float with NaN ok too
            df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
            df["cols"] = (df["cols"] + 10).apply(str)

            df.iloc[1] = np.nan
            expected = df[df["values"] > 2.0]

            store.append("df4", df, data_columns=True)
            result = store.select("df4", where="values>2.0")
            tm.assert_frame_equal(expected, result)

        # test selection with comparison against numpy scalar
        # GH 11283
        with ensure_clean_store(setup_path) as store:
            df = tm.makeDataFrame()

            expected = df[df["A"] > 0]

            store.append("df", df, data_columns=True)
            np_zero = np.float64(0)  # noqa
            result = store.select("df", where=["A>np_zero"])
            tm.assert_frame_equal(expected, result)

    def test_select_with_many_inputs(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            df = DataFrame(
                {
                    "ts": bdate_range("2012-01-01", periods=300),
                    "A": np.random.randn(300),
                    "B": range(300),
                    "users": ["a"] * 50
                    + ["b"] * 50
                    + ["c"] * 100
                    + [f"a{i:03d}" for i in range(100)],
                }
            )
            _maybe_remove(store, "df")
            store.append("df", df, data_columns=["ts", "A", "B", "users"])

            # regular select
            result = store.select("df", "ts>=Timestamp('2012-02-01')")
            expected = df[df.ts >= Timestamp("2012-02-01")]
            tm.assert_frame_equal(expected, result)

            # small selector
            result = store.select(
                "df", "ts>=Timestamp('2012-02-01') & users=['a','b','c']"
            )
            expected = df[
                (df.ts >= Timestamp("2012-02-01")) & df.users.isin(["a", "b", "c"])
            ]
            tm.assert_frame_equal(expected, result)

            # big selector along the columns
            selector = ["a", "b", "c"] + [f"a{i:03d}" for i in range(60)]
            result = store.select(
                "df", "ts>=Timestamp('2012-02-01') and users=selector"
            )
            expected = df[(df.ts >= Timestamp("2012-02-01")) & df.users.isin(selector)]
            tm.assert_frame_equal(expected, result)

            selector = range(100, 200)
            result = store.select("df", "B=selector")
            expected = df[df.B.isin(selector)]
            tm.assert_frame_equal(expected, result)
            assert len(result) == 100

            # big selector along the index
            selector = Index(df.ts[0:100].values)
            result = store.select("df", "ts=selector")
            expected = df[df.ts.isin(selector.values)]
            tm.assert_frame_equal(expected, result)
            assert len(result) == 100

    def test_select_iterator(self, setup_path):

        # single table
        with ensure_clean_store(setup_path) as store:

            df = tm.makeTimeDataFrame(500)
            _maybe_remove(store, "df")
            store.append("df", df)

            expected = store.select("df")

            results = list(store.select("df", iterator=True))
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            results = list(store.select("df", chunksize=100))
            assert len(results) == 5
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            results = list(store.select("df", chunksize=150))
            result = concat(results)
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path(setup_path) as path:

            df = tm.makeTimeDataFrame(500)
            df.to_hdf(path, "df_non_table")

            with pytest.raises(TypeError):
                read_hdf(path, "df_non_table", chunksize=100)

            with pytest.raises(TypeError):
                read_hdf(path, "df_non_table", iterator=True)

        with ensure_clean_path(setup_path) as path:

            df = tm.makeTimeDataFrame(500)
            df.to_hdf(path, "df", format="table")

            results = list(read_hdf(path, "df", chunksize=100))
            result = concat(results)

            assert len(results) == 5
            tm.assert_frame_equal(result, df)
            tm.assert_frame_equal(result, read_hdf(path, "df"))

        # multiple

        with ensure_clean_store(setup_path) as store:

            df1 = tm.makeTimeDataFrame(500)
            store.append("df1", df1, data_columns=True)
            df2 = tm.makeTimeDataFrame(500).rename(columns="{}_2".format)
            df2["foo"] = "bar"
            store.append("df2", df2)

            df = concat([df1, df2], axis=1)

            # full selection
            expected = store.select_as_multiple(["df1", "df2"], selector="df1")
            results = list(
                store.select_as_multiple(["df1", "df2"], selector="df1", chunksize=150)
            )
            result = concat(results)
            tm.assert_frame_equal(expected, result)

    def test_select_iterator_complete_8014(self, setup_path):

        # GH 8014
        # using iterator and where clause
        chunksize = 1e4

        # no iterator
        with ensure_clean_store(setup_path) as store:

            expected = tm.makeTimeDataFrame(100064, "S")
            _maybe_remove(store, "df")
            store.append("df", expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[-1]

            # select w/o iteration and no where clause works
            result = store.select("df")
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, single term, begin
            # of range, works
            where = f"index >= '{beg_dt}'"
            result = store.select("df", where=where)
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, single term, end
            # of range, works
            where = f"index <= '{end_dt}'"
            result = store.select("df", where=where)
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, inclusive range,
            # works
            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"
            result = store.select("df", where=where)
            tm.assert_frame_equal(expected, result)

        # with iterator, full range
        with ensure_clean_store(setup_path) as store:

            expected = tm.makeTimeDataFrame(100064, "S")
            _maybe_remove(store, "df")
            store.append("df", expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[-1]

            # select w/iterator and no where clause works
            results = list(store.select("df", chunksize=chunksize))
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, single term, begin of range
            where = f"index >= '{beg_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, single term, end of range
            where = f"index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, inclusive range
            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            tm.assert_frame_equal(expected, result)

    def test_select_iterator_non_complete_8014(self, setup_path):

        # GH 8014
        # using iterator and where clause
        chunksize = 1e4

        # with iterator, non complete range
        with ensure_clean_store(setup_path) as store:

            expected = tm.makeTimeDataFrame(100064, "S")
            _maybe_remove(store, "df")
            store.append("df", expected)

            beg_dt = expected.index[1]
            end_dt = expected.index[-2]

            # select w/iterator and where clause, single term, begin of range
            where = f"index >= '{beg_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            rexpected = expected[expected.index >= beg_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, single term, end of range
            where = f"index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            rexpected = expected[expected.index <= end_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, inclusive range
            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            rexpected = expected[
                (expected.index >= beg_dt) & (expected.index <= end_dt)
            ]
            tm.assert_frame_equal(rexpected, result)

        # with iterator, empty where
        with ensure_clean_store(setup_path) as store:

            expected = tm.makeTimeDataFrame(100064, "S")
            _maybe_remove(store, "df")
            store.append("df", expected)

            end_dt = expected.index[-1]

            # select w/iterator and where clause, single term, begin of range
            where = f"index > '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            assert 0 == len(results)

    def test_select_iterator_many_empty_frames(self, setup_path):

        # GH 8014
        # using iterator and where clause can return many empty
        # frames.
        chunksize = int(1e4)

        # with iterator, range limited to the first chunk
        with ensure_clean_store(setup_path) as store:

            expected = tm.makeTimeDataFrame(100000, "S")
            _maybe_remove(store, "df")
            store.append("df", expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[chunksize - 1]

            # select w/iterator and where clause, single term, begin of range
            where = f"index >= '{beg_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))
            result = concat(results)
            rexpected = expected[expected.index >= beg_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, single term, end of range
            where = f"index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))

            assert len(results) == 1
            result = concat(results)
            rexpected = expected[expected.index <= end_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, inclusive range
            where = f"index >= '{beg_dt}' & index <= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))

            # should be 1, is 10
            assert len(results) == 1
            result = concat(results)
            rexpected = expected[
                (expected.index >= beg_dt) & (expected.index <= end_dt)
            ]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause which selects
            # *nothing*.
            #
            # To be consistent with Python idiom I suggest this should
            # return [] e.g. `for e in []: print True` never prints
            # True.

            where = f"index <= '{beg_dt}' & index >= '{end_dt}'"
            results = list(store.select("df", where=where, chunksize=chunksize))

            # should be []
            assert len(results) == 0

    @pytest.mark.filterwarnings(
        "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
    )
    def test_retain_index_attributes(self, setup_path):

        # GH 3499, losing frequency info on index recreation
        df = DataFrame(
            {"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))}
        )

        with ensure_clean_store(setup_path) as store:
            _maybe_remove(store, "data")
            store.put("data", df, format="table")

            result = store.get("data")
            tm.assert_frame_equal(df, result)

            for attr in ["freq", "tz", "name"]:
                for idx in ["index", "columns"]:
                    assert getattr(getattr(df, idx), attr, None) == getattr(
                        getattr(result, idx), attr, None
                    )

            # try to append a table with a different frequency
            with catch_warnings(record=True):
                df2 = DataFrame(
                    {
                        "A": Series(
                            range(3), index=date_range("2002-1-1", periods=3, freq="D")
                        )
                    }
                )
                store.append("data", df2)

            assert store.get_storer("data").info["index"]["freq"] is None

            # this is ok
            _maybe_remove(store, "df2")
            df2 = DataFrame(
                {
                    "A": Series(
                        range(3),
                        index=[
                            Timestamp("20010101"),
                            Timestamp("20010102"),
                            Timestamp("20020101"),
                        ],
                    )
                }
            )
            store.append("df2", df2)
            df3 = DataFrame(
                {
                    "A": Series(
                        range(3), index=date_range("2002-1-1", periods=3, freq="D")
                    )
                }
            )
            store.append("df2", df3)

    @pytest.mark.filterwarnings(
        "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
    )
    def test_retain_index_attributes2(self, setup_path):
        with ensure_clean_path(setup_path) as path:

            with catch_warnings(record=True):

                df = DataFrame(
                    {
                        "A": Series(
                            range(3), index=date_range("2000-1-1", periods=3, freq="H")
                        )
                    }
                )
                df.to_hdf(path, "data", mode="w", append=True)
                df2 = DataFrame(
                    {
                        "A": Series(
                            range(3), index=date_range("2002-1-1", periods=3, freq="D")
                        )
                    }
                )

                df2.to_hdf(path, "data", append=True)

                idx = date_range("2000-1-1", periods=3, freq="H")
                idx.name = "foo"
                df = DataFrame({"A": Series(range(3), index=idx)})
                df.to_hdf(path, "data", mode="w", append=True)

            assert read_hdf(path, "data").index.name == "foo"

            with catch_warnings(record=True):

                idx2 = date_range("2001-1-1", periods=3, freq="H")
                idx2.name = "bar"
                df2 = DataFrame({"A": Series(range(3), index=idx2)})
                df2.to_hdf(path, "data", append=True)

            assert read_hdf(path, "data").index.name is None

    def test_frame_select(self, setup_path):

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(setup_path) as store:
            store.put("frame", df, format="table")
            date = df.index[len(df) // 2]

            crit1 = Term("index>=date")
            assert crit1.env.scope["date"] == date

            crit2 = "columns=['A', 'D']"
            crit3 = "columns=A"

            result = store.select("frame", [crit1, crit2])
            expected = df.loc[date:, ["A", "D"]]
            tm.assert_frame_equal(result, expected)

            result = store.select("frame", [crit3])
            expected = df.loc[:, ["A"]]
            tm.assert_frame_equal(result, expected)

            # invalid terms
            df = tm.makeTimeDataFrame()
            store.append("df_time", df)
            with pytest.raises(ValueError):
                store.select("df_time", "index>0")

            # can't select if not written as table
            # store['frame'] = df
            # with pytest.raises(ValueError):
            #     store.select('frame', [crit1, crit2])

    def test_frame_select_complex(self, setup_path):
        # select via complex criteria

        df = tm.makeTimeDataFrame()
        df["string"] = "foo"
        df.loc[df.index[0:4], "string"] = "bar"

        with ensure_clean_store(setup_path) as store:
            store.put("df", df, format="table", data_columns=["string"])

            # empty
            result = store.select("df", 'index>df.index[3] & string="bar"')
            expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")]
            tm.assert_frame_equal(result, expected)

            result = store.select("df", 'index>df.index[3] & string="foo"')
            expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")]
            tm.assert_frame_equal(result, expected)

            # or
            result = store.select("df", 'index>df.index[3] | string="bar"')
            expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")]
            tm.assert_frame_equal(result, expected)

            result = store.select(
                "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"'
            )
            expected = df.loc[
                ((df.index > df.index[3]) & (df.index <= df.index[6]))
                | (df.string == "bar")
            ]
            tm.assert_frame_equal(result, expected)

            # invert
            result = store.select("df", 'string!="bar"')
            expected = df.loc[df.string != "bar"]
            tm.assert_frame_equal(result, expected)

            # invert not implemented in numexpr :(
            with pytest.raises(NotImplementedError):
                store.select("df", '~(string="bar")')

            # invert ok for filters
            result = store.select("df", "~(columns=['A','B'])")
            expected = df.loc[:, df.columns.difference(["A", "B"])]
            tm.assert_frame_equal(result, expected)

            # in
            result = store.select("df", "index>df.index[3] & columns in ['A','B']")
            expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"])
            tm.assert_frame_equal(result, expected)

    def test_frame_select_complex2(self, setup_path):

        with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths:

            pp, hh = paths

            # use non-trivial selection criteria
            parms = DataFrame({"A": [1, 1, 2, 2, 3]})
            parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])

            selection = read_hdf(pp, "df", where="A=[2,3]")
            hist = DataFrame(
                np.random.randn(25, 1),
                columns=["data"],
                index=MultiIndex.from_tuples(
                    [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"]
                ),
            )

            hist.to_hdf(hh, "df", mode="w", format="table")

            expected = read_hdf(hh, "df", where="l1=[2, 3, 4]")

            # scope with list like
            l = selection.index.tolist()  # noqa
            store = HDFStore(hh)
            result = store.select("df", where="l1=l")
            tm.assert_frame_equal(result, expected)
            store.close()

            result = read_hdf(hh, "df", where="l1=l")
            tm.assert_frame_equal(result, expected)

            # index
            index = selection.index  # noqa
            result = read_hdf(hh, "df", where="l1=index")
            tm.assert_frame_equal(result, expected)

            result = read_hdf(hh, "df", where="l1=selection.index")
            tm.assert_frame_equal(result, expected)

            result = read_hdf(hh, "df", where="l1=selection.index.tolist()")
            tm.assert_frame_equal(result, expected)

            result = read_hdf(hh, "df", where="l1=list(selection.index)")
            tm.assert_frame_equal(result, expected)

            # scope with index
            store = HDFStore(hh)

            result = store.select("df", where="l1=index")
            tm.assert_frame_equal(result, expected)

            result = store.select("df", where="l1=selection.index")
            tm.assert_frame_equal(result, expected)

            result = store.select("df", where="l1=selection.index.tolist()")
            tm.assert_frame_equal(result, expected)

            result = store.select("df", where="l1=list(selection.index)")
            tm.assert_frame_equal(result, expected)

            store.close()

    def test_invalid_filtering(self, setup_path):

        # can't use more than one filter (atm)

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(setup_path) as store:
            store.put("df", df, format="table")

            # not implemented
            with pytest.raises(NotImplementedError):
                store.select("df", "columns=['A'] | columns=['B']")

            # in theory we could deal with this
            with pytest.raises(NotImplementedError):
                store.select("df", "columns=['A','B'] & columns=['C']")

    def test_string_select(self, setup_path):
        # GH 2973
        with ensure_clean_store(setup_path) as store:

            df = tm.makeTimeDataFrame()

            # test string ==/!=
            df["x"] = "none"
            df.loc[df.index[2:7], "x"] = ""

            store.append("df", df, data_columns=["x"])

            result = store.select("df", "x=none")
            expected = df[df.x == "none"]
            tm.assert_frame_equal(result, expected)

            result = store.select("df", "x!=none")
            expected = df[df.x != "none"]
            tm.assert_frame_equal(result, expected)

            df2 = df.copy()
            df2.loc[df2.x == "", "x"] = np.nan

            store.append("df2", df2, data_columns=["x"])
            result = store.select("df2", "x!=none")
            expected = df2[isna(df2.x)]
            tm.assert_frame_equal(result, expected)

            # int ==/!=
            df["int"] = 1
            df.loc[df.index[2:7], "int"] = 2

            store.append("df3", df, data_columns=["int"])

            result = store.select("df3", "int=2")
            expected = df[df.int == 2]
            tm.assert_frame_equal(result, expected)

            result = store.select("df3", "int!=2")
            expected = df[df.int != 2]
            tm.assert_frame_equal(result, expected)

    def test_read_column(self, setup_path):

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(setup_path) as store:
            _maybe_remove(store, "df")

            # GH 17912
            # HDFStore.select_column should raise a KeyError
            # exception if the key is not a valid store
            with pytest.raises(KeyError, match="No object named df in the file"):
                store.select_column("df", "index")

            store.append("df", df)
            # error
            with pytest.raises(
                KeyError, match=re.escape("'column [foo] not found in the table'")
            ):
                store.select_column("df", "foo")

            with pytest.raises(Exception):
                store.select_column("df", "index", where=["index>5"])

            # valid
            result = store.select_column("df", "index")
            tm.assert_almost_equal(result.values, Series(df.index).values)
            assert isinstance(result, Series)

            # not a data indexable column
            with pytest.raises(ValueError):
                store.select_column("df", "values_block_0")

            # a data column
            df2 = df.copy()
            df2["string"] = "foo"
            store.append("df2", df2, data_columns=["string"])
            result = store.select_column("df2", "string")
            tm.assert_almost_equal(result.values, df2["string"].values)

            # a data column with NaNs, result excludes the NaNs
            df3 = df.copy()
            df3["string"] = "foo"
            df3.loc[df3.index[4:6], "string"] = np.nan
            store.append("df3", df3, data_columns=["string"])
            result = store.select_column("df3", "string")
            tm.assert_almost_equal(result.values, df3["string"].values)

            # start/stop
            result = store.select_column("df3", "string", start=2)
            tm.assert_almost_equal(result.values, df3["string"].values[2:])

            result = store.select_column("df3", "string", start=-2)
            tm.assert_almost_equal(result.values, df3["string"].values[-2:])

            result = store.select_column("df3", "string", stop=2)
            tm.assert_almost_equal(result.values, df3["string"].values[:2])

            result = store.select_column("df3", "string", stop=-2)
            tm.assert_almost_equal(result.values, df3["string"].values[:-2])

            result = store.select_column("df3", "string", start=2, stop=-2)
            tm.assert_almost_equal(result.values, df3["string"].values[2:-2])

            result = store.select_column("df3", "string", start=-2, stop=2)
            tm.assert_almost_equal(result.values, df3["string"].values[-2:2])

            # GH 10392 - make sure column name is preserved
            df4 = DataFrame({"A": np.random.randn(10), "B": "foo"})
            store.append("df4", df4, data_columns=True)
            expected = df4["B"]
            result = store.select_column("df4", "B")
            tm.assert_series_equal(result, expected)

    def test_coordinates(self, setup_path):
        df = tm.makeTimeDataFrame()

        with ensure_clean_store(setup_path) as store:

            _maybe_remove(store, "df")
            store.append("df", df)

            # all
            c = store.select_as_coordinates("df")
            assert (c.values == np.arange(len(df.index))).all()

            # get coordinates back & test vs frame
            _maybe_remove(store, "df")

            df = DataFrame({"A": range(5), "B": range(5)})
            store.append("df", df)
            c = store.select_as_coordinates("df", ["index<3"])
            assert (c.values == np.arange(3)).all()
            result = store.select("df", where=c)
            expected = df.loc[0:2, :]
            tm.assert_frame_equal(result, expected)

            c = store.select_as_coordinates("df", ["index>=3", "index<=4"])
            assert (c.values == np.arange(2) + 3).all()
            result = store.select("df", where=c)
            expected = df.loc[3:4, :]
            tm.assert_frame_equal(result, expected)
            assert isinstance(c, Index)

            # multiple tables
            _maybe_remove(store, "df1")
            _maybe_remove(store, "df2")
            df1 = tm.makeTimeDataFrame()
            df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
            store.append("df1", df1, data_columns=["A", "B"])
            store.append("df2", df2)

            c = store.select_as_coordinates("df1", ["A>0", "B>0"])
            df1_result = store.select("df1", c)
            df2_result = store.select("df2", c)
            result = concat([df1_result, df2_result], axis=1)

            expected = concat([df1, df2], axis=1)
            expected = expected[(expected.A > 0) & (expected.B > 0)]
            tm.assert_frame_equal(result, expected)

        # pass array/mask as the coordinates
        with ensure_clean_store(setup_path) as store:

            df = DataFrame(
                np.random.randn(1000, 2), index=date_range("20000101", periods=1000)
            )
            store.append("df", df)
            c = store.select_column("df", "index")
            where = c[DatetimeIndex(c).month == 5].index
            expected = df.iloc[where]

            # locations
            result = store.select("df", where=where)
            tm.assert_frame_equal(result, expected)

            # boolean
            result = store.select("df", where=where)
            tm.assert_frame_equal(result, expected)

            # invalid
            with pytest.raises(ValueError):
                store.select("df", where=np.arange(len(df), dtype="float64"))

            with pytest.raises(ValueError):
                store.select("df", where=np.arange(len(df) + 1))

            with pytest.raises(ValueError):
                store.select("df", where=np.arange(len(df)), start=5)

            with pytest.raises(ValueError):
                store.select("df", where=np.arange(len(df)), start=5, stop=10)

            # selection with filter
            selection = date_range("20000101", periods=500)
            result = store.select("df", where="index in selection")
            expected = df[df.index.isin(selection)]
            tm.assert_frame_equal(result, expected)

            # list
            df = DataFrame(np.random.randn(10, 2))
            store.append("df2", df)
            result = store.select("df2", where=[0, 3, 5])
            expected = df.iloc[[0, 3, 5]]
            tm.assert_frame_equal(result, expected)

            # boolean
            where = [True] * 10
            where[-2] = False
            result = store.select("df2", where=where)
            expected = df.loc[where]
            tm.assert_frame_equal(result, expected)

            # start/stop
            result = store.select("df2", start=5, stop=10)
            expected = df[5:10]
            tm.assert_frame_equal(result, expected)

    def test_append_to_multiple(self, setup_path):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
        df2["foo"] = "bar"
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(setup_path) as store:

            # exceptions
            with pytest.raises(ValueError):
                store.append_to_multiple(
                    {"df1": ["A", "B"], "df2": None}, df, selector="df3"
                )

            with pytest.raises(ValueError):
                store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")

            with pytest.raises(ValueError):
                store.append_to_multiple("df1", df, "df1")

            # regular operation
            store.append_to_multiple(
                {"df1": ["A", "B"], "df2": None}, df, selector="df1"
            )
            result = store.select_as_multiple(
                ["df1", "df2"], where=["A>0", "B>0"], selector="df1"
            )
            expected = df[(df.A > 0) & (df.B > 0)]
            tm.assert_frame_equal(result, expected)

    def test_append_to_multiple_dropna(self, setup_path):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
        df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(setup_path) as store:

            # dropna=True should guarantee rows are synchronized
            store.append_to_multiple(
                {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True
            )
            result = store.select_as_multiple(["df1", "df2"])
            expected = df.dropna()
            tm.assert_frame_equal(result, expected)
            tm.assert_index_equal(store.select("df1").index, store.select("df2").index)

    @pytest.mark.xfail(
        run=False, reason="append_to_multiple_dropna_false is not raising as failed"
    )
    def test_append_to_multiple_dropna_false(self, setup_path):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
        df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(setup_path) as store:

            # dropna=False shouldn't synchronize row indexes
            store.append_to_multiple(
                {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False
            )

            with pytest.raises(ValueError):
                store.select_as_multiple(["df1a", "df2a"])

            assert not store.select("df1a").index.equals(store.select("df2a").index)

    def test_append_to_multiple_min_itemsize(self, setup_path):
        # GH 11238
        df = DataFrame(
            {
                "IX": np.arange(1, 21),
                "Num": np.arange(1, 21),
                "BigNum": np.arange(1, 21) * 88,
                "Str": ["a" for _ in range(20)],
                "LongStr": ["abcde" for _ in range(20)],
            }
        )
        expected = df.iloc[[0]]

        with ensure_clean_store(setup_path) as store:
            store.append_to_multiple(
                {
                    "index": ["IX"],
                    "nums": ["Num", "BigNum"],
                    "strs": ["Str", "LongStr"],
                },
                df.iloc[[0]],
                "index",
                min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
            )
            result = store.select_as_multiple(["index", "nums", "strs"])
            tm.assert_frame_equal(result, expected)

    def test_select_as_multiple(self, setup_path):

        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
        df2["foo"] = "bar"

        with ensure_clean_store(setup_path) as store:

            # no tables stored
            with pytest.raises(Exception):
                store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")

            store.append("df1", df1, data_columns=["A", "B"])
            store.append("df2", df2)

            # exceptions
            with pytest.raises(Exception):
                store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")

            with pytest.raises(Exception):
                store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1")

            msg = "'No object named df3 in the file'"
            with pytest.raises(KeyError, match=msg):
                store.select_as_multiple(
                    ["df1", "df3"], where=["A>0", "B>0"], selector="df1"
                )

            with pytest.raises(KeyError, match=msg):
                store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1")

            with pytest.raises(KeyError, match="'No object named df4 in the file'"):
                store.select_as_multiple(
                    ["df1", "df2"], where=["A>0", "B>0"], selector="df4"
                )

            # default select
            result = store.select("df1", ["A>0", "B>0"])
            expected = store.select_as_multiple(
                ["df1"], where=["A>0", "B>0"], selector="df1"
            )
            tm.assert_frame_equal(result, expected)
            expected = store.select_as_multiple(
                "df1", where=["A>0", "B>0"], selector="df1"
            )
            tm.assert_frame_equal(result, expected)

            # multiple
            result = store.select_as_multiple(
                ["df1", "df2"], where=["A>0", "B>0"], selector="df1"
            )
            expected = concat([df1, df2], axis=1)
            expected = expected[(expected.A > 0) & (expected.B > 0)]
            tm.assert_frame_equal(result, expected)

            # multiple (diff selector)
            result = store.select_as_multiple(
                ["df1", "df2"], where="index>df2.index[4]", selector="df2"
            )
            expected = concat([df1, df2], axis=1)
            expected = expected[5:]
            tm.assert_frame_equal(result, expected)

            # test exception for diff rows
            store.append("df3", tm.makeTimeDataFrame(nper=50))
            with pytest.raises(ValueError):
                store.select_as_multiple(
                    ["df1", "df3"], where=["A>0", "B>0"], selector="df1"
                )

    @pytest.mark.skipif(
        LooseVersion(tables.__version__) < LooseVersion("3.1.0"),
        reason=("tables version does not support fix for nan selection bug: GH 4858"),
    )
    def test_nan_selection_bug_4858(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            df = DataFrame({"cols": range(6), "values": range(6)}, dtype="float64")
            df["cols"] = (df["cols"] + 10).apply(str)
            df.iloc[0] = np.nan

            expected = DataFrame(
                {"cols": ["13.0", "14.0", "15.0"], "values": [3.0, 4.0, 5.0]},
                index=[3, 4, 5],
            )

            # write w/o the index on that particular column
            store.append("df", df, data_columns=True, index=["cols"])
            result = store.select("df", where="values>2.0")
            tm.assert_frame_equal(result, expected)

    def test_start_stop_table(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # table
            df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)})
            store.append("df", df)

            result = store.select("df", "columns=['A']", start=0, stop=5)
            expected = df.loc[0:4, ["A"]]
            tm.assert_frame_equal(result, expected)

            # out of range
            result = store.select("df", "columns=['A']", start=30, stop=40)
            assert len(result) == 0
            expected = df.loc[30:40, ["A"]]
            tm.assert_frame_equal(result, expected)

    def test_start_stop_multiple(self, setup_path):

        # GH 16209
        with ensure_clean_store(setup_path) as store:

            df = DataFrame({"foo": [1, 2], "bar": [1, 2]})

            store.append_to_multiple(
                {"selector": ["foo"], "data": None}, df, selector="selector"
            )
            result = store.select_as_multiple(
                ["selector", "data"], selector="selector", start=0, stop=1
            )
            expected = df.loc[[0], ["foo", "bar"]]
            tm.assert_frame_equal(result, expected)

    def test_start_stop_fixed(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # fixed, GH 8287
            df = DataFrame(
                {"A": np.random.rand(20), "B": np.random.rand(20)},
                index=pd.date_range("20130101", periods=20),
            )
            store.put("df", df)

            result = store.select("df", start=0, stop=5)
            expected = df.iloc[0:5, :]
            tm.assert_frame_equal(result, expected)

            result = store.select("df", start=5, stop=10)
            expected = df.iloc[5:10, :]
            tm.assert_frame_equal(result, expected)

            # out of range
            result = store.select("df", start=30, stop=40)
            expected = df.iloc[30:40, :]
            tm.assert_frame_equal(result, expected)

            # series
            s = df.A
            store.put("s", s)
            result = store.select("s", start=0, stop=5)
            expected = s.iloc[0:5]
            tm.assert_series_equal(result, expected)

            result = store.select("s", start=5, stop=10)
            expected = s.iloc[5:10]
            tm.assert_series_equal(result, expected)

            # sparse; not implemented
            df = tm.makeDataFrame()
            df.iloc[3:5, 1:3] = np.nan
            df.iloc[8:10, -2] = np.nan

    def test_select_filter_corner(self, setup_path):

        df = DataFrame(np.random.randn(50, 100))
        df.index = [f"{c:3d}" for c in df.index]
        df.columns = [f"{c:3d}" for c in df.columns]

        with ensure_clean_store(setup_path) as store:
            store.put("frame", df, format="table")

            crit = "columns=df.columns[:75]"
            result = store.select("frame", [crit])
            tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])

            crit = "columns=df.columns[:75:2]"
            result = store.select("frame", [crit])
            tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])

    def test_path_pathlib(self, setup_path):
        df = tm.makeDataFrame()

        result = tm.round_trip_pathlib(
            lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")
        )
        tm.assert_frame_equal(df, result)

    @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)])
    def test_contiguous_mixed_data_table(self, start, stop, setup_path):
        # GH 17021
        # ValueError when reading a contiguous mixed-data table ft. VLArray
        df = DataFrame(
            {
                "a": Series([20111010, 20111011, 20111012]),
                "b": Series(["ab", "cd", "ab"]),
            }
        )

        with ensure_clean_store(setup_path) as store:
            store.append("test_dataset", df)

            result = store.select("test_dataset", start=start, stop=stop)
            tm.assert_frame_equal(df[start:stop], result)

    def test_path_pathlib_hdfstore(self, setup_path):
        df = tm.makeDataFrame()

        def writer(path):
            with HDFStore(path) as store:
                df.to_hdf(store, "df")

        def reader(path):
            with HDFStore(path) as store:
                return pd.read_hdf(store, "df")

        result = tm.round_trip_pathlib(writer, reader)
        tm.assert_frame_equal(df, result)

    def test_pickle_path_localpath(self, setup_path):
        df = tm.makeDataFrame()
        result = tm.round_trip_pathlib(
            lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")
        )
        tm.assert_frame_equal(df, result)

    def test_path_localpath_hdfstore(self, setup_path):
        df = tm.makeDataFrame()

        def writer(path):
            with HDFStore(path) as store:
                df.to_hdf(store, "df")

        def reader(path):
            with HDFStore(path) as store:
                return pd.read_hdf(store, "df")

        result = tm.round_trip_localpath(writer, reader)
        tm.assert_frame_equal(df, result)

    def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs):

        options = {}
        if compression:
            options["complib"] = _default_compressor

        with ensure_clean_store(path, "w", **options) as store:
            store["obj"] = obj
            retrieved = store["obj"]
            comparator(retrieved, obj, **kwargs)

    def _check_double_roundtrip(
        self, obj, comparator, path, compression=False, **kwargs
    ):
        options = {}
        if compression:
            options["complib"] = compression or _default_compressor

        with ensure_clean_store(path, "w", **options) as store:
            store["obj"] = obj
            retrieved = store["obj"]
            comparator(retrieved, obj, **kwargs)
            store["obj"] = retrieved
            again = store["obj"]
            comparator(again, obj, **kwargs)

    def _check_roundtrip_table(self, obj, comparator, path, compression=False):
        options = {}
        if compression:
            options["complib"] = _default_compressor

        with ensure_clean_store(path, "w", **options) as store:
            store.put("obj", obj, format="table")
            retrieved = store["obj"]

            comparator(retrieved, obj)

    def test_multiple_open_close(self, setup_path):
        # gh-4409: open & close multiple times

        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, "df", mode="w", format="table")

            # single
            store = HDFStore(path)
            assert "CLOSED" not in store.info()
            assert store.is_open

            store.close()
            assert "CLOSED" in store.info()
            assert not store.is_open

        with ensure_clean_path(setup_path) as path:

            if pytables._table_file_open_policy_is_strict:

                # multiples
                store1 = HDFStore(path)

                with pytest.raises(ValueError):
                    HDFStore(path)

                store1.close()
            else:

                # multiples
                store1 = HDFStore(path)
                store2 = HDFStore(path)

                assert "CLOSED" not in store1.info()
                assert "CLOSED" not in store2.info()
                assert store1.is_open
                assert store2.is_open

                store1.close()
                assert "CLOSED" in store1.info()
                assert not store1.is_open
                assert "CLOSED" not in store2.info()
                assert store2.is_open

                store2.close()
                assert "CLOSED" in store1.info()
                assert "CLOSED" in store2.info()
                assert not store1.is_open
                assert not store2.is_open

                # nested close
                store = HDFStore(path, mode="w")
                store.append("df", df)

                store2 = HDFStore(path)
                store2.append("df2", df)
                store2.close()
                assert "CLOSED" in store2.info()
                assert not store2.is_open

                store.close()
                assert "CLOSED" in store.info()
                assert not store.is_open

                # double closing
                store = HDFStore(path, mode="w")
                store.append("df", df)

                store2 = HDFStore(path)
                store.close()
                assert "CLOSED" in store.info()
                assert not store.is_open

                store2.close()
                assert "CLOSED" in store2.info()
                assert not store2.is_open

        # ops on a closed store
        with ensure_clean_path(setup_path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, "df", mode="w", format="table")

            store = HDFStore(path)
            store.close()

            with pytest.raises(ClosedFileError):
                store.keys()

            with pytest.raises(ClosedFileError):
                "df" in store

            with pytest.raises(ClosedFileError):
                len(store)

            with pytest.raises(ClosedFileError):
                store["df"]

            with pytest.raises(AttributeError):
                store.df

            with pytest.raises(ClosedFileError):
                store.select("df")

            with pytest.raises(ClosedFileError):
                store.get("df")

            with pytest.raises(ClosedFileError):
                store.append("df2", df)

            with pytest.raises(ClosedFileError):
                store.put("df3", df)

            with pytest.raises(ClosedFileError):
                store.get_storer("df2")

            with pytest.raises(ClosedFileError):
                store.remove("df2")

            with pytest.raises(ClosedFileError, match="file is not open"):
                store.select("df")

    def test_pytables_native_read(self, datapath, setup_path):
        with ensure_clean_store(
            datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
        ) as store:
            d2 = store["detector/readout"]
            assert isinstance(d2, DataFrame)

    @pytest.mark.skipif(
        is_platform_windows(), reason="native2 read fails oddly on windows"
    )
    def test_pytables_native2_read(self, datapath, setup_path):
        with ensure_clean_store(
            datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
        ) as store:
            str(store)
            d1 = store["detector"]
            assert isinstance(d1, DataFrame)

    def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path):
        # GH 24510
        # legacy table with fixed format written in Python 2
        with ensure_clean_store(
            datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"
        ) as store:
            result = store.select("df")
            expected = DataFrame(
                [[1, 2, 3, "D"]],
                columns=["A", "B", "C", "D"],
                index=Index(["ABC"], name="INDEX_NAME"),
            )
            tm.assert_frame_equal(expected, result)

    def test_legacy_table_fixed_format_read_datetime_py2(self, datapath, setup_path):
        # GH 31750
        # legacy table with fixed format and datetime64 column written in Python 2
        with ensure_clean_store(
            datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),
            mode="r",
        ) as store:
            result = store.select("df")
            expected = DataFrame(
                [[Timestamp("2020-02-06T18:00")]],
                columns=["A"],
                index=Index(["date"]),
            )
            tm.assert_frame_equal(expected, result)

    def test_legacy_table_read_py2(self, datapath, setup_path):
        # issue: 24925
        # legacy table written in Python 2
        with ensure_clean_store(
            datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"
        ) as store:
            result = store.select("table")

        expected = DataFrame({"a": ["a", "b"], "b": [2, 3]})
        tm.assert_frame_equal(expected, result)

    def test_copy(self, setup_path):

        with catch_warnings(record=True):

            def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):
                try:
                    store = HDFStore(f, "r")

                    if new_f is None:
                        import tempfile

                        fd, new_f = tempfile.mkstemp()
                    tstore = store.copy(
                        new_f, keys=keys, propindexes=propindexes, **kwargs
                    )

                    # check keys
                    if keys is None:
                        keys = store.keys()
                    assert set(keys) == set(tstore.keys())

                    # check indices & nrows
                    for k in tstore.keys():
                        if tstore.get_storer(k).is_table:
                            new_t = tstore.get_storer(k)
                            orig_t = store.get_storer(k)

                            assert orig_t.nrows == new_t.nrows

                            # check propindixes
                            if propindexes:
                                for a in orig_t.axes:
                                    if a.is_indexed:
                                        assert new_t[a.name].is_indexed

                finally:
                    safe_close(store)
                    safe_close(tstore)
                    try:
                        os.close(fd)
                    except (OSError, ValueError):
                        pass
                    os.remove(new_f)

            # new table
            df = tm.makeDataFrame()

            with tm.ensure_clean() as path:
                st = HDFStore(path)
                st.append("df", df, data_columns=["A"])
                st.close()
                do_copy(f=path)
                do_copy(f=path, propindexes=False)

    def test_store_datetime_fractional_secs(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
            series = Series([0], [dt])
            store["a"] = series
            assert store["a"].index[0] == dt

    def test_tseries_indices_series(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            idx = tm.makeDateIndex(10)
            ser = Series(np.random.randn(len(idx)), idx)
            store["a"] = ser
            result = store["a"]

            tm.assert_series_equal(result, ser)
            assert result.index.freq == ser.index.freq
            tm.assert_class_equal(result.index, ser.index, obj="series index")

            idx = tm.makePeriodIndex(10)
            ser = Series(np.random.randn(len(idx)), idx)
            store["a"] = ser
            result = store["a"]

            tm.assert_series_equal(result, ser)
            assert result.index.freq == ser.index.freq
            tm.assert_class_equal(result.index, ser.index, obj="series index")

    def test_tseries_indices_frame(self, setup_path):

        with ensure_clean_store(setup_path) as store:
            idx = tm.makeDateIndex(10)
            df = DataFrame(np.random.randn(len(idx), 3), index=idx)
            store["a"] = df
            result = store["a"]

            tm.assert_frame_equal(result, df)
            assert result.index.freq == df.index.freq
            tm.assert_class_equal(result.index, df.index, obj="dataframe index")

            idx = tm.makePeriodIndex(10)
            df = DataFrame(np.random.randn(len(idx), 3), idx)
            store["a"] = df
            result = store["a"]

            tm.assert_frame_equal(result, df)
            assert result.index.freq == df.index.freq
            tm.assert_class_equal(result.index, df.index, obj="dataframe index")

    def test_unicode_index(self, setup_path):

        unicode_values = ["\u03c3", "\u03c3\u03c3"]

        # PerformanceWarning
        with catch_warnings(record=True):
            simplefilter("ignore", pd.errors.PerformanceWarning)
            s = Series(np.random.randn(len(unicode_values)), unicode_values)
            self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)

    def test_unicode_longer_encoded(self, setup_path):
        # GH 11234
        char = "\u0394"
        df = DataFrame({"A": [char]})
        with ensure_clean_store(setup_path) as store:
            store.put("df", df, format="table", encoding="utf-8")
            result = store.get("df")
            tm.assert_frame_equal(result, df)

        df = DataFrame({"A": ["a", char], "B": ["b", "b"]})
        with ensure_clean_store(setup_path) as store:
            store.put("df", df, format="table", encoding="utf-8")
            result = store.get("df")
            tm.assert_frame_equal(result, df)

    def test_store_datetime_mixed(self, setup_path):

        df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})
        ts = tm.makeTimeSeries()
        df["d"] = ts.index[:3]
        self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path)

    # FIXME: don't leave commented-out code
    # def test_cant_write_multiindex_table(self):
    #     # for now, #1848
    #     df = DataFrame(np.random.randn(10, 4),
    #                    index=[np.arange(5).repeat(2),
    #                           np.tile(np.arange(2), 5)])
    #
    #     with pytest.raises(Exception):
    #         store.put('foo', df, format='table')

    def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):
        df = DataFrame(np.random.randn(10, 1))
        df2 = DataFrame({"a": np.random.randn(10)})
        df3 = DataFrame({(1, 2): np.random.randn(10)})
        df4 = DataFrame({("1", 2): np.random.randn(10)})
        df5 = DataFrame({("1", 2, object): np.random.randn(10)})

        with ensure_clean_store(setup_path) as store:
            name = f"df_{tm.rands(10)}"
            store.append(name, df)

            for d in (df2, df3, df4, df5):
                with pytest.raises(ValueError):
                    store.append(name, d)

    def test_query_with_nested_special_character(self, setup_path):
        df = DataFrame(
            {
                "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"],
                "b": [1, 2, 3, 4, 5, 6, 7, 8],
            }
        )
        expected = df[df.a == "test & test"]
        with ensure_clean_store(setup_path) as store:
            store.append("test", df, format="table", data_columns=True)
            result = store.select("test", 'a = "test & test"')
        tm.assert_frame_equal(expected, result)

    def test_categorical(self, setup_path):

        with ensure_clean_store(setup_path) as store:

            # Basic
            _maybe_remove(store, "s")
            s = Series(
                Categorical(
                    ["a", "b", "b", "a", "a", "c"],
                    categories=["a", "b", "c", "d"],
                    ordered=False,
                )
            )
            store.append("s", s, format="table")
            result = store.select("s")
            tm.assert_series_equal(s, result)

            _maybe_remove(store, "s_ordered")
            s = Series(
                Categorical(
                    ["a", "b", "b", "a", "a", "c"],
                    categories=["a", "b", "c", "d"],
                    ordered=True,
                )
            )
            store.append("s_ordered", s, format="table")
            result = store.select("s_ordered")
            tm.assert_series_equal(s, result)

            _maybe_remove(store, "df")
            df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
            store.append("df", df, format="table")
            result = store.select("df")
            tm.assert_frame_equal(result, df)

            # Dtypes
            _maybe_remove(store, "si")
            s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
            store.append("si", s)
            result = store.select("si")
            tm.assert_series_equal(result, s)

            _maybe_remove(store, "si2")
            s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
            store.append("si2", s)
            result = store.select("si2")
            tm.assert_series_equal(result, s)

            # Multiple
            _maybe_remove(store, "df2")
            df2 = df.copy()
            df2["s2"] = Series(list("abcdefg")).astype("category")
            store.append("df2", df2)
            result = store.select("df2")
            tm.assert_frame_equal(result, df2)

            # Make sure the metadata is OK
            info = store.info()
            assert "/df2   " in info
            # assert '/df2/meta/values_block_0/meta' in info
            assert "/df2/meta/values_block_1/meta" in info

            # unordered
            _maybe_remove(store, "s2")
            s = Series(
                Categorical(
                    ["a", "b", "b", "a", "a", "c"],
                    categories=["a", "b", "c", "d"],
                    ordered=False,
                )
            )
            store.append("s2", s, format="table")
            result = store.select("s2")
            tm.assert_series_equal(result, s)

            # Query
            _maybe_remove(store, "df3")
            store.append("df3", df, data_columns=["s"])
            expected = df[df.s.isin(["b", "c"])]
            result = store.select("df3", where=['s in ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(["b", "c"])]
            result = store.select("df3", where=['s = ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(["d"])]
            result = store.select("df3", where=['s in ["d"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(["f"])]
            result = store.select("df3", where=['s in ["f"]'])
            tm.assert_frame_equal(result, expected)

            # Appending with same categories is ok
            store.append("df3", df)

            df = concat([df, df])
            expected = df[df.s.isin(["b", "c"])]
            result = store.select("df3", where=['s in ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            # Appending must have the same categories
            df3 = df.copy()
            df3["s"] = df3["s"].cat.remove_unused_categories()

            with pytest.raises(ValueError):
                store.append("df3", df3)

            # Remove, and make sure meta data is removed (its a recursive
            # removal so should be).
            result = store.select("df3/meta/s/meta")
            assert result is not None
            store.remove("df3")

            with pytest.raises(
                KeyError, match="'No object named df3/meta/s/meta in the file'"
            ):
                store.select("df3/meta/s/meta")

    def test_categorical_conversion(self, setup_path):

        # GH13322
        # Check that read_hdf with categorical columns doesn't return rows if
        # where criteria isn't met.
        obsids = ["ESP_012345_6789", "ESP_987654_3210"]
        imgids = ["APF00006np", "APF0001imm"]
        data = [4.3, 9.8]

        # Test without categories
        df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})

        # We are expecting an empty DataFrame matching types of df
        expected = df.iloc[[], :]
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", format="table", data_columns=True)
            result = read_hdf(path, "df", where="obsids=B")
            tm.assert_frame_equal(result, expected)

        # Test with categories
        df.obsids = df.obsids.astype("category")
        df.imgids = df.imgids.astype("category")

        # We are expecting an empty DataFrame matching types of df
        expected = df.iloc[[], :]
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", format="table", data_columns=True)
            result = read_hdf(path, "df", where="obsids=B")
            tm.assert_frame_equal(result, expected)

    def test_categorical_nan_only_columns(self, setup_path):
        # GH18413
        # Check that read_hdf with categorical columns with NaN-only values can
        # be read back.
        df = DataFrame(
            {
                "a": ["a", "b", "c", np.nan],
                "b": [np.nan, np.nan, np.nan, np.nan],
                "c": [1, 2, 3, 4],
                "d": Series([None] * 4, dtype=object),
            }
        )
        df["a"] = df.a.astype("category")
        df["b"] = df.b.astype("category")
        df["d"] = df.b.astype("category")
        expected = df
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", format="table", data_columns=True)
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, expected)

    def test_duplicate_column_name(self, setup_path):
        df = DataFrame(columns=["a", "a"], data=[[0, 0]])

        with ensure_clean_path(setup_path) as path:
            with pytest.raises(ValueError):
                df.to_hdf(path, "df", format="fixed")

            df.to_hdf(path, "df", format="table")
            other = read_hdf(path, "df")

            tm.assert_frame_equal(df, other)
            assert df.equals(other)
            assert other.equals(df)

    def test_round_trip_equals(self, setup_path):
        # GH 9330
        df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", format="table")
            other = read_hdf(path, "df")
            tm.assert_frame_equal(df, other)
            assert df.equals(other)
            assert other.equals(df)

    def test_preserve_timedeltaindex_type(self, setup_path):
        # GH9635
        # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
        # the type of the index.
        df = DataFrame(np.random.normal(size=(10, 5)))
        df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")

        with ensure_clean_store(setup_path) as store:

            store["df"] = df
            tm.assert_frame_equal(store["df"], df)

    def test_columns_multiindex_modified(self, setup_path):
        # BUG: 7212
        # read_hdf store.select modified the passed columns parameters
        # when multi-indexed.

        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
        df.index.name = "letters"
        df = df.set_index(keys="E", append=True)

        data_columns = df.index.names + df.columns.tolist()
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(
                path,
                "df",
                mode="a",
                append=True,
                data_columns=data_columns,
                index=False,
            )
            cols2load = list("BCD")
            cols2load_original = list(cols2load)
            df_loaded = read_hdf(path, "df", columns=cols2load)  # noqa
            assert cols2load_original == cols2load

    @ignore_natural_naming_warning
    def test_to_hdf_with_object_column_names(self, setup_path):
        # GH9057
        # Writing HDF5 table format should only work for string-like
        # column types

        types_should_fail = [
            tm.makeIntIndex,
            tm.makeFloatIndex,
            tm.makeDateIndex,
            tm.makeTimedeltaIndex,
            tm.makePeriodIndex,
        ]
        types_should_run = [
            tm.makeStringIndex,
            tm.makeCategoricalIndex,
            tm.makeUnicodeIndex,
        ]

        for index in types_should_fail:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(setup_path) as path:
                with catch_warnings(record=True):
                    msg = "cannot have non-object label DataIndexableCol"
                    with pytest.raises(ValueError, match=msg):
                        df.to_hdf(path, "df", format="table", data_columns=True)

        for index in types_should_run:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(setup_path) as path:
                with catch_warnings(record=True):
                    df.to_hdf(path, "df", format="table", data_columns=True)
                    result = pd.read_hdf(path, "df", where=f"index = [{df.index[0]}]")
                    assert len(result)

    def test_read_hdf_open_store(self, setup_path):
        # GH10330
        # No check for non-string path_or-buf, and no test of open store
        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
        df.index.name = "letters"
        df = df.set_index(keys="E", append=True)

        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", mode="w")
            direct = read_hdf(path, "df")
            store = HDFStore(path, mode="r")
            indirect = read_hdf(store, "df")
            tm.assert_frame_equal(direct, indirect)
            assert store.is_open
            store.close()

    def test_read_hdf_iterator(self, setup_path):
        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
        df.index.name = "letters"
        df = df.set_index(keys="E", append=True)

        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", mode="w", format="t")
            direct = read_hdf(path, "df")
            iterator = read_hdf(path, "df", iterator=True)
            assert isinstance(iterator, TableIterator)
            indirect = next(iterator.__iter__())
            tm.assert_frame_equal(direct, indirect)
            iterator.store.close()

    def test_read_hdf_errors(self, setup_path):
        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))

        with ensure_clean_path(setup_path) as path:
            with pytest.raises(IOError):
                read_hdf(path, "key")

            df.to_hdf(path, "df")
            store = HDFStore(path, mode="r")
            store.close()

            with pytest.raises(IOError):
                read_hdf(store, "df")

    def test_read_hdf_generic_buffer_errors(self):
        with pytest.raises(NotImplementedError):
            read_hdf(BytesIO(b""), "df")

    def test_invalid_complib(self, setup_path):
        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))
        with tm.ensure_clean(setup_path) as path:
            with pytest.raises(ValueError):
                df.to_hdf(path, "df", complib="foolib")

    # GH10443

    def test_read_nokey(self, setup_path):
        df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))

        # Categorical dtype not supported for "fixed" format. So no need
        # to test with that dtype in the dataframe here.
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", mode="a")
            reread = read_hdf(path)
            tm.assert_frame_equal(df, reread)
            df.to_hdf(path, "df2", mode="a")

            with pytest.raises(ValueError):
                read_hdf(path)

    def test_read_nokey_table(self, setup_path):
        # GH13231
        df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})

        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", mode="a", format="table")
            reread = read_hdf(path)
            tm.assert_frame_equal(df, reread)
            df.to_hdf(path, "df2", mode="a", format="table")

            with pytest.raises(ValueError):
                read_hdf(path)

    def test_read_nokey_empty(self, setup_path):
        with ensure_clean_path(setup_path) as path:
            store = HDFStore(path)
            store.close()

            with pytest.raises(ValueError):
                read_hdf(path)

    def test_read_from_pathlib_path(self, setup_path):

        # GH11773
        expected = DataFrame(
            np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")
        )
        with ensure_clean_path(setup_path) as filename:
            path_obj = Path(filename)

            expected.to_hdf(path_obj, "df", mode="a")
            actual = read_hdf(path_obj, "df")

        tm.assert_frame_equal(expected, actual)

    @td.skip_if_no("py.path")
    def test_read_from_py_localpath(self, setup_path):

        # GH11773
        from py.path import local as LocalPath

        expected = DataFrame(
            np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")
        )
        with ensure_clean_path(setup_path) as filename:
            path_obj = LocalPath(filename)

            expected.to_hdf(path_obj, "df", mode="a")
            actual = read_hdf(path_obj, "df")

        tm.assert_frame_equal(expected, actual)

    def test_query_long_float_literal(self, setup_path):
        # GH 14241
        df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})

        with ensure_clean_store(setup_path) as store:
            store.append("test", df, format="table", data_columns=True)

            cutoff = 1000000000.0006
            result = store.select("test", f"A < {cutoff:.4f}")
            assert result.empty

            cutoff = 1000000000.0010
            result = store.select("test", f"A > {cutoff:.4f}")
            expected = df.loc[[1, 2], :]
            tm.assert_frame_equal(expected, result)

            exact = 1000000000.0011
            result = store.select("test", f"A == {exact:.4f}")
            expected = df.loc[[1], :]
            tm.assert_frame_equal(expected, result)

    def test_query_compare_column_type(self, setup_path):
        # GH 15492
        df = DataFrame(
            {
                "date": ["2014-01-01", "2014-01-02"],
                "real_date": date_range("2014-01-01", periods=2),
                "float": [1.1, 1.2],
                "int": [1, 2],
            },
            columns=["date", "real_date", "float", "int"],
        )

        with ensure_clean_store(setup_path) as store:
            store.append("test", df, format="table", data_columns=True)

            ts = Timestamp("2014-01-01")  # noqa
            result = store.select("test", where="real_date > ts")
            expected = df.loc[[1], :]
            tm.assert_frame_equal(expected, result)

            for op in ["<", ">", "=="]:
                # non strings to string column always fail
                for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]:
                    query = f"date {op} v"
                    with pytest.raises(TypeError):
                        store.select("test", where=query)

                # strings to other columns must be convertible to type
                v = "a"
                for col in ["int", "float", "real_date"]:
                    query = f"{col} {op} v"
                    with pytest.raises(ValueError):
                        store.select("test", where=query)

                for v, col in zip(
                    ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"]
                ):
                    query = f"{col} {op} v"
                    result = store.select("test", where=query)

                    if op == "==":
                        expected = df.loc[[0], :]
                    elif op == ">":
                        expected = df.loc[[1], :]
                    else:
                        expected = df.loc[[], :]
                    tm.assert_frame_equal(expected, result)

    @pytest.mark.parametrize("format", ["fixed", "table"])
    def test_read_hdf_series_mode_r(self, format, setup_path):
        # GH 16583
        # Tests that reading a Series saved to an HDF file
        # still works if a mode='r' argument is supplied
        series = tm.makeFloatSeries()
        with ensure_clean_path(setup_path) as path:
            series.to_hdf(path, key="data", format=format)
            result = pd.read_hdf(path, key="data", mode="r")
        tm.assert_series_equal(result, series)

    def test_fspath(self):
        with tm.ensure_clean("foo.h5") as path:
            with HDFStore(path) as store:
                assert os.fspath(store) == str(path)

    def test_read_py2_hdf_file_in_py3(self, datapath):
        # GH 16781

        # tests reading a PeriodIndex DataFrame written in Python2 in Python3

        # the file was generated in Python 2.7 like so:
        #
        # df = DataFrame([1.,2,3], index=pd.PeriodIndex(
        #              ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
        # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')

        expected = DataFrame(
            [1.0, 2, 3],
            index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),
        )

        with ensure_clean_store(
            datapath(
                "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"
            ),
            mode="r",
        ) as store:
            result = store["p"]
            tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("where", ["", (), (None,), [], [None]])
    def test_select_empty_where(self, where):
        # GH26610

        # Using keyword `where` as '' or (), or [None], etc
        # while reading from HDF store raises
        # "SyntaxError: only a single expression is allowed"

        df = DataFrame([1, 2, 3])
        with ensure_clean_path("empty_where.h5") as path:
            with HDFStore(path) as store:
                store.put("df", df, "t")
                result = pd.read_hdf(store, "df", where=where)
                tm.assert_frame_equal(result, df)

    @pytest.mark.parametrize(
        "idx",
        [
            date_range("2019", freq="D", periods=3, tz="UTC"),
            CategoricalIndex(list("abc")),
        ],
    )
    def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path):
        # GH 7775
        mi = MultiIndex.from_arrays([idx, idx])
        df = DataFrame(0, index=mi, columns=["a"])
        with ensure_clean_path(setup_path) as path:
            with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
                df.to_hdf(path, "df")

    def test_unsuppored_hdf_file_error(self, datapath):
        # GH 9539
        data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")
        message = (
            r"Dataset\(s\) incompatible with Pandas data types, "
            "not table, or no datasets found in HDF5 file."
        )

        with pytest.raises(ValueError, match=message):
            pd.read_hdf(data_path)


@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"])
def test_maybe_adjust_name_bad_version_raises(bad_version):
    msg = "Version is incorrect, expected sequence of 3 integers"
    with pytest.raises(ValueError, match=msg):
        _maybe_adjust_name("values_block_0", version=bad_version)