"""Tests for Table Schema integration.""" from collections import OrderedDict import json import sys import numpy as np import pytest from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype, PeriodDtype import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.io.json._table_schema import ( as_json_table_type, build_table_schema, convert_json_field_to_pandas_type, convert_pandas_type_to_json_field, set_default_names, ) class TestBuildSchema: def setup_method(self, method): self.df = DataFrame( { "A": [1, 2, 3, 4], "B": ["a", "b", "c", "c"], "C": pd.date_range("2016-01-01", freq="d", periods=4), "D": pd.timedelta_range("1H", periods=4, freq="T"), }, index=pd.Index(range(4), name="idx"), ) def test_build_table_schema(self): result = build_table_schema(self.df, version=False) expected = { "fields": [ {"name": "idx", "type": "integer"}, {"name": "A", "type": "integer"}, {"name": "B", "type": "string"}, {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, ], "primaryKey": ["idx"], } assert result == expected result = build_table_schema(self.df) assert "pandas_version" in result def test_series(self): s = pd.Series([1, 2, 3], name="foo") result = build_table_schema(s, version=False) expected = { "fields": [ {"name": "index", "type": "integer"}, {"name": "foo", "type": "integer"}, ], "primaryKey": ["index"], } assert result == expected result = build_table_schema(s) assert "pandas_version" in result def test_series_unnamed(self): result = build_table_schema(pd.Series([1, 2, 3]), version=False) expected = { "fields": [ {"name": "index", "type": "integer"}, {"name": "values", "type": "integer"}, ], "primaryKey": ["index"], } assert result == expected def test_multiindex(self): df = self.df.copy() idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)]) df.index = idx result = build_table_schema(df, version=False) expected = { "fields": [ {"name": "level_0", "type": "string"}, {"name": "level_1", "type": "integer"}, {"name": "A", "type": "integer"}, {"name": "B", "type": "string"}, {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, ], "primaryKey": ["level_0", "level_1"], } assert result == expected df.index.names = ["idx0", None] expected["fields"][0]["name"] = "idx0" expected["primaryKey"] = ["idx0", "level_1"] result = build_table_schema(df, version=False) assert result == expected class TestTableSchemaType: @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64]) def test_as_json_table_type_int_data(self, int_type): int_data = [1, 2, 3] assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer" @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64]) def test_as_json_table_type_float_data(self, float_type): float_data = [1.0, 2.0, 3.0] assert ( as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number" ) @pytest.mark.parametrize("bool_type", [bool, np.bool_]) def test_as_json_table_type_bool_data(self, bool_type): bool_data = [True, False] assert ( as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean" ) @pytest.mark.parametrize( "date_data", [ pd.to_datetime(["2016"]), pd.to_datetime(["2016"], utc=True), pd.Series(pd.to_datetime(["2016"])), pd.Series(pd.to_datetime(["2016"], utc=True)), pd.period_range("2016", freq="A", periods=3), ], ) def test_as_json_table_type_date_data(self, date_data): assert as_json_table_type(date_data.dtype) == "datetime" @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])]) def test_as_json_table_type_string_data(self, str_data): assert as_json_table_type(str_data.dtype) == "string" @pytest.mark.parametrize( "cat_data", [ pd.Categorical(["a"]), pd.Categorical([1]), pd.Series(pd.Categorical([1])), pd.CategoricalIndex([1]), pd.Categorical([1]), ], ) def test_as_json_table_type_categorical_data(self, cat_data): assert as_json_table_type(cat_data.dtype) == "any" # ------ # dtypes # ------ @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64]) def test_as_json_table_type_int_dtypes(self, int_dtype): assert as_json_table_type(int_dtype) == "integer" @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64]) def test_as_json_table_type_float_dtypes(self, float_dtype): assert as_json_table_type(float_dtype) == "number" @pytest.mark.parametrize("bool_dtype", [bool, np.bool_]) def test_as_json_table_type_bool_dtypes(self, bool_dtype): assert as_json_table_type(bool_dtype) == "boolean" @pytest.mark.parametrize( "date_dtype", [ np.datetime64, np.dtype("