Inzynierka/Lib/site-packages/pandas/tests/interchange/test_spec_conformance.py

"""
A verbatim copy (vendored) of the spec tests.
Taken from https://github.com/data-apis/dataframe-api
"""
import ctypes
import math

import pytest


@pytest.mark.parametrize(
    "test_data",
    [
        {"a": ["foo", "bar"], "b": ["baz", "qux"]},
        {"a": [1.5, 2.5, 3.5], "b": [9.2, 10.5, 11.8]},
        {"A": [1, 2, 3, 4], "B": [1, 2, 3, 4]},
    ],
    ids=["str_data", "float_data", "int_data"],
)
def test_only_one_dtype(test_data, df_from_dict):
    columns = list(test_data.keys())
    df = df_from_dict(test_data)
    dfX = df.__dataframe__()

    column_size = len(test_data[columns[0]])
    for column in columns:
        null_count = dfX.get_column_by_name(column).null_count
        assert null_count == 0
        assert isinstance(null_count, int)
        assert dfX.get_column_by_name(column).size() == column_size
        assert dfX.get_column_by_name(column).offset == 0


def test_mixed_dtypes(df_from_dict):
    df = df_from_dict(
        {
            "a": [1, 2, 3],  # dtype kind INT = 0
            "b": [3, 4, 5],  # dtype kind INT = 0
            "c": [1.5, 2.5, 3.5],  # dtype kind FLOAT = 2
            "d": [9, 10, 11],  # dtype kind INT = 0
            "e": [True, False, True],  # dtype kind BOOLEAN = 20
            "f": ["a", "", "c"],  # dtype kind STRING = 21
        }
    )
    dfX = df.__dataframe__()
    # for meanings of dtype[0] see the spec; we cannot import the spec here as this
    # file is expected to be vendored *anywhere*;
    # values for dtype[0] are explained above
    columns = {"a": 0, "b": 0, "c": 2, "d": 0, "e": 20, "f": 21}

    for column, kind in columns.items():
        colX = dfX.get_column_by_name(column)
        assert colX.null_count == 0
        assert isinstance(colX.null_count, int)
        assert colX.size() == 3
        assert colX.offset == 0

        assert colX.dtype[0] == kind

    assert dfX.get_column_by_name("c").dtype[1] == 64


def test_na_float(df_from_dict):
    df = df_from_dict({"a": [1.0, math.nan, 2.0]})
    dfX = df.__dataframe__()
    colX = dfX.get_column_by_name("a")
    assert colX.null_count == 1
    assert isinstance(colX.null_count, int)


def test_noncategorical(df_from_dict):
    df = df_from_dict({"a": [1, 2, 3]})
    dfX = df.__dataframe__()
    colX = dfX.get_column_by_name("a")
    with pytest.raises(TypeError, match=".*categorical.*"):
        colX.describe_categorical


def test_categorical(df_from_dict):
    df = df_from_dict(
        {"weekday": ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]},
        is_categorical=True,
    )

    colX = df.__dataframe__().get_column_by_name("weekday")
    categorical = colX.describe_categorical
    assert isinstance(categorical["is_ordered"], bool)
    assert isinstance(categorical["is_dictionary"], bool)


def test_dataframe(df_from_dict):
    df = df_from_dict(
        {"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]}
    )
    dfX = df.__dataframe__()

    assert dfX.num_columns() == 3
    assert dfX.num_rows() == 3
    assert dfX.num_chunks() == 1
    assert list(dfX.column_names()) == ["x", "y", "z"]
    assert list(dfX.select_columns((0, 2)).column_names()) == list(
        dfX.select_columns_by_name(("x", "z")).column_names()
    )


@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])
def test_df_get_chunks(size, n_chunks, df_from_dict):
    df = df_from_dict({"x": list(range(size))})
    dfX = df.__dataframe__()
    chunks = list(dfX.get_chunks(n_chunks))
    assert len(chunks) == n_chunks
    assert sum(chunk.num_rows() for chunk in chunks) == size


@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])
def test_column_get_chunks(size, n_chunks, df_from_dict):
    df = df_from_dict({"x": list(range(size))})
    dfX = df.__dataframe__()
    chunks = list(dfX.get_column(0).get_chunks(n_chunks))
    assert len(chunks) == n_chunks
    assert sum(chunk.size() for chunk in chunks) == size


def test_get_columns(df_from_dict):
    df = df_from_dict({"a": [0, 1], "b": [2.5, 3.5]})
    dfX = df.__dataframe__()
    for colX in dfX.get_columns():
        assert colX.size() == 2
        assert colX.num_chunks() == 1
    # for meanings of dtype[0] see the spec; we cannot import the spec here as this
    # file is expected to be vendored *anywhere*
    assert dfX.get_column(0).dtype[0] == 0  # INT
    assert dfX.get_column(1).dtype[0] == 2  # FLOAT


def test_buffer(df_from_dict):
    arr = [0, 1, -1]
    df = df_from_dict({"a": arr})
    dfX = df.__dataframe__()
    colX = dfX.get_column(0)
    bufX = colX.get_buffers()

    dataBuf, dataDtype = bufX["data"]

    assert dataBuf.bufsize > 0
    assert dataBuf.ptr != 0
    device, _ = dataBuf.__dlpack_device__()

    # for meanings of dtype[0] see the spec; we cannot import the spec here as this
    # file is expected to be vendored *anywhere*
    assert dataDtype[0] == 0  # INT

    if device == 1:  # CPU-only as we're going to directly read memory here
        bitwidth = dataDtype[1]
        ctype = {
            8: ctypes.c_int8,
            16: ctypes.c_int16,
            32: ctypes.c_int32,
            64: ctypes.c_int64,
        }[bitwidth]

        for idx, truth in enumerate(arr):
            val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value
            assert val == truth, f"Buffer at index {idx} mismatch"
first commit 2023-06-02 12:51:02 +02:00			`"""`
			`A verbatim copy (vendored) of the spec tests.`
			`Taken from https://github.com/data-apis/dataframe-api`
			`"""`
			`import ctypes`
			`import math`

			`import pytest`


			`@pytest.mark.parametrize(`
			`"test_data",`
			`[`
			`{"a": ["foo", "bar"], "b": ["baz", "qux"]},`
			`{"a": [1.5, 2.5, 3.5], "b": [9.2, 10.5, 11.8]},`
			`{"A": [1, 2, 3, 4], "B": [1, 2, 3, 4]},`
			`],`
			`ids=["str_data", "float_data", "int_data"],`
			`)`
			`def test_only_one_dtype(test_data, df_from_dict):`
			`columns = list(test_data.keys())`
			`df = df_from_dict(test_data)`
			`dfX = df.__dataframe__()`

			`column_size = len(test_data[columns[0]])`
			`for column in columns:`
			`null_count = dfX.get_column_by_name(column).null_count`
			`assert null_count == 0`
			`assert isinstance(null_count, int)`
			`assert dfX.get_column_by_name(column).size() == column_size`
			`assert dfX.get_column_by_name(column).offset == 0`


			`def test_mixed_dtypes(df_from_dict):`
			`df = df_from_dict(`
			`{`
			`"a": [1, 2, 3], # dtype kind INT = 0`
			`"b": [3, 4, 5], # dtype kind INT = 0`
			`"c": [1.5, 2.5, 3.5], # dtype kind FLOAT = 2`
			`"d": [9, 10, 11], # dtype kind INT = 0`
			`"e": [True, False, True], # dtype kind BOOLEAN = 20`
			`"f": ["a", "", "c"], # dtype kind STRING = 21`
			`}`
			`)`
			`dfX = df.__dataframe__()`
			`# for meanings of dtype[0] see the spec; we cannot import the spec here as this`
			`# file is expected to be vendored anywhere;`
			`# values for dtype[0] are explained above`
			`columns = {"a": 0, "b": 0, "c": 2, "d": 0, "e": 20, "f": 21}`

			`for column, kind in columns.items():`
			`colX = dfX.get_column_by_name(column)`
			`assert colX.null_count == 0`
			`assert isinstance(colX.null_count, int)`
			`assert colX.size() == 3`
			`assert colX.offset == 0`

			`assert colX.dtype[0] == kind`

			`assert dfX.get_column_by_name("c").dtype[1] == 64`


			`def test_na_float(df_from_dict):`
			`df = df_from_dict({"a": [1.0, math.nan, 2.0]})`
			`dfX = df.__dataframe__()`
			`colX = dfX.get_column_by_name("a")`
			`assert colX.null_count == 1`
			`assert isinstance(colX.null_count, int)`


			`def test_noncategorical(df_from_dict):`
			`df = df_from_dict({"a": [1, 2, 3]})`
			`dfX = df.__dataframe__()`
			`colX = dfX.get_column_by_name("a")`
			`with pytest.raises(TypeError, match=".categorical."):`
			`colX.describe_categorical`


			`def test_categorical(df_from_dict):`
			`df = df_from_dict(`
			`{"weekday": ["Mon", "Tue", "Mon", "Wed", "Mon", "Thu", "Fri", "Sat", "Sun"]},`
			`is_categorical=True,`
			`)`

			`colX = df.__dataframe__().get_column_by_name("weekday")`
			`categorical = colX.describe_categorical`
			`assert isinstance(categorical["is_ordered"], bool)`
			`assert isinstance(categorical["is_dictionary"], bool)`


			`def test_dataframe(df_from_dict):`
			`df = df_from_dict(`
			`{"x": [True, True, False], "y": [1, 2, 0], "z": [9.2, 10.5, 11.8]}`
			`)`
			`dfX = df.__dataframe__()`

			`assert dfX.num_columns() == 3`
			`assert dfX.num_rows() == 3`
			`assert dfX.num_chunks() == 1`
			`assert list(dfX.column_names()) == ["x", "y", "z"]`
			`assert list(dfX.select_columns((0, 2)).column_names()) == list(`
			`dfX.select_columns_by_name(("x", "z")).column_names()`
			`)`


			`@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])`
			`def test_df_get_chunks(size, n_chunks, df_from_dict):`
			`df = df_from_dict({"x": list(range(size))})`
			`dfX = df.__dataframe__()`
			`chunks = list(dfX.get_chunks(n_chunks))`
			`assert len(chunks) == n_chunks`
			`assert sum(chunk.num_rows() for chunk in chunks) == size`


			`@pytest.mark.parametrize(["size", "n_chunks"], [(10, 3), (12, 3), (12, 5)])`
			`def test_column_get_chunks(size, n_chunks, df_from_dict):`
			`df = df_from_dict({"x": list(range(size))})`
			`dfX = df.__dataframe__()`
			`chunks = list(dfX.get_column(0).get_chunks(n_chunks))`
			`assert len(chunks) == n_chunks`
			`assert sum(chunk.size() for chunk in chunks) == size`


			`def test_get_columns(df_from_dict):`
			`df = df_from_dict({"a": [0, 1], "b": [2.5, 3.5]})`
			`dfX = df.__dataframe__()`
			`for colX in dfX.get_columns():`
			`assert colX.size() == 2`
			`assert colX.num_chunks() == 1`
			`# for meanings of dtype[0] see the spec; we cannot import the spec here as this`
			`# file is expected to be vendored anywhere`
			`assert dfX.get_column(0).dtype[0] == 0 # INT`
			`assert dfX.get_column(1).dtype[0] == 2 # FLOAT`


			`def test_buffer(df_from_dict):`
			`arr = [0, 1, -1]`
			`df = df_from_dict({"a": arr})`
			`dfX = df.__dataframe__()`
			`colX = dfX.get_column(0)`
			`bufX = colX.get_buffers()`

			`dataBuf, dataDtype = bufX["data"]`

			`assert dataBuf.bufsize > 0`
			`assert dataBuf.ptr != 0`
			`device, _ = dataBuf.__dlpack_device__()`

			`# for meanings of dtype[0] see the spec; we cannot import the spec here as this`
			`# file is expected to be vendored anywhere`
			`assert dataDtype[0] == 0 # INT`

			`if device == 1: # CPU-only as we're going to directly read memory here`
			`bitwidth = dataDtype[1]`
			`ctype = {`
			`8: ctypes.c_int8,`
			`16: ctypes.c_int16,`
			`32: ctypes.c_int32,`
			`64: ctypes.c_int64,`
			`}[bitwidth]`

			`for idx, truth in enumerate(arr):`
			`val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value`
			`assert val == truth, f"Buffer at index {idx} mismatch"`