from collections import abc import numpy as np import pytest from pandas import ( CategoricalDtype, DataFrame, MultiIndex, Series, Timestamp, date_range, ) import pandas._testing as tm class TestDataFrameToRecords: def test_to_records_dt64(self): df = DataFrame( [["one", "two", "three"], ["four", "five", "six"]], index=date_range("2012-01-01", "2012-01-02"), ) expected = df.index.values[0] result = df.to_records()["index"][0] assert expected == result def test_to_records_dt64tz_column(self): # GH#32535 dont less tz in to_records df = DataFrame({"A": date_range("2012-01-01", "2012-01-02", tz="US/Eastern")}) result = df.to_records() assert result.dtype["A"] == object val = result[0][1] assert isinstance(val, Timestamp) assert val == df.loc[0, "A"] def test_to_records_with_multindex(self): # GH#3189 index = [ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] data = np.zeros((8, 4)) df = DataFrame(data, index=index) r = df.to_records(index=True)["level_0"] assert "bar" in r assert "one" not in r def test_to_records_with_Mapping_type(self): import email from email.parser import Parser abc.Mapping.register(email.message.Message) headers = Parser().parsestr( "From: \n" "To: \n" "Subject: Test message\n" "\n" "Body would go here\n" ) frame = DataFrame.from_records([headers]) all(x in frame for x in ["Type", "Subject", "From"]) def test_to_records_floats(self): df = DataFrame(np.random.rand(10, 10)) df.to_records() def test_to_records_index_name(self): df = DataFrame(np.random.randn(3, 3)) df.index.name = "X" rs = df.to_records() assert "X" in rs.dtype.fields df = DataFrame(np.random.randn(3, 3)) rs = df.to_records() assert "index" in rs.dtype.fields df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")]) df.index.names = ["A", None] rs = df.to_records() assert "level_0" in rs.dtype.fields def test_to_records_with_unicode_index(self): # GH#13172 # unicode_literals conflict with to_records result = DataFrame([{"a": "x", "b": "y"}]).set_index("a").to_records() expected = np.rec.array([("x", "y")], dtype=[("a", "O"), ("b", "O")]) tm.assert_almost_equal(result, expected) def test_to_records_with_unicode_column_names(self): # xref issue: https://github.com/numpy/numpy/issues/2407 # Issue GH#11879. to_records used to raise an exception when used # with column names containing non-ascii characters in Python 2 result = DataFrame(data={"accented_name_é": [1.0]}).to_records() # Note that numpy allows for unicode field names but dtypes need # to be specified using dictionary instead of list of tuples. expected = np.rec.array( [(0, 1.0)], dtype={"names": ["index", "accented_name_é"], "formats": ["=i8", "=f8"]}, ) tm.assert_almost_equal(result, expected) def test_to_records_with_categorical(self): # GH#8626 # dict creation df = DataFrame({"A": list("abc")}, dtype="category") expected = Series(list("abc"), dtype="category", name="A") tm.assert_series_equal(df["A"], expected) # list-like creation df = DataFrame(list("abc"), dtype="category") expected = Series(list("abc"), dtype="category", name=0) tm.assert_series_equal(df[0], expected) # to record array # this coerces result = df.to_records() expected = np.rec.array( [(0, "a"), (1, "b"), (2, "c")], dtype=[("index", "=i8"), ("0", "O")] ) tm.assert_almost_equal(result, expected) @pytest.mark.parametrize( "kwargs,expected", [ # No dtypes --> default to array dtypes. ( {}, np.rec.array( [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], dtype=[("index", " bool: return key in self.d def keys(self): return self.d.keys() df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) dtype_mappings = { "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}), "index_dtypes": "