Inzynierka/Lib/site-packages/pandas/tests/io/json/test_readlines.py

from io import StringIO
from pathlib import Path
from typing import Iterator

import pytest

import pandas as pd
from pandas import (
    DataFrame,
    read_json,
)
import pandas._testing as tm

from pandas.io.json._json import JsonReader


@pytest.fixture
def lines_json_df():
    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    return df.to_json(lines=True, orient="records")


def test_read_jsonl():
    # GH9180
    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
    tm.assert_frame_equal(result, expected)


def test_read_jsonl_engine_pyarrow(datapath, engine):
    result = read_json(
        datapath("io", "json", "data", "line_delimited.json"),
        lines=True,
        engine=engine,
    )
    expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
    tm.assert_frame_equal(result, expected)


def test_read_datetime(request, engine):
    # GH33787
    if engine == "pyarrow":
        # GH 48893
        reason = "Pyarrow only supports a file path as an input and line delimited json"
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    df = DataFrame(
        [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
        columns=["accounts", "date", "name"],
    )
    json_line = df.to_json(lines=True, orient="records")
    result = read_json(json_line, engine=engine)
    expected = DataFrame(
        [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]],
        columns=["accounts", "date", "name"],
    )
    tm.assert_frame_equal(result, expected)


def test_read_jsonl_unicode_chars():
    # GH15132: non-ascii unicode characters
    # \u201d == RIGHT DOUBLE QUOTATION MARK

    # simulate file handle
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    json = StringIO(json)
    result = read_json(json, lines=True)
    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
    tm.assert_frame_equal(result, expected)

    # simulate string
    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
    result = read_json(json, lines=True)
    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
    tm.assert_frame_equal(result, expected)


def test_to_jsonl():
    # GH9180
    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
    assert result == expected

    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
    assert result == expected
    tm.assert_frame_equal(read_json(result, lines=True), df)

    # GH15096: escaped characters in columns and data
    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
    result = df.to_json(orient="records", lines=True)
    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
    assert result == expected
    tm.assert_frame_equal(read_json(result, lines=True), df)


def test_to_jsonl_count_new_lines():
    # GH36888
    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
    actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")
    expected_new_lines_count = 2
    assert actual_new_lines_count == expected_new_lines_count


@pytest.mark.parametrize("chunksize", [1, 1.0])
def test_readjson_chunks(request, lines_json_df, chunksize, engine):
    # Basic test that read_json(chunks=True) gives the same result as
    # read_json(chunks=False)
    # GH17048: memory usage when lines=True

    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    unchunked = read_json(StringIO(lines_json_df), lines=True)
    with read_json(
        StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
    ) as reader:
        chunked = pd.concat(reader)

    tm.assert_frame_equal(chunked, unchunked)


def test_readjson_chunksize_requires_lines(lines_json_df, engine):
    msg = "chunksize can only be passed if lines=True"
    with pytest.raises(ValueError, match=msg):
        with read_json(
            StringIO(lines_json_df), lines=False, chunksize=2, engine=engine
        ) as _:
            pass


def test_readjson_chunks_series(request, engine):
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason))

    # Test reading line-format JSON to Series with chunksize param
    s = pd.Series({"A": 1, "B": 2})

    strio = StringIO(s.to_json(lines=True, orient="records"))
    unchunked = read_json(strio, lines=True, typ="Series", engine=engine)

    strio = StringIO(s.to_json(lines=True, orient="records"))
    with read_json(
        strio, lines=True, typ="Series", chunksize=1, engine=engine
    ) as reader:
        chunked = pd.concat(reader)

    tm.assert_series_equal(chunked, unchunked)


def test_readjson_each_chunk(request, lines_json_df, engine):
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    # Other tests check that the final result of read_json(chunksize=True)
    # is correct. This checks the intermediate chunks.
    with read_json(
        StringIO(lines_json_df), lines=True, chunksize=2, engine=engine
    ) as reader:
        chunks = list(reader)
    assert chunks[0].shape == (2, 2)
    assert chunks[1].shape == (1, 2)


def test_readjson_chunks_from_file(request, engine):
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    with tm.ensure_clean("test.json") as path:
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
        df.to_json(path, lines=True, orient="records")
        with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
            chunked = pd.concat(reader)
        unchunked = read_json(path, lines=True, engine=engine)
        tm.assert_frame_equal(unchunked, chunked)


@pytest.mark.parametrize("chunksize", [None, 1])
def test_readjson_chunks_closes(chunksize):
    with tm.ensure_clean("test.json") as path:
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
        df.to_json(path, lines=True, orient="records")
        reader = JsonReader(
            path,
            orient=None,
            typ="frame",
            dtype=True,
            convert_axes=True,
            convert_dates=True,
            keep_default_dates=True,
            precise_float=False,
            date_unit=None,
            encoding=None,
            lines=True,
            chunksize=chunksize,
            compression=None,
            nrows=None,
        )
        with reader:
            reader.read()
        assert (
            reader.handles.handle.closed
        ), f"didn't close stream with chunksize = {chunksize}"


@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
    msg = r"'chunksize' must be an integer >=1"

    with pytest.raises(ValueError, match=msg):
        with read_json(
            StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
        ) as _:
            pass


@pytest.mark.parametrize("chunksize", [None, 1, 2])
def test_readjson_chunks_multiple_empty_lines(chunksize):
    j = """

    {"A":1,"B":4}


    {"A":2,"B":5}


    {"A":3,"B":6}
    """
    orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    test = read_json(j, lines=True, chunksize=chunksize)
    if chunksize is not None:
        with test:
            test = pd.concat(test)
    tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")


def test_readjson_unicode(request, monkeypatch, engine):
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    with tm.ensure_clean("test.json") as path:
        monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
        with open(path, "w", encoding="utf-8") as f:
            f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')

        result = read_json(path, engine=engine)
        expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
        tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("nrows", [1, 2])
def test_readjson_nrows(nrows, engine):
    # GH 33916
    # Test reading line-format JSON to Series with nrows param
    jsonl = """{"a": 1, "b": 2}
        {"a": 3, "b": 4}
        {"a": 5, "b": 6}
        {"a": 7, "b": 8}"""
    result = read_json(jsonl, lines=True, nrows=nrows)
    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
    # GH 33916
    # Test reading line-format JSON to Series with nrows and chunksize param
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    jsonl = """{"a": 1, "b": 2}
        {"a": 3, "b": 4}
        {"a": 5, "b": 6}
        {"a": 7, "b": 8}"""
    with read_json(
        jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine
    ) as reader:
        chunked = pd.concat(reader)
    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
    tm.assert_frame_equal(chunked, expected)


def test_readjson_nrows_requires_lines(engine):
    # GH 33916
    # Test ValuError raised if nrows is set without setting lines in read_json
    jsonl = """{"a": 1, "b": 2}
        {"a": 3, "b": 4}
        {"a": 5, "b": 6}
        {"a": 7, "b": 8}"""
    msg = "nrows can only be passed if lines=True"
    with pytest.raises(ValueError, match=msg):
        read_json(jsonl, lines=False, nrows=2, engine=engine)


def test_readjson_lines_chunks_fileurl(request, datapath, engine):
    # GH 27135
    # Test reading line-format JSON from file url
    if engine == "pyarrow":
        # GH 48893
        reason = (
            "Pyarrow only supports a file path as an input and line delimited json"
            "and doesn't support chunksize parameter."
        )
        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))

    df_list_expected = [
        DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
        DataFrame([[3, 4]], columns=["a", "b"], index=[1]),
        DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
    ]
    os_path = datapath("io", "json", "data", "line_delimited.json")
    file_url = Path(os_path).as_uri()
    with read_json(file_url, lines=True, chunksize=1, engine=engine) as url_reader:
        for index, chuck in enumerate(url_reader):
            tm.assert_frame_equal(chuck, df_list_expected[index])


def test_chunksize_is_incremental():
    # See https://github.com/pandas-dev/pandas/issues/34548
    jsonl = (
        """{"a": 1, "b": 2}
        {"a": 3, "b": 4}
        {"a": 5, "b": 6}
        {"a": 7, "b": 8}\n"""
        * 1000
    )

    class MyReader:
        def __init__(self, contents) -> None:
            self.read_count = 0
            self.stringio = StringIO(contents)

        def read(self, *args):
            self.read_count += 1
            return self.stringio.read(*args)

        def __iter__(self) -> Iterator:
            self.read_count += 1
            return iter(self.stringio)

    reader = MyReader(jsonl)
    assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
    assert reader.read_count > 10


@pytest.mark.parametrize("orient_", ["split", "index", "table"])
def test_to_json_append_orient(orient_):
    # GH 35849
    # Test ValueError when orient is not 'records'
    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    msg = (
        r"mode='a' \(append\) is only supported when"
        "lines is True and orient is 'records'"
    )
    with pytest.raises(ValueError, match=msg):
        df.to_json(mode="a", orient=orient_)


def test_to_json_append_lines():
    # GH 35849
    # Test ValueError when lines is not True
    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    msg = (
        r"mode='a' \(append\) is only supported when"
        "lines is True and orient is 'records'"
    )
    with pytest.raises(ValueError, match=msg):
        df.to_json(mode="a", lines=False, orient="records")


@pytest.mark.parametrize("mode_", ["r", "x"])
def test_to_json_append_mode(mode_):
    # GH 35849
    # Test ValueError when mode is not supported option
    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    msg = (
        f"mode={mode_} is not a valid option."
        "Only 'w' and 'a' are currently supported."
    )
    with pytest.raises(ValueError, match=msg):
        df.to_json(mode=mode_, lines=False, orient="records")


def test_to_json_append_output_consistent_columns():
    # GH 35849
    # Testing that resulting output reads in as expected.
    # Testing same columns, new rows
    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})

    expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
    with tm.ensure_clean("test.json") as path:
        # Save dataframes to the same file
        df1.to_json(path, lines=True, orient="records")
        df2.to_json(path, mode="a", lines=True, orient="records")

        # Read path file
        result = read_json(path, lines=True)
        tm.assert_frame_equal(result, expected)


def test_to_json_append_output_inconsistent_columns():
    # GH 35849
    # Testing that resulting output reads in as expected.
    # Testing one new column, one old column, new rows
    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})

    expected = DataFrame(
        {
            "col1": [1, 2, None, None],
            "col2": ["a", "b", "e", "f"],
            "col3": [None, None, "!", "#"],
        }
    )
    with tm.ensure_clean("test.json") as path:
        # Save dataframes to the same file
        df1.to_json(path, mode="a", lines=True, orient="records")
        df3.to_json(path, mode="a", lines=True, orient="records")

        # Read path file
        result = read_json(path, lines=True)
        tm.assert_frame_equal(result, expected)


def test_to_json_append_output_different_columns():
    # GH 35849
    # Testing that resulting output reads in as expected.
    # Testing same, differing and new columns
    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
    df4 = DataFrame({"col4": [True, False]})

    expected = DataFrame(
        {
            "col1": [1, 2, 3, 4, None, None, None, None],
            "col2": ["a", "b", "c", "d", "e", "f", None, None],
            "col3": [None, None, None, None, "!", "#", None, None],
            "col4": [None, None, None, None, None, None, True, False],
        }
    ).astype({"col4": "float"})
    with tm.ensure_clean("test.json") as path:
        # Save dataframes to the same file
        df1.to_json(path, mode="a", lines=True, orient="records")
        df2.to_json(path, mode="a", lines=True, orient="records")
        df3.to_json(path, mode="a", lines=True, orient="records")
        df4.to_json(path, mode="a", lines=True, orient="records")

        # Read path file
        result = read_json(path, lines=True)
        tm.assert_frame_equal(result, expected)


def test_to_json_append_output_different_columns_reordered():
    # GH 35849
    # Testing that resulting output reads in as expected.
    # Testing specific result column order.
    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
    df4 = DataFrame({"col4": [True, False]})

    # df4, df3, df2, df1 (in that order)
    expected = DataFrame(
        {
            "col4": [True, False, None, None, None, None, None, None],
            "col2": [None, None, "e", "f", "c", "d", "a", "b"],
            "col3": [None, None, "!", "#", None, None, None, None],
            "col1": [None, None, None, None, 3, 4, 1, 2],
        }
    ).astype({"col4": "float"})
    with tm.ensure_clean("test.json") as path:
        # Save dataframes to the same file
        df4.to_json(path, mode="a", lines=True, orient="records")
        df3.to_json(path, mode="a", lines=True, orient="records")
        df2.to_json(path, mode="a", lines=True, orient="records")
        df1.to_json(path, mode="a", lines=True, orient="records")

        # Read path file
        result = read_json(path, lines=True)
        tm.assert_frame_equal(result, expected)
first commit 2023-06-02 12:51:02 +02:00			`from io import StringIO`
			`from pathlib import Path`
			`from typing import Iterator`

			`import pytest`

			`import pandas as pd`
			`from pandas import (`
			`DataFrame,`
			`read_json,`
			`)`
			`import pandas._testing as tm`

			`from pandas.io.json._json import JsonReader`


			`@pytest.fixture`
			`def lines_json_df():`
			`df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})`
			`return df.to_json(lines=True, orient="records")`


			`def test_read_jsonl():`
			`# GH9180`
			`result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)`
			`expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])`
			`tm.assert_frame_equal(result, expected)`


			`def test_read_jsonl_engine_pyarrow(datapath, engine):`
			`result = read_json(`
			`datapath("io", "json", "data", "line_delimited.json"),`
			`lines=True,`
			`engine=engine,`
			`)`
			`expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})`
			`tm.assert_frame_equal(result, expected)`


			`def test_read_datetime(request, engine):`
			`# GH33787`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = "Pyarrow only supports a file path as an input and line delimited json"`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`df = DataFrame(`
			`[([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],`
			`columns=["accounts", "date", "name"],`
			`)`
			`json_line = df.to_json(lines=True, orient="records")`
			`result = read_json(json_line, engine=engine)`
			`expected = DataFrame(`
			`[[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]],`
			`columns=["accounts", "date", "name"],`
			`)`
			`tm.assert_frame_equal(result, expected)`


			`def test_read_jsonl_unicode_chars():`
			`# GH15132: non-ascii unicode characters`
			`# \u201d == RIGHT DOUBLE QUOTATION MARK`

			`# simulate file handle`
			`json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'`
			`json = StringIO(json)`
			`result = read_json(json, lines=True)`
			`expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])`
			`tm.assert_frame_equal(result, expected)`

			`# simulate string`
			`json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'`
			`result = read_json(json, lines=True)`
			`expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])`
			`tm.assert_frame_equal(result, expected)`


			`def test_to_jsonl():`
			`# GH9180`
			`df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])`
			`result = df.to_json(orient="records", lines=True)`
			`expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'`
			`assert result == expected`

			`df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])`
			`result = df.to_json(orient="records", lines=True)`
			`expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'`
			`assert result == expected`
			`tm.assert_frame_equal(read_json(result, lines=True), df)`

			`# GH15096: escaped characters in columns and data`
			`df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])`
			`result = df.to_json(orient="records", lines=True)`
			`expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'`
			`assert result == expected`
			`tm.assert_frame_equal(read_json(result, lines=True), df)`


			`def test_to_jsonl_count_new_lines():`
			`# GH36888`
			`df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])`
			`actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")`
			`expected_new_lines_count = 2`
			`assert actual_new_lines_count == expected_new_lines_count`


			`@pytest.mark.parametrize("chunksize", [1, 1.0])`
			`def test_readjson_chunks(request, lines_json_df, chunksize, engine):`
			`# Basic test that read_json(chunks=True) gives the same result as`
			`# read_json(chunks=False)`
			`# GH17048: memory usage when lines=True`

			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`unchunked = read_json(StringIO(lines_json_df), lines=True)`
			`with read_json(`
			`StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine`
			`) as reader:`
			`chunked = pd.concat(reader)`

			`tm.assert_frame_equal(chunked, unchunked)`


			`def test_readjson_chunksize_requires_lines(lines_json_df, engine):`
			`msg = "chunksize can only be passed if lines=True"`
			`with pytest.raises(ValueError, match=msg):`
			`with read_json(`
			`StringIO(lines_json_df), lines=False, chunksize=2, engine=engine`
			`) as _:`
			`pass`


			`def test_readjson_chunks_series(request, engine):`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason))`

			`# Test reading line-format JSON to Series with chunksize param`
			`s = pd.Series({"A": 1, "B": 2})`

			`strio = StringIO(s.to_json(lines=True, orient="records"))`
			`unchunked = read_json(strio, lines=True, typ="Series", engine=engine)`

			`strio = StringIO(s.to_json(lines=True, orient="records"))`
			`with read_json(`
			`strio, lines=True, typ="Series", chunksize=1, engine=engine`
			`) as reader:`
			`chunked = pd.concat(reader)`

			`tm.assert_series_equal(chunked, unchunked)`


			`def test_readjson_each_chunk(request, lines_json_df, engine):`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`# Other tests check that the final result of read_json(chunksize=True)`
			`# is correct. This checks the intermediate chunks.`
			`with read_json(`
			`StringIO(lines_json_df), lines=True, chunksize=2, engine=engine`
			`) as reader:`
			`chunks = list(reader)`
			`assert chunks[0].shape == (2, 2)`
			`assert chunks[1].shape == (1, 2)`


			`def test_readjson_chunks_from_file(request, engine):`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`with tm.ensure_clean("test.json") as path:`
			`df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})`
			`df.to_json(path, lines=True, orient="records")`
			`with read_json(path, lines=True, chunksize=1, engine=engine) as reader:`
			`chunked = pd.concat(reader)`
			`unchunked = read_json(path, lines=True, engine=engine)`
			`tm.assert_frame_equal(unchunked, chunked)`


			`@pytest.mark.parametrize("chunksize", [None, 1])`
			`def test_readjson_chunks_closes(chunksize):`
			`with tm.ensure_clean("test.json") as path:`
			`df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})`
			`df.to_json(path, lines=True, orient="records")`
			`reader = JsonReader(`
			`path,`
			`orient=None,`
			`typ="frame",`
			`dtype=True,`
			`convert_axes=True,`
			`convert_dates=True,`
			`keep_default_dates=True,`
			`precise_float=False,`
			`date_unit=None,`
			`encoding=None,`
			`lines=True,`
			`chunksize=chunksize,`
			`compression=None,`
			`nrows=None,`
			`)`
			`with reader:`
			`reader.read()`
			`assert (`
			`reader.handles.handle.closed`
			`), f"didn't close stream with chunksize = {chunksize}"`


			`@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])`
			`def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):`
			`msg = r"'chunksize' must be an integer >=1"`

			`with pytest.raises(ValueError, match=msg):`
			`with read_json(`
			`StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine`
			`) as _:`
			`pass`


			`@pytest.mark.parametrize("chunksize", [None, 1, 2])`
			`def test_readjson_chunks_multiple_empty_lines(chunksize):`
			`j = """`

			`{"A":1,"B":4}`



			`{"A":2,"B":5}`







			`{"A":3,"B":6}`
			`"""`
			`orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})`
			`test = read_json(j, lines=True, chunksize=chunksize)`
			`if chunksize is not None:`
			`with test:`
			`test = pd.concat(test)`
			`tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")`


			`def test_readjson_unicode(request, monkeypatch, engine):`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`with tm.ensure_clean("test.json") as path:`
			`monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")`
			`with open(path, "w", encoding="utf-8") as f:`
			`f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')`

			`result = read_json(path, engine=engine)`
			`expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize("nrows", [1, 2])`
			`def test_readjson_nrows(nrows, engine):`
			`# GH 33916`
			`# Test reading line-format JSON to Series with nrows param`
			`jsonl = """{"a": 1, "b": 2}`
			`{"a": 3, "b": 4}`
			`{"a": 5, "b": 6}`
			`{"a": 7, "b": 8}"""`
			`result = read_json(jsonl, lines=True, nrows=nrows)`
			`expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])`
			`def test_readjson_nrows_chunks(request, nrows, chunksize, engine):`
			`# GH 33916`
			`# Test reading line-format JSON to Series with nrows and chunksize param`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`jsonl = """{"a": 1, "b": 2}`
			`{"a": 3, "b": 4}`
			`{"a": 5, "b": 6}`
			`{"a": 7, "b": 8}"""`
			`with read_json(`
			`jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine`
			`) as reader:`
			`chunked = pd.concat(reader)`
			`expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]`
			`tm.assert_frame_equal(chunked, expected)`


			`def test_readjson_nrows_requires_lines(engine):`
			`# GH 33916`
			`# Test ValuError raised if nrows is set without setting lines in read_json`
			`jsonl = """{"a": 1, "b": 2}`
			`{"a": 3, "b": 4}`
			`{"a": 5, "b": 6}`
			`{"a": 7, "b": 8}"""`
			`msg = "nrows can only be passed if lines=True"`
			`with pytest.raises(ValueError, match=msg):`
			`read_json(jsonl, lines=False, nrows=2, engine=engine)`


			`def test_readjson_lines_chunks_fileurl(request, datapath, engine):`
			`# GH 27135`
			`# Test reading line-format JSON from file url`
			`if engine == "pyarrow":`
			`# GH 48893`
			`reason = (`
			`"Pyarrow only supports a file path as an input and line delimited json"`
			`"and doesn't support chunksize parameter."`
			`)`
			`request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))`

			`df_list_expected = [`
			`DataFrame([[1, 2]], columns=["a", "b"], index=[0]),`
			`DataFrame([[3, 4]], columns=["a", "b"], index=[1]),`
			`DataFrame([[5, 6]], columns=["a", "b"], index=[2]),`
			`]`
			`os_path = datapath("io", "json", "data", "line_delimited.json")`
			`file_url = Path(os_path).as_uri()`
			`with read_json(file_url, lines=True, chunksize=1, engine=engine) as url_reader:`
			`for index, chuck in enumerate(url_reader):`
			`tm.assert_frame_equal(chuck, df_list_expected[index])`


			`def test_chunksize_is_incremental():`
			`# See https://github.com/pandas-dev/pandas/issues/34548`
			`jsonl = (`
			`"""{"a": 1, "b": 2}`
			`{"a": 3, "b": 4}`
			`{"a": 5, "b": 6}`
			`{"a": 7, "b": 8}\n"""`
			`* 1000`
			`)`

			`class MyReader:`
			`def __init__(self, contents) -> None:`
			`self.read_count = 0`
			`self.stringio = StringIO(contents)`

			`def read(self, *args):`
			`self.read_count += 1`
			`return self.stringio.read(*args)`

			`def __iter__(self) -> Iterator:`
			`self.read_count += 1`
			`return iter(self.stringio)`

			`reader = MyReader(jsonl)`
			`assert len(list(read_json(reader, lines=True, chunksize=100))) > 1`
			`assert reader.read_count > 10`


			`@pytest.mark.parametrize("orient_", ["split", "index", "table"])`
			`def test_to_json_append_orient(orient_):`
			`# GH 35849`
			`# Test ValueError when orient is not 'records'`
			`df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`msg = (`
			`r"mode='a' \(append\) is only supported when"`
			`"lines is True and orient is 'records'"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`df.to_json(mode="a", orient=orient_)`


			`def test_to_json_append_lines():`
			`# GH 35849`
			`# Test ValueError when lines is not True`
			`df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`msg = (`
			`r"mode='a' \(append\) is only supported when"`
			`"lines is True and orient is 'records'"`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`df.to_json(mode="a", lines=False, orient="records")`


			`@pytest.mark.parametrize("mode_", ["r", "x"])`
			`def test_to_json_append_mode(mode_):`
			`# GH 35849`
			`# Test ValueError when mode is not supported option`
			`df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`msg = (`
			`f"mode={mode_} is not a valid option."`
			`"Only 'w' and 'a' are currently supported."`
			`)`
			`with pytest.raises(ValueError, match=msg):`
			`df.to_json(mode=mode_, lines=False, orient="records")`


			`def test_to_json_append_output_consistent_columns():`
			`# GH 35849`
			`# Testing that resulting output reads in as expected.`
			`# Testing same columns, new rows`
			`df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})`

			`expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})`
			`with tm.ensure_clean("test.json") as path:`
			`# Save dataframes to the same file`
			`df1.to_json(path, lines=True, orient="records")`
			`df2.to_json(path, mode="a", lines=True, orient="records")`

			`# Read path file`
			`result = read_json(path, lines=True)`
			`tm.assert_frame_equal(result, expected)`


			`def test_to_json_append_output_inconsistent_columns():`
			`# GH 35849`
			`# Testing that resulting output reads in as expected.`
			`# Testing one new column, one old column, new rows`
			`df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})`

			`expected = DataFrame(`
			`{`
			`"col1": [1, 2, None, None],`
			`"col2": ["a", "b", "e", "f"],`
			`"col3": [None, None, "!", "#"],`
			`}`
			`)`
			`with tm.ensure_clean("test.json") as path:`
			`# Save dataframes to the same file`
			`df1.to_json(path, mode="a", lines=True, orient="records")`
			`df3.to_json(path, mode="a", lines=True, orient="records")`

			`# Read path file`
			`result = read_json(path, lines=True)`
			`tm.assert_frame_equal(result, expected)`


			`def test_to_json_append_output_different_columns():`
			`# GH 35849`
			`# Testing that resulting output reads in as expected.`
			`# Testing same, differing and new columns`
			`df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})`
			`df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})`
			`df4 = DataFrame({"col4": [True, False]})`

			`expected = DataFrame(`
			`{`
			`"col1": [1, 2, 3, 4, None, None, None, None],`
			`"col2": ["a", "b", "c", "d", "e", "f", None, None],`
			`"col3": [None, None, None, None, "!", "#", None, None],`
			`"col4": [None, None, None, None, None, None, True, False],`
			`}`
			`).astype({"col4": "float"})`
			`with tm.ensure_clean("test.json") as path:`
			`# Save dataframes to the same file`
			`df1.to_json(path, mode="a", lines=True, orient="records")`
			`df2.to_json(path, mode="a", lines=True, orient="records")`
			`df3.to_json(path, mode="a", lines=True, orient="records")`
			`df4.to_json(path, mode="a", lines=True, orient="records")`

			`# Read path file`
			`result = read_json(path, lines=True)`
			`tm.assert_frame_equal(result, expected)`


			`def test_to_json_append_output_different_columns_reordered():`
			`# GH 35849`
			`# Testing that resulting output reads in as expected.`
			`# Testing specific result column order.`
			`df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})`
			`df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})`
			`df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})`
			`df4 = DataFrame({"col4": [True, False]})`

			`# df4, df3, df2, df1 (in that order)`
			`expected = DataFrame(`
			`{`
			`"col4": [True, False, None, None, None, None, None, None],`
			`"col2": [None, None, "e", "f", "c", "d", "a", "b"],`
			`"col3": [None, None, "!", "#", None, None, None, None],`
			`"col1": [None, None, None, None, 3, 4, 1, 2],`
			`}`
			`).astype({"col4": "float"})`
			`with tm.ensure_clean("test.json") as path:`
			`# Save dataframes to the same file`
			`df4.to_json(path, mode="a", lines=True, orient="records")`
			`df3.to_json(path, mode="a", lines=True, orient="records")`
			`df2.to_json(path, mode="a", lines=True, orient="records")`
			`df1.to_json(path, mode="a", lines=True, orient="records")`

			`# Read path file`
			`result = read_json(path, lines=True)`
			`tm.assert_frame_equal(result, expected)`