first commit

2025-07-03 22:57:06 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/init.py
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/init.py
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_chunksize.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_chunksize.py
@ -0,0 +1,278 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import DtypeWarning
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("index_col", [0, "index"])
+def test_read_chunksize_with_index(all_parsers, index_col):
+    parser = all_parsers
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    expected = DataFrame(
+        [
+            ["foo", 2, 3, 4, 5],
+            ["bar", 7, 8, 9, 10],
+            ["baz", 12, 13, 14, 15],
+            ["qux", 12, 13, 14, 15],
+            ["foo2", 12, 13, 14, 15],
+            ["bar2", 12, 13, 14, 15],
+        ],
+        columns=["index", "A", "B", "C", "D"],
+    )
+    expected = expected.set_index("index")
+
+    with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader:
+        chunks = list(reader)
+    tm.assert_frame_equal(chunks[0], expected[:2])
+    tm.assert_frame_equal(chunks[1], expected[2:4])
+    tm.assert_frame_equal(chunks[2], expected[4:])
+
+
+@pytest.mark.parametrize("chunksize", [1.3, "foo", 0])
+def test_read_chunksize_bad(all_parsers, chunksize):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    msg = r"'chunksize' must be an integer >=1"
+
+    with pytest.raises(ValueError, match=msg):
+        with parser.read_csv(StringIO(data), chunksize=chunksize) as _:
+            pass
+
+
+@pytest.mark.parametrize("chunksize", [2, 8])
+def test_read_chunksize_and_nrows(all_parsers, chunksize):
+    # see gh-15755
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0, "nrows": 5}
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), chunksize=chunksize, **kwargs) as reader:
+        tm.assert_frame_equal(concat(reader), expected)
+
+
+def test_read_chunksize_and_nrows_changing_size(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0, "nrows": 5}
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), chunksize=8, **kwargs) as reader:
+        tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2])
+        tm.assert_frame_equal(reader.get_chunk(size=4), expected.iloc[2:5])
+
+        with pytest.raises(StopIteration, match=""):
+            reader.get_chunk(size=3)
+
+
+def test_get_chunk_passed_chunksize(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+1,2,3"""
+
+    with parser.read_csv(StringIO(data), chunksize=2) as reader:
+        result = reader.get_chunk()
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}])
+def test_read_chunksize_compat(all_parsers, kwargs):
+    # see gh-12185
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader:
+        tm.assert_frame_equal(concat(reader), result)
+
+
+def test_read_chunksize_jagged_names(all_parsers):
+    # see gh-23509
+    parser = all_parsers
+    data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)])
+
+    expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10])
+    with parser.read_csv(StringIO(data), names=range(10), chunksize=4) as reader:
+        result = concat(reader)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_chunk_begins_with_newline_whitespace(all_parsers):
+    # see gh-10022
+    parser = all_parsers
+    data = "\n hello\nworld\n"
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame([" hello", "world"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+def test_chunks_have_consistent_numerical_type(all_parsers):
+    parser = all_parsers
+    integers = [str(i) for i in range(499999)]
+    data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers)
+
+    # Coercions should work without warnings.
+    with tm.assert_produces_warning(None):
+        result = parser.read_csv(StringIO(data))
+
+    assert type(result.a[0]) is np.float64
+    assert result.a.dtype == float
+
+
+def test_warn_if_chunks_have_mismatched_type(all_parsers):
+    warning_type = None
+    parser = all_parsers
+    size = 10000
+
+    # see gh-3866: if chunks are different types and can't
+    # be coerced using numerical types, then issue warning.
+    if parser.engine == "c" and parser.low_memory:
+        warning_type = DtypeWarning
+        # Use larger size to hit warning path
+        size = 499999
+
+    integers = [str(i) for i in range(size)]
+    data = "a\n" + "\n".join(integers + ["a", "b"] + integers)
+
+    buf = StringIO(data)
+
+    with tm.assert_produces_warning(warning_type):
+        df = parser.read_csv(buf)
+
+    assert df.a.dtype == object
+
+
+@pytest.mark.parametrize("iterator", [True, False])
+def test_empty_with_nrows_chunksize(all_parsers, iterator):
+    # see gh-9535
+    parser = all_parsers
+    expected = DataFrame(columns=["foo", "bar"])
+
+    nrows = 10
+    data = StringIO("foo,bar\n")
+
+    if iterator:
+        with parser.read_csv(data, chunksize=nrows) as reader:
+            result = next(iter(reader))
+    else:
+        result = parser.read_csv(data, nrows=nrows)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_memory_growth_chunksize(all_parsers):
+    # see gh-24805
+    #
+    # Let's just make sure that we don't crash
+    # as we iteratively process all chunks.
+    parser = all_parsers
+
+    with tm.ensure_clean() as path:
+        with open(path, "w") as f:
+            for i in range(1000):
+                f.write(str(i) + "\n")
+
+        with parser.read_csv(path, chunksize=20) as result:
+            for _ in result:
+                pass
+
+
+def test_chunksize_with_usecols_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(
+        StringIO(data),
+        names=["a", "b"],
+        chunksize=2,
+        usecols=[0, 1],
+        header=None,
+    )
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6]}),
+        DataFrame({"a": [9], "b": [10]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_chunksize_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    result_chunks = parser.read_csv(StringIO(data), chunksize=2)
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+        DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_common_basic.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_common_basic.py
@ -0,0 +1,931 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from datetime import datetime
+from inspect import signature
+from io import StringIO
+import os
+from pathlib import Path
+import sys
+
+import numpy as np
+import pytest
+
+from pandas.compat import PY310
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+    ParserWarning,
+)
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    Timestamp,
+    compat,
+)
+import pandas._testing as tm
+
+from pandas.io.parsers import TextFileReader
+from pandas.io.parsers.c_parser_wrapper import CParserWrapper
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_override_set_noconvert_columns():
+    # see gh-17351
+    #
+    # Usecols needs to be sorted in _set_noconvert_columns based
+    # on the test_usecols_with_parse_dates test from test_usecols.py
+    class MyTextFileReader(TextFileReader):
+        def __init__(self):
+            self._currow = 0
+            self.squeeze = False
+
+    class MyCParserWrapper(CParserWrapper):
+        def _set_noconvert_columns(self):
+            if self.usecols_dtype == "integer":
+                # self.usecols is a set, which is documented as unordered
+                # but in practice, a CPython set of integers is sorted.
+                # In other implementations this assumption does not hold.
+                # The following code simulates a different order, which
+                # before GH 17351 would cause the wrong columns to be
+                # converted via the parse_dates parameter
+                self.usecols = list(self.usecols)
+                self.usecols.reverse()
+            return CParserWrapper._set_noconvert_columns(self)
+
+    data = """a,b,c,d,e
+0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+
+    parse_dates = [[1, 2]]
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    parser = MyTextFileReader()
+    parser.options = {
+        "usecols": [0, 2, 3],
+        "parse_dates": parse_dates,
+        "delimiter": ",",
+    }
+    parser.engine = "c"
+    parser._engine = MyCParserWrapper(StringIO(data), **parser.options)
+
+    result = parser.read()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_local(all_parsers, csv1):
+    prefix = "file:///" if compat.is_platform_windows() else "file://"
+    parser = all_parsers
+
+    fname = prefix + str(os.path.abspath(csv1))
+    result = parser.read_csv(fname, index_col=0, parse_dates=True)
+
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738],
+            [1.047916, -0.041232, -0.16181208307, 0.212549],
+            [0.498581, 0.731168, -0.537677223318, 1.346270],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469],
+            [0.836649, 0.246462, 0.588542635376, 1.062782],
+            [-0.157161, 1.340307, 1.1957779562, -1.097007],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+                datetime(2000, 1, 10),
+                datetime(2000, 1, 11),
+            ],
+            name="index",
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_1000_sep(all_parsers):
+    parser = all_parsers
+    data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+    expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]})
+
+    result = parser.read_csv(StringIO(data), sep="|", thousands=",")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("squeeze", [True, False])
+def test_squeeze(all_parsers, squeeze):
+    data = """\
+a,1
+b,2
+c,3
+"""
+    parser = all_parsers
+    index = Index(["a", "b", "c"], name=0)
+    expected = Series([1, 2, 3], name=1, index=index)
+
+    result = parser.read_csv_check_warnings(
+        FutureWarning,
+        "The squeeze argument has been deprecated "
+        "and will be removed in a future version. "
+        'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n',
+        StringIO(data),
+        index_col=0,
+        header=None,
+        squeeze=squeeze,
+    )
+    if not squeeze:
+        expected = DataFrame(expected)
+        tm.assert_frame_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)
+
+        # see gh-8217
+        #
+        # Series should not be a view.
+        assert not result._is_view
+
+
+@xfail_pyarrow
+def test_unnamed_columns(all_parsers):
+    data = """A,B,C,,
+1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
+        dtype=np.int64,
+        columns=["A", "B", "C", "Unnamed: 3", "Unnamed: 4"],
+    )
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_csv_mixed_type(all_parsers):
+    data = """A,B,C
+a,1,2
+b,3,4
+c,4,5
+"""
+    parser = all_parsers
+    expected = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]})
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_read_csv_low_memory_no_rows_with_index(all_parsers):
+    # see gh-21141
+    parser = all_parsers
+
+    if not parser.low_memory:
+        pytest.skip("This is a low-memory specific test")
+
+    data = """A,B,C
+1,1,1,2
+2,2,3,4
+3,3,4,5
+"""
+    result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0)
+    expected = DataFrame(columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_dataframe(all_parsers, csv1):
+    parser = all_parsers
+    result = parser.read_csv(csv1, index_col=0, parse_dates=True)
+
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738],
+            [1.047916, -0.041232, -0.16181208307, 0.212549],
+            [0.498581, 0.731168, -0.537677223318, 1.346270],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469],
+            [0.836649, 0.246462, 0.588542635376, 1.062782],
+            [-0.157161, 1.340307, 1.1957779562, -1.097007],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+                datetime(2000, 1, 10),
+                datetime(2000, 1, 11),
+            ],
+            name="index",
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("nrows", [3, 3.0])
+def test_read_nrows(all_parsers, nrows):
+    # see gh-10476
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    expected = DataFrame(
+        [["foo", 2, 3, 4, 5], ["bar", 7, 8, 9, 10], ["baz", 12, 13, 14, 15]],
+        columns=["index", "A", "B", "C", "D"],
+    )
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), nrows=nrows)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("nrows", [1.2, "foo", -1])
+def test_read_nrows_bad(all_parsers, nrows):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    msg = r"'nrows' must be an integer >=0"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), nrows=nrows)
+
+
+def test_nrows_skipfooter_errors(all_parsers):
+    msg = "'skipfooter' not supported with 'nrows'"
+    data = "a\n1\n2\n3\n4\n5\n6"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=1, nrows=5)
+
+
+@xfail_pyarrow
+def test_missing_trailing_delimiters(all_parsers):
+    parser = all_parsers
+    data = """A,B,C,D
+1,2,3,4
+1,3,3,
+1,4,5"""
+
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4], [1, 3, 3, np.nan], [1, 4, 5, np.nan]],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_skip_initial_space(all_parsers):
+    data = (
+        '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, '
+        "1.00361,  1.12551, 330.65659, 0355626618.16711,  73.48821, "
+        "314.11625,  1917.09447,   179.71425,  80.000, 240.000, -350,  "
+        "70.06056, 344.98370, 1,   1, -0.689265, -0.692787,  "
+        "0.212036,    14.7674,   41.605,   -9999.0,   -9999.0,   "
+        "-9999.0,   -9999.0,   -9999.0,  -9999.0, 000, 012, 128"
+    )
+    parser = all_parsers
+
+    result = parser.read_csv(
+        StringIO(data),
+        names=list(range(33)),
+        header=None,
+        na_values=["-9999.0"],
+        skipinitialspace=True,
+    )
+    expected = DataFrame(
+        [
+            [
+                "09-Apr-2012",
+                "01:10:18.300",
+                2456026.548822908,
+                12849,
+                1.00361,
+                1.12551,
+                330.65659,
+                355626618.16711,
+                73.48821,
+                314.11625,
+                1917.09447,
+                179.71425,
+                80.0,
+                240.0,
+                -350,
+                70.06056,
+                344.9837,
+                1,
+                1,
+                -0.689265,
+                -0.692787,
+                0.212036,
+                14.7674,
+                41.605,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                0,
+                12,
+                128,
+            ]
+        ]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_trailing_delimiters(all_parsers):
+    # see gh-2442
+    data = """A,B,C
+1,2,3,
+4,5,6,
+7,8,9,"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=False)
+
+    expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_escapechar(all_parsers):
+    # https://stackoverflow.com/questions/13824840/feature-request-for-
+    # pandas-read-csv
+    data = '''SEARCH_TERM,ACTUAL_URL
+"bra tv board","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
+"tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
+"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals series","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"'''  # noqa:E501
+
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8"
+    )
+
+    assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals series'
+
+    tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
+
+
+@xfail_pyarrow
+def test_ignore_leading_whitespace(all_parsers):
+    # see gh-3374, gh-6607
+    parser = all_parsers
+    data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9"
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+
+    expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
+def test_uneven_lines_with_usecols(all_parsers, usecols):
+    # see gh-12203
+    parser = all_parsers
+    data = r"""a,b,c
+0,1,2
+3,4,5,6,7
+8,9,10"""
+
+    if usecols is None:
+        # Make sure that an error is still raised
+        # when the "usecols" parameter is not provided.
+        msg = r"Expected \d+ fields in line \d+, saw \d+"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data))
+    else:
+        expected = DataFrame({"a": [0, 3, 8], "b": [1, 4, 9]})
+
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        # First, check to see that the response of parser when faced with no
+        # provided columns raises the correct error, with or without usecols.
+        ("", {}, None),
+        ("", {"usecols": ["X"]}, None),
+        (
+            ",,",
+            {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]},
+            DataFrame(columns=["X"], index=[0], dtype=np.float64),
+        ),
+        (
+            "",
+            {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]},
+            DataFrame(columns=["X"]),
+        ),
+    ],
+)
+def test_read_empty_with_usecols(all_parsers, data, kwargs, expected):
+    # see gh-12493
+    parser = all_parsers
+
+    if expected is None:
+        msg = "No columns to parse from file"
+        with pytest.raises(EmptyDataError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        # gh-8661, gh-8679: this should ignore six lines, including
+        # lines with trailing whitespace and blank lines.
+        (
+            {
+                "header": None,
+                "delim_whitespace": True,
+                "skiprows": [0, 1, 2, 3, 5, 6],
+                "skip_blank_lines": True,
+            },
+            DataFrame([[1.0, 2.0, 4.0], [5.1, np.nan, 10.0]]),
+        ),
+        # gh-8983: test skipping set of rows after a row with trailing spaces.
+        (
+            {
+                "delim_whitespace": True,
+                "skiprows": [1, 2, 3, 5, 6],
+                "skip_blank_lines": True,
+            },
+            DataFrame({"A": [1.0, 5.1], "B": [2.0, np.nan], "C": [4.0, 10]}),
+        ),
+    ],
+)
+def test_trailing_spaces(all_parsers, kwargs, expected):
+    data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"  # noqa:E501
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_raise_on_sep_with_delim_whitespace(all_parsers):
+    # see gh-6607
+    data = "a b c\n1 2 3"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match="you can only specify one"):
+        parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
+
+
+def test_read_filepath_or_buffer(all_parsers):
+    # see gh-43366
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match="Expected file path name or file-like"):
+        parser.read_csv(filepath_or_buffer=b"input")
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("delim_whitespace", [True, False])
+def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
+    # see gh-9710
+    parser = all_parsers
+    data = """\
+MyColumn
+a
+b
+a
+b\n"""
+
+    expected = DataFrame({"MyColumn": list("abab")})
+    result = parser.read_csv(
+        StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+# Skip for now, actually only one test fails though, but its tricky to xfail
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "sep,skip_blank_lines,exp_data",
+    [
+        (",", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]),
+        (r"\s+", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]),
+        (
+            ",",
+            False,
+            [
+                [1.0, 2.0, 4.0],
+                [np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan],
+                [5.0, np.nan, 10.0],
+                [np.nan, np.nan, np.nan],
+                [-70.0, 0.4, 1.0],
+            ],
+        ),
+    ],
+)
+def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):
+    parser = all_parsers
+    data = """\
+A,B,C
+1,2.,4.
+
+
+5.,NaN,10.0
+
+-70,.4,1
+"""
+
+    if sep == r"\s+":
+        data = data.replace(",", "  ")
+
+    result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
+    expected = DataFrame(exp_data, columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_whitespace_lines(all_parsers):
+    parser = all_parsers
+    data = """
+
+\t  \t\t
+\t
+A,B,C
+\t    1,2.,4.
+5.,NaN,10.0
+"""
+    expected = DataFrame([[1, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            """   A   B   C   D
+a   1   2   3   4
+b   1   2   3   4
+c   1   2   3   4
+""",
+            DataFrame(
+                [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                columns=["A", "B", "C", "D"],
+                index=["a", "b", "c"],
+            ),
+        ),
+        (
+            "    a b c\n1 2 3 \n4 5  6\n 7 8 9",
+            DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]),
+        ),
+    ],
+)
+def test_whitespace_regex_separator(all_parsers, data, expected):
+    # see gh-6607
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_sub_character(all_parsers, csv_dir_path):
+    # see gh-16893
+    filename = os.path.join(csv_dir_path, "sub_char.csv")
+    expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
+
+    parser = all_parsers
+    result = parser.read_csv(filename)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("filename", ["sé-es-vé.csv", "ru-sй.csv", "中文文件名.csv"])
+def test_filename_with_special_chars(all_parsers, filename):
+    # see gh-15086.
+    parser = all_parsers
+    df = DataFrame({"a": [1, 2, 3]})
+
+    with tm.ensure_clean(filename) as path:
+        df.to_csv(path, index=False)
+
+        result = parser.read_csv(path)
+        tm.assert_frame_equal(result, df)
+
+
+def test_read_table_same_signature_as_read_csv(all_parsers):
+    # GH-34976
+    parser = all_parsers
+
+    table_sign = signature(parser.read_table)
+    csv_sign = signature(parser.read_csv)
+
+    assert table_sign.parameters.keys() == csv_sign.parameters.keys()
+    assert table_sign.return_annotation == csv_sign.return_annotation
+
+    for key, csv_param in csv_sign.parameters.items():
+        table_param = table_sign.parameters[key]
+        if key == "sep":
+            assert csv_param.default == ","
+            assert table_param.default == "\t"
+            assert table_param.annotation == csv_param.annotation
+            assert table_param.kind == csv_param.kind
+            continue
+        else:
+            assert table_param == csv_param
+
+
+def test_read_table_equivalency_to_read_csv(all_parsers):
+    # see gh-21948
+    # As of 0.25.0, read_table is undeprecated
+    parser = all_parsers
+    data = "a\tb\n1\t2\n3\t4"
+    expected = parser.read_csv(StringIO(data), sep="\t")
+    result = parser.read_table(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.skipif(
+    PY310,
+    reason="GH41935 This test is leaking only on Python 3.10,"
+    "causing other tests to fail with a cryptic error.",
+)
+@pytest.mark.parametrize("read_func", ["read_csv", "read_table"])
+def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
+    # GH#41069
+    parser = all_parsers
+    data = "a b\n0 1"
+
+    sys.setprofile(lambda *a, **k: None)
+    result = getattr(parser, read_func)(StringIO(data))
+    sys.setprofile(None)
+
+    expected = DataFrame({"a b": ["0 1"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_first_row_bom(all_parsers):
+    # see gh-26545
+    parser = all_parsers
+    data = '''\ufeff"Head1"	"Head2"	"Head3"'''
+
+    result = parser.read_csv(StringIO(data), delimiter="\t")
+    expected = DataFrame(columns=["Head1", "Head2", "Head3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_first_row_bom_unquoted(all_parsers):
+    # see gh-36343
+    parser = all_parsers
+    data = """\ufeffHead1	Head2	Head3"""
+
+    result = parser.read_csv(StringIO(data), delimiter="\t")
+    expected = DataFrame(columns=["Head1", "Head2", "Head3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("nrows", range(1, 6))
+def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
+    # GH 28071
+    ref = DataFrame(
+        [[np.nan, np.nan], [np.nan, np.nan], [1, 2], [np.nan, np.nan], [3, 4]],
+        columns=list("ab"),
+    )
+    csv = "\nheader\n\na,b\n\n\n1,2\n\n3,4"
+    parser = all_parsers
+    df = parser.read_csv(StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False)
+    tm.assert_frame_equal(df, ref[:nrows])
+
+
+@xfail_pyarrow
+def test_no_header_two_extra_columns(all_parsers):
+    # GH 26218
+    column_names = ["one", "two", "three"]
+    ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
+    stream = StringIO("foo,bar,baz,bam,blah")
+    parser = all_parsers
+    with tm.assert_produces_warning(ParserWarning):
+        df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
+    tm.assert_frame_equal(df, ref)
+
+
+def test_read_csv_names_not_accepting_sets(all_parsers):
+    # GH 34946
+    data = """\
+    1,2,3
+    4,5,6\n"""
+    parser = all_parsers
+    with pytest.raises(ValueError, match="Names should be an ordered collection."):
+        parser.read_csv(StringIO(data), names=set("QAZ"))
+
+
+@xfail_pyarrow
+def test_read_table_delim_whitespace_default_sep(all_parsers):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+    result = parser.read_table(f, delim_whitespace=True)
+    expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("delimiter", [",", "\t"])
+def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+    msg = (
+        "Specified a delimiter with both sep and "
+        "delim_whitespace=True; you can only specify one."
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(f, delim_whitespace=True, sep=delimiter)
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)
+
+
+def test_read_csv_delimiter_and_sep_no_default(all_parsers):
+    # GH#39823
+    f = StringIO("a,b\n1,2")
+    parser = all_parsers
+    msg = "Specified a sep and a delimiter; you can only specify one."
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(f, sep=" ", delimiter=".")
+
+
+@pytest.mark.parametrize("kwargs", [{"delimiter": "\n"}, {"sep": "\n"}])
+def test_read_csv_line_break_as_separator(kwargs, all_parsers):
+    # GH#43528
+    parser = all_parsers
+    data = """a,b,c
+1,2,3
+    """
+    msg = (
+        r"Specified \\n as separator or delimiter. This forces the python engine "
+        r"which does not accept a line terminator. Hence it is not allowed to use "
+        r"the line terminator as separator."
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+def test_read_csv_posargs_deprecation(all_parsers):
+    # GH 41485
+    f = StringIO("a,b\n1,2")
+    parser = all_parsers
+    msg = (
+        "In a future version of pandas all arguments of read_csv "
+        "except for the argument 'filepath_or_buffer' will be keyword-only"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        parser.read_csv(f, " ")
+
+
+@pytest.mark.parametrize("delimiter", [",", "\t"])
+def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+    msg = (
+        "Specified a delimiter with both sep and "
+        "delim_whitespace=True; you can only specify one."
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_table(f, delim_whitespace=True, sep=delimiter)
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
+
+
+@pytest.mark.parametrize("func", ["read_csv", "read_table"])
+def test_names_and_prefix_not_None_raises(all_parsers, func):
+    # GH#39123
+    f = StringIO("a,b\n1,2")
+    parser = all_parsers
+    msg = "Specified named and prefix; you can only specify one."
+    with pytest.raises(ValueError, match=msg):
+        with tm.assert_produces_warning(FutureWarning):
+            getattr(parser, func)(f, names=["a", "b"], prefix="x")
+
+
+@pytest.mark.parametrize("func", ["read_csv", "read_table"])
+@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)])
+def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func):
+    # GH42387
+    f = StringIO("a,b\n1,2")
+    expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]})
+    parser = all_parsers
+    if prefix is not None:
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = getattr(parser, func)(
+                f, names=names, sep=",", prefix=prefix, header=None
+            )
+    else:
+        result = getattr(parser, func)(
+            f, names=names, sep=",", prefix=prefix, header=None
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_dict_keys_as_names(all_parsers):
+    # GH: 36928
+    data = "1,2"
+
+    keys = {"a": int, "b": int}.keys()
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), names=keys)
+    expected = DataFrame({"a": [1], "b": [2]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_encoding_surrogatepass(all_parsers):
+    # GH39017
+    parser = all_parsers
+    content = b"\xed\xbd\xbf"
+    decoded = content.decode("utf-8", errors="surrogatepass")
+    expected = DataFrame({decoded: [decoded]}, index=[decoded * 2])
+    expected.index.name = decoded * 2
+
+    with tm.ensure_clean() as path:
+        Path(path).write_bytes(
+            content * 2 + b"," + content + b"\n" + content * 2 + b"," + content
+        )
+        df = parser.read_csv(path, encoding_errors="surrogatepass", index_col=0)
+        tm.assert_frame_equal(df, expected)
+        with pytest.raises(UnicodeDecodeError, match="'utf-8' codec can't decode byte"):
+            parser.read_csv(path)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("on_bad_lines", ["error", "warn"])
+def test_deprecated_bad_lines_warns(all_parsers, csv1, on_bad_lines):
+    # GH 15122
+    parser = all_parsers
+    kwds = {f"{on_bad_lines}_bad_lines": False}
+    parser.read_csv_check_warnings(
+        FutureWarning,
+        f"The {on_bad_lines}_bad_lines argument has been deprecated "
+        "and will be removed in a future version. "
+        "Use on_bad_lines in the future.\n\n",
+        csv1,
+        **kwds,
+    )
+
+
+def test_malformed_second_line(all_parsers):
+    # see GH14782
+    parser = all_parsers
+    data = "\na\nb\n"
+    result = parser.read_csv(StringIO(data), skip_blank_lines=False, header=1)
+    expected = DataFrame({"a": ["b"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_table_posargs_deprecation(all_parsers):
+    # https://github.com/pandas-dev/pandas/issues/41485
+    data = StringIO("a\tb\n1\t2")
+    parser = all_parsers
+    msg = (
+        "In a future version of pandas all arguments of read_table "
+        "except for the argument 'filepath_or_buffer' will be keyword-only"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        parser.read_table(data, " ")
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_data_list.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_data_list.py
@ -0,0 +1,87 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.parsers import TextParser
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@xfail_pyarrow
+def test_read_data_list(all_parsers):
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+    data = "A,B,C\nfoo,1,2,3\nbar,4,5,6"
+
+    data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]]
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    with TextParser(data_list, chunksize=2, **kwargs) as parser:
+        result = parser.read()
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_reader_list(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    lines = list(csv.reader(StringIO(data)))
+    with TextParser(lines, chunksize=2, **kwargs) as reader:
+        chunks = list(reader)
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    tm.assert_frame_equal(chunks[0], expected[:2])
+    tm.assert_frame_equal(chunks[1], expected[2:4])
+    tm.assert_frame_equal(chunks[2], expected[4:])
+
+
+def test_reader_list_skiprows(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    lines = list(csv.reader(StringIO(data)))
+    with TextParser(lines, chunksize=2, skiprows=[1], **kwargs) as reader:
+        chunks = list(reader)
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    tm.assert_frame_equal(chunks[0], expected[1:3])
+
+
+def test_read_csv_parse_simple_list(all_parsers):
+    parser = all_parsers
+    data = """foo
+bar baz
+qux foo
+foo
+bar"""
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame(["foo", "bar baz", "qux foo", "foo", "bar"])
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_decimal.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_decimal.py
@ -0,0 +1,62 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize(
+    "data,thousands,decimal",
+    [
+        (
+            """A|B|C
+1|2,334.01|5
+10|13|10.
+""",
+            ",",
+            ".",
+        ),
+        (
+            """A|B|C
+1|2.334,01|5
+10|13|10,
+""",
+            ".",
+            ",",
+        ),
+    ],
+)
+def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal):
+    parser = all_parsers
+    expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]})
+
+    result = parser.read_csv(
+        StringIO(data), sep="|", thousands=thousands, decimal=decimal
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_euro_decimal_format(all_parsers):
+    parser = all_parsers
+    data = """Id;Number1;Number2;Text1;Text2;Number3
+1;1521,1541;187101,9543;ABC;poi;4,738797819
+2;121,12;14897,76;DEF;uyt;0,377320872
+3;878,158;108013,434;GHI;rez;2,735694704"""
+
+    result = parser.read_csv(StringIO(data), sep=";", decimal=",")
+    expected = DataFrame(
+        [
+            [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819],
+            [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872],
+            [3, 878.158, 108013.434, "GHI", "rez", 2.735694704],
+        ],
+        columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"],
+    )
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py
@ -0,0 +1,429 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import (
+    BytesIO,
+    StringIO,
+)
+import os
+import platform
+from urllib.error import URLError
+
+import pytest
+
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+)
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+# TODO(1.4) Please xfail individual tests at release time
+# instead of skip
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.network
+@tm.network(
+    url=(
+        "https://raw.github.com/pandas-dev/pandas/main/"
+        "pandas/tests/io/parser/data/salaries.csv"
+    ),
+    check_before_test=True,
+)
+def test_url(all_parsers, csv_dir_path):
+    parser = all_parsers
+    kwargs = {"sep": "\t"}
+
+    url = (
+        "https://raw.github.com/pandas-dev/pandas/main/"
+        "pandas/tests/io/parser/data/salaries.csv"
+    )
+    url_result = parser.read_csv(url, **kwargs)
+
+    local_path = os.path.join(csv_dir_path, "salaries.csv")
+    local_result = parser.read_csv(local_path, **kwargs)
+    tm.assert_frame_equal(url_result, local_result)
+
+
+@pytest.mark.slow
+def test_local_file(all_parsers, csv_dir_path):
+    parser = all_parsers
+    kwargs = {"sep": "\t"}
+
+    local_path = os.path.join(csv_dir_path, "salaries.csv")
+    local_result = parser.read_csv(local_path, **kwargs)
+    url = "file://localhost/" + local_path
+
+    try:
+        url_result = parser.read_csv(url, **kwargs)
+        tm.assert_frame_equal(url_result, local_result)
+    except URLError:
+        # Fails on some systems.
+        pytest.skip("Failing on: " + " ".join(platform.uname()))
+
+
+def test_path_path_lib(all_parsers):
+    parser = all_parsers
+    df = tm.makeDataFrame()
+    result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
+    tm.assert_frame_equal(df, result)
+
+
+def test_path_local_path(all_parsers):
+    parser = all_parsers
+    df = tm.makeDataFrame()
+    result = tm.round_trip_localpath(
+        df.to_csv, lambda p: parser.read_csv(p, index_col=0)
+    )
+    tm.assert_frame_equal(df, result)
+
+
+def test_nonexistent_path(all_parsers):
+    # gh-2428: pls no segfault
+    # gh-14086: raise more helpful FileNotFoundError
+    # GH#29233 "File foo" instead of "File b'foo'"
+    parser = all_parsers
+    path = f"{tm.rands(10)}.csv"
+
+    msg = r"\[Errno 2\]"
+    with pytest.raises(FileNotFoundError, match=msg) as e:
+        parser.read_csv(path)
+    assert path == e.value.filename
+
+
+@td.skip_if_windows  # os.chmod does not work in windows
+def test_no_permission(all_parsers):
+    # GH 23784
+    parser = all_parsers
+
+    msg = r"\[Errno 13\]"
+    with tm.ensure_clean() as path:
+        os.chmod(path, 0)  # make file unreadable
+
+        # verify that this process cannot open the file (not running as sudo)
+        try:
+            with open(path):
+                pass
+            pytest.skip("Running as sudo.")
+        except PermissionError:
+            pass
+
+        with pytest.raises(PermissionError, match=msg) as e:
+            parser.read_csv(path)
+        assert path == e.value.filename
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected,msg",
+    [
+        # gh-10728: WHITESPACE_LINE
+        (
+            "a,b,c\n4,5,6\n ",
+            {},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # gh-10548: EAT_LINE_COMMENT
+        (
+            "a,b,c\n4,5,6\n#comment",
+            {"comment": "#"},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_CRNL_NOP
+        (
+            "a,b,c\n4,5,6\n\r",
+            {},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_COMMENT
+        (
+            "a,b,c\n4,5,6#comment",
+            {"comment": "#"},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # SKIP_LINE
+        (
+            "a,b,c\n4,5,6\nskipme",
+            {"skiprows": [2]},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_LINE_COMMENT
+        (
+            "a,b,c\n4,5,6\n#comment",
+            {"comment": "#", "skip_blank_lines": False},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # IN_FIELD
+        (
+            "a,b,c\n4,5,6\n ",
+            {"skip_blank_lines": False},
+            DataFrame([["4", 5, 6], [" ", None, None]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_CRNL
+        (
+            "a,b,c\n4,5,6\n\r",
+            {"skip_blank_lines": False},
+            DataFrame([[4, 5, 6], [None, None, None]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # ESCAPED_CHAR
+        (
+            "a,b,c\n4,5,6\n\\",
+            {"escapechar": "\\"},
+            None,
+            "(EOF following escape character)|(unexpected end of data)",
+        ),
+        # ESCAPE_IN_QUOTED_FIELD
+        (
+            'a,b,c\n4,5,6\n"\\',
+            {"escapechar": "\\"},
+            None,
+            "(EOF inside string starting at row 2)|(unexpected end of data)",
+        ),
+        # IN_QUOTED_FIELD
+        (
+            'a,b,c\n4,5,6\n"',
+            {"escapechar": "\\"},
+            None,
+            "(EOF inside string starting at row 2)|(unexpected end of data)",
+        ),
+    ],
+    ids=[
+        "whitespace-line",
+        "eat-line-comment",
+        "eat-crnl-nop",
+        "eat-comment",
+        "skip-line",
+        "eat-line-comment",
+        "in-field",
+        "eat-crnl",
+        "escaped-char",
+        "escape-in-quoted-field",
+        "in-quoted-field",
+    ],
+)
+def test_eof_states(all_parsers, data, kwargs, expected, msg):
+    # see gh-10728, gh-10548
+    parser = all_parsers
+
+    if expected is None:
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_temporary_file(all_parsers):
+    # see gh-13398
+    parser = all_parsers
+    data = "0 0"
+
+    with tm.ensure_clean(mode="w+", return_filelike=True) as new_file:
+        new_file.write(data)
+        new_file.flush()
+        new_file.seek(0)
+
+        result = parser.read_csv(new_file, sep=r"\s+", header=None)
+
+        expected = DataFrame([[0, 0]])
+        tm.assert_frame_equal(result, expected)
+
+
+def test_internal_eof_byte(all_parsers):
+    # see gh-5500
+    parser = all_parsers
+    data = "a,b\n1\x1a,2"
+
+    expected = DataFrame([["1\x1a", 2]], columns=["a", "b"])
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_internal_eof_byte_to_file(all_parsers):
+    # see gh-16559
+    parser = all_parsers
+    data = b'c1,c2\r\n"test \x1a    test", test\r\n'
+    expected = DataFrame([["test \x1a    test", " test"]], columns=["c1", "c2"])
+    path = f"__{tm.rands(10)}__.csv"
+
+    with tm.ensure_clean(path) as path:
+        with open(path, "wb") as f:
+            f.write(data)
+
+        result = parser.read_csv(path)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_file_handle_string_io(all_parsers):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = all_parsers
+    data = "a,b\n1,2"
+
+    fh = StringIO(data)
+    parser.read_csv(fh)
+    assert not fh.closed
+
+
+def test_file_handles_with_open(all_parsers, csv1):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = all_parsers
+
+    for mode in ["r", "rb"]:
+        with open(csv1, mode) as f:
+            parser.read_csv(f)
+            assert not f.closed
+
+
+def test_invalid_file_buffer_class(all_parsers):
+    # see gh-15337
+    class InvalidBuffer:
+        pass
+
+    parser = all_parsers
+    msg = "Invalid file path or buffer object type"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(InvalidBuffer())
+
+
+def test_invalid_file_buffer_mock(all_parsers):
+    # see gh-15337
+    parser = all_parsers
+    msg = "Invalid file path or buffer object type"
+
+    class Foo:
+        pass
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(Foo())
+
+
+def test_valid_file_buffer_seems_invalid(all_parsers):
+    # gh-16135: we want to ensure that "tell" and "seek"
+    # aren't actually being used when we call `read_csv`
+    #
+    # Thus, while the object may look "invalid" (these
+    # methods are attributes of the `StringIO` class),
+    # it is still a valid file-object for our purposes.
+    class NoSeekTellBuffer(StringIO):
+        def tell(self):
+            raise AttributeError("No tell method")
+
+        def seek(self, pos, whence=0):
+            raise AttributeError("No seek method")
+
+    data = "a\n1"
+    parser = all_parsers
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(NoSeekTellBuffer(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("io_class", [StringIO, BytesIO])
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_read_csv_file_handle(all_parsers, io_class, encoding):
+    """
+    Test whether read_csv does not close user-provided file handles.
+
+    GH 36980
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    content = "a,b\n1,2"
+    handle = io_class(content.encode("utf-8") if io_class == BytesIO else content)
+
+    tm.assert_frame_equal(parser.read_csv(handle, encoding=encoding), expected)
+    assert not handle.closed
+
+
+def test_memory_map_compression(all_parsers, compression):
+    """
+    Support memory map for compressed files.
+
+    GH 37621
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    with tm.ensure_clean() as path:
+        expected.to_csv(path, index=False, compression=compression)
+
+        tm.assert_frame_equal(
+            parser.read_csv(path, memory_map=True, compression=compression),
+            expected,
+        )
+
+
+def test_context_manager(all_parsers, datapath):
+    # make sure that opened files are closed
+    parser = all_parsers
+
+    path = datapath("io", "data", "csv", "iris.csv")
+
+    reader = parser.read_csv(path, chunksize=1)
+    assert not reader.handles.handle.closed
+    try:
+        with reader:
+            next(reader)
+            assert False
+    except AssertionError:
+        assert reader.handles.handle.closed
+
+
+def test_context_manageri_user_provided(all_parsers, datapath):
+    # make sure that user-provided handles are not closed
+    parser = all_parsers
+
+    with open(datapath("io", "data", "csv", "iris.csv")) as path:
+
+        reader = parser.read_csv(path, chunksize=1)
+        assert not reader.handles.handle.closed
+        try:
+            with reader:
+                next(reader)
+                assert False
+        except AssertionError:
+            assert not reader.handles.handle.closed
+
+
+def test_file_descriptor_leak(all_parsers):
+    # GH 31488
+
+    parser = all_parsers
+    with tm.ensure_clean() as path:
+
+        def test():
+            with pytest.raises(EmptyDataError, match="No columns to parse from file"):
+                parser.read_csv(path)
+
+        td.check_file_leaks(test)()
+
+
+@td.check_file_leaks
+def test_memory_map(all_parsers, csv_dir_path):
+    mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
+    parser = all_parsers
+
+    expected = DataFrame(
+        {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]}
+    )
+
+    result = parser.read_csv(mmap_file, memory_map=True)
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_float.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_float.py
@ -0,0 +1,65 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_linux
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_float_parser(all_parsers):
+    # see gh-9565
+    parser = all_parsers
+    data = "45e-1,4.5,45.,inf,-inf"
+    result = parser.read_csv(StringIO(data), header=None)
+
+    expected = DataFrame([[float(s) for s in data.split(",")]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_scientific_no_exponent(all_parsers_all_precisions):
+    # see gh-12215
+    df = DataFrame.from_dict({"w": ["2e"], "x": ["3E"], "y": ["42e"], "z": ["632E"]})
+    data = df.to_csv(index=False)
+    parser, precision = all_parsers_all_precisions
+
+    df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision)
+    tm.assert_frame_equal(df_roundtrip, df)
+
+
+@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999])
+def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
+    # GH#38753
+    parser, precision = all_parsers_all_precisions
+
+    data = f"data\n10E{neg_exp}"
+    result = parser.read_csv(StringIO(data), float_precision=precision)
+    expected = DataFrame({"data": [0.0]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
+def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
+    # GH#38753
+    parser, precision = all_parsers_all_precisions
+    data = f"data\n10E{exp}"
+    result = parser.read_csv(StringIO(data), float_precision=precision)
+    if precision == "round_trip":
+        if exp == 999999999999999999 and is_platform_linux():
+            mark = pytest.mark.xfail(reason="GH38794, on Linux gives object result")
+            request.node.add_marker(mark)
+
+        value = np.inf if exp > 0 else 0.0
+        expected = DataFrame({"data": [value]})
+    else:
+        expected = DataFrame({"data": [f"10E{exp}"]})
+
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_index.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_index.py
@ -0,0 +1,299 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from datetime import datetime
+from io import StringIO
+import os
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+# GH#43650: Some expected failures with the pyarrow engine can occasionally
+# cause a deadlock instead, so we skip these instead of xfailing
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            """foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+""",
+            {"index_col": 0, "names": ["index", "A", "B", "C", "D"]},
+            DataFrame(
+                [
+                    [2, 3, 4, 5],
+                    [7, 8, 9, 10],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                ],
+                index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"),
+                columns=["A", "B", "C", "D"],
+            ),
+        ),
+        (
+            """foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+""",
+            {"index_col": [0, 1], "names": ["index1", "index2", "A", "B", "C", "D"]},
+            DataFrame(
+                [
+                    [2, 3, 4, 5],
+                    [7, 8, 9, 10],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                ],
+                index=MultiIndex.from_tuples(
+                    [
+                        ("foo", "one"),
+                        ("foo", "two"),
+                        ("foo", "three"),
+                        ("bar", "one"),
+                        ("bar", "two"),
+                    ],
+                    names=["index1", "index2"],
+                ),
+                columns=["A", "B", "C", "D"],
+            ),
+        ),
+    ],
+)
+def test_pass_names_with_index(all_parsers, data, kwargs, expected):
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
+def test_multi_index_no_level_names(all_parsers, index_col):
+    data = """index1,index2,A,B,C,D
+foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+"""
+    headless_data = "\n".join(data.split("\n")[1:])
+
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    result = parser.read_csv(
+        StringIO(headless_data), index_col=index_col, header=None, names=names
+    )
+    expected = parser.read_csv(StringIO(data), index_col=index_col)
+
+    # No index names in headless data.
+    expected.index.names = [None] * 2
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_multi_index_no_level_names_implicit(all_parsers):
+    parser = all_parsers
+    data = """A,B,C,D
+foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+"""
+
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=MultiIndex.from_tuples(
+            [
+                ("foo", "one"),
+                ("foo", "two"),
+                ("foo", "three"),
+                ("bar", "one"),
+                ("bar", "two"),
+            ]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "data,expected,header",
+    [
+        ("a,b", DataFrame(columns=["a", "b"]), [0]),
+        (
+            "a,b\nc,d",
+            DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])),
+            [0, 1],
+        ),
+    ],
+)
+@pytest.mark.parametrize("round_trip", [True, False])
+def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
+    # see gh-14545
+    parser = all_parsers
+    data = expected.to_csv(index=False) if round_trip else data
+
+    result = parser.read_csv(StringIO(data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_no_unnamed_index(all_parsers):
+    parser = all_parsers
+    data = """ id c0 c1 c2
+0 1 0 a b
+1 2 0 c d
+2 2 2 e f
+"""
+    result = parser.read_csv(StringIO(data), sep=" ")
+    expected = DataFrame(
+        [[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]],
+        columns=["Unnamed: 0", "id", "c0", "c1", "c2"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_duplicate_index_explicit(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo,12,13,14,15
+bar,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=0)
+
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_read_duplicate_index_implicit(all_parsers):
+    data = """A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo,12,13,14,15
+bar,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_read_csv_no_index_name(all_parsers, csv_dir_path):
+    parser = all_parsers
+    csv2 = os.path.join(csv_dir_path, "test2.csv")
+    result = parser.read_csv(csv2, index_col=0, parse_dates=True)
+
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738, "foo"],
+            [1.047916, -0.041232, -0.16181208307, 0.212549, "bar"],
+            [0.498581, 0.731168, -0.537677223318, 1.346270, "baz"],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+            ]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_empty_with_index(all_parsers):
+    # see gh-10184
+    data = "x,y"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=0)
+
+    expected = DataFrame(columns=["y"], index=Index([], name="x"))
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_empty_with_multi_index(all_parsers):
+    # see gh-10467
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=["x", "y"])
+
+    expected = DataFrame(
+        columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"])
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_empty_with_reversed_multi_index(all_parsers):
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=[1, 0])
+
+    expected = DataFrame(
+        columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"])
+    )
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_inf.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_inf.py
@ -0,0 +1,68 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    option_context,
+)
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_inf_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,inf
+b,-inf
+c,+Inf
+d,-Inf
+e,INF
+f,-INF
+g,+INf
+h,-INf
+i,inF
+j,-inF"""
+    expected = DataFrame(
+        {"A": [float("inf"), float("-inf")] * 5},
+        index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_infinity_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,Infinity
+b,-Infinity
+c,+Infinity
+"""
+    expected = DataFrame(
+        {"A": [float("infinity"), float("-infinity"), float("+infinity")]},
+        index=["a", "b", "c"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_with_use_inf_as_na(all_parsers):
+    # https://github.com/pandas-dev/pandas/issues/35493
+    parser = all_parsers
+    data = "1.0\nNaN\n3.0"
+    with option_context("use_inf_as_na", True):
+        result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame([1.0, np.nan, 3.0])
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_ints.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_ints.py
@ -0,0 +1,215 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+# GH#43650: Some expected failures with the pyarrow engine can occasionally
+# cause a deadlock instead, so we skip these instead of xfailing
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_int_conversion(all_parsers):
+    data = """A,B
+1.0,1
+2.0,2
+3.0,3
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            "A,B\nTrue,1\nFalse,2\nTrue,3",
+            {},
+            DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]),
+        ),
+        (
+            "A,B\nYES,1\nno,2\nyes,3\nNo,3\nYes,3",
+            {"true_values": ["yes", "Yes", "YES"], "false_values": ["no", "NO", "No"]},
+            DataFrame(
+                [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]],
+                columns=["A", "B"],
+            ),
+        ),
+        (
+            "A,B\nTRUE,1\nFALSE,2\nTRUE,3",
+            {},
+            DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]),
+        ),
+        (
+            "A,B\nfoo,bar\nbar,foo",
+            {"true_values": ["foo"], "false_values": ["bar"]},
+            DataFrame([[True, False], [False, True]], columns=["A", "B"]),
+        ),
+    ],
+)
+def test_parse_bool(all_parsers, data, kwargs, expected):
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_integers_above_fp_precision(all_parsers):
+    data = """Numbers
+17007000002000191
+17007000002000191
+17007000002000191
+17007000002000191
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000194"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        {
+            "Numbers": [
+                17007000002000191,
+                17007000002000191,
+                17007000002000191,
+                17007000002000191,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000194,
+            ]
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # Flaky
+@pytest.mark.parametrize("sep", [" ", r"\s+"])
+def test_integer_overflow_bug(all_parsers, sep):
+    # see gh-2601
+    data = "65248E10 11\n55555E55 22\n"
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=None, sep=sep)
+    expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_int64_min_issues(all_parsers):
+    # see gh-2599
+    parser = all_parsers
+    data = "A,B\n0,0\n0,"
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame({"A": [0, 0], "B": [0, np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
+def test_int64_overflow(all_parsers, conv):
+    data = """ID
+00013007854817840016671868
+00013007854817840016749251
+00013007854817840016754630
+00013007854817840016781876
+00013007854817840017028824
+00013007854817840017963235
+00013007854817840018860166"""
+    parser = all_parsers
+
+    if conv is None:
+        # 13007854817840016671868 > UINT64_MAX, so this
+        # will overflow and return object as the dtype.
+        result = parser.read_csv(StringIO(data))
+        expected = DataFrame(
+            [
+                "00013007854817840016671868",
+                "00013007854817840016749251",
+                "00013007854817840016754630",
+                "00013007854817840016781876",
+                "00013007854817840017028824",
+                "00013007854817840017963235",
+                "00013007854817840018860166",
+            ],
+            columns=["ID"],
+        )
+        tm.assert_frame_equal(result, expected)
+    else:
+        # 13007854817840016671868 > UINT64_MAX, so attempts
+        # to cast to either int64 or uint64 will result in
+        # an OverflowError being raised.
+        msg = (
+            "(Python int too large to convert to C long)|"
+            "(long too big to convert)|"
+            "(int too big to convert)"
+        )
+
+        with pytest.raises(OverflowError, match=msg):
+            parser.read_csv(StringIO(data), converters={"ID": conv})
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
+)
+def test_int64_uint64_range(all_parsers, val):
+    # These numbers fall right inside the int64-uint64
+    # range, so they should be parsed as string.
+    parser = all_parsers
+    result = parser.read_csv(StringIO(str(val)), header=None)
+
+    expected = DataFrame([val])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
+)
+def test_outside_int64_uint64_range(all_parsers, val):
+    # These numbers fall just outside the int64-uint64
+    # range, so they should be parsed as string.
+    parser = all_parsers
+    result = parser.read_csv(StringIO(str(val)), header=None)
+
+    expected = DataFrame([str(val)])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
+def test_numeric_range_too_wide(all_parsers, exp_data):
+    # No numerical dtype can hold both negative and uint64
+    # values, so they should be cast as string.
+    parser = all_parsers
+    data = "\n".join(exp_data)
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_integer_precision(all_parsers):
+    # Gh 7072
+    s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765
+5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(s), header=None)[4]
+    expected = Series([4321583677327450765, 4321113141090630389], name=4)
+    tm.assert_series_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_iterator.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_iterator.py
@ -0,0 +1,109 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_iterator(all_parsers):
+    # see gh-6607
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader:
+
+        first_chunk = reader.read(3)
+        tm.assert_frame_equal(first_chunk, expected[:3])
+
+        last_chunk = reader.read(5)
+    tm.assert_frame_equal(last_chunk, expected[3:])
+
+
+def test_iterator2(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+
+    with parser.read_csv(StringIO(data), iterator=True) as reader:
+        result = list(reader)
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result[0], expected)
+
+
+def test_iterator_stop_on_chunksize(all_parsers):
+    # gh-3967: stopping iteration when chunksize is specified
+    parser = all_parsers
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+
+    with parser.read_csv(StringIO(data), chunksize=1) as reader:
+        result = list(reader)
+
+    assert len(result) == 3
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(concat(result), expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
+)
+def test_iterator_skipfooter_errors(all_parsers, kwargs):
+    msg = "'skipfooter' not supported for iteration"
+    parser = all_parsers
+    data = "a\n1\n2"
+
+    with pytest.raises(ValueError, match=msg):
+        with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _:
+            pass
+
+
+def test_iteration_open_handle(all_parsers):
+    parser = all_parsers
+    kwargs = {"header": None}
+
+    with tm.ensure_clean() as path:
+        with open(path, "w") as f:
+            f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
+
+        with open(path) as f:
+            for line in f:
+                if "CCC" in line:
+                    break
+
+            result = parser.read_csv(f, **kwargs)
+            expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]})
+            tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_read_errors.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_read_errors.py
@ -0,0 +1,280 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+import codecs
+import csv
+from io import StringIO
+import os
+from pathlib import Path
+import warnings
+
+import numpy as np
+import pytest
+
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+)
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_empty_decimal_marker(all_parsers):
+    data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+    # Parsers support only length-1 decimals
+    msg = "Only length-1 decimal markers supported"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), decimal="")
+
+
+def test_bad_stream_exception(all_parsers, csv_dir_path):
+    # see gh-13652
+    #
+    # This test validates that both the Python engine and C engine will
+    # raise UnicodeDecodeError instead of C engine raising ParserError
+    # and swallowing the exception that caused read to fail.
+    path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv")
+    codec = codecs.lookup("utf-8")
+    utf8 = codecs.lookup("utf-8")
+    parser = all_parsers
+    msg = "'utf-8' codec can't decode byte"
+
+    # Stream must be binary UTF8.
+    with open(path, "rb") as handle, codecs.StreamRecoder(
+        handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter
+    ) as stream:
+
+        with pytest.raises(UnicodeDecodeError, match=msg):
+            parser.read_csv(stream)
+
+
+def test_malformed(all_parsers):
+    # see gh-6607
+    parser = all_parsers
+    data = """ignore
+A,B,C
+1,2,3 # comment
+1,2,3,4,5
+2,3,4
+"""
+    msg = "Expected 3 fields in line 4, saw 5"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), header=1, comment="#")
+
+
+@pytest.mark.parametrize("nrows", [5, 3, None])
+def test_malformed_chunks(all_parsers, nrows):
+    data = """ignore
+A,B,C
+skip
+1,2,3
+3,5,10 # comment
+1,2,3,4,5
+2,3,4
+"""
+    parser = all_parsers
+    msg = "Expected 3 fields in line 6, saw 5"
+    with parser.read_csv(
+        StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2]
+    ) as reader:
+        with pytest.raises(ParserError, match=msg):
+            reader.read(nrows)
+
+
+def test_catch_too_many_names(all_parsers):
+    # see gh-5156
+    data = """\
+1,2,3
+4,,6
+7,8,9
+10,11,12\n"""
+    parser = all_parsers
+    msg = (
+        "Too many columns specified: expected 4 and found 3"
+        if parser.engine == "c"
+        else "Number of passed names did not match "
+        "number of header fields in the file"
+    )
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
+
+
+@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
+def test_raise_on_no_columns(all_parsers, nrows):
+    parser = all_parsers
+    data = "\n" * nrows
+
+    msg = "No columns to parse from file"
+    with pytest.raises(EmptyDataError, match=msg):
+        parser.read_csv(StringIO(data))
+
+
+def test_read_csv_raises_on_header_prefix(all_parsers):
+    # gh-27394
+    parser = all_parsers
+    msg = "Argument prefix must be None if argument header is not None"
+
+    s = StringIO("0,1\n2,3")
+
+    with pytest.raises(ValueError, match=msg):
+        with tm.assert_produces_warning(FutureWarning):
+            parser.read_csv(s, header=0, prefix="_X")
+
+
+def test_unexpected_keyword_parameter_exception(all_parsers):
+    # GH-34976
+    parser = all_parsers
+
+    msg = "{}\\(\\) got an unexpected keyword argument 'foo'"
+    with pytest.raises(TypeError, match=msg.format("read_csv")):
+        parser.read_csv("foo.csv", foo=1)
+    with pytest.raises(TypeError, match=msg.format("read_table")):
+        parser.read_table("foo.tsv", foo=1)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        pytest.param(
+            {"error_bad_lines": False, "warn_bad_lines": False},
+            marks=pytest.mark.filterwarnings("ignore"),
+        ),
+        {"on_bad_lines": "skip"},
+    ],
+)
+def test_suppress_error_output(all_parsers, capsys, kwargs):
+    # see gh-15925
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    expected = DataFrame({"a": [1, 4]})
+
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+    captured = capsys.readouterr()
+    assert captured.err == ""
+
+
+@pytest.mark.filterwarnings("ignore")
+@pytest.mark.parametrize(
+    "kwargs",
+    [{}, {"error_bad_lines": True}],  # Default is True.  # Explicitly pass in.
+)
+@pytest.mark.parametrize(
+    "warn_kwargs",
+    [{}, {"warn_bad_lines": True}, {"warn_bad_lines": False}],
+)
+def test_error_bad_lines(all_parsers, kwargs, warn_kwargs):
+    # see gh-15925
+    parser = all_parsers
+    kwargs.update(**warn_kwargs)
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+
+    msg = "Expected 1 fields in line 3, saw 3"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        pytest.param(
+            {"error_bad_lines": False, "warn_bad_lines": True},
+            marks=pytest.mark.filterwarnings("ignore"),
+        ),
+        {"on_bad_lines": "warn"},
+    ],
+)
+def test_warn_bad_lines(all_parsers, capsys, kwargs):
+    # see gh-15925
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    expected = DataFrame({"a": [1, 4]})
+
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+    captured = capsys.readouterr()
+    assert "Skipping line 3" in captured.err
+    assert "Skipping line 5" in captured.err
+
+
+def test_read_csv_wrong_num_columns(all_parsers):
+    # Too few columns.
+    data = """A,B,C,D,E,F
+1,2,3,4,5,6
+6,7,8,9,10,11,12
+11,12,13,14,15,16
+"""
+    parser = all_parsers
+    msg = "Expected 6 fields in line 3, saw 7"
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data))
+
+
+def test_null_byte_char(all_parsers):
+    # see gh-2741
+    data = "\x00,foo"
+    names = ["a", "b"]
+    parser = all_parsers
+
+    if parser.engine == "c":
+        expected = DataFrame([[np.nan, "foo"]], columns=names)
+        out = parser.read_csv(StringIO(data), names=names)
+        tm.assert_frame_equal(out, expected)
+    else:
+        msg = "NULL byte detected"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), names=names)
+
+
+@td.check_file_leaks
+def test_open_file(all_parsers):
+    # GH 39024
+    parser = all_parsers
+    if parser.engine == "c":
+        pytest.skip("'c' engine does not support sep=None with delim_whitespace=False")
+
+    with tm.ensure_clean() as path:
+        file = Path(path)
+        file.write_bytes(b"\xe4\na\n1")
+
+        # should not trigger a ResourceWarning
+        warnings.simplefilter("always", category=ResourceWarning)
+        with warnings.catch_warnings(record=True) as record:
+            with pytest.raises(csv.Error, match="Could not determine delimiter"):
+                parser.read_csv(file, sep=None, encoding_errors="replace")
+            assert len(record) == 0, record[0].message
+
+
+def test_invalid_on_bad_line(all_parsers):
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    with pytest.raises(ValueError, match="Argument abc is invalid for on_bad_lines"):
+        parser.read_csv(StringIO(data), on_bad_lines="abc")
+
+
+@pytest.mark.parametrize("error_bad_lines", [True, False])
+@pytest.mark.parametrize("warn_bad_lines", [True, False])
+def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines):
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    kwds = {"error_bad_lines": error_bad_lines, "warn_bad_lines": warn_bad_lines}
+    with pytest.raises(
+        ValueError,
+        match="Both on_bad_lines and error_bad_lines/warn_bad_lines are set. "
+        "Please only set on_bad_lines.",
+    ):
+        parser.read_csv(StringIO(data), on_bad_lines="error", **kwds)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_verbose.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/common/test_verbose.py
@ -0,0 +1,55 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_verbose_read(all_parsers, capsys):
+    parser = all_parsers
+    data = """a,b,c,d
+one,1,2,3
+one,1,2,3
+,1,2,3
+one,1,2,3
+,1,2,3
+,1,2,3
+one,1,2,3
+two,1,2,3"""
+
+    # Engines are verbose in different ways.
+    parser.read_csv(StringIO(data), verbose=True)
+    captured = capsys.readouterr()
+
+    if parser.engine == "c":
+        assert "Tokenization took:" in captured.out
+        assert "Parser memory cleanup took:" in captured.out
+    else:  # Python engine
+        assert captured.out == "Filled 3 NA values in column a\n"
+
+
+def test_verbose_read2(all_parsers, capsys):
+    parser = all_parsers
+    data = """a,b,c,d
+one,1,2,3
+two,1,2,3
+three,1,2,3
+four,1,2,3
+five,1,2,3
+,1,2,3
+seven,1,2,3
+eight,1,2,3"""
+
+    parser.read_csv(StringIO(data), verbose=True, index_col=0)
+    captured = capsys.readouterr()
+
+    # Engines are verbose in different ways.
+    if parser.engine == "c":
+        assert "Tokenization took:" in captured.out
+        assert "Parser memory cleanup took:" in captured.out
+    else:  # Python engine
+        assert captured.out == "Filled 1 NA values in column a\n"
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/conftest.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/conftest.py
@ -0,0 +1,287 @@
+from __future__ import annotations
+
+import os
+
+import pytest
+
+from pandas.compat._optional import VERSIONS
+
+from pandas import (
+    read_csv,
+    read_table,
+)
+import pandas._testing as tm
+
+
+class BaseParser:
+    engine: str | None = None
+    low_memory = True
+    float_precision_choices: list[str | None] = []
+
+    def update_kwargs(self, kwargs):
+        kwargs = kwargs.copy()
+        kwargs.update({"engine": self.engine, "low_memory": self.low_memory})
+
+        return kwargs
+
+    def read_csv(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_csv(*args, **kwargs)
+
+    def read_csv_check_warnings(
+        self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
+    ):
+        # We need to check the stacklevel here instead of in the tests
+        # since this is where read_csv is called and where the warning
+        # should point to.
+        kwargs = self.update_kwargs(kwargs)
+        with tm.assert_produces_warning(warn_type, match=warn_msg):
+            return read_csv(*args, **kwargs)
+
+    def read_table(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_table(*args, **kwargs)
+
+
+class CParser(BaseParser):
+    engine = "c"
+    float_precision_choices = [None, "high", "round_trip"]
+
+
+class CParserHighMemory(CParser):
+    low_memory = False
+
+
+class CParserLowMemory(CParser):
+    low_memory = True
+
+
+class PythonParser(BaseParser):
+    engine = "python"
+    float_precision_choices = [None]
+
+
+class PyArrowParser(BaseParser):
+    engine = "pyarrow"
+    float_precision_choices = [None]
+
+
+@pytest.fixture
+def csv_dir_path(datapath):
+    """
+    The directory path to the data files needed for parser tests.
+    """
+    return datapath("io", "parser", "data")
+
+
+@pytest.fixture
+def csv1(datapath):
+    """
+    The path to the data file "test1.csv" needed for parser tests.
+    """
+    return os.path.join(datapath("io", "data", "csv"), "test1.csv")
+
+
+_cParserHighMemory = CParserHighMemory
+_cParserLowMemory = CParserLowMemory
+_pythonParser = PythonParser
+_pyarrowParser = PyArrowParser
+
+_py_parsers_only = [_pythonParser]
+_c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
+_pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
+
+_all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
+
+_py_parser_ids = ["python"]
+_c_parser_ids = ["c_high", "c_low"]
+_pyarrow_parsers_ids = ["pyarrow"]
+
+_all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids]
+
+
+@pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
+def all_parsers(request):
+    """
+    Fixture all of the CSV parsers.
+    """
+    parser = request.param()
+    if parser.engine == "pyarrow":
+        pytest.importorskip("pyarrow", VERSIONS["pyarrow"])
+        # Try finding a way to disable threads all together
+        # for more stable CI runs
+        import pyarrow
+
+        pyarrow.set_cpu_count(1)
+    return parser
+
+
+@pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids)
+def c_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the C engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids)
+def python_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Python engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids)
+def pyarrow_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Pyarrow engine.
+    """
+    return request.param()
+
+
+def _get_all_parser_float_precision_combinations():
+    """
+    Return all allowable parser and float precision
+    combinations and corresponding ids.
+    """
+    params = []
+    ids = []
+    for parser, parser_id in zip(_all_parsers, _all_parser_ids):
+        if hasattr(parser, "values"):
+            # Wrapped in pytest.param, get the actual parser back
+            parser = parser.values[0]
+        for precision in parser.float_precision_choices:
+            # Re-wrap in pytest.param for pyarrow
+            mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else ()
+            param = pytest.param((parser(), precision), marks=mark)
+            params.append(param)
+            ids.append(f"{parser_id}-{precision}")
+
+    return {"params": params, "ids": ids}
+
+
+@pytest.fixture(
+    params=_get_all_parser_float_precision_combinations()["params"],
+    ids=_get_all_parser_float_precision_combinations()["ids"],
+)
+def all_parsers_all_precisions(request):
+    """
+    Fixture for all allowable combinations of parser
+    and float precision
+    """
+    return request.param
+
+
+_utf_values = [8, 16, 32]
+
+_encoding_seps = ["", "-", "_"]
+_encoding_prefixes = ["utf", "UTF"]
+
+_encoding_fmts = [
+    f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes
+]
+
+
+@pytest.fixture(params=_utf_values)
+def utf_value(request):
+    """
+    Fixture for all possible integer values for a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(params=_encoding_fmts)
+def encoding_fmt(request):
+    """
+    Fixture for all possible string formats of a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("-1,0", -1.0),
+        ("-1,2e0", -1.2),
+        ("-1e0", -1.0),
+        ("+1e0", 1.0),
+        ("+1e+0", 1.0),
+        ("+1e-1", 0.1),
+        ("+,1e1", 1.0),
+        ("+1,e0", 1.0),
+        ("-,1e1", -1.0),
+        ("-1,e0", -1.0),
+        ("0,1", 0.1),
+        ("1,", 1.0),
+        (",1", 0.1),
+        ("-,1", -0.1),
+        ("1_,", 1.0),
+        ("1_234,56", 1234.56),
+        ("1_234,56e0", 1234.56),
+        # negative cases; must not parse as float
+        ("_", "_"),
+        ("-_", "-_"),
+        ("-_1", "-_1"),
+        ("-_1e0", "-_1e0"),
+        ("_1", "_1"),
+        ("_1,", "_1,"),
+        ("_1,_", "_1,_"),
+        ("_1e0", "_1e0"),
+        ("1,2e_1", "1,2e_1"),
+        ("1,2e1_0", "1,2e1_0"),
+        ("1,_2", "1,_2"),
+        (",1__2", ",1__2"),
+        (",1e", ",1e"),
+        ("-,1e", "-,1e"),
+        ("1_000,000_000", "1_000,000_000"),
+        ("1,e1_2", "1,e1_2"),
+        ("e11,2", "e11,2"),
+        ("1e11,2", "1e11,2"),
+        ("1,2,2", "1,2,2"),
+        ("1,2_1", "1,2_1"),
+        ("1,2e-10e1", "1,2e-10e1"),
+        ("--1,2", "--1,2"),
+        ("1a_2,1", "1a_2,1"),
+        ("1,2E-1", 0.12),
+        ("1,2E1", 12.0),
+    ]
+)
+def numeric_decimal(request):
+    """
+    Fixture for all numeric formats which should get recognized. The first entry
+    represents the value to read while the second represents the expected result.
+    """
+    return request.param
+
+
+@pytest.fixture
+def pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.node.add_marker(mark)
+
+
+@pytest.fixture
+def pyarrow_skip(request):
+    """
+    Fixture that skips a test if the engine is pyarrow.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        pytest.skip("pyarrow doesn't support this.")
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/init.py
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py
@ -0,0 +1,310 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+import os
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Timestamp,
+)
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@xfail_pyarrow
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "category",
+        CategoricalDtype(),
+        {"a": "category", "b": "category", "c": CategoricalDtype()},
+    ],
+)
+def test_categorical_dtype(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["a", "a", "b"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@skip_pyarrow  # Flaky
+@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
+def test_categorical_dtype_single(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+def test_categorical_dtype_unsorted(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", "b", "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+def test_categorical_dtype_missing(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", np.nan, "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+@pytest.mark.slow
+def test_categorical_dtype_high_cardinality_numeric(all_parsers):
+    # see gh-18186
+    parser = all_parsers
+    data = np.sort([str(i) for i in range(524289)])
+    expected = DataFrame({"a": Categorical(data, ordered=True)})
+
+    actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
+    actual["a"] = actual["a"].cat.reorder_categories(
+        np.sort(actual.a.cat.categories), ordered=True
+    )
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    encoding = "utf-16"
+    sep = "\t"
+
+    expected = parser.read_csv(pth, sep=sep, encoding=encoding)
+    expected = expected.apply(Categorical)
+
+    actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+def test_categorical_dtype_chunksize_infer_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
+        DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
+    ]
+    with parser.read_csv(
+        StringIO(data), dtype={"b": "category"}, chunksize=2
+    ) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow
+def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    cats = ["a", "b", "c"]
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
+        DataFrame(
+            {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)},
+            index=[2, 3],
+        ),
+    ]
+    dtype = CategoricalDtype(cats)
+    with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_latin1(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+    encoding = "latin-1"
+
+    expected = parser.read_csv(pth, header=None, encoding=encoding)
+    expected[1] = Categorical(expected[1])
+
+    actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"})
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("ordered", [False, True])
+@pytest.mark.parametrize(
+    "categories",
+    [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
+)
+def test_categorical_category_dtype(all_parsers, categories, ordered):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(
+                ["a", "b", "b", "c"], categories=categories, ordered=ordered
+            ),
+        }
+    )
+
+    dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_category_dtype_unsorted(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    dtype = CategoricalDtype(["c", "b", "a"])
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
+        }
+    )
+
+    result = parser.read_csv(StringIO(data), dtype={"b": dtype})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_numeric(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([1, 2, 3])}
+
+    data = "b\n1\n1\n2\n3"
+    expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # Flaky
+def test_categorical_coerces_datetime(all_parsers):
+    parser = all_parsers
+    dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
+    dtype = {"b": CategoricalDtype(dti)}
+
+    data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timestamp(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([Timestamp("2014")])}
+
+    data = "b\n2014-01-01\n2014-01-01T00:00:00"
+    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timedelta(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
+
+    data = "b\n1H\n2H\n3H"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        "b\nTrue\nFalse\nNA\nFalse",
+        "b\ntrue\nfalse\nNA\nfalse",
+        "b\nTRUE\nFALSE\nNA\nFALSE",
+        "b\nTrue\nFalse\nNA\nFALSE",
+    ],
+)
+def test_categorical_dtype_coerces_boolean(all_parsers, data):
+    # see gh-20498
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([False, True])}
+    expected = DataFrame({"b": Categorical([True, False, None, False])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_unexpected_categories(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
+
+    data = "b\nd\na\nc\nd"  # Unexpected c
+    expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@ -0,0 +1,336 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserWarning
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Timestamp,
+)
+import pandas._testing as tm
+
+# TODO(1.4): Change me into xfail at release time
+# and xfail individual tests
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("dtype", [str, object])
+@pytest.mark.parametrize("check_orig", [True, False])
+def test_dtype_all_columns(all_parsers, dtype, check_orig):
+    # see gh-3795, gh-6607
+    parser = all_parsers
+
+    df = DataFrame(
+        np.random.rand(5, 2).round(4),
+        columns=list("AB"),
+        index=["1A", "1B", "1C", "1D", "1E"],
+    )
+
+    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, dtype=dtype, index_col=0)
+
+        if check_orig:
+            expected = df.copy()
+            result = result.astype(float)
+        else:
+            expected = df.astype(str)
+
+        tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+    expected = DataFrame(
+        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
+    )
+    expected["one"] = expected["one"].astype(np.float64)
+    expected["two"] = expected["two"].astype(object)
+
+    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
+        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+
+def test_raise_on_passed_int_dtype_with_nas(all_parsers):
+    # see gh-2631
+    parser = all_parsers
+    data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+
+    msg = (
+        "Integer column has NA values"
+        if parser.engine == "c"
+        else "Unable to convert column DOY"
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
+
+
+def test_dtype_with_converters(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+    # Dtype spec ignored if converted specified.
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(
+            StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+        )
+    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
+)
+def test_numeric_dtype(all_parsers, dtype):
+    data = "0\n1"
+    parser = all_parsers
+    expected = DataFrame([0, 1], dtype=dtype)
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+    tm.assert_frame_equal(expected, result)
+
+
+def test_boolean_dtype(all_parsers):
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "a",
+            "True",
+            "TRUE",
+            "true",
+            "1",
+            "1.0",
+            "False",
+            "FALSE",
+            "false",
+            "0",
+            "0.0",
+            "NaN",
+            "nan",
+            "NA",
+            "null",
+            "NULL",
+        ]
+    )
+
+    result = parser.read_csv(StringIO(data), dtype="boolean")
+    expected = DataFrame(
+        {
+            "a": pd.array(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                ],
+                dtype="boolean",
+            )
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_delimiter_with_usecols_and_parse_dates(all_parsers):
+    # GH#35873
+    result = all_parsers.read_csv(
+        StringIO('"dump","-9,1","-9,1",20101010'),
+        engine="python",
+        names=["col", "col1", "col2", "col3"],
+        usecols=["col1", "col2", "col3"],
+        parse_dates=["col3"],
+        decimal=",",
+    )
+    expected = DataFrame(
+        {"col1": [-9.1], "col2": [-9.1], "col3": [Timestamp("2010-10-10")]}
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("thousands", ["_", None])
+def test_decimal_and_exponential(python_parser_only, numeric_decimal, thousands):
+    # GH#31920
+    decimal_number_check(python_parser_only, numeric_decimal, thousands, None)
+
+
+@pytest.mark.parametrize("thousands", ["_", None])
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+def test_1000_sep_decimal_float_precision(
+    c_parser_only, numeric_decimal, float_precision, thousands
+):
+    # test decimal and thousand sep handling in across 'float_precision'
+    # parsers
+    decimal_number_check(c_parser_only, numeric_decimal, thousands, float_precision)
+    text, value = numeric_decimal
+    text = " " + text + " "
+    if isinstance(value, str):  # the negative cases (parse as text)
+        value = " " + value + " "
+    decimal_number_check(c_parser_only, (text, value), thousands, float_precision)
+
+
+def decimal_number_check(parser, numeric_decimal, thousands, float_precision):
+    # GH#31920
+    value = numeric_decimal[0]
+    if thousands is None and "_" in value:
+        pytest.skip("Skip test if no thousands sep is defined and sep is in value")
+    df = parser.read_csv(
+        StringIO(value),
+        sep="|",
+        thousands=thousands,
+        decimal=",",
+        header=None,
+    )
+    val = df.iloc[0, 0]
+    assert val == numeric_decimal[1]
+
+
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+def test_skip_whitespace(c_parser_only, float_precision):
+    DATA = """id\tnum\t
+1\t1.2 \t
+1\t 2.1\t
+2\t 1\t
+2\t 1.2 \t
+"""
+    df = c_parser_only.read_csv(
+        StringIO(DATA),
+        float_precision=float_precision,
+        sep="\t",
+        header=0,
+        dtype={1: np.float64},
+    )
+    tm.assert_series_equal(df.iloc[:, 1], pd.Series([1.2, 2.1, 1.0, 1.2], name="num"))
+
+
+def test_true_values_cast_to_bool(all_parsers):
+    # GH#34655
+    text = """a,b
+yes,xxx
+no,yyy
+1,zzz
+0,aaa
+    """
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(text),
+        true_values=["yes"],
+        false_values=["no"],
+        dtype={"a": "boolean"},
+    )
+    expected = DataFrame(
+        {"a": [True, False, True, False], "b": ["xxx", "yyy", "zzz", "aaa"]}
+    )
+    expected["a"] = expected["a"].astype("boolean")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
+def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
+    # GH#35211
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    dtype_dict = {"a": str, **dtypes}
+    # GH#42462
+    dtype_dict_copy = dtype_dict.copy()
+    result = parser.read_csv(StringIO(data), dtype=dtype_dict)
+    expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
+    assert dtype_dict == dtype_dict_copy, "dtype dict changed"
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
+    # GH#42022
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    result = parser.read_csv(StringIO(data), dtype=str)
+    expected = DataFrame({"a": ["1"], "a.1": ["1"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        dtype={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nullable_int_dtype(all_parsers, any_int_ea_dtype):
+    # GH 25472
+    parser = all_parsers
+    dtype = any_int_ea_dtype
+
+    data = """a,b,c
+,3,5
+1,,6
+2,4,"""
+    expected = DataFrame(
+        {
+            "a": pd.array([pd.NA, 1, 2], dtype=dtype),
+            "b": pd.array([3, pd.NA, 4], dtype=dtype),
+            "c": pd.array([5, 6, pd.NA], dtype=dtype),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_empty.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/dtypes/test_empty.py
@ -0,0 +1,182 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    concat,
+)
+import pandas._testing as tm
+
+# TODO(1.4): Change me into individual xfails at release time
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_dtype_all_columns_empty(all_parsers):
+    # see gh-12048
+    parser = all_parsers
+    result = parser.read_csv(StringIO("A,B"), dtype=str)
+
+    expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
+    )
+
+    expected = DataFrame(
+        {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_multi_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two,three"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
+    )
+
+    exp_idx = MultiIndex.from_arrays(
+        [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)],
+        names=["one", "two"],
+    )
+    expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+    expected.index = expected.index.astype(object)
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+    expected.index = expected.index.astype(object)
+
+    with pytest.raises(ValueError, match="Duplicate names"):
+        data = ""
+        parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"})
+
+
+@pytest.mark.parametrize(
+    "dtype,expected",
+    [
+        (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
+        (
+            "category",
+            DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
+        ),
+        (
+            {"a": "category", "b": "category"},
+            DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
+        ),
+        ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
+        (
+            "timedelta64[ns]",
+            DataFrame(
+                {
+                    "a": Series([], dtype="timedelta64[ns]"),
+                    "b": Series([], dtype="timedelta64[ns]"),
+                },
+                index=[],
+            ),
+        ),
+        (
+            {"a": np.int64, "b": np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+        (
+            {0: np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+        (
+            {"a": np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+    ],
+)
+def test_empty_dtype(all_parsers, dtype, expected):
+    # see gh-14712
+    parser = all_parsers
+    data = "a,b"
+
+    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_c_parser_only.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_c_parser_only.py
@ -0,0 +1,684 @@
+"""
+Tests that apply specifically to the CParser. Unless specifically stated
+as a CParser-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the Python parser can accept
+further arguments when parsing.
+"""
+
+from io import (
+    BytesIO,
+    StringIO,
+    TextIOWrapper,
+)
+import mmap
+import os
+import tarfile
+
+import numpy as np
+import pytest
+
+from pandas.compat import IS64
+from pandas.errors import ParserError
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "malformed",
+    ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"],
+    ids=["words pointer", "stream pointer", "lines pointer"],
+)
+def test_buffer_overflow(c_parser_only, malformed):
+    # see gh-9205: test certain malformed input files that cause
+    # buffer overflows in tokenizer.c
+    msg = "Buffer overflow caught - possible malformed input file."
+    parser = c_parser_only
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(malformed))
+
+
+def test_buffer_rd_bytes(c_parser_only):
+    # see gh-12098: src->buffer in the C parser can be freed twice leading
+    # to a segfault if a corrupt gzip file is read with 'read_csv', and the
+    # buffer is filled more than once before gzip raises an Exception.
+
+    data = (
+        "\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09"
+        "\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0"
+        "\xA6\x4D" + "\x55" * 267 + "\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00"
+        "\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO"
+    )
+    parser = c_parser_only
+
+    with tm.assert_produces_warning(RuntimeWarning):
+        # compression has no effect when passing a non-binary object as input
+        for _ in range(100):
+            try:
+                parser.read_csv(
+                    StringIO(data), compression="gzip", delim_whitespace=True
+                )
+            except Exception:
+                pass
+
+
+def test_delim_whitespace_custom_terminator(c_parser_only):
+    # See gh-12912
+    data = "a b c~1 2 3~4 5 6~7 8 9"
+    parser = c_parser_only
+
+    df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_dtype_and_names_error(c_parser_only):
+    # see gh-8833: passing both dtype and names
+    # resulting in an error reporting issue
+    parser = c_parser_only
+    data = """
+1.0 1
+2.0 2
+3.0 3
+"""
+    # base cases
+    result = parser.read_csv(StringIO(data), sep=r"\s+", header=None)
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]])
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+", header=None, names=["a", "b"])
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # fallback casting
+    result = parser.read_csv(
+        StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32}
+    )
+    expected = DataFrame([[1, 1], [2, 2], [3, 3]], columns=["a", "b"])
+    expected["a"] = expected["a"].astype(np.int32)
+    tm.assert_frame_equal(result, expected)
+
+    data = """
+1.0 1
+nan 2
+3.0 3
+"""
+    # fallback casting, but not castable
+    with pytest.raises(ValueError, match="cannot safely convert"):
+        parser.read_csv(
+            StringIO(data),
+            sep=r"\s+",
+            header=None,
+            names=["a", "b"],
+            dtype={"a": np.int32},
+        )
+
+
+@pytest.mark.parametrize(
+    "match,kwargs",
+    [
+        # For each of these cases, all of the dtypes are valid, just unsupported.
+        (
+            (
+                "the dtype datetime64 is not supported for parsing, "
+                "pass this column using parse_dates instead"
+            ),
+            {"dtype": {"A": "datetime64", "B": "float64"}},
+        ),
+        (
+            (
+                "the dtype datetime64 is not supported for parsing, "
+                "pass this column using parse_dates instead"
+            ),
+            {"dtype": {"A": "datetime64", "B": "float64"}, "parse_dates": ["B"]},
+        ),
+        (
+            "the dtype timedelta64 is not supported for parsing",
+            {"dtype": {"A": "timedelta64", "B": "float64"}},
+        ),
+        ("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
+    ],
+    ids=["dt64-0", "dt64-1", "td64", "<U8"],
+)
+def test_unsupported_dtype(c_parser_only, match, kwargs):
+    parser = c_parser_only
+    df = DataFrame(
+        np.random.rand(5, 2), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"]
+    )
+
+    with tm.ensure_clean("__unsupported_dtype__.csv") as path:
+        df.to_csv(path)
+
+        with pytest.raises(TypeError, match=match):
+            parser.read_csv(path, index_col=0, **kwargs)
+
+
+@td.skip_if_32bit
+@pytest.mark.slow
+def test_precise_conversion(c_parser_only):
+    from decimal import Decimal
+
+    parser = c_parser_only
+
+    normal_errors = []
+    precise_errors = []
+
+    # test numbers between 1 and 2
+    for num in np.linspace(1.0, 2.0, num=500):
+        # 25 decimal digits of precision
+        text = f"a\n{num:.25}"
+
+        normal_val = float(
+            parser.read_csv(StringIO(text), float_precision="legacy")["a"][0]
+        )
+        precise_val = float(
+            parser.read_csv(StringIO(text), float_precision="high")["a"][0]
+        )
+        roundtrip_val = float(
+            parser.read_csv(StringIO(text), float_precision="round_trip")["a"][0]
+        )
+        actual_val = Decimal(text[2:])
+
+        def error(val):
+            return abs(Decimal(f"{val:.100}") - actual_val)
+
+        normal_errors.append(error(normal_val))
+        precise_errors.append(error(precise_val))
+
+        # round-trip should match float()
+        assert roundtrip_val == float(text[2:])
+
+    assert sum(precise_errors) <= sum(normal_errors)
+    assert max(precise_errors) <= max(normal_errors)
+
+
+def test_usecols_dtypes(c_parser_only):
+    parser = c_parser_only
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+    result = parser.read_csv(
+        StringIO(data),
+        usecols=(0, 1, 2),
+        names=("a", "b", "c"),
+        header=None,
+        converters={"a": str},
+        dtype={"b": int, "c": float},
+    )
+    result2 = parser.read_csv(
+        StringIO(data),
+        usecols=(0, 2),
+        names=("a", "b", "c"),
+        header=None,
+        converters={"a": str},
+        dtype={"b": int, "c": float},
+    )
+
+    assert (result.dtypes == [object, int, float]).all()
+    assert (result2.dtypes == [object, float]).all()
+
+
+def test_disable_bool_parsing(c_parser_only):
+    # see gh-2090
+
+    parser = c_parser_only
+    data = """A,B,C
+Yes,No,Yes
+No,Yes,Yes
+Yes,,Yes
+No,No,No"""
+
+    result = parser.read_csv(StringIO(data), dtype=object)
+    assert (result.dtypes == object).all()
+
+    result = parser.read_csv(StringIO(data), dtype=object, na_filter=False)
+    assert result["B"][2] == ""
+
+
+def test_custom_lineterminator(c_parser_only):
+    parser = c_parser_only
+    data = "a,b,c~1,2,3~4,5,6"
+
+    result = parser.read_csv(StringIO(data), lineterminator="~")
+    expected = parser.read_csv(StringIO(data.replace("~", "\n")))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_ragged_csv(c_parser_only):
+    parser = c_parser_only
+    data = """1,2,3
+1,2,3,4
+1,2,3,4,5
+1,2
+1,2,3,4"""
+
+    nice_data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+    result = parser.read_csv(
+        StringIO(data), header=None, names=["a", "b", "c", "d", "e"]
+    )
+
+    expected = parser.read_csv(
+        StringIO(nice_data), header=None, names=["a", "b", "c", "d", "e"]
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+    # too many columns, cause segfault if not careful
+    data = "1,2\n3,4,5"
+
+    result = parser.read_csv(StringIO(data), header=None, names=range(50))
+    expected = parser.read_csv(StringIO(data), header=None, names=range(3)).reindex(
+        columns=range(50)
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_tokenize_CR_with_quoting(c_parser_only):
+    # see gh-3453
+    parser = c_parser_only
+    data = ' a,b,c\r"a,b","e,d","f,f"'
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")), header=None)
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data))
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+def test_grow_boundary_at_cap(c_parser_only):
+    # See gh-12494
+    #
+    # Cause of error was that the C parser
+    # was not increasing the buffer size when
+    # the desired space would fill the buffer
+    # to capacity, which would later cause a
+    # buffer overflow error when checking the
+    # EOF terminator of the CSV stream.
+    parser = c_parser_only
+
+    def test_empty_header_read(count):
+        with StringIO("," * count) as s:
+            expected = DataFrame(columns=[f"Unnamed: {i}" for i in range(count + 1)])
+            df = parser.read_csv(s)
+        tm.assert_frame_equal(df, expected)
+
+    for cnt in range(1, 101):
+        test_empty_header_read(cnt)
+
+
+def test_parse_trim_buffers(c_parser_only):
+    # This test is part of a bugfix for gh-13703. It attempts to
+    # to stress the system memory allocator, to cause it to move the
+    # stream buffer and either let the OS reclaim the region, or let
+    # other memory requests of parser otherwise modify the contents
+    # of memory space, where it was formally located.
+    # This test is designed to cause a `segfault` with unpatched
+    # `tokenizer.c`. Sometimes the test fails on `segfault`, other
+    # times it fails due to memory corruption, which causes the
+    # loaded DataFrame to differ from the expected one.
+
+    parser = c_parser_only
+
+    # Generate a large mixed-type CSV file on-the-fly (one record is
+    # approx 1.5KiB).
+    record_ = (
+        """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z"""
+        """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,"""
+        """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9"""
+        """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,"""
+        """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9."""
+        """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999."""
+        """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ"""
+        """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ"""
+        """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z"""
+        """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,"""
+        """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,"""
+        """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,"""
+        """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999"""
+        """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9."""
+        """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,"""
+        """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z"""
+        """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ"""
+        """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99"""
+        """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-"""
+        """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9"""
+        """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,"""
+        """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9."""
+        """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ"""
+        """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ"""
+        """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ"""
+        """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ"""
+        """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99"""
+        """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9"""
+        """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+    )
+
+    # Set the number of lines so that a call to `parser_trim_buffers`
+    # is triggered: after a couple of full chunks are consumed a
+    # relatively small 'residual' chunk would cause reallocation
+    # within the parser.
+    chunksize, n_lines = 128, 2 * 128 + 15
+    csv_data = "\n".join([record_] * n_lines) + "\n"
+
+    # We will use StringIO to load the CSV from this text buffer.
+    # pd.read_csv() will iterate over the file in chunks and will
+    # finally read a residual chunk of really small size.
+
+    # Generate the expected output: manually create the dataframe
+    # by splitting by comma and repeating the `n_lines` times.
+    row = tuple(val_ if val_ else np.nan for val_ in record_.split(","))
+    expected = DataFrame(
+        [row for _ in range(n_lines)], dtype=object, columns=None, index=None
+    )
+
+    # Iterate over the CSV file in chunks of `chunksize` lines
+    with parser.read_csv(
+        StringIO(csv_data), header=None, dtype=object, chunksize=chunksize
+    ) as chunks_:
+        result = concat(chunks_, axis=0, ignore_index=True)
+
+    # Check for data corruption if there was no segfault
+    tm.assert_frame_equal(result, expected)
+
+    # This extra test was added to replicate the fault in gh-5291.
+    # Force 'utf-8' encoding, so that `_string_convert` would take
+    # a different execution branch.
+    with parser.read_csv(
+        StringIO(csv_data),
+        header=None,
+        dtype=object,
+        chunksize=chunksize,
+        encoding="utf_8",
+    ) as chunks_:
+        result = concat(chunks_, axis=0, ignore_index=True)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_internal_null_byte(c_parser_only):
+    # see gh-14012
+    #
+    # The null byte ('\x00') should not be used as a
+    # true line terminator, escape character, or comment
+    # character, only as a placeholder to indicate that
+    # none was specified.
+    #
+    # This test should be moved to test_common.py ONLY when
+    # Python's csv class supports parsing '\x00'.
+    parser = c_parser_only
+
+    names = ["a", "b", "c"]
+    data = "1,2,3\n4,\x00,6\n7,8,9"
+    expected = DataFrame([[1, 2.0, 3], [4, np.nan, 6], [7, 8, 9]], columns=names)
+
+    result = parser.read_csv(StringIO(data), names=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_nrows_large(c_parser_only):
+    # gh-7626 - Read only nrows of data in for large inputs (>262144b)
+    parser = c_parser_only
+    header_narrow = "\t".join(["COL_HEADER_" + str(i) for i in range(10)]) + "\n"
+    data_narrow = "\t".join(["somedatasomedatasomedata1" for _ in range(10)]) + "\n"
+    header_wide = "\t".join(["COL_HEADER_" + str(i) for i in range(15)]) + "\n"
+    data_wide = "\t".join(["somedatasomedatasomedata2" for _ in range(15)]) + "\n"
+    test_input = header_narrow + data_narrow * 1050 + header_wide + data_wide * 2
+
+    df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010)
+
+    assert df.size == 1010 * 10
+
+
+def test_float_precision_round_trip_with_text(c_parser_only):
+    # see gh-15140
+    parser = c_parser_only
+    df = parser.read_csv(StringIO("a"), header=None, float_precision="round_trip")
+    tm.assert_frame_equal(df, DataFrame({0: ["a"]}))
+
+
+def test_large_difference_in_columns(c_parser_only):
+    # see gh-14125
+    parser = c_parser_only
+
+    count = 10000
+    large_row = ("X," * count)[:-1] + "\n"
+    normal_row = "XXXXXX XXXXXX,111111111111111\n"
+    test_input = (large_row + normal_row * 6)[:-1]
+
+    result = parser.read_csv(StringIO(test_input), header=None, usecols=[0])
+    rows = test_input.split("\n")
+
+    expected = DataFrame([row.split(",")[0] for row in rows])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_data_after_quote(c_parser_only):
+    # see gh-15910
+    parser = c_parser_only
+
+    data = 'a\n1\n"b"a'
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame({"a": ["1", "ba"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_whitespace_delimited(c_parser_only, capsys):
+    parser = c_parser_only
+    test_input = """\
+1 2
+2 2 3
+3 2 3 # 3 fields
+4 2 3# 3 fields
+5 2 # 2 fields
+6 2# 2 fields
+7 # 1 field, NaN
+8# 1 field, NaN
+9 2 3 # skipped line
+# comment"""
+    df = parser.read_csv(
+        StringIO(test_input),
+        comment="#",
+        header=None,
+        delimiter="\\s+",
+        skiprows=0,
+        on_bad_lines="warn",
+    )
+    captured = capsys.readouterr()
+    # skipped lines 2, 3, 4, 9
+    for line_num in (2, 3, 4, 9):
+        assert f"Skipping line {line_num}" in captured.err
+    expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_file_like_no_next(c_parser_only):
+    # gh-16530: the file-like need not have a "next" or "__next__"
+    # attribute despite having an "__iter__" attribute.
+    #
+    # NOTE: This is only true for the C engine, not Python engine.
+    class NoNextBuffer(StringIO):
+        def __next__(self):
+            raise AttributeError("No next method")
+
+        next = __next__
+
+    parser = c_parser_only
+    data = "a\n1"
+
+    expected = DataFrame({"a": [1]})
+    result = parser.read_csv(NoNextBuffer(data))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_buffer_rd_bytes_bad_unicode(c_parser_only):
+    # see gh-22748
+    t = BytesIO(b"\xB0")
+    t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape")
+    msg = "'utf-8' codec can't encode character"
+    with pytest.raises(UnicodeError, match=msg):
+        c_parser_only.read_csv(t, encoding="UTF-8")
+
+
+@pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"])
+def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix):
+    # see gh-16530
+    #
+    # Unfortunately, Python's CSV library can't handle
+    # tarfile objects (expects string, not bytes when
+    # iterating through a file-like).
+    parser = c_parser_only
+    tar_path = os.path.join(csv_dir_path, "tar_csv" + tar_suffix)
+
+    with tarfile.open(tar_path, "r") as tar:
+        data_file = tar.extractfile("tar_data.csv")
+
+        out = parser.read_csv(data_file)
+        expected = DataFrame({"a": [1]})
+        tm.assert_frame_equal(out, expected)
+
+
+@pytest.mark.high_memory
+def test_bytes_exceed_2gb(c_parser_only):
+    # see gh-16798
+    #
+    # Read from a "CSV" that has a column larger than 2GB.
+    parser = c_parser_only
+
+    if parser.low_memory:
+        pytest.skip("not a high_memory test")
+
+    csv = StringIO("strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)]))
+    df = parser.read_csv(csv)
+    assert not df.empty
+
+
+def test_chunk_whitespace_on_boundary(c_parser_only):
+    # see gh-9735: this issue is C parser-specific (bug when
+    # parsing whitespace and characters at chunk boundary)
+    #
+    # This test case has a field too large for the Python parser / CSV library.
+    parser = c_parser_only
+
+    chunk1 = "a" * (1024 * 256 - 2) + "\na"
+    chunk2 = "\n a"
+    result = parser.read_csv(StringIO(chunk1 + chunk2), header=None)
+
+    expected = DataFrame(["a" * (1024 * 256 - 2), "a", " a"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_file_handles_mmap(c_parser_only, csv1):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = c_parser_only
+
+    with open(csv1) as f:
+        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+        parser.read_csv(m)
+
+        assert not m.closed
+        m.close()
+
+
+def test_file_binary_mode(c_parser_only):
+    # see gh-23779
+    parser = c_parser_only
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]])
+
+    with tm.ensure_clean() as path:
+        with open(path, "w") as f:
+            f.write("1,2,3\n4,5,6")
+
+        with open(path, "rb") as f:
+            result = parser.read_csv(f, header=None)
+            tm.assert_frame_equal(result, expected)
+
+
+def test_unix_style_breaks(c_parser_only):
+    # GH 11020
+    parser = c_parser_only
+    with tm.ensure_clean() as path:
+        with open(path, "w", newline="\n") as f:
+            f.write("blah\n\ncol_1,col_2,col_3\n\n")
+        result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
+    expected = DataFrame(columns=["col_1", "col_2", "col_3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+@pytest.mark.parametrize(
+    "data,thousands,decimal",
+    [
+        (
+            """A|B|C
+1|2,334.01|5
+10|13|10.
+""",
+            ",",
+            ".",
+        ),
+        (
+            """A|B|C
+1|2.334,01|5
+10|13|10,
+""",
+            ".",
+            ",",
+        ),
+    ],
+)
+def test_1000_sep_with_decimal(
+    c_parser_only, data, thousands, decimal, float_precision
+):
+    parser = c_parser_only
+    expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]})
+
+    result = parser.read_csv(
+        StringIO(data),
+        sep="|",
+        thousands=thousands,
+        decimal=decimal,
+        float_precision=float_precision,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_float_precision_options(c_parser_only):
+    # GH 17154, 36228
+    parser = c_parser_only
+    s = "foo\n243.164\n"
+    df = parser.read_csv(StringIO(s))
+    df2 = parser.read_csv(StringIO(s), float_precision="high")
+
+    tm.assert_frame_equal(df, df2)
+
+    df3 = parser.read_csv(StringIO(s), float_precision="legacy")
+
+    if IS64:
+        assert not df.iloc[0, 0] == df3.iloc[0, 0]
+    else:
+        assert df.iloc[0, 0] == df3.iloc[0, 0]
+
+    msg = "Unrecognized float_precision option: junk"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(s), float_precision="junk")
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_comment.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_comment.py
@ -0,0 +1,168 @@
+"""
+Tests that comments are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("na_values", [None, ["NaN"]])
+def test_comment(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+1,2.,4.#hello world
+5.,NaN,10.0
+"""
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    result = parser.read_csv(StringIO(data), comment="#", na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}]
+)
+def test_line_comment(all_parsers, read_kwargs, request):
+    parser = all_parsers
+    data = """# empty
+A,B,C
+1,2.,4.#hello world
+#ignore this line
+5.,NaN,10.0
+"""
+    if read_kwargs.get("delim_whitespace"):
+        data = data.replace(",", " ")
+    elif read_kwargs.get("lineterminator"):
+        if parser.engine != "c":
+            mark = pytest.mark.xfail(
+                reason="Custom terminator not supported with Python engine"
+            )
+            request.node.add_marker(mark)
+
+        data = data.replace("\n", read_kwargs.get("lineterminator"))
+
+    read_kwargs["comment"] = "#"
+    result = parser.read_csv(StringIO(data), **read_kwargs)
+
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows(all_parsers):
+    parser = all_parsers
+    data = """# empty
+random line
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # This should ignore the first four lines (including comments).
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Header should begin at the second non-comment line.
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    result = parser.read_csv(StringIO(data), comment="#", header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+# third empty line
+X,Y,Z
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Skiprows should skip the first 4 lines (including comments),
+    # while header should start from the second non-commented line,
+    # starting with line 5.
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
+def test_custom_comment_char(all_parsers, comment_char):
+    parser = all_parsers
+    data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
+    result = parser.read_csv(
+        StringIO(data.replace("#", comment_char)), comment=comment_char
+    )
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", ["infer", None])
+def test_comment_first_line(all_parsers, header):
+    # see gh-4623
+    parser = all_parsers
+    data = "# notes\na,b,c\n# more notes\n1,2,3"
+
+    if header is None:
+        expected = DataFrame({0: ["a", "1"], 1: ["b", "2"], 2: ["c", "3"]})
+    else:
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), comment="#", header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_char_in_default_value(all_parsers, request):
+    # GH#34002
+    if all_parsers.engine == "c":
+        reason = "see gh-34002: works on the python engine but not the c engine"
+        # NA value containing comment char is interpreted as comment
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=AssertionError))
+    parser = all_parsers
+
+    data = (
+        "# this is a comment\n"
+        "col1,col2,col3,col4\n"
+        "1,2,3,4#inline comment\n"
+        "4,5#,6,10\n"
+        "7,8,#N/A,11\n"
+    )
+    result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
+    expected = DataFrame(
+        {
+            "col1": [1, 4, 7],
+            "col2": [2, 5, 8],
+            "col3": [3.0, np.nan, np.nan],
+            "col4": [4.0, np.nan, 11.0],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_compression.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_compression.py
@ -0,0 +1,176 @@
+"""
+Tests compressed data parsing functionality for all
+of the parsers defined in parsers.py
+"""
+
+import os
+from pathlib import Path
+import zipfile
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+import pandas.io.common as icom
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.fixture(params=[True, False])
+def buffer(request):
+    return request.param
+
+
+@pytest.fixture
+def parser_and_data(all_parsers, csv1):
+    parser = all_parsers
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+    expected = parser.read_csv(csv1)
+
+    return parser, data, expected
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
+def test_zip(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("test_file.zip") as path:
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            tmp.writestr("test_file", data)
+
+        if compression == "zip2":
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression="zip")
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("compression", ["zip", "infer"])
+def test_zip_error_multiple_files(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("combined_zip.zip") as path:
+        inner_file_names = ["test_file", "second_file"]
+
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            for file_name in inner_file_names:
+                tmp.writestr(file_name, data)
+
+        with pytest.raises(ValueError, match="Multiple files"):
+            parser.read_csv(path, compression=compression)
+
+
+@skip_pyarrow
+def test_zip_error_no_files(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with zipfile.ZipFile(path, mode="w"):
+            pass
+
+        with pytest.raises(ValueError, match="Zero files"):
+            parser.read_csv(path, compression="zip")
+
+
+@skip_pyarrow
+def test_zip_error_invalid_zip(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with open(path, "rb") as f:
+            with pytest.raises(zipfile.BadZipfile, match="File is not a zip file"):
+                parser.read_csv(f, compression="zip")
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("filename", [None, "test.{ext}"])
+def test_compression(parser_and_data, compression_only, buffer, filename):
+    parser, data, expected = parser_and_data
+    compress_type = compression_only
+
+    ext = icom._compression_to_extension[compress_type]
+    filename = filename if filename is None else filename.format(ext=ext)
+
+    if filename and buffer:
+        pytest.skip("Cannot deduce compression from buffer of compressed data.")
+
+    with tm.ensure_clean(filename=filename) as path:
+        tm.write_to_compressed(compress_type, path, data)
+        compression = "infer" if filename else compress_type
+
+        if buffer:
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression=compression)
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
+def test_infer_compression(all_parsers, csv1, buffer, ext):
+    # see gh-9770
+    parser = all_parsers
+    kwargs = {"index_col": 0, "parse_dates": True}
+
+    expected = parser.read_csv(csv1, **kwargs)
+    kwargs["compression"] = "infer"
+
+    if buffer:
+        with open(csv1) as f:
+            result = parser.read_csv(f, **kwargs)
+    else:
+        ext = "." + ext if ext else ""
+        result = parser.read_csv(csv1 + ext, **kwargs)
+
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt):
+    # see gh-18071, gh-24130
+    parser = all_parsers
+    encoding = encoding_fmt.format(utf_value)
+    path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip")
+
+    result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t")
+    expected = DataFrame(
+        {
+            "Country": ["Venezuela", "Venezuela"],
+            "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
+def test_invalid_compression(all_parsers, invalid_compression):
+    parser = all_parsers
+    compress_kwargs = {"compression": invalid_compression}
+
+    msg = f"Unrecognized compression type: {invalid_compression}"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv("test_file.zip", **compress_kwargs)
+
+
+def test_ignore_compression_extension(all_parsers):
+    parser = all_parsers
+    df = DataFrame({"a": [0, 1]})
+    with tm.ensure_clean("test.csv") as path_csv:
+        with tm.ensure_clean("test.csv.zip") as path_zip:
+            # make sure to create un-compressed file with zip extension
+            df.to_csv(path_csv, index=False)
+            Path(path_zip).write_text(Path(path_csv).read_text())
+
+            tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_converters.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_converters.py
@ -0,0 +1,191 @@
+"""
+Tests column conversion functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+from dateutil.parser import parse
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_converters_type_must_be_dict(all_parsers):
+    parser = all_parsers
+    data = """index,A,B,C,D
+foo,2,3,4,5
+"""
+
+    with pytest.raises(TypeError, match="Type converters.+"):
+        parser.read_csv(StringIO(data), converters=0)
+
+
+@pytest.mark.parametrize("column", [3, "D"])
+@pytest.mark.parametrize(
+    "converter", [parse, lambda x: int(x.split("/")[2])]  # Produce integer.
+)
+def test_converters(all_parsers, column, converter):
+    parser = all_parsers
+    data = """A,B,C,D
+a,1,2,01/01/2009
+b,3,4,01/02/2009
+c,4,5,01/03/2009
+"""
+    result = parser.read_csv(StringIO(data), converters={column: converter})
+
+    expected = parser.read_csv(StringIO(data))
+    expected["D"] = expected["D"].map(converter)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_no_implicit_conv(all_parsers):
+    # see gh-2184
+    parser = all_parsers
+    data = """000102,1.2,A\n001245,2,B"""
+
+    converters = {0: lambda x: x.strip()}
+    result = parser.read_csv(StringIO(data), header=None, converters=converters)
+
+    # Column 0 should not be casted to numeric and should remain as object.
+    expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_euro_decimal_format(all_parsers):
+    # see gh-583
+    converters = {}
+    parser = all_parsers
+
+    data = """Id;Number1;Number2;Text1;Text2;Number3
+1;1521,1541;187101,9543;ABC;poi;4,7387
+2;121,12;14897,76;DEF;uyt;0,3773
+3;878,158;108013,434;GHI;rez;2,7356"""
+    converters["Number1"] = converters["Number2"] = converters[
+        "Number3"
+    ] = lambda x: float(x.replace(",", "."))
+
+    result = parser.read_csv(StringIO(data), sep=";", converters=converters)
+    expected = DataFrame(
+        [
+            [1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
+            [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
+            [3, 878.158, 108013.434, "GHI", "rez", 2.7356],
+        ],
+        columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_corner_with_nans(all_parsers):
+    parser = all_parsers
+    data = """id,score,days
+1,2,12
+2,2-5,
+3,,14+
+4,6-12,2"""
+
+    # Example converters.
+    def convert_days(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_days_sentinel(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_score(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        if x.find("-") > 0:
+            val_min, val_max = map(int, x.split("-"))
+            val = 0.5 * (val_min + val_max)
+        else:
+            val = float(x)
+
+        return val
+
+    results = []
+
+    for day_converter in [convert_days, convert_days_sentinel]:
+        result = parser.read_csv(
+            StringIO(data),
+            converters={"score": convert_score, "days": day_converter},
+            na_values=["", None],
+        )
+        assert pd.isna(result["days"][1])
+        results.append(result)
+
+    tm.assert_frame_equal(results[0], results[1])
+
+
+def test_converter_index_col_bug(all_parsers):
+    # see gh-1835
+    parser = all_parsers
+    data = "A;B\n1;2\n3;4"
+
+    rs = parser.read_csv(
+        StringIO(data), sep=";", index_col="A", converters={"A": lambda x: x}
+    )
+
+    xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_converter_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        converters={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_dialect.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_dialect.py
@ -0,0 +1,146 @@
+"""
+Tests that dialects are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas.errors import ParserWarning
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.fixture
+def custom_dialect():
+    dialect_name = "weird"
+    dialect_kwargs = {
+        "doublequote": False,
+        "escapechar": "~",
+        "delimiter": ":",
+        "skipinitialspace": False,
+        "quotechar": "~",
+        "quoting": 3,
+    }
+    return dialect_name, dialect_kwargs
+
+
+def test_dialect(all_parsers):
+    parser = all_parsers
+    data = """\
+label1,label2,label3
+index1,"a,c,e
+index2,b,d,f
+"""
+
+    dia = csv.excel()
+    dia.quoting = csv.QUOTE_NONE
+    df = parser.read_csv(StringIO(data), dialect=dia)
+
+    data = """\
+label1,label2,label3
+index1,a,c,e
+index2,b,d,f
+"""
+    exp = parser.read_csv(StringIO(data))
+    exp.replace("a", '"a', inplace=True)
+    tm.assert_frame_equal(df, exp)
+
+
+def test_dialect_str(all_parsers):
+    dialect_name = "mydialect"
+    parser = all_parsers
+    data = """\
+fruit:vegetable
+apple:broccoli
+pear:tomato
+"""
+    exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"]})
+
+    with tm.with_csv_dialect(dialect_name, delimiter=":"):
+        df = parser.read_csv(StringIO(data), dialect=dialect_name)
+        tm.assert_frame_equal(df, exp)
+
+
+def test_invalid_dialect(all_parsers):
+    class InvalidDialect:
+        pass
+
+    data = "a\n1"
+    parser = all_parsers
+    msg = "Invalid dialect"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dialect=InvalidDialect)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"],
+)
+@pytest.mark.parametrize("value", ["dialect", "default", "other"])
+def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, value):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    warning_klass = None
+    kwds = {}
+
+    # arg=None tests when we pass in the dialect without any other arguments.
+    if arg is not None:
+        if "value" == "dialect":  # No conflict --> no warning.
+            kwds[arg] = dialect_kwargs[arg]
+        elif "value" == "default":  # Default --> no warning.
+            from pandas.io.parsers.base_parser import parser_defaults
+
+            kwds[arg] = parser_defaults[arg]
+        else:  # Non-default + conflict with dialect --> warning.
+            warning_klass = ParserWarning
+            kwds[arg] = "blah"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        with tm.assert_produces_warning(warning_klass):
+            result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwds)
+            tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,warning_klass",
+    [
+        ({"sep": ","}, None),  # sep is default --> sep_override=True
+        ({"sep": "."}, ParserWarning),  # sep isn't default --> sep_override=False
+        ({"delimiter": ":"}, None),  # No conflict
+        ({"delimiter": None}, None),  # Default arguments --> sep_override=True
+        ({"delimiter": ","}, ParserWarning),  # Conflict
+        ({"delimiter": "."}, ParserWarning),  # Conflict
+    ],
+    ids=[
+        "sep-override-true",
+        "sep-override-false",
+        "delimiter-no-conflict",
+        "delimiter-default-arg",
+        "delimiter-conflict",
+        "delimiter-conflict2",
+    ],
+)
+def test_dialect_conflict_delimiter(all_parsers, custom_dialect, kwargs, warning_klass):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        with tm.assert_produces_warning(warning_klass):
+            result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwargs)
+            tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_encoding.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_encoding.py
@ -0,0 +1,315 @@
+"""
+Tests encoding functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+
+from io import BytesIO
+import os
+import tempfile
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    read_csv,
+)
+import pandas._testing as tm
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@skip_pyarrow
+def test_bytes_io_input(all_parsers):
+    encoding = "cp1255"
+    parser = all_parsers
+
+    data = BytesIO("שלום:1234\n562:123".encode(encoding))
+    result = parser.read_csv(data, sep=":", encoding=encoding)
+
+    expected = DataFrame([[562, 123]], columns=["שלום", "1234"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_read_csv_unicode(all_parsers):
+    parser = all_parsers
+    data = BytesIO("\u0141aski, Jan;1".encode())
+
+    result = parser.read_csv(data, sep=";", encoding="utf-8", header=None)
+    expected = DataFrame([["\u0141aski, Jan", 1]])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("sep", [",", "\t"])
+@pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"])
+def test_utf16_bom_skiprows(all_parsers, sep, encoding):
+    # see gh-2298
+    parser = all_parsers
+    data = """skip this
+skip this too
+A,B,C
+1,2,3
+4,5,6""".replace(
+        ",", sep
+    )
+    path = f"__{tm.rands(10)}__.csv"
+    kwargs = {"sep": sep, "skiprows": 2}
+    utf8 = "utf-8"
+
+    with tm.ensure_clean(path) as path:
+        from io import TextIOWrapper
+
+        bytes_data = data.encode(encoding)
+
+        with open(path, "wb") as f:
+            f.write(bytes_data)
+
+        bytes_buffer = BytesIO(data.encode(utf8))
+        bytes_buffer = TextIOWrapper(bytes_buffer, encoding=utf8)
+
+        result = parser.read_csv(path, encoding=encoding, **kwargs)
+        expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs)
+
+        bytes_buffer.close()
+        tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_utf16_example(all_parsers, csv_dir_path):
+    path = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    result = parser.read_csv(path, encoding="utf-16", sep="\t")
+    assert len(result) == 50
+
+
+@skip_pyarrow
+def test_unicode_encoding(all_parsers, csv_dir_path):
+    path = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+
+    result = parser.read_csv(path, header=None, encoding="latin-1")
+    result = result.set_index(0)
+    got = result[1][1632]
+
+    expected = "\xc1 k\xf6ldum klaka (Cold Fever) (1994)"
+    assert got == expected
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        # Basic test
+        ("a\n1", {}, DataFrame({"a": [1]})),
+        # "Regular" quoting
+        ('"a"\n1', {"quotechar": '"'}, DataFrame({"a": [1]})),
+        # Test in a data row instead of header
+        ("b\n1", {"names": ["a"]}, DataFrame({"a": ["b", "1"]})),
+        # Test in empty data row with skipping
+        ("\n1", {"names": ["a"], "skip_blank_lines": True}, DataFrame({"a": [1]})),
+        # Test in empty data row without skipping
+        (
+            "\n1",
+            {"names": ["a"], "skip_blank_lines": False},
+            DataFrame({"a": [np.nan, 1]}),
+        ),
+    ],
+)
+def test_utf8_bom(all_parsers, data, kwargs, expected):
+    # see gh-4793
+    parser = all_parsers
+    bom = "\ufeff"
+    utf8 = "utf-8"
+
+    def _encode_data_with_bom(_data):
+        bom_data = (bom + _data).encode(utf8)
+        return BytesIO(bom_data)
+
+    result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt):
+    # see gh-13549
+    expected = DataFrame({"mb_num": [4.8], "multibyte": ["test"]})
+    parser = all_parsers
+
+    encoding = encoding_fmt.format(utf_value)
+    data = "mb_num,multibyte\n4.8,test".encode(encoding)
+
+    result = parser.read_csv(BytesIO(data), encoding=encoding)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "file_path,encoding",
+    [
+        (("io", "data", "csv", "test1.csv"), "utf-8"),
+        (("io", "parser", "data", "unicode_series.csv"), "latin-1"),
+        (("io", "parser", "data", "sauron.SHIFT_JIS.csv"), "shiftjis"),
+    ],
+)
+def test_binary_mode_file_buffers(
+    all_parsers, csv_dir_path, file_path, encoding, datapath
+):
+    # gh-23779: Python csv engine shouldn't error on files opened in binary.
+    # gh-31575: Python csv engine shouldn't error on files opened in raw binary.
+    parser = all_parsers
+
+    fpath = datapath(*file_path)
+    expected = parser.read_csv(fpath, encoding=encoding)
+
+    with open(fpath, encoding=encoding) as fa:
+        result = parser.read_csv(fa)
+        assert not fa.closed
+    tm.assert_frame_equal(expected, result)
+
+    with open(fpath, mode="rb") as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
+    tm.assert_frame_equal(expected, result)
+
+    with open(fpath, mode="rb", buffering=0) as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
+    tm.assert_frame_equal(expected, result)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("pass_encoding", [True, False])
+def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):
+    # see gh-24130
+    parser = all_parsers
+    encoding = encoding_fmt.format(utf_value)
+
+    expected = DataFrame({"foo": ["bar"]})
+
+    with tm.ensure_clean(mode="w+", encoding=encoding, return_filelike=True) as f:
+        f.write("foo\nbar")
+        f.seek(0)
+
+        result = parser.read_csv(f, encoding=encoding if pass_encoding else None)
+        tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_encoding_named_temp_file(all_parsers):
+    # see gh-31819
+    parser = all_parsers
+    encoding = "shift-jis"
+
+    title = "てすと"
+    data = "こむ"
+
+    expected = DataFrame({title: [data]})
+
+    with tempfile.NamedTemporaryFile() as f:
+        f.write(f"{title}\n{data}".encode(encoding))
+
+        f.seek(0)
+
+        result = parser.read_csv(f, encoding=encoding)
+        tm.assert_frame_equal(result, expected)
+        assert not f.closed
+
+
+@pytest.mark.parametrize(
+    "encoding", ["utf-8", "utf-16", "utf-16-be", "utf-16-le", "utf-32"]
+)
+def test_parse_encoded_special_characters(encoding):
+    # GH16218 Verify parsing of data with encoded special characters
+    # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a")
+    data = "a\tb\n：foo\t0\nbar\t1\nbaz\t2"
+    encoded_data = BytesIO(data.encode(encoding))
+    result = read_csv(encoded_data, delimiter="\t", encoding=encoding)
+
+    expected = DataFrame(data=[["：foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"])
+def test_encoding_memory_map(all_parsers, encoding):
+    # GH40986
+    parser = all_parsers
+    expected = DataFrame(
+        {
+            "name": ["Raphael", "Donatello", "Miguel Angel", "Leonardo"],
+            "mask": ["red", "purple", "orange", "blue"],
+            "weapon": ["sai", "bo staff", "nunchunk", "katana"],
+        }
+    )
+    with tm.ensure_clean() as file:
+        expected.to_csv(file, index=False, encoding=encoding)
+        df = parser.read_csv(file, encoding=encoding, memory_map=True)
+    tm.assert_frame_equal(df, expected)
+
+
+@skip_pyarrow
+def test_chunk_splits_multibyte_char(all_parsers):
+    """
+    Chunk splits a multibyte character with memory_map=True
+
+    GH 43540
+    """
+    parser = all_parsers
+    # DEFAULT_CHUNKSIZE = 262144, defined in parsers.pyx
+    df = DataFrame(data=["a" * 127] * 2048)
+
+    # Put two-bytes utf-8 encoded character "ą" at the end of chunk
+    # utf-8 encoding of "ą" is b'\xc4\x85'
+    df.iloc[2047] = "a" * 127 + "ą"
+    with tm.ensure_clean("bug-gh43540.csv") as fname:
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+        dfr = parser.read_csv(fname, header=None, memory_map=True, engine="c")
+    tm.assert_frame_equal(dfr, df)
+
+
+@skip_pyarrow
+def test_readcsv_memmap_utf8(all_parsers):
+    """
+    GH 43787
+
+    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
+    """
+    lines = []
+    line_length = 128
+    start_char = " "
+    end_char = "\U00010080"
+    # This for loop creates a list of 128-char strings
+    # consisting of consecutive Unicode chars
+    for lnum in range(ord(start_char), ord(end_char), line_length):
+        line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
+        try:
+            line.encode("utf-8")
+        except UnicodeEncodeError:
+            continue
+        lines.append(line)
+    parser = all_parsers
+    df = DataFrame(lines)
+    with tm.ensure_clean("utf8test.csv") as fname:
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+        dfr = parser.read_csv(
+            fname, header=None, memory_map=True, engine="c", encoding="utf-8"
+        )
+    tm.assert_frame_equal(df, dfr)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+@pytest.mark.parametrize("mode", ["w+b", "w+t"])
+def test_not_readable(all_parsers, mode):
+    # GH43439
+    parser = all_parsers
+    content = b"abcd"
+    if "t" in mode:
+        content = "abcd"
+    with tempfile.SpooledTemporaryFile(mode=mode) as handle:
+        handle.write(content)
+        handle.seek(0)
+        df = parser.read_csv(handle)
+    expected = DataFrame([], columns=["abcd"])
+    tm.assert_frame_equal(df, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_header.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_header.py
@ -0,0 +1,668 @@
+"""
+Tests that the file header is properly handled or inferred
+during parsing for all of the parsers defined in parsers.py
+"""
+
+from collections import namedtuple
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserError
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+# TODO(1.4): Change me to xfails at release time
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@skip_pyarrow
+def test_read_with_bad_header(all_parsers):
+    parser = all_parsers
+    msg = r"but only \d+ lines in file"
+
+    with pytest.raises(ValueError, match=msg):
+        s = StringIO(",,")
+        parser.read_csv(s, header=[10])
+
+
+def test_negative_header(all_parsers):
+    # see gh-27779
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    with pytest.raises(
+        ValueError,
+        match="Passing negative integer to header is invalid. "
+        "For no header, use header=None instead",
+    ):
+        parser.read_csv(StringIO(data), header=-1)
+
+
+@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])])
+def test_negative_multi_index_header(all_parsers, header):
+    # see gh-27779
+    parser = all_parsers
+    data = """1,2,3,4,5
+        6,7,8,9,10
+        11,12,13,14,15
+        """
+    with pytest.raises(
+        ValueError, match="cannot specify multi-index header with negative integers"
+    ):
+        parser.read_csv(StringIO(data), header=header)
+
+
+@pytest.mark.parametrize("header", [True, False])
+def test_bool_header_arg(all_parsers, header):
+    # see gh-6114
+    parser = all_parsers
+    data = """\
+MyColumn
+a
+b
+a
+b"""
+    msg = "Passing a bool to header is invalid"
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+def test_no_header_prefix(all_parsers):
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), prefix="Field", header=None)
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
+        columns=["Field0", "Field1", "Field2", "Field3", "Field4"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_with_index_col(all_parsers):
+    parser = all_parsers
+    data = """foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    names = ["A", "B", "C"]
+    result = parser.read_csv(StringIO(data), names=names)
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_not_first_line(all_parsers):
+    parser = all_parsers
+    data = """got,to,ignore,this,line
+got,to,ignore,this,line
+index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+    data2 = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+
+    result = parser.read_csv(StringIO(data), header=2, index_col=0)
+    expected = parser.read_csv(StringIO(data2), header=0, index_col=0)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_multi_index(all_parsers):
+    parser = all_parsers
+    expected = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
+
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        (
+            {"index_col": ["foo", "bar"]},
+            (
+                "index_col must only contain "
+                "row numbers when specifying "
+                "a multi-index header"
+            ),
+        ),
+        (
+            {"index_col": [0, 1], "names": ["foo", "bar"]},
+            ("cannot specify names when specifying a multi-index header"),
+        ),
+        (
+            {"index_col": [0, 1], "usecols": ["foo", "bar"]},
+            ("cannot specify usecols when specifying a multi-index header"),
+        ),
+    ],
+)
+def test_header_multi_index_invalid(all_parsers, kwargs, msg):
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=[0, 1, 2, 3], **kwargs)
+
+
+_TestTuple = namedtuple("_TestTuple", ["first", "second"])
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 3,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 3,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format1(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+,,,,,,
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 2,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 2,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format2(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 2,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 2,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format3(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    expected = expected.reset_index(drop=True)
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_multi_index_common_format_malformed1(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]],
+            codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+            names=["a", "q"],
+        ),
+    )
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+@skip_pyarrow
+def test_header_multi_index_common_format_malformed2(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]],
+            codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+            names=[None, "q"],
+        ),
+    )
+
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+@skip_pyarrow
+def test_header_multi_index_common_format_malformed3(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"),
+        index=MultiIndex(levels=[[1, 7], [2, 8]], codes=[[0, 1], [0, 1]]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["s", "t", "u", "v"]],
+            codes=[[0, 1, 2, 2], [0, 1, 2, 3]],
+            names=[None, "q"],
+        ),
+    )
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
+    tm.assert_frame_equal(expected, result)
+
+
+@skip_pyarrow
+def test_header_multi_index_blank_line(all_parsers):
+    # GH 40442
+    parser = all_parsers
+    data = [[None, None], [1, 2], [3, 4]]
+    columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
+    expected = DataFrame(data, columns=columns)
+    data = "a,b\nA,B\n,\n1,2\n3,4"
+    result = parser.read_csv(StringIO(data), header=[0, 1])
+    tm.assert_frame_equal(expected, result)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)]
+)
+def test_header_names_backward_compat(all_parsers, data, header):
+    # see gh-2539
+    parser = all_parsers
+    expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), names=["a", "b", "c"], header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("kwargs", [{}, {"index_col": False}])
+def test_read_only_header_no_rows(all_parsers, kwargs):
+    # See gh-7773
+    parser = all_parsers
+    expected = DataFrame(columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO("a,b,c"), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,names",
+    [
+        ({}, [0, 1, 2, 3, 4]),
+        ({"prefix": "X"}, ["X0", "X1", "X2", "X3", "X4"]),
+        (
+            {"names": ["foo", "bar", "baz", "quux", "panda"]},
+            ["foo", "bar", "baz", "quux", "panda"],
+        ),
+    ],
+)
+def test_no_header(all_parsers, kwargs, names):
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names
+    )
+    if "prefix" in kwargs.keys():
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", [["a", "b"], "string_header"])
+def test_non_int_header(all_parsers, header):
+    # see gh-16338
+    msg = "header must be integer or list of integers"
+    data = """1,2\n3,4"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+@skip_pyarrow
+def test_singleton_header(all_parsers):
+    # see gh-7757
+    data = """a,b,c\n0,1,2\n1,2,3"""
+    parser = all_parsers
+
+    expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
+    result = parser.read_csv(StringIO(data), header=[0])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            "A,A,A,B\none,one,one,two\n0,40,34,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [("A", "one"), ("A", "one.1"), ("A", "one.2"), ("B", "two")]
+                ),
+            ),
+        ),
+        (
+            "A,A,A,B\none,one,one.1,two\n0,40,34,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [("A", "one"), ("A", "one.1"), ("A", "one.1.1"), ("B", "two")]
+                ),
+            ),
+        ),
+        (
+            "A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [
+                        ("A", "one"),
+                        ("A", "one.1"),
+                        ("A", "one.1.1"),
+                        ("B", "two"),
+                        ("B", "two.1"),
+                    ]
+                ),
+            ),
+        ),
+    ],
+)
+def test_mangles_multi_index(all_parsers, data, expected):
+    # see gh-18062
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("index_col", [None, [0]])
+@pytest.mark.parametrize(
+    "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])]
+)
+def test_multi_index_unnamed(all_parsers, index_col, columns):
+    # see gh-23687
+    #
+    # When specifying a multi-index header, make sure that
+    # we don't error just because one of the rows in our header
+    # has ALL column names containing the string "Unnamed". The
+    # correct condition to check is whether the row contains
+    # ALL columns that did not have names (and instead were given
+    # placeholder ones).
+    parser = all_parsers
+    header = [0, 1]
+
+    if index_col is None:
+        data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n"
+    else:
+        data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n"
+
+    result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
+    exp_columns = []
+
+    if columns is None:
+        columns = ["", "", ""]
+
+    for i, col in enumerate(columns):
+        if not col:  # Unnamed.
+            col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
+
+        exp_columns.append(col)
+
+    columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
+    expected = DataFrame([[2, 3], [4, 5]], columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_names_longer_than_header_but_equal_with_data_rows(all_parsers):
+    # GH#38453
+    parser = all_parsers
+    data = """a, b
+1,2,3
+5,6,4
+"""
+    result = parser.read_csv(StringIO(data), header=0, names=["A", "B", "C"])
+    expected = DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_read_csv_multiindex_columns(all_parsers):
+    # GH#6051
+    parser = all_parsers
+
+    s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81"
+    s2 = (
+        "Male, Male, Male, Female, Female\n"
+        "R, R, L, R, R\n"
+        ".86, .67, .88, .78, .81\n"
+        ".86, .67, .88, .78, .82"
+    )
+
+    mi = MultiIndex.from_tuples(
+        [
+            ("Male", "R"),
+            (" Male", " R"),
+            (" Male", " L"),
+            (" Female", " R"),
+            (" Female", " R.1"),
+        ]
+    )
+    expected = DataFrame(
+        [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi
+    )
+
+    df1 = parser.read_csv(StringIO(s1), header=[0, 1])
+    tm.assert_frame_equal(df1, expected.iloc[:1])
+    df2 = parser.read_csv(StringIO(s2), header=[0, 1])
+    tm.assert_frame_equal(df2, expected)
+
+
+@skip_pyarrow
+def test_read_csv_multi_header_length_check(all_parsers):
+    # GH#43102
+    parser = all_parsers
+
+    case = """row11,row12,row13
+row21,row22, row23
+row31,row32
+"""
+
+    with pytest.raises(
+        ParserError, match="Header rows must have an equal number of columns."
+    ):
+        parser.read_csv(StringIO(case), header=[0, 2])
+
+
+@skip_pyarrow
+def test_header_none_and_implicit_index(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1,5\ny,2\nz,3\n"
+    result = parser.read_csv(StringIO(data), names=["a", "b"], header=None)
+    expected = DataFrame(
+        {"a": [1, 2, 3], "b": [5, np.nan, np.nan]}, index=["x", "y", "z"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_none_and_implicit_index_in_second_row(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1\ny,2,5\nz,3\n"
+    with pytest.raises(ParserError, match="Expected 2 fields in line 2, saw 3"):
+        parser.read_csv(StringIO(data), names=["a", "b"], header=None)
+
+
+@skip_pyarrow
+def test_header_none_and_on_bad_lines_skip(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1\ny,2,5\nz,3\n"
+    result = parser.read_csv(
+        StringIO(data), names=["a", "b"], header=None, on_bad_lines="skip"
+    )
+    expected = DataFrame({"a": ["x", "z"], "b": [1, 3]})
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_index_col.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_index_col.py
@ -0,0 +1,354 @@
+"""
+Tests that the specified index column (a.k.a "index_col")
+is properly handled or inferred during parsing for all of
+the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+# TODO(1.4): Change me to xfails at release time
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("with_header", [True, False])
+def test_index_col_named(all_parsers, with_header):
+    parser = all_parsers
+    no_header = """\
+KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""
+    header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n"
+
+    if with_header:
+        data = header + no_header
+
+        result = parser.read_csv(StringIO(data), index_col="ID")
+        expected = parser.read_csv(StringIO(data), header=0).set_index("ID")
+        tm.assert_frame_equal(result, expected)
+    else:
+        data = no_header
+        msg = "Index ID invalid"
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), index_col="ID")
+
+
+def test_index_col_named2(all_parsers):
+    parser = all_parsers
+    data = """\
+1,2,3,4,hello
+5,6,7,8,world
+9,10,11,12,foo
+"""
+
+    expected = DataFrame(
+        {"a": [1, 5, 9], "b": [2, 6, 10], "c": [3, 7, 11], "d": [4, 8, 12]},
+        index=Index(["hello", "world", "foo"], name="message"),
+    )
+    names = ["a", "b", "c", "d", "message"]
+
+    result = parser.read_csv(StringIO(data), names=names, index_col=["message"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_is_true(all_parsers):
+    # see gh-9798
+    data = "a,b\n1,2"
+    parser = all_parsers
+
+    msg = "The value of index_col couldn't be 'True'"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), index_col=True)
+
+
+@skip_pyarrow
+def test_infer_index_col(all_parsers):
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "index_col,kwargs",
+    [
+        (None, {"columns": ["x", "y", "z"]}),
+        (False, {"columns": ["x", "y", "z"]}),
+        (0, {"columns": ["y", "z"], "index": Index([], name="x")}),
+        (1, {"columns": ["x", "z"], "index": Index([], name="y")}),
+        ("x", {"columns": ["y", "z"], "index": Index([], name="x")}),
+        ("y", {"columns": ["x", "z"], "index": Index([], name="y")}),
+        (
+            [0, 1],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]),
+            },
+        ),
+        (
+            ["x", "y"],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]),
+            },
+        ),
+        (
+            [1, 0],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]),
+            },
+        ),
+        (
+            ["y", "x"],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]),
+            },
+        ),
+    ],
+)
+def test_index_col_empty_data(all_parsers, index_col, kwargs):
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=index_col)
+
+    expected = DataFrame(**kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_empty_with_index_col_false(all_parsers):
+    # see gh-10413
+    data = "x,y"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=False)
+
+    expected = DataFrame(columns=["x", "y"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "index_names",
+    [
+        ["", ""],
+        ["foo", ""],
+        ["", "bar"],
+        ["foo", "bar"],
+        ["NotReallyUnnamed", "Unnamed: 0"],
+    ],
+)
+def test_multi_index_naming(all_parsers, index_names):
+    parser = all_parsers
+
+    # We don't want empty index names being replaced with "Unnamed: 0"
+    data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"])
+    result = parser.read_csv(StringIO(data), index_col=[0, 1])
+
+    expected = DataFrame(
+        {"col": [1, 2, 3, 4]}, index=MultiIndex.from_product([["a", "b"], ["c", "d"]])
+    )
+    expected.index.names = [name if name else None for name in index_names]
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_multi_index_naming_not_all_at_beginning(all_parsers):
+    parser = all_parsers
+    data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4"
+    result = parser.read_csv(StringIO(data), index_col=[0, 2])
+
+    expected = DataFrame(
+        {"Unnamed: 2": ["c", "d", "c", "d"]},
+        index=MultiIndex(
+            levels=[["a", "b"], [1, 2, 3, 4]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_no_multi_index_level_names_empty(all_parsers):
+    # GH 10984
+    parser = all_parsers
+    midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)])
+    expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"])
+    with tm.ensure_clean() as path:
+        expected.to_csv(path)
+        result = parser.read_csv(path, index_col=[0, 1, 2])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_header_with_index_col(all_parsers):
+    # GH 33476
+    parser = all_parsers
+    data = """
+I11,A,A
+I12,B,B
+I2,1,3
+"""
+    midx = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"])
+    idx = Index(["I2"])
+    expected = DataFrame([[1, 3]], index=idx, columns=midx)
+
+    result = parser.read_csv(StringIO(data), index_col=0, header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+    col_idx = Index(["A", "A.1"])
+    idx = Index(["I12", "I2"], name="I11")
+    expected = DataFrame([["B", "B"], ["1", "3"]], index=idx, columns=col_idx)
+
+    result = parser.read_csv(StringIO(data), index_col="I11", header=0)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+def test_index_col_large_csv(all_parsers):
+    # https://github.com/pandas-dev/pandas/issues/37094
+    parser = all_parsers
+
+    N = 1_000_001
+    df = DataFrame({"a": range(N), "b": np.random.randn(N)})
+
+    with tm.ensure_clean() as path:
+        df.to_csv(path, index=False)
+        result = parser.read_csv(path, index_col=[0])
+
+    tm.assert_frame_equal(result, df.set_index("a"))
+
+
+@skip_pyarrow
+def test_index_col_multiindex_columns_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1], index_col=0
+    )
+    expected = DataFrame(
+        [],
+        columns=MultiIndex.from_arrays(
+            [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_index_col_header_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(StringIO("a0,a1,a2\n"), header=[0], index_col=0)
+    expected = DataFrame(
+        [],
+        columns=["a1", "a2"],
+        index=Index([], name="a0"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_multiindex_columns_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1])
+    expected = DataFrame(
+        [], columns=MultiIndex.from_arrays([["a0", "a1", "a2"], ["b0", "b1", "b2"]])
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_multiindex_columns_index_col_with_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("a0,a1,a2\nb0,b1,b2\ndata,data,data"), header=[0, 1], index_col=0
+    )
+    expected = DataFrame(
+        [["data", "data"]],
+        columns=MultiIndex.from_arrays(
+            [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
+        ),
+        index=Index(["data"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_infer_types_boolean_sum(all_parsers):
+    # GH#44079
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("0,1"),
+        names=["a", "b"],
+        index_col=["a"],
+        dtype={"a": "UInt8"},
+    )
+    expected = DataFrame(
+        data={
+            "a": [
+                0,
+            ],
+            "b": [1],
+        }
+    ).set_index("a")
+    # Not checking index type now, because the C parser will return a
+    # index column of dtype 'object', and the Python parser will return a
+    # index column of dtype 'int64'.
+    tm.assert_frame_equal(result, expected, check_index_type=False)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("dtype, val", [(object, "01"), ("int64", 1)])
+def test_specify_dtype_for_index_col(all_parsers, dtype, val):
+    # GH#9435
+    data = "a,b\n01,2"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
+    expected = DataFrame({"b": [2]}, index=Index([val], name="a"))
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_multiindex_columns_not_leading_index_col(all_parsers):
+    # GH#38549
+    parser = all_parsers
+    data = """a,b,c,d
+e,f,g,h
+x,y,1,2
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        header=[0, 1],
+        index_col=1,
+    )
+    cols = MultiIndex.from_tuples(
+        [("a", "e"), ("c", "g"), ("d", "h")], names=["b", "f"]
+    )
+    expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"])
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_mangle_dupes.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_mangle_dupes.py
@ -0,0 +1,168 @@
+"""
+Tests that duplicate columns are handled appropriately when parsed by the
+CSV engine. In general, the expected result is that they are either thoroughly
+de-duplicated (if mangling requested) or ignored otherwise.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("kwargs", [{}, {"mangle_dupe_cols": True}])
+def test_basic(all_parsers, kwargs):
+    # TODO: add test for condition "mangle_dupe_cols=False"
+    # once it is actually supported (gh-12935)
+    parser = all_parsers
+
+    data = "a,a,b,b,b\n1,2,3,4,5"
+    result = parser.read_csv(StringIO(data), sep=",", **kwargs)
+
+    expected = DataFrame([[1, 2, 3, 4, 5]], columns=["a", "a.1", "b", "b.1", "b.2"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_basic_names(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "a,b,a\n0,1,2\n3,4,5"
+    expected = DataFrame([[0, 1, 2], [3, 4, 5]], columns=["a", "b", "a.1"])
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_basic_names_raise(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "0,1,2\n3,4,5"
+    with pytest.raises(ValueError, match="Duplicate names"):
+        parser.read_csv(StringIO(data), names=["a", "b", "a"])
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.2", "a.1"])),
+        (
+            "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6",
+            DataFrame(
+                [[1, 2, 3, 4, 5, 6]],
+                columns=["a", "a.2", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
+            ),
+        ),
+        (
+            "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7",
+            DataFrame(
+                [[1, 2, 3, 4, 5, 6, 7]],
+                columns=["a", "a.4", "a.3", "a.1", "a.2", "a.5", "a.6"],
+            ),
+        ),
+    ],
+)
+def test_thorough_mangle_columns(all_parsers, data, expected):
+    # see gh-17060
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,names,expected",
+    [
+        (
+            "a,b,b\n1,2,3",
+            ["a.1", "a.1", "a.1.1"],
+            DataFrame(
+                [["a", "b", "b"], ["1", "2", "3"]], columns=["a.1", "a.1.1", "a.1.1.1"]
+            ),
+        ),
+        (
+            "a,b,c,d,e,f\n1,2,3,4,5,6",
+            ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
+            DataFrame(
+                [["a", "b", "c", "d", "e", "f"], ["1", "2", "3", "4", "5", "6"]],
+                columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"],
+            ),
+        ),
+        (
+            "a,b,c,d,e,f,g\n1,2,3,4,5,6,7",
+            ["a", "a", "a.3", "a.1", "a.2", "a", "a"],
+            DataFrame(
+                [
+                    ["a", "b", "c", "d", "e", "f", "g"],
+                    ["1", "2", "3", "4", "5", "6", "7"],
+                ],
+                columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"],
+            ),
+        ),
+    ],
+)
+def test_thorough_mangle_names(all_parsers, data, names, expected):
+    # see gh-17095
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match="Duplicate names"):
+        parser.read_csv(StringIO(data), names=names)
+
+
+@skip_pyarrow
+def test_mangled_unnamed_placeholders(all_parsers):
+    # xref gh-13017
+    orig_key = "0"
+    parser = all_parsers
+
+    orig_value = [1, 2, 3]
+    df = DataFrame({orig_key: orig_value})
+
+    # This test recursively updates `df`.
+    for i in range(3):
+        expected = DataFrame()
+
+        for j in range(i + 1):
+            col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1)
+            expected.insert(loc=0, column=col_name, value=[0, 1, 2])
+
+        expected[orig_key] = orig_value
+        df = parser.read_csv(StringIO(df.to_csv()))
+
+        tm.assert_frame_equal(df, expected)
+
+
+@skip_pyarrow
+def test_mangle_dupe_cols_already_exists(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = "a,a,a.1,a,a.3,a.1,a.1.1\n1,2,3,4,5,6,7"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6, 7]],
+        columns=["a", "a.2", "a.1", "a.4", "a.3", "a.1.2", "a.1.1"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = ",Unnamed: 0,,Unnamed: 2\n1,2,3,4"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4]],
+        columns=["Unnamed: 0.1", "Unnamed: 0", "Unnamed: 2.1", "Unnamed: 2"],
+    )
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_multi_thread.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_multi_thread.py
@ -0,0 +1,154 @@
+"""
+Tests multithreading behaviour for reading and
+parsing files for each parser defined in parsers.py
+"""
+from contextlib import ExitStack
+from io import BytesIO
+from multiprocessing.pool import ThreadPool
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+# We'll probably always skip these for pyarrow
+# Maybe we'll add our own tests for pyarrow too
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def _construct_dataframe(num_rows):
+    """
+    Construct a DataFrame for testing.
+
+    Parameters
+    ----------
+    num_rows : int
+        The number of rows for our DataFrame.
+
+    Returns
+    -------
+    df : DataFrame
+    """
+    df = DataFrame(np.random.rand(num_rows, 5), columns=list("abcde"))
+    df["foo"] = "foo"
+    df["bar"] = "bar"
+    df["baz"] = "baz"
+    df["date"] = pd.date_range("20000101 09:00:00", periods=num_rows, freq="s")
+    df["int"] = np.arange(num_rows, dtype="int64")
+    return df
+
+
+@pytest.mark.slow
+def test_multi_thread_string_io_read_csv(all_parsers):
+    # see gh-11786
+    parser = all_parsers
+    max_row_range = 10000
+    num_files = 100
+
+    bytes_to_df = [
+        "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode()
+        for _ in range(num_files)
+    ]
+
+    # Read all files in many threads.
+    with ExitStack() as stack:
+        files = [stack.enter_context(BytesIO(b)) for b in bytes_to_df]
+
+        pool = stack.enter_context(ThreadPool(8))
+
+        results = pool.map(parser.read_csv, files)
+        first_result = results[0]
+
+        for result in results:
+            tm.assert_frame_equal(first_result, result)
+
+
+def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks):
+    """
+    Generate a DataFrame via multi-thread.
+
+    Parameters
+    ----------
+    parser : BaseParser
+        The parser object to use for reading the data.
+    path : str
+        The location of the CSV file to read.
+    num_rows : int
+        The number of rows to read per task.
+    num_tasks : int
+        The number of tasks to use for reading this DataFrame.
+
+    Returns
+    -------
+    df : DataFrame
+    """
+
+    def reader(arg):
+        """
+        Create a reader for part of the CSV.
+
+        Parameters
+        ----------
+        arg : tuple
+            A tuple of the following:
+
+            * start : int
+                The starting row to start for parsing CSV
+            * nrows : int
+                The number of rows to read.
+
+        Returns
+        -------
+        df : DataFrame
+        """
+        start, nrows = arg
+
+        if not start:
+            return parser.read_csv(
+                path, index_col=0, header=0, nrows=nrows, parse_dates=["date"]
+            )
+
+        return parser.read_csv(
+            path,
+            index_col=0,
+            header=None,
+            skiprows=int(start) + 1,
+            nrows=nrows,
+            parse_dates=[9],
+        )
+
+    tasks = [
+        (num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks)
+    ]
+
+    with ThreadPool(processes=num_tasks) as pool:
+        results = pool.map(reader, tasks)
+
+    header = results[0].columns
+
+    for r in results[1:]:
+        r.columns = header
+
+    final_dataframe = pd.concat(results)
+    return final_dataframe
+
+
+@pytest.mark.slow
+def test_multi_thread_path_multipart_read_csv(all_parsers):
+    # see gh-11786
+    num_tasks = 4
+    num_rows = 100000
+
+    parser = all_parsers
+    file_name = "__thread_pool_reader__.csv"
+    df = _construct_dataframe(num_rows)
+
+    with tm.ensure_clean(file_name) as path:
+        df.to_csv(path)
+
+        final_dataframe = _generate_multi_thread_dataframe(
+            parser, path, num_rows, num_tasks
+        )
+        tm.assert_frame_equal(df, final_dataframe)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_na_values.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_na_values.py
@ -0,0 +1,656 @@
+"""
+Tests that NA values are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas._libs.parsers import STR_NA_VALUES
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@skip_pyarrow
+def test_string_nas(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+a,b,c
+d,,f
+,g,h
+"""
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [["a", "b", "c"], ["d", np.nan, "f"], [np.nan, "g", "h"]],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_detect_string_na(all_parsers):
+    parser = all_parsers
+    data = """A,B
+foo,bar
+NA,baz
+NaN,nan
+"""
+    expected = DataFrame(
+        [["foo", "bar"], [np.nan, "baz"], [np.nan, np.nan]], columns=["A", "B"]
+    )
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "na_values",
+    [
+        ["-999.0", "-999"],
+        [-999, -999.0],
+        [-999.0, -999],
+        ["-999.0"],
+        ["-999"],
+        [-999.0],
+        [-999],
+    ],
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        """A,B
+-999,1.2
+2,-999
+3,4.5
+""",
+        """A,B
+-999,1.200
+2,-999.000
+3,4.500
+""",
+    ],
+)
+def test_non_string_na_values(all_parsers, data, na_values):
+    # see gh-3611: with an odd float format, we can't match
+    # the string "999.0" exactly but still need float matching
+    parser = all_parsers
+    expected = DataFrame([[np.nan, 1.2], [2.0, np.nan], [3.0, 4.5]], columns=["A", "B"])
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_default_na_values(all_parsers):
+    _NA_VALUES = {
+        "-1.#IND",
+        "1.#QNAN",
+        "1.#IND",
+        "-1.#QNAN",
+        "#N/A",
+        "N/A",
+        "n/a",
+        "NA",
+        "<NA>",
+        "#NA",
+        "NULL",
+        "null",
+        "NaN",
+        "nan",
+        "-NaN",
+        "-nan",
+        "#N/A N/A",
+        "",
+    }
+    assert _NA_VALUES == STR_NA_VALUES
+
+    parser = all_parsers
+    nv = len(_NA_VALUES)
+
+    def f(i, v):
+        if i == 0:
+            buf = ""
+        elif i > 0:
+            buf = "".join([","] * i)
+
+        buf = f"{buf}{v}"
+
+        if i < nv - 1:
+            joined = "".join([","] * (nv - i - 1))
+            buf = f"{buf}{joined}"
+
+        return buf
+
+    data = StringIO("\n".join([f(i, v) for i, v in enumerate(_NA_VALUES)]))
+    expected = DataFrame(np.nan, columns=range(nv), index=range(nv))
+
+    result = parser.read_csv(data, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("na_values", ["baz", ["baz"]])
+def test_custom_na_values(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+ignore,this,row
+1,NA,3
+-1.#IND,5,baz
+7,8,NaN
+"""
+    expected = DataFrame(
+        [[1.0, np.nan, 3], [np.nan, 5, np.nan], [7, 8, np.nan]], columns=["A", "B", "C"]
+    )
+    result = parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_bool_na_values(all_parsers):
+    data = """A,B,C
+True,False,True
+NA,True,False
+False,NA,True"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        {
+            "A": np.array([True, np.nan, False], dtype=object),
+            "B": np.array([False, True, np.nan], dtype=object),
+            "C": [True, False, True],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_na_value_dict(all_parsers):
+    data = """A,B,C
+foo,bar,NA
+bar,foo,foo
+foo,bar,NA
+bar,foo,foo"""
+    parser = all_parsers
+    df = parser.read_csv(StringIO(data), na_values={"A": ["foo"], "B": ["bar"]})
+    expected = DataFrame(
+        {
+            "A": [np.nan, "bar", np.nan, "bar"],
+            "B": [np.nan, "foo", np.nan, "foo"],
+            "C": [np.nan, "foo", np.nan, "foo"],
+        }
+    )
+    tm.assert_frame_equal(df, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "index_col,expected",
+    [
+        (
+            [0],
+            DataFrame({"b": [np.nan], "c": [1], "d": [5]}, index=Index([0], name="a")),
+        ),
+        (
+            [0, 2],
+            DataFrame(
+                {"b": [np.nan], "d": [5]},
+                index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]),
+            ),
+        ),
+        (
+            ["a", "c"],
+            DataFrame(
+                {"b": [np.nan], "d": [5]},
+                index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]),
+            ),
+        ),
+    ],
+)
+def test_na_value_dict_multi_index(all_parsers, index_col, expected):
+    data = """\
+a,b,c,d
+0,NA,1,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values=set(), index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        (
+            {},
+            DataFrame(
+                {
+                    "A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": {"A": [], "C": []}, "keep_default_na": False},
+            DataFrame(
+                {
+                    "A": ["a", "b", "", "d", "e", "nan", "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", "nan", "five", "", "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": ["a"], "keep_default_na": False},
+            DataFrame(
+                {
+                    "A": [np.nan, "b", "", "d", "e", "nan", "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", "nan", "five", "", "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": {"A": [], "C": []}},
+            DataFrame(
+                {
+                    "A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"],
+                }
+            ),
+        ),
+    ],
+)
+def test_na_values_keep_default(all_parsers, kwargs, expected):
+    data = """\
+A,B,C
+a,1,one
+b,2,two
+,3,three
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_no_na_values_no_keep_default(all_parsers):
+    # see gh-4318: passing na_values=None and
+    # keep_default_na=False yields 'None" as a na_value
+    data = """\
+A,B,C
+a,1,None
+b,2,two
+,3,None
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), keep_default_na=False)
+
+    expected = DataFrame(
+        {
+            "A": ["a", "b", "", "d", "e", "nan", "g"],
+            "B": [1, 2, 3, 4, 5, 6, 7],
+            "C": ["None", "two", "None", "nan", "five", "", "seven"],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_no_keep_default_na_dict_na_values(all_parsers):
+    # see gh-19227
+    data = "a,b\n,2"
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), na_values={"b": ["2"]}, keep_default_na=False
+    )
+    expected = DataFrame({"a": [""], "b": [np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_no_keep_default_na_dict_na_scalar_values(all_parsers):
+    # see gh-19227
+    #
+    # Scalar values shouldn't cause the parsing to crash or fail.
+    data = "a,b\n1,2"
+    parser = all_parsers
+    df = parser.read_csv(StringIO(data), na_values={"b": 2}, keep_default_na=False)
+    expected = DataFrame({"a": [1], "b": [np.nan]})
+    tm.assert_frame_equal(df, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("col_zero_na_values", [113125, "113125"])
+def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_values):
+    # see gh-19227
+    data = """\
+113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
+729639,"qwer","",asdfkj,466.681,,252.373
+"""
+    parser = all_parsers
+    expected = DataFrame(
+        {
+            0: [np.nan, 729639.0],
+            1: [np.nan, "qwer"],
+            2: ["/blaha", np.nan],
+            3: ["kjsdkj", "asdfkj"],
+            4: [412.166, 466.681],
+            5: ["225.874", ""],
+            6: [np.nan, 252.373],
+        }
+    )
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        keep_default_na=False,
+        na_values={2: "", 6: "214.008", 1: "blah", 0: col_zero_na_values},
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "na_filter,row_data",
+    [
+        (True, [[1, "A"], [np.nan, np.nan], [3, "C"]]),
+        (False, [["1", "A"], ["nan", "B"], ["3", "C"]]),
+    ],
+)
+def test_na_values_na_filter_override(all_parsers, na_filter, row_data):
+    data = """\
+A,B
+1,A
+nan,B
+3,C
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter)
+
+    expected = DataFrame(row_data, columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_na_trailing_columns(all_parsers):
+    parser = all_parsers
+    data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
+2012-03-14,USD,AAPL,BUY,1000
+2012-05-12,USD,SBUX,SELL,500"""
+
+    # Trailing columns should be all NaN.
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [
+            ["2012-03-14", "USD", "AAPL", "BUY", 1000, np.nan, np.nan, np.nan],
+            ["2012-05-12", "USD", "SBUX", "SELL", 500, np.nan, np.nan, np.nan],
+        ],
+        columns=[
+            "Date",
+            "Currency",
+            "Symbol",
+            "Type",
+            "Units",
+            "UnitPrice",
+            "Cost",
+            "Tax",
+        ],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "na_values,row_data",
+    [
+        (1, [[np.nan, 2.0], [2.0, np.nan]]),
+        ({"a": 2, "b": 1}, [[1.0, 2.0], [np.nan, np.nan]]),
+    ],
+)
+def test_na_values_scalar(all_parsers, na_values, row_data):
+    # see gh-12224
+    parser = all_parsers
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+    expected = DataFrame(row_data, columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_na_values_dict_aliasing(all_parsers):
+    parser = all_parsers
+    na_values = {"a": 2, "b": 1}
+    na_values_copy = na_values.copy()
+
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+
+    tm.assert_frame_equal(result, expected)
+    tm.assert_dict_equal(na_values, na_values_copy)
+
+
+@skip_pyarrow
+def test_na_values_dict_col_index(all_parsers):
+    # see gh-14203
+    data = "a\nfoo\n1"
+    parser = all_parsers
+    na_values = {0: "foo"}
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    expected = DataFrame({"a": [np.nan, 1]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            str(2**63) + "\n" + str(2**63 + 1),
+            {"na_values": [2**63]},
+            DataFrame([str(2**63), str(2**63 + 1)]),
+        ),
+        (str(2**63) + ",1" + "\n,2", {}, DataFrame([[str(2**63), 1], ["", 2]])),
+        (str(2**63) + "\n1", {"na_values": [2**63]}, DataFrame([np.nan, 1])),
+    ],
+)
+def test_na_values_uint64(all_parsers, data, kwargs, expected):
+    # see gh-14983
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_na_values_no_default_with_index(all_parsers):
+    # see gh-15835
+    data = "a,1\nb,2"
+    parser = all_parsers
+    expected = DataFrame({"1": [2]}, index=Index(["b"], name="a"))
+
+    result = parser.read_csv(StringIO(data), index_col=0, keep_default_na=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "na_filter,index_data", [(False, ["", "5"]), (True, [np.nan, 5.0])]
+)
+def test_no_na_filter_on_index(all_parsers, na_filter, index_data):
+    # see gh-5239
+    #
+    # Don't parse NA-values in index unless na_filter=True
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index(index_data, name="b"))
+    result = parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_inf_na_values_with_int_index(all_parsers):
+    # see gh-17128
+    parser = all_parsers
+    data = "idx,col1,col2\n1,3,4\n2,inf,-inf"
+
+    # Don't fail with OverflowError with inf's and integer index column.
+    out = parser.read_csv(StringIO(data), index_col=[0], na_values=["inf", "-inf"])
+    expected = DataFrame(
+        {"col1": [3, np.nan], "col2": [4, np.nan]}, index=Index([1, 2], name="idx")
+    )
+    tm.assert_frame_equal(out, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
+    # see gh-20377
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    # na_filter=True --> missing value becomes NaN.
+    # na_filter=False --> missing value remains empty string.
+    empty = np.nan if na_filter else ""
+    expected = DataFrame({"a": ["1", "4"], "b": [empty, "5"], "c": ["3", "6"]})
+
+    result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data, na_values",
+    [
+        ("false,1\n,1\ntrue", None),
+        ("false,1\nnull,1\ntrue", None),
+        ("false,1\nnan,1\ntrue", None),
+        ("false,1\nfoo,1\ntrue", "foo"),
+        ("false,1\nfoo,1\ntrue", ["foo"]),
+        ("false,1\nfoo,1\ntrue", {"a": "foo"}),
+    ],
+)
+def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
+    parser = all_parsers
+    msg = (
+        "(Bool column has NA values in column [0a])|"
+        "(cannot safely convert passed user dtype of "
+        "bool for object dtyped data in column 0)"
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(
+            StringIO(data),
+            header=None,
+            names=["a", "b"],
+            dtype={"a": "bool"},
+            na_values=na_values,
+        )
+
+
+@skip_pyarrow
+def test_str_nan_dropped(all_parsers):
+    # see gh-21131
+    parser = all_parsers
+
+    data = """File: small.csv,,
+10010010233,0123,654
+foo,,bar
+01001000155,4530,898"""
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        names=["col1", "col2", "col3"],
+        dtype={"col1": str, "col2": str, "col3": str},
+    ).dropna()
+
+    expected = DataFrame(
+        {
+            "col1": ["10010010233", "01001000155"],
+            "col2": ["0123", "4530"],
+            "col3": ["654", "898"],
+        },
+        index=[1, 3],
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_nan_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,inf"
+
+    result = parser.read_csv(
+        StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"}
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): [1],
+            ("B", "Y"): [2],
+            ("B", "Z"): [np.nan],
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_bool_and_nan_to_bool(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    with pytest.raises(ValueError, match="NA values"):
+        parser.read_csv(StringIO(data), dtype="bool")
+
+
+def test_bool_and_nan_to_int(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    with pytest.raises(ValueError, match="convert|NoneType"):
+        parser.read_csv(StringIO(data), dtype="int")
+
+
+def test_bool_and_nan_to_float(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    result = parser.read_csv(StringIO(data), dtype="float")
+    expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]})
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_network.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_network.py
@ -0,0 +1,317 @@
+"""
+Tests parsers ability to read and parse non-local files
+and hence require a network connection to be read.
+"""
+from io import (
+    BytesIO,
+    StringIO,
+)
+import logging
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_ci_environment
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+import pandas.io.common as icom
+from pandas.io.feather_format import read_feather
+from pandas.io.parsers import read_csv
+
+
+@pytest.mark.network
+@tm.network(
+    url=(
+        "https://github.com/pandas-dev/pandas/raw/main/"
+        "pandas/tests/io/parser/data/salaries.csv"
+    ),
+    check_before_test=True,
+)
+@pytest.mark.parametrize("mode", ["explicit", "infer"])
+@pytest.mark.parametrize("engine", ["python", "c"])
+def test_compressed_urls(salaries_table, mode, engine, compression_only):
+    # test reading compressed urls with various engines and
+    # extension inference
+    extension = icom._compression_to_extension[compression_only]
+    base_url = (
+        "https://github.com/pandas-dev/pandas/raw/main/"
+        "pandas/tests/io/parser/data/salaries.csv"
+    )
+
+    url = base_url + extension
+
+    if mode != "explicit":
+        compression_only = mode
+
+    url_table = read_csv(url, sep="\t", compression=compression_only, engine=engine)
+    tm.assert_frame_equal(url_table, salaries_table)
+
+
+@pytest.mark.network
+@tm.network(
+    url=(
+        "https://raw.githubusercontent.com/pandas-dev/pandas/main/"
+        "pandas/tests/io/parser/data/unicode_series.csv"
+    ),
+    check_before_test=True,
+)
+def test_url_encoding_csv():
+    """
+    read_csv should honor the requested encoding for URLs.
+
+    GH 10424
+    """
+    path = (
+        "https://raw.githubusercontent.com/pandas-dev/pandas/main/"
+        + "pandas/tests/io/parser/data/unicode_series.csv"
+    )
+    df = read_csv(path, encoding="latin-1", header=None)
+    assert df.loc[15, 1] == "Á köldum klaka (Cold Fever) (1994)"
+
+
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath("io", "data", "csv", "tips.csv"))
+
+
+@pytest.mark.single_cpu
+@pytest.mark.usefixtures("s3_resource")
+@td.skip_if_not_us_locale()
+class TestS3:
+    @td.skip_if_no("s3fs")
+    def test_parse_public_s3_bucket(self, tips_df, s3so):
+
+        # more of an integration test due to the not-public contents portion
+        # can probably mock this though.
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+        # Read public file from bucket with not-public contents
+        df = read_csv("s3://cant_get_it/tips.csv", storage_options=s3so)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3n_bucket(self, tips_df, s3so):
+
+        # Read from AWS s3 as "s3n" URL
+        df = read_csv("s3n://pandas-test/tips.csv", nrows=10, storage_options=s3so)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3a_bucket(self, tips_df, s3so):
+        # Read from AWS s3 as "s3a" URL
+        df = read_csv("s3a://pandas-test/tips.csv", nrows=10, storage_options=s3so)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_nrows(self, tips_df, s3so):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                nrows=10,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_chunked(self, tips_df, s3so):
+        # Read with a chunksize
+        chunksize = 5
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            with read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                chunksize=chunksize,
+                compression=comp,
+                storage_options=s3so,
+            ) as df_reader:
+                assert df_reader.chunksize == chunksize
+                for i_chunk in [0, 1, 2]:
+                    # Read a couple of chunks and make sure we see them
+                    # properly.
+                    df = df_reader.get_chunk()
+                    assert isinstance(df, DataFrame)
+                    assert not df.empty
+                    true_df = tips_df.iloc[
+                        chunksize * i_chunk : chunksize * (i_chunk + 1)
+                    ]
+                    tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df, s3so):
+        # Read with a chunksize using the Python parser
+        chunksize = 5
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            with read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                chunksize=chunksize,
+                compression=comp,
+                engine="python",
+                storage_options=s3so,
+            ) as df_reader:
+                assert df_reader.chunksize == chunksize
+                for i_chunk in [0, 1, 2]:
+                    # Read a couple of chunks and make sure we see them properly.
+                    df = df_reader.get_chunk()
+                    assert isinstance(df, DataFrame)
+                    assert not df.empty
+                    true_df = tips_df.iloc[
+                        chunksize * i_chunk : chunksize * (i_chunk + 1)
+                    ]
+                    tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_python(self, tips_df, s3so):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                engine="python",
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_infer_s3_compression(self, tips_df, s3so):
+        for ext in ["", ".gz", ".bz2"]:
+            df = read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                engine="python",
+                compression="infer",
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                "s3://pandas-test/tips.csv" + ext,
+                engine="python",
+                nrows=10,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_read_s3_fails(self, s3so):
+        msg = "The specified bucket does not exist"
+        with pytest.raises(OSError, match=msg):
+            read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
+
+        # Receive a permission error when trying to read a private bucket.
+        # It's irrelevant here that this isn't actually a table.
+        with pytest.raises(OSError, match=msg):
+            read_csv("s3://cant_get_it/file.csv")
+
+    @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)
+    def test_write_s3_csv_fails(self, tips_df, s3so):
+        # GH 32486
+        # Attempting to write to an invalid S3 path should raise
+        import botocore
+
+        # GH 34087
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
+        # Catch a ClientError since AWS Service Errors are defined dynamically
+        error = (FileNotFoundError, botocore.exceptions.ClientError)
+
+        with pytest.raises(error, match="The specified bucket does not exist"):
+            tips_df.to_csv(
+                "s3://an_s3_bucket_data_doesnt_exit/not_real.csv", storage_options=s3so
+            )
+
+    @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)
+    @td.skip_if_no("pyarrow")
+    def test_write_s3_parquet_fails(self, tips_df, s3so):
+        # GH 27679
+        # Attempting to write to an invalid S3 path should raise
+        import botocore
+
+        # GH 34087
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
+        # Catch a ClientError since AWS Service Errors are defined dynamically
+        error = (FileNotFoundError, botocore.exceptions.ClientError)
+
+        with pytest.raises(error, match="The specified bucket does not exist"):
+            tips_df.to_parquet(
+                "s3://an_s3_bucket_data_doesnt_exit/not_real.parquet",
+                storage_options=s3so,
+            )
+
+    @pytest.mark.single_cpu
+    def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file):
+        # see gh-16135
+
+        s3_object = s3_resource.meta.client.get_object(
+            Bucket="pandas-test", Key="tips.csv"
+        )
+
+        with BytesIO(s3_object["Body"].read()) as buffer:
+            result = read_csv(buffer, encoding="utf8")
+        assert isinstance(result, DataFrame)
+        assert not result.empty
+
+        expected = read_csv(tips_file)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.single_cpu
+    @pytest.mark.skipif(
+        is_ci_environment(),
+        reason="This test can hang in our CI min_versions build "
+        "and leads to '##[error]The runner has "
+        "received a shutdown signal...' in GHA. GH: 45651",
+    )
+    def test_read_csv_chunked_download(self, s3_resource, caplog, s3so):
+        # 8 MB, S3FS uses 5MB chunks
+        import s3fs
+
+        df = DataFrame(np.random.randn(100000, 4), columns=list("abcd"))
+        str_buf = StringIO()
+
+        df.to_csv(str_buf)
+
+        buf = BytesIO(str_buf.getvalue().encode("utf-8"))
+
+        s3_resource.Bucket("pandas-test").put_object(Key="large-file.csv", Body=buf)
+
+        # Possibly some state leaking in between tests.
+        # If we don't clear this cache, we saw `GetObject operation: Forbidden`.
+        # Presumably the s3fs instance is being cached, with the directory listing
+        # from *before* we add the large-file.csv in the pandas-test bucket.
+        s3fs.S3FileSystem.clear_instance_cache()
+
+        with caplog.at_level(logging.DEBUG, logger="s3fs"):
+            read_csv("s3://pandas-test/large-file.csv", nrows=5, storage_options=s3so)
+            # log of fetch_range (start, stop)
+            assert (0, 5505024) in (x.args[-2:] for x in caplog.records)
+
+    def test_read_s3_with_hash_in_key(self, tips_df, s3so):
+        # GH 25945
+        result = read_csv("s3://pandas-test/tips#1.csv", storage_options=s3so)
+        tm.assert_frame_equal(tips_df, result)
+
+    @td.skip_if_no("pyarrow")
+    def test_read_feather_s3_file_path(self, feather_file, s3so):
+        # GH 29055
+        expected = read_feather(feather_file)
+        res = read_feather(
+            "s3://pandas-test/simple_dataset.feather", storage_options=s3so
+        )
+        tm.assert_frame_equal(expected, res)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_parse_dates.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_parse_dates.py
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_python_parser_only.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_python_parser_only.py
@ -0,0 +1,460 @@
+"""
+Tests that apply specifically to the Python parser. Unless specifically
+stated as a Python-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the C parser can accept further
+arguments when parsing.
+"""
+from __future__ import annotations
+
+import csv
+from io import (
+    BytesIO,
+    StringIO,
+)
+
+import pytest
+
+from pandas.errors import (
+    ParserError,
+    ParserWarning,
+)
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+
+def test_default_separator(python_parser_only):
+    # see gh-17333
+    #
+    # csv.Sniffer in Python treats "o" as separator.
+    data = "aob\n1o2\n3o4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1, 3], "b": [2, 4]})
+
+    result = parser.read_csv(StringIO(data), sep=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("skipfooter", ["foo", 1.5, True])
+def test_invalid_skipfooter_non_int(python_parser_only, skipfooter):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter must be an integer"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+
+def test_invalid_skipfooter_negative(python_parser_only):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter cannot be negative"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=-1)
+
+
+@pytest.mark.parametrize("kwargs", [{"sep": None}, {"delimiter": "|"}])
+def test_sniff_delimiter(python_parser_only, kwargs):
+    data = """index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_sniff_delimiter_comment(python_parser_only):
+    data = """# comment line
+index|A|B|C
+# comment line
+foo|1|2|3 # ignore | this
+bar|4|5|6
+baz|7|8|9
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), index_col=0, sep=None, comment="#")
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_sniff_delimiter_encoding(python_parser_only, encoding):
+    parser = python_parser_only
+    data = """ignore this
+ignore this too
+index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+
+    if encoding is not None:
+        from io import TextIOWrapper
+
+        data = data.encode(encoding)
+        data = BytesIO(data)
+        data = TextIOWrapper(data, encoding=encoding)
+    else:
+        data = StringIO(data)
+
+    result = parser.read_csv(data, index_col=0, sep=None, skiprows=2, encoding=encoding)
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_single_line(python_parser_only):
+    # see gh-6607: sniff separator
+    parser = python_parser_only
+    result = parser.read_csv(StringIO("1,2"), names=["a", "b"], header=None, sep=None)
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [{"skipfooter": 2}, {"nrows": 3}])
+def test_skipfooter(python_parser_only, kwargs):
+    # see gh-6607
+    data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+want to skip this
+also also skip this
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), **kwargs)
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "compression,klass", [("gzip", "GzipFile"), ("bz2", "BZ2File")]
+)
+def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
+    # see gh-6607
+    parser = python_parser_only
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+
+    data = data.replace(b",", b"::")
+    expected = parser.read_csv(csv1)
+
+    module = pytest.importorskip(compression)
+    klass = getattr(module, klass)
+
+    with tm.ensure_clean() as path:
+        tmp = klass(path, mode="wb")
+        tmp.write(data)
+        tmp.close()
+
+        result = parser.read_csv(path, sep="::", compression=compression)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index(python_parser_only):
+    # see gh-6607
+    data = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+    parser = python_parser_only
+
+    expected = DataFrame(
+        [
+            [-0.5109, -2.3358, -0.4645, 0.05076, 0.3640],
+            [0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
+            [-0.6662, -0.5243, -0.3580, 0.89145, 2.5838],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+        index=MultiIndex.from_tuples(
+            [("a", "b", 10.0032, 5), ("a", "q", 20, 4), ("x", "q", 30, 3)],
+            names=["one", "two", "three", "four"],
+        ),
+    )
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index2(python_parser_only):
+    # see gh-6893
+    data = "      A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9"
+    parser = python_parser_only
+
+    expected = DataFrame.from_records(
+        [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)],
+        columns=list("abcABC"),
+        index=list("abc"),
+    )
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("add_footer", [True, False])
+def test_skipfooter_with_decimal(python_parser_only, add_footer):
+    # see gh-6971
+    data = "1#2\n3#4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1.2, 3.4]})
+
+    if add_footer:
+        # The stray footer line should not mess with the
+        # casting of the first two lines if we skip it.
+        kwargs = {"skipfooter": 1}
+        data += "\nFooter"
+    else:
+        kwargs = {}
+
+    result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "sep", ["::", "#####", "!!!", "123", "#1!c5", "%!c!d", "@@#4:2", "_!pd#_"]
+)
+@pytest.mark.parametrize(
+    "encoding", ["utf-16", "utf-16-be", "utf-16-le", "utf-32", "cp037"]
+)
+def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding):
+    # see gh-3404
+    expected = DataFrame({"a": [1], "b": [2]})
+    parser = python_parser_only
+
+    data = "1" + sep + "2"
+    encoded_data = data.encode(encoding)
+
+    result = parser.read_csv(
+        BytesIO(encoded_data), sep=sep, names=["a", "b"], encoding=encoding
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+def test_multi_char_sep_quotes(python_parser_only, quoting):
+    # see gh-13374
+    kwargs = {"sep": ",,"}
+    parser = python_parser_only
+
+    data = 'a,,b\n1,,a\n2,,"2,,b"'
+
+    if quoting == csv.QUOTE_NONE:
+        msg = "Expected 2 fields in line 3, saw 3"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
+    else:
+        msg = "ignored when a multi-char delimiter is used"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
+
+
+def test_none_delimiter(python_parser_only, capsys):
+    # see gh-13374 and gh-17465
+    parser = python_parser_only
+    data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9"
+    expected = DataFrame({"a": [0, 7], "b": [1, 8], "c": [2, 9]})
+
+    # We expect the third line in the data to be
+    # skipped because it is malformed, but we do
+    # not expect any errors to occur.
+    result = parser.read_csv(StringIO(data), header=0, sep=None, on_bad_lines="warn")
+    tm.assert_frame_equal(result, expected)
+
+    captured = capsys.readouterr()
+    assert "Skipping line 3" in captured.err
+
+
+@pytest.mark.parametrize("data", ['a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'])
+@pytest.mark.parametrize("skipfooter", [0, 1])
+def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
+    # see gh-13879 and gh-15910
+    parser = python_parser_only
+    if skipfooter:
+        msg = "parsing errors in the skipped footer rows"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
+    else:
+        msg = "unexpected end of data|expected after"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+
+def test_malformed_skipfooter(python_parser_only):
+    parser = python_parser_only
+    data = """ignore
+A,B,C
+1,2,3 # comment
+1,2,3,4,5
+2,3,4
+footer
+"""
+    msg = "Expected 3 fields in line 4, saw 5"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+def test_python_engine_file_no_next(python_parser_only):
+    parser = python_parser_only
+
+    class NoNextBuffer:
+        def __init__(self, csv_data):
+            self.data = csv_data
+
+        def __iter__(self):
+            return self.data.__iter__()
+
+        def read(self):
+            return self.data
+
+        def readline(self):
+            return self.data
+
+    parser.read_csv(NoNextBuffer("a\n1"))
+
+
+@pytest.mark.parametrize("bad_line_func", [lambda x: ["2", "3"], lambda x: x[:2]])
+def test_on_bad_lines_callable(python_parser_only, bad_line_func):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_write_to_external_list(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    lst = []
+
+    def bad_line_func(bad_line: list[str]) -> list[str]:
+        lst.append(bad_line)
+        return ["2", "3"]
+
+    result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+    assert lst == [["2", "3", "4", "5", "6"]]
+
+
+@pytest.mark.parametrize("bad_line_func", [lambda x: ["foo", "bar"], lambda x: x[:2]])
+@pytest.mark.parametrize("sep", [",", "111"])
+def test_on_bad_lines_callable_iterator_true(python_parser_only, bad_line_func, sep):
+    # GH 5686
+    # iterator=True has a separate code path than iterator=False
+    parser = python_parser_only
+    data = f"""
+0{sep}1
+hi{sep}there
+foo{sep}bar{sep}baz
+good{sep}bye
+"""
+    bad_sio = StringIO(data)
+    result_iter = parser.read_csv(
+        bad_sio, on_bad_lines=bad_line_func, chunksize=1, iterator=True, sep=sep
+    )
+    expecteds = [
+        {"0": "hi", "1": "there"},
+        {"0": "foo", "1": "bar"},
+        {"0": "good", "1": "bye"},
+    ]
+    for i, (result, expected) in enumerate(zip(result_iter, expecteds)):
+        expected = DataFrame(expected, index=range(i, i + 1))
+        tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_dont_swallow_errors(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    msg = "This function is buggy."
+
+    def bad_line_func(bad_line):
+        raise ValueError(msg)
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+
+
+def test_on_bad_lines_callable_not_expected_length(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+
+    with tm.assert_produces_warning(ParserWarning, match="Length of header or names"):
+        result = parser.read_csv(bad_sio, on_bad_lines=lambda x: x)
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_returns_none(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+
+    result = parser.read_csv(bad_sio, on_bad_lines=lambda x: None)
+    expected = DataFrame({"a": [1, 3], "b": [2, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_index_col_inferred(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2,3
+4,5,6
+"""
+    bad_sio = StringIO(data)
+
+    result = parser.read_csv(bad_sio, on_bad_lines=lambda x: ["99", "99"])
+    expected = DataFrame({"a": [2, 5], "b": [3, 6]}, index=[1, 4])
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_quoting.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_quoting.py
@ -0,0 +1,161 @@
+"""
+Tests that quoting specifications are properly handled
+during parsing for all of the parsers defined in parsers.py
+"""
+
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas.errors import ParserError
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        ({"quotechar": "foo"}, '"quotechar" must be a(n)? 1-character string'),
+        (
+            {"quotechar": None, "quoting": csv.QUOTE_MINIMAL},
+            "quotechar must be set if quoting enabled",
+        ),
+        ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'),
+    ],
+)
+def test_bad_quote_char(all_parsers, kwargs, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+@pytest.mark.parametrize(
+    "quoting,msg",
+    [
+        ("foo", '"quoting" must be an integer'),
+        (5, 'bad "quoting" value'),  # quoting must be in the range [0, 3]
+    ],
+)
+def test_bad_quoting(all_parsers, quoting, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), quoting=quoting)
+
+
+def test_quote_char_basic(all_parsers):
+    parser = all_parsers
+    data = 'a,b,c\n1,2,"cat"'
+    expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), quotechar='"')
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"])
+def test_quote_char_various(all_parsers, quote_char):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"])
+
+    data = 'a,b,c\n1,2,"cat"'
+    new_data = data.replace('"', quote_char)
+
+    result = parser.read_csv(StringIO(new_data), quotechar=quote_char)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+@pytest.mark.parametrize("quote_char", ["", None])
+def test_null_quote_char(all_parsers, quoting, quote_char):
+    kwargs = {"quotechar": quote_char, "quoting": quoting}
+    data = "a,b,c\n1,2,3"
+    parser = all_parsers
+
+    if quoting != csv.QUOTE_NONE:
+        # Sanity checking.
+        msg = "quotechar must be set if quoting enabled"
+
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,exp_data",
+    [
+        ({}, [[1, 2, "foo"]]),  # Test default.
+        # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+        ({"quotechar": '"', "quoting": csv.QUOTE_MINIMAL}, [[1, 2, "foo"]]),
+        # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+        ({"quotechar": '"', "quoting": csv.QUOTE_ALL}, [[1, 2, "foo"]]),
+        # QUOTE_NONE tells the reader to do no special handling
+        # of quote characters and leave them alone.
+        ({"quotechar": '"', "quoting": csv.QUOTE_NONE}, [[1, 2, '"foo"']]),
+        # QUOTE_NONNUMERIC tells the reader to cast
+        # all non-quoted fields to float
+        ({"quotechar": '"', "quoting": csv.QUOTE_NONNUMERIC}, [[1.0, 2.0, "foo"]]),
+    ],
+)
+def test_quoting_various(all_parsers, kwargs, exp_data):
+    data = '1,2,"foo"'
+    parser = all_parsers
+    columns = ["a", "b", "c"]
+
+    result = parser.read_csv(StringIO(data), names=columns, **kwargs)
+    expected = DataFrame(exp_data, columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "doublequote,exp_data", [(True, [[3, '4 " 5']]), (False, [[3, '4 " 5"']])]
+)
+def test_double_quote(all_parsers, doublequote, exp_data):
+    parser = all_parsers
+    data = 'a,b\n3,"4 "" 5"'
+
+    result = parser.read_csv(StringIO(data), quotechar='"', doublequote=doublequote)
+    expected = DataFrame(exp_data, columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quotechar", ['"', "\u0001"])
+def test_quotechar_unicode(all_parsers, quotechar):
+    # see gh-14477
+    data = "a\n1"
+    parser = all_parsers
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), quotechar=quotechar)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("balanced", [True, False])
+def test_unbalanced_quoting(all_parsers, balanced):
+    # see gh-22789.
+    parser = all_parsers
+    data = 'a,b,c\n1,2,"3'
+
+    if balanced:
+        # Re-balance the quoting and read in without errors.
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data + '"'))
+        tm.assert_frame_equal(result, expected)
+    else:
+        msg = (
+            "EOF inside string starting at row 1"
+            if parser.engine == "c"
+            else "unexpected end of data"
+        )
+
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data))
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_read_fwf.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_read_fwf.py
@ -0,0 +1,932 @@
+"""
+Tests the 'read_fwf' function in parsers.py. This
+test suite is independent of the others because the
+engine is set to 'python-fwf' internally.
+"""
+
+from datetime import datetime
+from io import (
+    BytesIO,
+    StringIO,
+)
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from pandas.errors import EmptyDataError
+
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+)
+import pandas._testing as tm
+
+import pandas.io.common as icom
+from pandas.io.parsers import (
+    read_csv,
+    read_fwf,
+)
+
+
+def test_basic():
+    data = """\
+A         B            C            D
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data))
+    expected = DataFrame(
+        [
+            [201158, 360.242940, 149.910199, 11950.7],
+            [201159, 444.953632, 166.985655, 11788.4],
+            [201160, 364.136849, 183.628767, 11806.2],
+            [201161, 413.836124, 184.375703, 11916.8],
+            [201162, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_colspecs():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(data), colspecs=colspecs)
+
+    expected = DataFrame(
+        [
+            [2011, 58, 360.242940, 149.910199, 11950.7],
+            [2011, 59, 444.953632, 166.985655, 11788.4],
+            [2011, 60, 364.136849, 183.628767, 11806.2],
+            [2011, 61, 413.836124, 184.375703, 11916.8],
+            [2011, 62, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_widths():
+    data = """\
+A    B    C            D            E
+2011 58   360.242940   149.910199   11950.7
+2011 59   444.953632   166.985655   11788.4
+2011 60   364.136849   183.628767   11806.2
+2011 61   413.836124   184.375703   11916.8
+2011 62   502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data), widths=[5, 5, 13, 13, 7])
+
+    expected = DataFrame(
+        [
+            [2011, 58, 360.242940, 149.910199, 11950.7],
+            [2011, 59, 444.953632, 166.985655, 11788.4],
+            [2011, 60, 364.136849, 183.628767, 11806.2],
+            [2011, 61, 413.836124, 184.375703, 11916.8],
+            [2011, 62, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_non_space_filler():
+    # From Thomas Kluyver:
+    #
+    # Apparently, some non-space filler characters can be seen, this is
+    # supported by specifying the 'delimiter' character:
+    #
+    # http://publib.boulder.ibm.com/infocenter/dmndhelp/v6r1mx/index.jsp?topic=/com.ibm.wbit.612.help.config.doc/topics/rfixwidth.html
+    data = """\
+A~~~~B~~~~C~~~~~~~~~~~~D~~~~~~~~~~~~E
+201158~~~~360.242940~~~149.910199~~~11950.7
+201159~~~~444.953632~~~166.985655~~~11788.4
+201160~~~~364.136849~~~183.628767~~~11806.2
+201161~~~~413.836124~~~184.375703~~~11916.8
+201162~~~~502.953953~~~173.237159~~~12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(data), colspecs=colspecs, delimiter="~")
+
+    expected = DataFrame(
+        [
+            [2011, 58, 360.242940, 149.910199, 11950.7],
+            [2011, 59, 444.953632, 166.985655, 11788.4],
+            [2011, 60, 364.136849, 183.628767, 11806.2],
+            [2011, 61, 413.836124, 184.375703, 11916.8],
+            [2011, 62, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_over_specified():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+
+    with pytest.raises(ValueError, match="must specify only one of"):
+        read_fwf(StringIO(data), colspecs=colspecs, widths=[6, 10, 10, 7])
+
+
+def test_under_specified():
+    data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    with pytest.raises(ValueError, match="Must specify either"):
+        read_fwf(StringIO(data), colspecs=None, widths=None)
+
+
+def test_read_csv_compat():
+    csv_data = """\
+A,B,C,D,E
+2011,58,360.242940,149.910199,11950.7
+2011,59,444.953632,166.985655,11788.4
+2011,60,364.136849,183.628767,11806.2
+2011,61,413.836124,184.375703,11916.8
+2011,62,502.953953,173.237159,12468.3
+"""
+    expected = read_csv(StringIO(csv_data), engine="python")
+
+    fwf_data = """\
+A   B     C            D            E
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+201160    364.136849   183.628767   11806.2
+201161    413.836124   184.375703   11916.8
+201162    502.953953   173.237159   12468.3
+"""
+    colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
+    result = read_fwf(StringIO(fwf_data), colspecs=colspecs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_bytes_io_input():
+    result = read_fwf(BytesIO("שלום\nשלום".encode()), widths=[2, 2], encoding="utf8")
+    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_colspecs_is_list_or_tuple():
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    msg = "column specifications must be a list or tuple.+"
+
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), colspecs={"a": 1}, delimiter=",")
+
+
+def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples():
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    msg = "Each column specification must be.+"
+
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), colspecs=[("a", 1)])
+
+
+@pytest.mark.parametrize(
+    "colspecs,exp_data",
+    [
+        ([(0, 3), (3, None)], [[123, 456], [456, 789]]),
+        ([(None, 3), (3, 6)], [[123, 456], [456, 789]]),
+        ([(0, None), (3, None)], [[123456, 456], [456789, 789]]),
+        ([(None, None), (3, 6)], [[123456, 456], [456789, 789]]),
+    ],
+)
+def test_fwf_colspecs_none(colspecs, exp_data):
+    # see gh-7079
+    data = """\
+123456
+456789
+"""
+    expected = DataFrame(exp_data)
+
+    result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "infer_nrows,exp_data",
+    [
+        # infer_nrows --> colspec == [(2, 3), (5, 6)]
+        (1, [[1, 2], [3, 8]]),
+        # infer_nrows > number of rows
+        (10, [[1, 2], [123, 98]]),
+    ],
+)
+def test_fwf_colspecs_infer_nrows(infer_nrows, exp_data):
+    # see gh-15138
+    data = """\
+  1  2
+123 98
+"""
+    expected = DataFrame(exp_data)
+
+    result = read_fwf(StringIO(data), infer_nrows=infer_nrows, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_regression():
+    # see gh-3594
+    #
+    # Turns out "T060" is parsable as a datetime slice!
+    tz_list = [1, 10, 20, 30, 60, 80, 100]
+    widths = [16] + [8] * len(tz_list)
+    names = ["SST"] + [f"T{z:03d}" for z in tz_list[1:]]
+
+    data = """  2009164202000   9.5403  9.4105  8.6571  7.8372  6.0612  5.8843  5.5192
+2009164203000   9.5435  9.2010  8.6167  7.8176  6.0804  5.8728  5.4869
+2009164204000   9.5873  9.1326  8.4694  7.5889  6.0422  5.8526  5.4657
+2009164205000   9.5810  9.0896  8.4009  7.4652  6.0322  5.8189  5.4379
+2009164210000   9.6034  9.0897  8.3822  7.4905  6.0908  5.7904  5.4039
+"""
+
+    result = read_fwf(
+        StringIO(data),
+        index_col=0,
+        header=None,
+        names=names,
+        widths=widths,
+        parse_dates=True,
+        date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"),
+    )
+    expected = DataFrame(
+        [
+            [9.5403, 9.4105, 8.6571, 7.8372, 6.0612, 5.8843, 5.5192],
+            [9.5435, 9.2010, 8.6167, 7.8176, 6.0804, 5.8728, 5.4869],
+            [9.5873, 9.1326, 8.4694, 7.5889, 6.0422, 5.8526, 5.4657],
+            [9.5810, 9.0896, 8.4009, 7.4652, 6.0322, 5.8189, 5.4379],
+            [9.6034, 9.0897, 8.3822, 7.4905, 6.0908, 5.7904, 5.4039],
+        ],
+        index=DatetimeIndex(
+            [
+                "2009-06-13 20:20:00",
+                "2009-06-13 20:30:00",
+                "2009-06-13 20:40:00",
+                "2009-06-13 20:50:00",
+                "2009-06-13 21:00:00",
+            ]
+        ),
+        columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fwf_for_uint8():
+    data = """1421302965.213420    PRI=3 PGN=0xef00      DST=0x17 SRC=0x28    04 154 00 00 00 00 00 127
+1421302964.226776    PRI=6 PGN=0xf002               SRC=0x47    243 00 00 255 247 00 00 71"""  # noqa:E501
+    df = read_fwf(
+        StringIO(data),
+        colspecs=[(0, 17), (25, 26), (33, 37), (49, 51), (58, 62), (63, 1000)],
+        names=["time", "pri", "pgn", "dst", "src", "data"],
+        converters={
+            "pgn": lambda x: int(x, 16),
+            "src": lambda x: int(x, 16),
+            "dst": lambda x: int(x, 16),
+            "data": lambda x: len(x.split(" ")),
+        },
+    )
+
+    expected = DataFrame(
+        [
+            [1421302965.213420, 3, 61184, 23, 40, 8],
+            [1421302964.226776, 6, 61442, None, 71, 8],
+        ],
+        columns=["time", "pri", "pgn", "dst", "src", "data"],
+    )
+    expected["dst"] = expected["dst"].astype(object)
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("comment", ["#", "~", "!"])
+def test_fwf_comment(comment):
+    data = """\
+  1   2.   4  #hello world
+  5  NaN  10.0
+"""
+    data = data.replace("#", comment)
+
+    colspecs = [(0, 3), (4, 9), (9, 25)]
+    expected = DataFrame([[1, 2.0, 4], [5, np.nan, 10.0]])
+
+    result = read_fwf(StringIO(data), colspecs=colspecs, header=None, comment=comment)
+    tm.assert_almost_equal(result, expected)
+
+
+def test_fwf_skip_blank_lines():
+    data = """
+
+A         B            C            D
+
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+
+
+201162    502.953953   173.237159   12468.3
+
+"""
+    result = read_fwf(StringIO(data), skip_blank_lines=True)
+    expected = DataFrame(
+        [
+            [201158, 360.242940, 149.910199, 11950.7],
+            [201159, 444.953632, 166.985655, 11788.4],
+            [201162, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+    data = """\
+A         B            C            D
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+
+
+201162    502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data), skip_blank_lines=False)
+    expected = DataFrame(
+        [
+            [201158, 360.242940, 149.910199, 11950.7],
+            [201159, 444.953632, 166.985655, 11788.4],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, np.nan, np.nan, np.nan],
+            [201162, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("thousands", [",", "#", "~"])
+def test_fwf_thousands(thousands):
+    data = """\
+ 1 2,334.0    5
+10   13     10.
+"""
+    data = data.replace(",", thousands)
+
+    colspecs = [(0, 3), (3, 11), (12, 16)]
+    expected = DataFrame([[1, 2334.0, 5], [10, 13, 10.0]])
+
+    result = read_fwf(
+        StringIO(data), header=None, colspecs=colspecs, thousands=thousands
+    )
+    tm.assert_almost_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", [True, False])
+def test_bool_header_arg(header):
+    # see gh-6114
+    data = """\
+MyColumn
+   a
+   b
+   a
+   b"""
+
+    msg = "Passing a bool to header is invalid"
+    with pytest.raises(TypeError, match=msg):
+        read_fwf(StringIO(data), header=header)
+
+
+def test_full_file():
+    # File with all values.
+    test = """index                             A    B    C
+2000-01-03T00:00:00  0.980268513777    3  foo
+2000-01-04T00:00:00  1.04791624281    -4  bar
+2000-01-05T00:00:00  0.498580885705   73  baz
+2000-01-06T00:00:00  1.12020151869     1  foo
+2000-01-07T00:00:00  0.487094399463    0  bar
+2000-01-10T00:00:00  0.836648671666    2  baz
+2000-01-11T00:00:00  0.157160753327   34  foo"""
+    colspecs = ((0, 19), (21, 35), (38, 40), (42, 45))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_missing():
+    # File with missing values.
+    test = """index                             A    B    C
+2000-01-03T00:00:00  0.980268513777    3  foo
+2000-01-04T00:00:00  1.04791624281    -4  bar
+                     0.498580885705   73  baz
+2000-01-06T00:00:00  1.12020151869     1  foo
+2000-01-07T00:00:00                    0  bar
+2000-01-10T00:00:00  0.836648671666    2  baz
+                                      34"""
+    colspecs = ((0, 19), (21, 35), (38, 40), (42, 45))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_spaces():
+    # File with spaces in columns.
+    test = """
+Account                 Name  Balance     CreditLimit   AccountCreated
+101     Keanu Reeves          9315.45     10000.00           1/17/1998
+312     Gerard Butler         90.00       1000.00             8/6/2003
+868     Jennifer Love Hewitt  0           17000.00           5/25/1985
+761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+317     Bill Murray           789.65      5000.00             2/5/2007
+""".strip(
+        "\r\n"
+    )
+    colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_full_file_with_spaces_and_missing():
+    # File with spaces and missing values in columns.
+    test = """
+Account               Name    Balance     CreditLimit   AccountCreated
+101                           10000.00                       1/17/1998
+312     Gerard Butler         90.00       1000.00             8/6/2003
+868                                                          5/25/1985
+761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+317     Bill Murray           789.65
+""".strip(
+        "\r\n"
+    )
+    colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_messed_up_data():
+    # Completely messed up file.
+    test = """
+   Account          Name             Balance     Credit Limit   Account Created
+       101                           10000.00                       1/17/1998
+       312     Gerard Butler         90.00       1000.00
+
+       761     Jada Pinkett-Smith    49654.87    100000.00          12/5/2006
+  317          Bill Murray           789.65
+""".strip(
+        "\r\n"
+    )
+    colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79))
+    expected = read_fwf(StringIO(test), colspecs=colspecs)
+
+    result = read_fwf(StringIO(test))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multiple_delimiters():
+    test = r"""
+col1~~~~~col2  col3++++++++++++++++++col4
+~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves
+  33+++122.33\\\bar.........Gerard Butler
++44~~~~12.01   baz~~Jennifer Love Hewitt
+~~55       11+++foo++++Jada Pinkett-Smith
+..66++++++.03~~~bar           Bill Murray
+""".strip(
+        "\r\n"
+    )
+    delimiter = " +~.\\"
+    colspecs = ((0, 4), (7, 13), (15, 19), (21, 41))
+    expected = read_fwf(StringIO(test), colspecs=colspecs, delimiter=delimiter)
+
+    result = read_fwf(StringIO(test), delimiter=delimiter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_variable_width_unicode():
+    data = """
+שלום שלום
+ום   שלל
+של   ום
+""".strip(
+        "\r\n"
+    )
+    encoding = "utf8"
+    kwargs = {"header": None, "encoding": encoding}
+
+    expected = read_fwf(
+        BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs
+    )
+    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", [{}, {"a": "float64", "b": str, "c": "int32"}])
+def test_dtype(dtype):
+    data = """ a    b    c
+1    2    3.2
+3    4    5.2
+"""
+    colspecs = [(0, 5), (5, 10), (10, None)]
+    result = read_fwf(StringIO(data), colspecs=colspecs, dtype=dtype)
+
+    expected = DataFrame(
+        {"a": [1, 3], "b": [2, 4], "c": [3.2, 5.2]}, columns=["a", "b", "c"]
+    )
+
+    for col, dt in dtype.items():
+        expected[col] = expected[col].astype(dt)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_inference():
+    # see gh-11256
+    data = """
+Text contained in the file header
+
+DataCol1   DataCol2
+     0.0        1.0
+   101.6      956.1
+""".strip()
+    skiprows = 2
+    expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
+
+    result = read_fwf(StringIO(data), skiprows=skiprows)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_by_index_inference():
+    data = """
+To be skipped
+Not  To  Be  Skipped
+Once more to be skipped
+123  34   8      123
+456  78   9      456
+""".strip()
+    skiprows = [0, 2]
+    expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True)
+
+    result = read_fwf(StringIO(data), skiprows=skiprows)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_inference_empty():
+    data = """
+AA   BBB  C
+12   345  6
+78   901  2
+""".strip()
+
+    msg = "No rows from which to infer column width"
+    with pytest.raises(EmptyDataError, match=msg):
+        read_fwf(StringIO(data), skiprows=3)
+
+
+def test_whitespace_preservation():
+    # see gh-16772
+    header = None
+    csv_data = """
+ a ,bbb
+ cc,dd """
+
+    fwf_data = """
+ a bbb
+ ccdd """
+    result = read_fwf(
+        StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0], delimiter="\n\t"
+    )
+    expected = read_csv(StringIO(csv_data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_default_delimiter():
+    header = None
+    csv_data = """
+a,bbb
+cc,dd"""
+
+    fwf_data = """
+a \tbbb
+cc\tdd """
+    result = read_fwf(StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0])
+    expected = read_csv(StringIO(csv_data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("infer", [True, False])
+def test_fwf_compression(compression_only, infer):
+    data = """1111111111
+    2222222222
+    3333333333""".strip()
+
+    compression = compression_only
+    extension = icom._compression_to_extension[compression]
+
+    kwargs = {"widths": [5, 5], "names": ["one", "two"]}
+    expected = read_fwf(StringIO(data), **kwargs)
+
+    data = bytes(data, encoding="utf-8")
+
+    with tm.ensure_clean(filename="tmp." + extension) as path:
+        tm.write_to_compressed(compression, path, data)
+
+        if infer is not None:
+            kwargs["compression"] = "infer" if infer else compression
+
+        result = read_fwf(path, **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_binary_mode():
+    """
+    read_fwf supports opening files in binary mode.
+
+    GH 18035.
+    """
+    data = """aas aas aas
+bba bab b a"""
+    df_reference = DataFrame(
+        [["bba", "bab", "b a"]], columns=["aas", "aas.1", "aas.2"], index=[0]
+    )
+    with tm.ensure_clean() as path:
+        Path(path).write_text(data)
+        with open(path, "rb") as file:
+            df = read_fwf(file)
+            file.seek(0)
+            tm.assert_frame_equal(df, df_reference)
+
+
+@pytest.mark.parametrize("memory_map", [True, False])
+def test_encoding_mmap(memory_map):
+    """
+    encoding should be working, even when using a memory-mapped file.
+
+    GH 23254.
+    """
+    encoding = "iso8859_1"
+    with tm.ensure_clean() as path:
+        Path(path).write_bytes(" 1 A Ä 2\n".encode(encoding))
+        df = read_fwf(
+            path,
+            header=None,
+            widths=[2, 2, 2, 2],
+            encoding=encoding,
+            memory_map=memory_map,
+        )
+    df_reference = DataFrame([[1, "A", "Ä", 2]])
+    tm.assert_frame_equal(df, df_reference)
+
+
+@pytest.mark.parametrize(
+    "colspecs, names, widths, index_col",
+    [
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            None,
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            None,
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            True,
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            False,
+        ),
+        (
+            None,
+            list("abcde"),
+            [6] * 4,
+            True,
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abcde"),
+            None,
+            False,
+        ),
+    ],
+)
+def test_len_colspecs_len_names(colspecs, names, widths, index_col):
+    # GH#40830
+    data = """col1  col2  col3  col4
+    bab   ba    2"""
+    msg = "Length of colspecs must match length of names"
+    with pytest.raises(ValueError, match=msg):
+        read_fwf(
+            StringIO(data),
+            colspecs=colspecs,
+            names=names,
+            widths=widths,
+            index_col=index_col,
+        )
+
+
+@pytest.mark.parametrize(
+    "colspecs, names, widths, index_col, expected",
+    [
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("abc"),
+            None,
+            0,
+            DataFrame(
+                index=["col1", "ba"],
+                columns=["a", "b", "c"],
+                data=[["col2", "col3", "col4"], ["b   ba", "2", np.nan]],
+            ),
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("ab"),
+            None,
+            [0, 1],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"]],
+                columns=["a", "b"],
+                data=[["col3", "col4"], ["2", np.nan]],
+            ),
+        ),
+        (
+            [(0, 6), (6, 12), (12, 18), (18, None)],
+            list("a"),
+            None,
+            [0, 1, 2],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"], ["col3", "2"]],
+                columns=["a"],
+                data=[["col4"], [np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("abc"),
+            [6] * 4,
+            0,
+            DataFrame(
+                index=["col1", "ba"],
+                columns=["a", "b", "c"],
+                data=[["col2", "col3", "col4"], ["b   ba", "2", np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("ab"),
+            [6] * 4,
+            [0, 1],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"]],
+                columns=["a", "b"],
+                data=[["col3", "col4"], ["2", np.nan]],
+            ),
+        ),
+        (
+            None,
+            list("a"),
+            [6] * 4,
+            [0, 1, 2],
+            DataFrame(
+                index=[["col1", "ba"], ["col2", "b   ba"], ["col3", "2"]],
+                columns=["a"],
+                data=[["col4"], [np.nan]],
+            ),
+        ),
+    ],
+)
+def test_len_colspecs_len_names_with_index_col(
+    colspecs, names, widths, index_col, expected
+):
+    # GH#40830
+    data = """col1  col2  col3  col4
+    bab   ba    2"""
+    result = read_fwf(
+        StringIO(data),
+        colspecs=colspecs,
+        names=names,
+        widths=widths,
+        index_col=index_col,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_colspecs_with_comment():
+    # GH 14135
+    result = read_fwf(
+        StringIO("#\nA1K\n"), colspecs=[(1, 2), (2, 3)], comment="#", header=None
+    )
+    expected = DataFrame([[1, "K"]], columns=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_rows_and_n_rows():
+    # GH#44021
+    data = """a\tb
+1\t a
+2\t b
+3\t c
+4\t d
+5\t e
+6\t f
+    """
+    result = read_fwf(StringIO(data), nrows=4, skiprows=[2, 4])
+    expected = DataFrame({"a": [1, 3, 5, 6], "b": ["a", "c", "e", "f"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_with_iterator():
+    # GH#10261
+    data = """0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+    """
+    df_iter = read_fwf(
+        StringIO(data),
+        colspecs=[(0, 2)],
+        names=["a"],
+        iterator=True,
+        chunksize=2,
+        skiprows=[0, 1, 2, 6, 9],
+    )
+    expected_frames = [
+        DataFrame({"a": [3, 4]}),
+        DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]),
+        DataFrame({"a": []}, index=[], dtype="object"),
+    ]
+    for i, result in enumerate(df_iter):
+        tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_skiprows_passing_as_positional_deprecated():
+    # GH#41485
+    data = """0
+1
+2
+"""
+    with tm.assert_produces_warning(FutureWarning, match="keyword-only"):
+        result = read_fwf(StringIO(data), [(0, 2)])
+    expected = DataFrame({"0": [1, 2]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_names_and_infer_colspecs():
+    # GH#45337
+    data = """X   Y   Z
+      959.0    345   22.2
+    """
+    result = read_fwf(StringIO(data), skiprows=1, usecols=[0, 2], names=["a", "b"])
+    expected = DataFrame({"a": [959.0], "b": 22.2})
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_skiprows.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_skiprows.py
@ -0,0 +1,277 @@
+"""
+Tests that skipped rows are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+
+from datetime import datetime
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import EmptyDataError
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+# XFAIL ME PLS once hanging tests issues identified
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("skiprows", [list(range(6)), 6])
+def test_skip_rows_bug(all_parsers, skiprows):
+    # see gh-505
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    result = parser.read_csv(
+        StringIO(text), skiprows=skiprows, header=None, index_col=0, parse_dates=True
+    )
+    index = Index(
+        [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0
+    )
+
+    expected = DataFrame(
+        np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_deep_skip_rows(all_parsers):
+    # see gh-4382
+    parser = all_parsers
+    data = "a,b,c\n" + "\n".join(
+        [",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10)]
+    )
+    condensed_data = "a,b,c\n" + "\n".join(
+        [",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]]
+    )
+
+    result = parser.read_csv(StringIO(data), skiprows=[6, 8])
+    condensed_result = parser.read_csv(StringIO(condensed_data))
+    tm.assert_frame_equal(result, condensed_result)
+
+
+def test_skip_rows_blank(all_parsers):
+    # see gh-9832
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+
+#foo,a,b,c
+#foo,a,b,c
+
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    data = parser.read_csv(
+        StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True
+    )
+    index = Index(
+        [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0
+    )
+
+    expected = DataFrame(
+        np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index
+    )
+    tm.assert_frame_equal(data, expected)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            """id,text,num_lines
+1,"line 11
+line 12",2
+2,"line 21
+line 22",2
+3,"line 31",1""",
+            {"skiprows": [1]},
+            DataFrame(
+                [[2, "line 21\nline 22", 2], [3, "line 31", 1]],
+                columns=["id", "text", "num_lines"],
+            ),
+        ),
+        (
+            "a,b,c\n~a\n b~,~e\n d~,~f\n f~\n1,2,~12\n 13\n 14~",
+            {"quotechar": "~", "skiprows": [2]},
+            DataFrame([["a\n b", "e\n d", "f\n f"]], columns=["a", "b", "c"]),
+        ),
+        (
+            (
+                "Text,url\n~example\n "
+                "sentence\n one~,url1\n~"
+                "example\n sentence\n two~,url2\n~"
+                "example\n sentence\n three~,url3"
+            ),
+            {"quotechar": "~", "skiprows": [1, 3]},
+            DataFrame([["example\n sentence\n two", "url2"]], columns=["Text", "url"]),
+        ),
+    ],
+)
+def test_skip_row_with_newline(all_parsers, data, kwargs, expected):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_row_with_quote(all_parsers):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    data = """id,text,num_lines
+1,"line '11' line 12",2
+2,"line '21' line 22",2
+3,"line '31' line 32",1"""
+
+    exp_data = [[2, "line '21' line 22", 2], [3, "line '31' line 32", 1]]
+    expected = DataFrame(exp_data, columns=["id", "text", "num_lines"])
+
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,exp_data",
+    [
+        (
+            """id,text,num_lines
+1,"line \n'11' line 12",2
+2,"line \n'21' line 22",2
+3,"line \n'31' line 32",1""",
+            [[2, "line \n'21' line 22", 2], [3, "line \n'31' line 32", 1]],
+        ),
+        (
+            """id,text,num_lines
+1,"line '11\n' line 12",2
+2,"line '21\n' line 22",2
+3,"line '31\n' line 32",1""",
+            [[2, "line '21\n' line 22", 2], [3, "line '31\n' line 32", 1]],
+        ),
+        (
+            """id,text,num_lines
+1,"line '11\n' \r\tline 12",2
+2,"line '21\n' \r\tline 22",2
+3,"line '31\n' \r\tline 32",1""",
+            [[2, "line '21\n' \r\tline 22", 2], [3, "line '31\n' \r\tline 32", 1]],
+        ),
+    ],
+)
+def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+
+    expected = DataFrame(exp_data, columns=["id", "text", "num_lines"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "line_terminator", ["\n", "\r\n", "\r"]  # "LF"  # "CRLF"  # "CR"
+)
+def test_skiprows_lineterminator(all_parsers, line_terminator, request):
+    # see gh-9079
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "SMOSMANIA ThetaProbe-ML2X ",
+            "2007/01/01 01:00   0.2140 U M ",
+            "2007/01/01 02:00   0.2141 M O ",
+            "2007/01/01 04:00   0.2142 D M ",
+        ]
+    )
+    expected = DataFrame(
+        [
+            ["2007/01/01", "01:00", 0.2140, "U", "M"],
+            ["2007/01/01", "02:00", 0.2141, "M", "O"],
+            ["2007/01/01", "04:00", 0.2142, "D", "M"],
+        ],
+        columns=["date", "time", "var", "flag", "oflag"],
+    )
+
+    if parser.engine == "python" and line_terminator == "\r":
+        mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet")
+        request.node.add_marker(mark)
+
+    data = data.replace("\n", line_terminator)
+    result = parser.read_csv(
+        StringIO(data),
+        skiprows=1,
+        delim_whitespace=True,
+        names=["date", "time", "var", "flag", "oflag"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skiprows_infield_quote(all_parsers):
+    # see gh-14459
+    parser = all_parsers
+    data = 'a"\nb"\na\n1'
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), skiprows=2)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        ({}, DataFrame({"1": [3, 5]})),
+        ({"header": 0, "names": ["foo"]}, DataFrame({"foo": [3, 5]})),
+    ],
+)
+def test_skip_rows_callable(all_parsers, kwargs, expected):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    result = parser.read_csv(StringIO(data), skiprows=lambda x: x % 2 == 0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_rows_skip_all(all_parsers):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+    msg = "No columns to parse from file"
+
+    with pytest.raises(EmptyDataError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: True)
+
+
+def test_skip_rows_bad_callable(all_parsers):
+    msg = "by zero"
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    with pytest.raises(ZeroDivisionError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0)
+
+
+def test_skip_rows_and_n_rows(all_parsers):
+    # GH#44021
+    data = """a,b
+1,a
+2,b
+3,c
+4,d
+5,e
+6,f
+7,g
+8,h
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), nrows=5, skiprows=[2, 4, 6])
+    expected = DataFrame({"a": [1, 3, 5, 7, 8], "b": ["a", "c", "e", "g", "h"]})
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_textreader.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_textreader.py
@ -0,0 +1,350 @@
+"""
+Tests the TextReader class in parsers.pyx, which
+is integral to the C engine in parsers.py
+"""
+from io import (
+    BytesIO,
+    StringIO,
+)
+import os
+
+import numpy as np
+import pytest
+
+import pandas._libs.parsers as parser
+from pandas._libs.parsers import TextReader
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.parsers import (
+    TextFileReader,
+    read_csv,
+)
+from pandas.io.parsers.c_parser_wrapper import ensure_dtype_objs
+
+
+class TestTextReader:
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "parser", "data")
+        csv1_dirpath = datapath("io", "data", "csv")
+        self.csv1 = os.path.join(csv1_dirpath, "test1.csv")
+        self.csv2 = os.path.join(self.dirpath, "test2.csv")
+        self.xls1 = os.path.join(self.dirpath, "test.xls")
+
+    def test_file_handle(self):
+        with open(self.csv1, "rb") as f:
+            reader = TextReader(f)
+            reader.read()
+
+    def test_file_handle_mmap(self):
+        # this was never using memory_map=True
+        with open(self.csv1, "rb") as f:
+            reader = TextReader(f, header=None)
+            reader.read()
+
+    def test_StringIO(self):
+        with open(self.csv1, "rb") as f:
+            text = f.read()
+        src = BytesIO(text)
+        reader = TextReader(src, header=None)
+        reader.read()
+
+    def test_string_factorize(self):
+        # should this be optional?
+        data = "a\nb\na\nb\na"
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+        assert len(set(map(id, result[0]))) == 2
+
+    def test_skipinitialspace(self):
+        data = "a,   b\na,   b\na,   b\na,   b"
+
+        reader = TextReader(StringIO(data), skipinitialspace=True, header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(
+            result[0], np.array(["a", "a", "a", "a"], dtype=np.object_)
+        )
+        tm.assert_numpy_array_equal(
+            result[1], np.array(["b", "b", "b", "b"], dtype=np.object_)
+        )
+
+    def test_parse_booleans(self):
+        data = "True\nFalse\nTrue\nTrue"
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        assert result[0].dtype == np.bool_
+
+    def test_delimit_whitespace(self):
+        data = 'a  b\na\t\t "b"\n"a"\t \t b'
+
+        reader = TextReader(StringIO(data), delim_whitespace=True, header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(
+            result[0], np.array(["a", "a", "a"], dtype=np.object_)
+        )
+        tm.assert_numpy_array_equal(
+            result[1], np.array(["b", "b", "b"], dtype=np.object_)
+        )
+
+    def test_embedded_newline(self):
+        data = 'a\n"hello\nthere"\nthis'
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        expected = np.array(["a", "hello\nthere", "this"], dtype=np.object_)
+        tm.assert_numpy_array_equal(result[0], expected)
+
+    def test_euro_decimal(self):
+        data = "12345,67\n345,678"
+
+        reader = TextReader(StringIO(data), delimiter=":", decimal=",", header=None)
+        result = reader.read()
+
+        expected = np.array([12345.67, 345.678])
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands(self):
+        data = "123,456\n12,500"
+
+        reader = TextReader(StringIO(data), delimiter=":", thousands=",", header=None)
+        result = reader.read()
+
+        expected = np.array([123456, 12500], dtype=np.int64)
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands_alt(self):
+        data = "123.456\n12.500"
+
+        reader = TextFileReader(
+            StringIO(data), delimiter=":", thousands=".", header=None
+        )
+        result = reader.read()
+
+        expected = DataFrame([123456, 12500])
+        tm.assert_frame_equal(result, expected)
+
+    def test_skip_bad_lines(self, capsys):
+        # too many lines, see #2430 for why
+        data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r"
+
+        reader = TextReader(StringIO(data), delimiter=":", header=None)
+        msg = r"Error tokenizing data\. C error: Expected 3 fields in line 4, saw 4"
+        with pytest.raises(parser.ParserError, match=msg):
+            reader.read()
+
+        reader = TextReader(
+            StringIO(data), delimiter=":", header=None, on_bad_lines=2  # Skip
+        )
+        result = reader.read()
+        expected = {
+            0: np.array(["a", "d", "g", "l"], dtype=object),
+            1: np.array(["b", "e", "h", "m"], dtype=object),
+            2: np.array(["c", "f", "i", "n"], dtype=object),
+        }
+        assert_array_dicts_equal(result, expected)
+
+        reader = TextReader(
+            StringIO(data), delimiter=":", header=None, on_bad_lines=1  # Warn
+        )
+        reader.read()
+        captured = capsys.readouterr()
+
+        assert "Skipping line 4" in captured.err
+        assert "Skipping line 6" in captured.err
+
+    def test_header_not_enough_lines(self):
+        data = "skip this\nskip this\na,b,c\n1,2,3\n4,5,6"
+
+        reader = TextReader(StringIO(data), delimiter=",", header=2)
+        header = reader.header
+        expected = [["a", "b", "c"]]
+        assert header == expected
+
+        recs = reader.read()
+        expected = {
+            0: np.array([1, 4], dtype=np.int64),
+            1: np.array([2, 5], dtype=np.int64),
+            2: np.array([3, 6], dtype=np.int64),
+        }
+        assert_array_dicts_equal(recs, expected)
+
+    def test_escapechar(self):
+        data = '\\"hello world"\n\\"hello world"\n\\"hello world"'
+
+        reader = TextReader(StringIO(data), delimiter=",", header=None, escapechar="\\")
+        result = reader.read()
+        expected = {0: np.array(['"hello world"'] * 3, dtype=object)}
+        assert_array_dicts_equal(result, expected)
+
+    def test_eof_has_eol(self):
+        # handling of new line at EOF
+        pass
+
+    def test_na_substitution(self):
+        pass
+
+    def test_numpy_string_dtype(self):
+        data = """\
+a,1
+aa,2
+aaa,3
+aaaa,4
+aaaaa,5"""
+
+        def _make_reader(**kwds):
+            if "dtype" in kwds:
+                kwds["dtype"] = ensure_dtype_objs(kwds["dtype"])
+            return TextReader(StringIO(data), delimiter=",", header=None, **kwds)
+
+        reader = _make_reader(dtype="S5,i4")
+        result = reader.read()
+
+        assert result[0].dtype == "S5"
+
+        ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaaa"], dtype="S5")
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == "i4"
+
+        reader = _make_reader(dtype="S4")
+        result = reader.read()
+        assert result[0].dtype == "S4"
+        ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaa"], dtype="S4")
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == "S4"
+
+    def test_pass_dtype(self):
+        data = """\
+one,two
+1,a
+2,b
+3,c
+4,d"""
+
+        def _make_reader(**kwds):
+            if "dtype" in kwds:
+                kwds["dtype"] = ensure_dtype_objs(kwds["dtype"])
+            return TextReader(StringIO(data), delimiter=",", **kwds)
+
+        reader = _make_reader(dtype={"one": "u1", 1: "S1"})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "S1"
+
+        reader = _make_reader(dtype={"one": np.uint8, 1: object})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "O"
+
+        reader = _make_reader(dtype={"one": np.dtype("u1"), 1: np.dtype("O")})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "O"
+
+    def test_usecols(self):
+        data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=",", **kwds)
+
+        reader = _make_reader(usecols=(1, 2))
+        result = reader.read()
+
+        exp = _make_reader().read()
+        assert len(result) == 2
+        assert (result[1] == exp[1]).all()
+        assert (result[2] == exp[2]).all()
+
+    def test_cr_delimited(self):
+        def _test(text, **kwargs):
+            nice_text = text.replace("\r", "\r\n")
+            result = TextReader(StringIO(text), **kwargs).read()
+            expected = TextReader(StringIO(nice_text), **kwargs).read()
+            assert_array_dicts_equal(result, expected)
+
+        data = "a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12"
+        _test(data, delimiter=",")
+
+        data = "a  b  c\r1  2  3\r4  5  6\r7  8  9\r10  11  12"
+        _test(data, delim_whitespace=True)
+
+        data = "a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12"
+        _test(data, delimiter=",")
+
+        sample = (
+            "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r"
+            "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r"
+            ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0"
+        )
+        _test(sample, delimiter=",")
+
+        data = "A  B  C\r  2  3\r4  5  6"
+        _test(data, delim_whitespace=True)
+
+        data = "A B C\r2 3\r4 5 6"
+        _test(data, delim_whitespace=True)
+
+    def test_empty_field_eof(self):
+        data = "a,b,c\n1,2,3\n4,,"
+
+        result = TextReader(StringIO(data), delimiter=",").read()
+
+        expected = {
+            0: np.array([1, 4], dtype=np.int64),
+            1: np.array(["2", ""], dtype=object),
+            2: np.array(["3", ""], dtype=object),
+        }
+        assert_array_dicts_equal(result, expected)
+
+        # GH5664
+        a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"])
+        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1])
+        c = DataFrame(
+            [
+                [1, 2, 3, 4],
+                [6, np.nan, np.nan, np.nan],
+                [8, 9, 10, 11],
+                [13, 14, np.nan, np.nan],
+            ],
+            columns=list("abcd"),
+            index=[0, 5, 7, 12],
+        )
+
+        for _ in range(100):
+            df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c")
+            tm.assert_frame_equal(df, a)
+
+            df = read_csv(
+                StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c"
+            )
+            tm.assert_frame_equal(df, b)
+
+            df = read_csv(
+                StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"),
+                names=list("abcd"),
+                engine="c",
+            )
+            tm.assert_frame_equal(df, c)
+
+    def test_empty_csv_input(self):
+        # GH14867
+        with read_csv(
+            StringIO(), chunksize=20, header=None, names=["a", "b", "c"]
+        ) as df:
+            assert isinstance(df, TextFileReader)
+
+
+def assert_array_dicts_equal(left, right):
+    for k, v in left.items():
+        tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k]))
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/test_unsupported.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/test_unsupported.py
@ -0,0 +1,201 @@
+"""
+Tests that features that are currently unsupported in
+either the Python or C parser are actually enforced
+and are clearly communicated to the user.
+
+Ultimately, the goal is to remove test cases from this
+test suite as new feature support is added to the parsers.
+"""
+from io import StringIO
+import os
+from pathlib import Path
+
+import pytest
+
+from pandas.compat import (
+    is_ci_environment,
+    is_platform_mac,
+    is_platform_windows,
+)
+from pandas.errors import ParserError
+
+import pandas._testing as tm
+
+from pandas.io.parsers import read_csv
+import pandas.io.parsers.readers as parsers
+
+
+@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
+def python_engine(request):
+    return request.param
+
+
+class TestUnsupportedFeatures:
+    def test_mangle_dupe_cols_false(self):
+        # see gh-12935
+        data = "a b c\n1 2 3"
+        msg = "is not supported"
+
+        for engine in ("c", "python"):
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False)
+
+    def test_c_engine(self):
+        # see gh-6607
+        data = "a b c\n1 2 3"
+        msg = "does not support"
+
+        # specify C engine with unsupported options (raise)
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", sep=None, delim_whitespace=False)
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", sep=r"\s")
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", sep="\t", quotechar=chr(128))
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", skipfooter=1)
+
+        # specify C-unsupported options without python-unsupported options
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep=None, delim_whitespace=False)
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep=r"\s")
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep="\t", quotechar=chr(128))
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), skipfooter=1)
+
+        text = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+        msg = "Error tokenizing data"
+
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), sep="\\s+")
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), engine="c", sep="\\s+")
+
+        msg = "Only length-1 thousands markers supported"
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands=",,")
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands="")
+
+        msg = "Only length-1 line terminators supported"
+        data = "a,b,c~~1,2,3~~4,5,6"
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), lineterminator="~~")
+
+    def test_python_engine(self, python_engine):
+        from pandas.io.parsers.readers import _python_unsupported as py_unsupported
+
+        data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+
+        for default in py_unsupported:
+            msg = (
+                f"The {repr(default)} option is not "
+                f"supported with the {repr(python_engine)} engine"
+            )
+
+            kwargs = {default: object()}
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine=python_engine, **kwargs)
+
+    def test_python_engine_file_no_iter(self, python_engine):
+        # see gh-16530
+        class NoNextBuffer:
+            def __init__(self, csv_data):
+                self.data = csv_data
+
+            def __next__(self):
+                return self.data.__next__()
+
+            def read(self):
+                return self.data
+
+            def readline(self):
+                return self.data
+
+        data = "a\n1"
+        msg = "'NoNextBuffer' object is not iterable|argument 1 must be an iterator"
+
+        with pytest.raises(TypeError, match=msg):
+            read_csv(NoNextBuffer(data), engine=python_engine)
+
+    def test_pyarrow_engine(self):
+        from pandas.io.parsers.readers import _pyarrow_unsupported as pa_unsupported
+
+        data = """1,2,3,,
+        1,2,3,4,
+        1,2,3,4,5
+        1,2,,,
+        1,2,3,4,"""
+
+        for default in pa_unsupported:
+            msg = (
+                f"The {repr(default)} option is not "
+                f"supported with the 'pyarrow' engine"
+            )
+            kwargs = {default: object()}
+            default_needs_bool = {"warn_bad_lines", "error_bad_lines"}
+            if default == "dialect":
+                kwargs[default] = "excel"  # test a random dialect
+            elif default in default_needs_bool:
+                kwargs[default] = True
+            elif default == "on_bad_lines":
+                kwargs[default] = "warn"
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine="pyarrow", **kwargs)
+
+    def test_on_bad_lines_callable_python_only(self, all_parsers):
+        # GH 5686
+        sio = StringIO("a,b\n1,2")
+        bad_lines_func = lambda x: x
+        parser = all_parsers
+        if all_parsers.engine != "python":
+            msg = "on_bad_line can only be a callable function if engine='python'"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(sio, on_bad_lines=bad_lines_func)
+        else:
+            parser.read_csv(sio, on_bad_lines=bad_lines_func)
+
+
+def test_close_file_handle_on_invalide_usecols(all_parsers):
+    # GH 45384
+    parser = all_parsers
+
+    error = ValueError
+    if parser.engine == "pyarrow":
+        pyarrow = pytest.importorskip("pyarrow")
+        error = pyarrow.lib.ArrowKeyError
+        if is_ci_environment() and (is_platform_windows() or is_platform_mac()):
+            # GH#45547 causes timeouts on windows/mac builds
+            pytest.skip("GH#45547 causing timeouts on windows/mac builds 2022-01-22")
+
+    with tm.ensure_clean("test.csv") as fname:
+        Path(fname).write_text("col1,col2\na,b\n1,2")
+        with tm.assert_produces_warning(False):
+            with pytest.raises(error, match="col3"):
+                parser.read_csv(fname, usecols=["col1", "col2", "col3"])
+        # unlink fails on windows if file handles still point to it
+        os.unlink(fname)
+
+
+def test_invalid_file_inputs(all_parsers):
+    # GH#45957
+    parser = all_parsers
+    if parser.engine == "python":
+        pytest.skip("Python engine supports lists.")
+
+    with pytest.raises(ValueError, match="Invalid"):
+        parser.read_csv([])
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/init.py
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py
@ -0,0 +1,155 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Timestamp,
+)
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+# TODO(1.4): Change these to xfails whenever parse_dates support(which was
+# intentionally disable to keep small PR sizes) is added back
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+def test_usecols_with_parse_dates(all_parsers, usecols):
+    # see gh-9755
+    data = """a,b,c,d,e
+0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parser = all_parsers
+    parse_dates = [[1, 2]]
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates2(all_parsers):
+    # see gh-13604
+    parser = all_parsers
+    data = """2008-02-07 09:40,1032.43
+2008-02-07 09:50,1042.54
+2008-02-07 10:00,1051.65"""
+
+    names = ["date", "values"]
+    usecols = names[:]
+    parse_dates = [0]
+
+    index = Index(
+        [
+            Timestamp("2008-02-07 09:40"),
+            Timestamp("2008-02-07 09:50"),
+            Timestamp("2008-02-07 10:00"),
+        ],
+        name="date",
+    )
+    cols = {"values": [1032.43, 1042.54, 1051.65]}
+    expected = DataFrame(cols, index=index)
+
+    result = parser.read_csv(
+        StringIO(data),
+        parse_dates=parse_dates,
+        index_col=0,
+        usecols=usecols,
+        header=None,
+        names=names,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates3(all_parsers):
+    # see gh-14792
+    parser = all_parsers
+    data = """a,b,c,d,e,f,g,h,i,j
+2016/09/21,1,1,2,3,4,5,6,7,8"""
+
+    usecols = list("abcdefghij")
+    parse_dates = [0]
+
+    cols = {
+        "a": Timestamp("2016-09-21"),
+        "b": [1],
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=usecols)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates4(all_parsers):
+    data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
+    usecols = list("abcdefghij")
+    parse_dates = [[0, 1]]
+    parser = all_parsers
+
+    cols = {
+        "a_b": "2016/09/21 1",
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+@pytest.mark.parametrize(
+    "names",
+    [
+        list("abcde"),  # Names span all columns in original data.
+        list("acd"),  # Names span only the selected columns.
+    ],
+)
+def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
+    # see gh-9755
+    s = """0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parse_dates = [[1, 2]]
+    parser = all_parsers
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    result = parser.read_csv(
+        StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
+    )
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_strings.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_strings.py
@ -0,0 +1,97 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+
+def test_usecols_with_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "AAA": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "BBB": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_single_byte_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """A,B,C,D
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "A": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "B": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
+def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
+def test_usecols_with_multi_byte_characters(all_parsers, usecols):
+    data = """あああ,いい,ううう,ええええ
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "あああ": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "いい": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py
@ -0,0 +1,418 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+# TODO(1.4): Change to xfails at release time
+pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_raise_on_mixed_dtype_usecols(all_parsers):
+    # See gh-12678
+    data = """a,b,c
+        1000,2000,3000
+        4000,5000,6000
+        """
+    usecols = [0, "b", 2]
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
+def test_usecols(all_parsers, usecols):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_names(all_parsers):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    names = ["foo", "bar"]
+    result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
+)
+def test_usecols_relative_to_names(all_parsers, names, usecols):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_relative_to_names2(all_parsers):
+    # see gh-5766
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
+    )
+
+    expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_name_length_conflict(all_parsers):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    msg = "Number of passed names did not match number of header fields in the file"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
+
+
+def test_usecols_single_string(all_parsers):
+    # see gh-20558
+    parser = all_parsers
+    data = """foo, bar, baz
+1000, 2000, 3000
+4000, 5000, 6000"""
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols="foo")
+
+
+@pytest.mark.parametrize(
+    "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
+)
+def test_usecols_index_col_false(all_parsers, data):
+    # see gh-9082
+    parser = all_parsers
+    usecols = ["a", "c", "d"]
+    expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", ["b", 0])
+@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
+def test_usecols_index_col_conflict(all_parsers, usecols, index_col):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+    expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_conflict2(all_parsers):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+
+    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
+    expected = expected.set_index(["b", "c"])
+
+    result = parser.read_csv(
+        StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_implicit_index_col(all_parsers):
+    # see gh-2654
+    parser = all_parsers
+    data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
+
+    result = parser.read_csv(StringIO(data), usecols=["a", "b"])
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_middle(all_parsers):
+    # GH#9098
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+"""
+    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c")
+    expected = DataFrame({"b": [2], "d": [4]}, index=Index([3], name="c"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_end(all_parsers):
+    # GH#9098
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+"""
+    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d")
+    expected = DataFrame({"b": [2], "c": [3]}, index=Index([4], name="d"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_regex_sep(all_parsers):
+    # see gh-2733
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_whitespace(all_parsers):
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+
+    result = parser.read_csv(StringIO(data), delim_whitespace=True, usecols=("a", "b"))
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        # Column selection by index.
+        ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
+        # Column selection by name.
+        (
+            ["0", "1"],
+            DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),
+        ),
+    ],
+)
+def test_usecols_with_integer_like_header(all_parsers, usecols, expected):
+    parser = all_parsers
+    data = """2,0,1
+1000,2000,3000
+4000,5000,6000"""
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_usecols(all_parsers):
+    data = "a,b,c\n1,2,3\n4,5,6"
+    expected = DataFrame()
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=set())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_np_array_usecols(all_parsers):
+    # see gh-12546
+    parser = all_parsers
+    data = "a,b,c\n1,2,3"
+    usecols = np.array(["a", "b"])
+
+    expected = DataFrame([[1, 2]], columns=usecols)
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        (
+            lambda x: x.upper() in ["AAA", "BBB", "DDD"],
+            DataFrame(
+                {
+                    "AaA": {
+                        0: 0.056674972999999997,
+                        1: 2.6132309819999997,
+                        2: 3.5689350380000002,
+                    },
+                    "bBb": {0: 8, 1: 2, 2: 7},
+                    "ddd": {0: "a", 1: "b", 2: "a"},
+                }
+            ),
+        ),
+        (lambda x: False, DataFrame()),
+    ],
+)
+def test_callable_usecols(all_parsers, usecols, expected):
+    # see gh-14154
+    data = """AaA,bBb,CCC,ddd
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
+def test_incomplete_first_row(all_parsers, usecols):
+    # see gh-6710
+    data = "1,2\n1,2,3"
+    parser = all_parsers
+    names = ["a", "b", "c"]
+    expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
+
+    result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,usecols,kwargs,expected",
+    [
+        # see gh-8985
+        (
+            "19,29,39\n" * 2 + "10,20,30,40",
+            [0, 1, 2],
+            {"header": None},
+            DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
+        ),
+        # see gh-9549
+        (
+            ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
+            ["A", "B", "C"],
+            {},
+            DataFrame(
+                {
+                    "A": [1, 3, 1, 1, 1, 5],
+                    "B": [2, 4, 2, 2, 2, 6],
+                    "C": [3, 5, 4, 3, 3, 7],
+                }
+            ),
+        ),
+    ],
+)
+def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
+    # see gh-8985
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,kwargs,expected,msg",
+    [
+        (
+            ["a", "b", "c", "d"],
+            {},
+            DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+            None,
+        ),
+        (
+            ["a", "b", "c", "f"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
+        (
+            ["a", "b", "f", "g"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
+        ),
+        # see gh-14671
+        (
+            None,
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
+            None,
+        ),
+        (
+            ["A", "B", "C", "f"],
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (
+            ["A", "B", "f"],
+            {"names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+    ],
+)
+def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected, msg):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    kwargs.update(usecols=usecols)
+    parser = all_parsers
+
+    if expected is None:
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
+def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
+    expected = DataFrame({"A": [1, 5], "C": [3, 7]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("names", [None, ["a", "b"]])
+def test_usecols_indices_out_of_bounds(all_parsers, names):
+    # GH#25623
+    parser = all_parsers
+    data = """
+a,b
+1,2
+    """
+    with tm.assert_produces_warning(
+        FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
+    ):
+        result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
+    expected = DataFrame({"a": [1], "b": [None]})
+    if names is None and parser.engine == "python":
+        expected = DataFrame({"a": [1]})
+    tm.assert_frame_equal(result, expected)