first commit

2025-07-04 23:32:37 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/tests/io/parser/conftest.py
+++ b/.venv/Lib/site-packages/pandas/tests/io/parser/conftest.py
@ -0,0 +1,287 @@
+from __future__ import annotations
+
+import os
+
+import pytest
+
+from pandas.compat._optional import VERSIONS
+
+from pandas import (
+    read_csv,
+    read_table,
+)
+import pandas._testing as tm
+
+
+class BaseParser:
+    engine: str | None = None
+    low_memory = True
+    float_precision_choices: list[str | None] = []
+
+    def update_kwargs(self, kwargs):
+        kwargs = kwargs.copy()
+        kwargs.update({"engine": self.engine, "low_memory": self.low_memory})
+
+        return kwargs
+
+    def read_csv(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_csv(*args, **kwargs)
+
+    def read_csv_check_warnings(
+        self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
+    ):
+        # We need to check the stacklevel here instead of in the tests
+        # since this is where read_csv is called and where the warning
+        # should point to.
+        kwargs = self.update_kwargs(kwargs)
+        with tm.assert_produces_warning(warn_type, match=warn_msg):
+            return read_csv(*args, **kwargs)
+
+    def read_table(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_table(*args, **kwargs)
+
+
+class CParser(BaseParser):
+    engine = "c"
+    float_precision_choices = [None, "high", "round_trip"]
+
+
+class CParserHighMemory(CParser):
+    low_memory = False
+
+
+class CParserLowMemory(CParser):
+    low_memory = True
+
+
+class PythonParser(BaseParser):
+    engine = "python"
+    float_precision_choices = [None]
+
+
+class PyArrowParser(BaseParser):
+    engine = "pyarrow"
+    float_precision_choices = [None]
+
+
+@pytest.fixture
+def csv_dir_path(datapath):
+    """
+    The directory path to the data files needed for parser tests.
+    """
+    return datapath("io", "parser", "data")
+
+
+@pytest.fixture
+def csv1(datapath):
+    """
+    The path to the data file "test1.csv" needed for parser tests.
+    """
+    return os.path.join(datapath("io", "data", "csv"), "test1.csv")
+
+
+_cParserHighMemory = CParserHighMemory
+_cParserLowMemory = CParserLowMemory
+_pythonParser = PythonParser
+_pyarrowParser = PyArrowParser
+
+_py_parsers_only = [_pythonParser]
+_c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
+_pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
+
+_all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
+
+_py_parser_ids = ["python"]
+_c_parser_ids = ["c_high", "c_low"]
+_pyarrow_parsers_ids = ["pyarrow"]
+
+_all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids]
+
+
+@pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
+def all_parsers(request):
+    """
+    Fixture all of the CSV parsers.
+    """
+    parser = request.param()
+    if parser.engine == "pyarrow":
+        pytest.importorskip("pyarrow", VERSIONS["pyarrow"])
+        # Try finding a way to disable threads all together
+        # for more stable CI runs
+        import pyarrow
+
+        pyarrow.set_cpu_count(1)
+    return parser
+
+
+@pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids)
+def c_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the C engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids)
+def python_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Python engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids)
+def pyarrow_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Pyarrow engine.
+    """
+    return request.param()
+
+
+def _get_all_parser_float_precision_combinations():
+    """
+    Return all allowable parser and float precision
+    combinations and corresponding ids.
+    """
+    params = []
+    ids = []
+    for parser, parser_id in zip(_all_parsers, _all_parser_ids):
+        if hasattr(parser, "values"):
+            # Wrapped in pytest.param, get the actual parser back
+            parser = parser.values[0]
+        for precision in parser.float_precision_choices:
+            # Re-wrap in pytest.param for pyarrow
+            mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else ()
+            param = pytest.param((parser(), precision), marks=mark)
+            params.append(param)
+            ids.append(f"{parser_id}-{precision}")
+
+    return {"params": params, "ids": ids}
+
+
+@pytest.fixture(
+    params=_get_all_parser_float_precision_combinations()["params"],
+    ids=_get_all_parser_float_precision_combinations()["ids"],
+)
+def all_parsers_all_precisions(request):
+    """
+    Fixture for all allowable combinations of parser
+    and float precision
+    """
+    return request.param
+
+
+_utf_values = [8, 16, 32]
+
+_encoding_seps = ["", "-", "_"]
+_encoding_prefixes = ["utf", "UTF"]
+
+_encoding_fmts = [
+    f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes
+]
+
+
+@pytest.fixture(params=_utf_values)
+def utf_value(request):
+    """
+    Fixture for all possible integer values for a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(params=_encoding_fmts)
+def encoding_fmt(request):
+    """
+    Fixture for all possible string formats of a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("-1,0", -1.0),
+        ("-1,2e0", -1.2),
+        ("-1e0", -1.0),
+        ("+1e0", 1.0),
+        ("+1e+0", 1.0),
+        ("+1e-1", 0.1),
+        ("+,1e1", 1.0),
+        ("+1,e0", 1.0),
+        ("-,1e1", -1.0),
+        ("-1,e0", -1.0),
+        ("0,1", 0.1),
+        ("1,", 1.0),
+        (",1", 0.1),
+        ("-,1", -0.1),
+        ("1_,", 1.0),
+        ("1_234,56", 1234.56),
+        ("1_234,56e0", 1234.56),
+        # negative cases; must not parse as float
+        ("_", "_"),
+        ("-_", "-_"),
+        ("-_1", "-_1"),
+        ("-_1e0", "-_1e0"),
+        ("_1", "_1"),
+        ("_1,", "_1,"),
+        ("_1,_", "_1,_"),
+        ("_1e0", "_1e0"),
+        ("1,2e_1", "1,2e_1"),
+        ("1,2e1_0", "1,2e1_0"),
+        ("1,_2", "1,_2"),
+        (",1__2", ",1__2"),
+        (",1e", ",1e"),
+        ("-,1e", "-,1e"),
+        ("1_000,000_000", "1_000,000_000"),
+        ("1,e1_2", "1,e1_2"),
+        ("e11,2", "e11,2"),
+        ("1e11,2", "1e11,2"),
+        ("1,2,2", "1,2,2"),
+        ("1,2_1", "1,2_1"),
+        ("1,2e-10e1", "1,2e-10e1"),
+        ("--1,2", "--1,2"),
+        ("1a_2,1", "1a_2,1"),
+        ("1,2E-1", 0.12),
+        ("1,2E1", 12.0),
+    ]
+)
+def numeric_decimal(request):
+    """
+    Fixture for all numeric formats which should get recognized. The first entry
+    represents the value to read while the second represents the expected result.
+    """
+    return request.param
+
+
+@pytest.fixture
+def pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.node.add_marker(mark)
+
+
+@pytest.fixture
+def pyarrow_skip(request):
+    """
+    Fixture that skips a test if the engine is pyarrow.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        pytest.skip("pyarrow doesn't support this.")