mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 06:31:28 +00:00
first commit
This commit is contained in:
146
.venv/Lib/site-packages/pandas/tests/io/parser/test_dialect.py
Normal file
146
.venv/Lib/site-packages/pandas/tests/io/parser/test_dialect.py
Normal file
@ -0,0 +1,146 @@
|
||||
"""
|
||||
Tests that dialects are properly handled during parsing
|
||||
for all of the parsers defined in parsers.py
|
||||
"""
|
||||
|
||||
import csv
|
||||
from io import StringIO
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.errors import ParserWarning
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def custom_dialect():
|
||||
dialect_name = "weird"
|
||||
dialect_kwargs = {
|
||||
"doublequote": False,
|
||||
"escapechar": "~",
|
||||
"delimiter": ":",
|
||||
"skipinitialspace": False,
|
||||
"quotechar": "~",
|
||||
"quoting": 3,
|
||||
}
|
||||
return dialect_name, dialect_kwargs
|
||||
|
||||
|
||||
def test_dialect(all_parsers):
|
||||
parser = all_parsers
|
||||
data = """\
|
||||
label1,label2,label3
|
||||
index1,"a,c,e
|
||||
index2,b,d,f
|
||||
"""
|
||||
|
||||
dia = csv.excel()
|
||||
dia.quoting = csv.QUOTE_NONE
|
||||
df = parser.read_csv(StringIO(data), dialect=dia)
|
||||
|
||||
data = """\
|
||||
label1,label2,label3
|
||||
index1,a,c,e
|
||||
index2,b,d,f
|
||||
"""
|
||||
exp = parser.read_csv(StringIO(data))
|
||||
exp.replace("a", '"a', inplace=True)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
|
||||
def test_dialect_str(all_parsers):
|
||||
dialect_name = "mydialect"
|
||||
parser = all_parsers
|
||||
data = """\
|
||||
fruit:vegetable
|
||||
apple:broccoli
|
||||
pear:tomato
|
||||
"""
|
||||
exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"]})
|
||||
|
||||
with tm.with_csv_dialect(dialect_name, delimiter=":"):
|
||||
df = parser.read_csv(StringIO(data), dialect=dialect_name)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
|
||||
def test_invalid_dialect(all_parsers):
|
||||
class InvalidDialect:
|
||||
pass
|
||||
|
||||
data = "a\n1"
|
||||
parser = all_parsers
|
||||
msg = "Invalid dialect"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
parser.read_csv(StringIO(data), dialect=InvalidDialect)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg",
|
||||
[None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"],
|
||||
)
|
||||
@pytest.mark.parametrize("value", ["dialect", "default", "other"])
|
||||
def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, value):
|
||||
# see gh-23761.
|
||||
dialect_name, dialect_kwargs = custom_dialect
|
||||
parser = all_parsers
|
||||
|
||||
expected = DataFrame({"a": [1], "b": [2]})
|
||||
data = "a:b\n1:2"
|
||||
|
||||
warning_klass = None
|
||||
kwds = {}
|
||||
|
||||
# arg=None tests when we pass in the dialect without any other arguments.
|
||||
if arg is not None:
|
||||
if "value" == "dialect": # No conflict --> no warning.
|
||||
kwds[arg] = dialect_kwargs[arg]
|
||||
elif "value" == "default": # Default --> no warning.
|
||||
from pandas.io.parsers.base_parser import parser_defaults
|
||||
|
||||
kwds[arg] = parser_defaults[arg]
|
||||
else: # Non-default + conflict with dialect --> warning.
|
||||
warning_klass = ParserWarning
|
||||
kwds[arg] = "blah"
|
||||
|
||||
with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
|
||||
with tm.assert_produces_warning(warning_klass):
|
||||
result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwds)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs,warning_klass",
|
||||
[
|
||||
({"sep": ","}, None), # sep is default --> sep_override=True
|
||||
({"sep": "."}, ParserWarning), # sep isn't default --> sep_override=False
|
||||
({"delimiter": ":"}, None), # No conflict
|
||||
({"delimiter": None}, None), # Default arguments --> sep_override=True
|
||||
({"delimiter": ","}, ParserWarning), # Conflict
|
||||
({"delimiter": "."}, ParserWarning), # Conflict
|
||||
],
|
||||
ids=[
|
||||
"sep-override-true",
|
||||
"sep-override-false",
|
||||
"delimiter-no-conflict",
|
||||
"delimiter-default-arg",
|
||||
"delimiter-conflict",
|
||||
"delimiter-conflict2",
|
||||
],
|
||||
)
|
||||
def test_dialect_conflict_delimiter(all_parsers, custom_dialect, kwargs, warning_klass):
|
||||
# see gh-23761.
|
||||
dialect_name, dialect_kwargs = custom_dialect
|
||||
parser = all_parsers
|
||||
|
||||
expected = DataFrame({"a": [1], "b": [2]})
|
||||
data = "a:b\n1:2"
|
||||
|
||||
with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
|
||||
with tm.assert_produces_warning(warning_klass):
|
||||
result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwargs)
|
||||
tm.assert_frame_equal(result, expected)
|
Reference in New Issue
Block a user