mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-02 14:27:31 +00:00
first commit
This commit is contained in:
265
.venv/Lib/site-packages/altair/utils/tests/test_core.py
Normal file
265
.venv/Lib/site-packages/altair/utils/tests/test_core.py
Normal file
@ -0,0 +1,265 @@
|
||||
import types
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
import altair as alt
|
||||
from .. import parse_shorthand, update_nested, infer_encoding_types
|
||||
from ..core import infer_dtype
|
||||
|
||||
FAKE_CHANNELS_MODULE = '''
|
||||
"""Fake channels module for utility tests."""
|
||||
|
||||
from altair.utils import schemapi
|
||||
|
||||
|
||||
class FieldChannel(object):
|
||||
def __init__(self, shorthand, **kwargs):
|
||||
kwargs['shorthand'] = shorthand
|
||||
return super(FieldChannel, self).__init__(**kwargs)
|
||||
|
||||
|
||||
class ValueChannel(object):
|
||||
def __init__(self, value, **kwargs):
|
||||
kwargs['value'] = value
|
||||
return super(ValueChannel, self).__init__(**kwargs)
|
||||
|
||||
|
||||
class X(FieldChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "x"
|
||||
|
||||
|
||||
class XValue(ValueChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "x"
|
||||
|
||||
|
||||
class Y(FieldChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "y"
|
||||
|
||||
|
||||
class YValue(ValueChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "y"
|
||||
|
||||
|
||||
class StrokeWidth(FieldChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "strokeWidth"
|
||||
|
||||
|
||||
class StrokeWidthValue(ValueChannel, schemapi.SchemaBase):
|
||||
_schema = {}
|
||||
_encoding_name = "strokeWidth"
|
||||
'''
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value,expected_type",
|
||||
[
|
||||
([1, 2, 3], "integer"),
|
||||
([1.0, 2.0, 3.0], "floating"),
|
||||
([1, 2.0, 3], "mixed-integer-float"),
|
||||
(["a", "b", "c"], "string"),
|
||||
(["a", "b", np.nan], "mixed"),
|
||||
],
|
||||
)
|
||||
def test_infer_dtype(value, expected_type):
|
||||
assert infer_dtype(value) == expected_type
|
||||
|
||||
|
||||
def test_parse_shorthand():
|
||||
def check(s, **kwargs):
|
||||
assert parse_shorthand(s) == kwargs
|
||||
|
||||
check("")
|
||||
|
||||
# Fields alone
|
||||
check("foobar", field="foobar")
|
||||
check("blah:(fd ", field="blah:(fd ")
|
||||
|
||||
# Fields with type
|
||||
check("foobar:quantitative", type="quantitative", field="foobar")
|
||||
check("foobar:nominal", type="nominal", field="foobar")
|
||||
check("foobar:ordinal", type="ordinal", field="foobar")
|
||||
check("foobar:temporal", type="temporal", field="foobar")
|
||||
check("foobar:geojson", type="geojson", field="foobar")
|
||||
|
||||
check("foobar:Q", type="quantitative", field="foobar")
|
||||
check("foobar:N", type="nominal", field="foobar")
|
||||
check("foobar:O", type="ordinal", field="foobar")
|
||||
check("foobar:T", type="temporal", field="foobar")
|
||||
check("foobar:G", type="geojson", field="foobar")
|
||||
|
||||
# Fields with aggregate and/or type
|
||||
check("average(foobar)", field="foobar", aggregate="average")
|
||||
check("min(foobar):temporal", type="temporal", field="foobar", aggregate="min")
|
||||
check("sum(foobar):Q", type="quantitative", field="foobar", aggregate="sum")
|
||||
|
||||
# check that invalid arguments are not split-out
|
||||
check("invalid(blah)", field="invalid(blah)")
|
||||
check("blah:invalid", field="blah:invalid")
|
||||
check("invalid(blah):invalid", field="invalid(blah):invalid")
|
||||
|
||||
# check parsing in presence of strange characters
|
||||
check(
|
||||
"average(a b:(c\nd):Q",
|
||||
aggregate="average",
|
||||
field="a b:(c\nd",
|
||||
type="quantitative",
|
||||
)
|
||||
|
||||
# special case: count doesn't need an argument
|
||||
check("count()", aggregate="count", type="quantitative")
|
||||
check("count():O", aggregate="count", type="ordinal")
|
||||
|
||||
# time units:
|
||||
check("month(x)", field="x", timeUnit="month", type="temporal")
|
||||
check("year(foo):O", field="foo", timeUnit="year", type="ordinal")
|
||||
check("date(date):quantitative", field="date", timeUnit="date", type="quantitative")
|
||||
check(
|
||||
"yearmonthdate(field)", field="field", timeUnit="yearmonthdate", type="temporal"
|
||||
)
|
||||
|
||||
|
||||
def test_parse_shorthand_with_data():
|
||||
def check(s, data, **kwargs):
|
||||
assert parse_shorthand(s, data) == kwargs
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"x": [1, 2, 3, 4, 5],
|
||||
"y": ["A", "B", "C", "D", "E"],
|
||||
"z": pd.date_range("2018-01-01", periods=5, freq="D"),
|
||||
"t": pd.date_range("2018-01-01", periods=5, freq="D").tz_localize("UTC"),
|
||||
}
|
||||
)
|
||||
|
||||
check("x", data, field="x", type="quantitative")
|
||||
check("y", data, field="y", type="nominal")
|
||||
check("z", data, field="z", type="temporal")
|
||||
check("t", data, field="t", type="temporal")
|
||||
check("count(x)", data, field="x", aggregate="count", type="quantitative")
|
||||
check("count()", data, aggregate="count", type="quantitative")
|
||||
check("month(z)", data, timeUnit="month", field="z", type="temporal")
|
||||
check("month(t)", data, timeUnit="month", field="t", type="temporal")
|
||||
|
||||
|
||||
def test_parse_shorthand_all_aggregates():
|
||||
aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"]
|
||||
for aggregate in aggregates:
|
||||
shorthand = "{aggregate}(field):Q".format(aggregate=aggregate)
|
||||
assert parse_shorthand(shorthand) == {
|
||||
"aggregate": aggregate,
|
||||
"field": "field",
|
||||
"type": "quantitative",
|
||||
}
|
||||
|
||||
|
||||
def test_parse_shorthand_all_timeunits():
|
||||
timeUnits = []
|
||||
for loc in ["Local", "Utc"]:
|
||||
for typ in ["Single", "Multi"]:
|
||||
defn = loc + typ + "TimeUnit"
|
||||
timeUnits.extend(alt.Root._schema["definitions"][defn]["enum"])
|
||||
for timeUnit in timeUnits:
|
||||
shorthand = "{timeUnit}(field):Q".format(timeUnit=timeUnit)
|
||||
assert parse_shorthand(shorthand) == {
|
||||
"timeUnit": timeUnit,
|
||||
"field": "field",
|
||||
"type": "quantitative",
|
||||
}
|
||||
|
||||
|
||||
def test_parse_shorthand_window_count():
|
||||
shorthand = "count()"
|
||||
dct = parse_shorthand(
|
||||
shorthand,
|
||||
parse_aggregates=False,
|
||||
parse_window_ops=True,
|
||||
parse_timeunits=False,
|
||||
parse_types=False,
|
||||
)
|
||||
assert dct == {"op": "count"}
|
||||
|
||||
|
||||
def test_parse_shorthand_all_window_ops():
|
||||
window_ops = alt.Root._schema["definitions"]["WindowOnlyOp"]["enum"]
|
||||
aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"]
|
||||
for op in window_ops + aggregates:
|
||||
shorthand = "{op}(field)".format(op=op)
|
||||
dct = parse_shorthand(
|
||||
shorthand,
|
||||
parse_aggregates=False,
|
||||
parse_window_ops=True,
|
||||
parse_timeunits=False,
|
||||
parse_types=False,
|
||||
)
|
||||
assert dct == {"field": "field", "op": op}
|
||||
|
||||
|
||||
def test_update_nested():
|
||||
original = {"x": {"b": {"foo": 2}, "c": 4}}
|
||||
update = {"x": {"b": {"foo": 5}, "d": 6}, "y": 40}
|
||||
|
||||
output = update_nested(original, update, copy=True)
|
||||
assert output is not original
|
||||
assert output == {"x": {"b": {"foo": 5}, "c": 4, "d": 6}, "y": 40}
|
||||
|
||||
output2 = update_nested(original, update)
|
||||
assert output2 is original
|
||||
assert output == output2
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def channels():
|
||||
channels = types.ModuleType("channels")
|
||||
exec(FAKE_CHANNELS_MODULE, channels.__dict__)
|
||||
return channels
|
||||
|
||||
|
||||
def _getargs(*args, **kwargs):
|
||||
return args, kwargs
|
||||
|
||||
|
||||
def test_infer_encoding_types(channels):
|
||||
expected = dict(
|
||||
x=channels.X("xval"),
|
||||
y=channels.YValue("yval"),
|
||||
strokeWidth=channels.StrokeWidthValue(value=4),
|
||||
)
|
||||
|
||||
# All positional args
|
||||
args, kwds = _getargs(
|
||||
channels.X("xval"), channels.YValue("yval"), channels.StrokeWidthValue(4)
|
||||
)
|
||||
assert infer_encoding_types(args, kwds, channels) == expected
|
||||
|
||||
# All keyword args
|
||||
args, kwds = _getargs(x="xval", y=alt.value("yval"), strokeWidth=alt.value(4))
|
||||
assert infer_encoding_types(args, kwds, channels) == expected
|
||||
|
||||
# Mixed positional & keyword
|
||||
args, kwds = _getargs(
|
||||
channels.X("xval"), channels.YValue("yval"), strokeWidth=alt.value(4)
|
||||
)
|
||||
assert infer_encoding_types(args, kwds, channels) == expected
|
||||
|
||||
|
||||
def test_infer_encoding_types_with_condition(channels):
|
||||
args, kwds = _getargs(
|
||||
x=alt.condition("pred1", alt.value(1), alt.value(2)),
|
||||
y=alt.condition("pred2", alt.value(1), "yval"),
|
||||
strokeWidth=alt.condition("pred3", "sval", alt.value(2)),
|
||||
)
|
||||
expected = dict(
|
||||
x=channels.XValue(2, condition=channels.XValue(1, test="pred1")),
|
||||
y=channels.Y("yval", condition=channels.YValue(1, test="pred2")),
|
||||
strokeWidth=channels.StrokeWidthValue(
|
||||
2, condition=channels.StrokeWidth("sval", test="pred3")
|
||||
),
|
||||
)
|
||||
assert infer_encoding_types(args, kwds, channels) == expected
|
139
.venv/Lib/site-packages/altair/utils/tests/test_data.py
Normal file
139
.venv/Lib/site-packages/altair/utils/tests/test_data.py
Normal file
@ -0,0 +1,139 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from toolz import pipe
|
||||
|
||||
from ..data import limit_rows, MaxRowsError, sample, to_values, to_json, to_csv
|
||||
|
||||
|
||||
def _create_dataframe(N):
|
||||
data = pd.DataFrame({"x": range(N), "y": range(N)})
|
||||
return data
|
||||
|
||||
|
||||
def _create_data_with_values(N):
|
||||
data = {"values": [{"x": i, "y": i + 1} for i in range(N)]}
|
||||
return data
|
||||
|
||||
|
||||
def test_limit_rows():
|
||||
"""Test the limit_rows data transformer."""
|
||||
data = _create_dataframe(10)
|
||||
result = limit_rows(data, max_rows=20)
|
||||
assert data is result
|
||||
with pytest.raises(MaxRowsError):
|
||||
pipe(data, limit_rows(max_rows=5))
|
||||
data = _create_data_with_values(10)
|
||||
result = pipe(data, limit_rows(max_rows=20))
|
||||
assert data is result
|
||||
with pytest.raises(MaxRowsError):
|
||||
limit_rows(data, max_rows=5)
|
||||
|
||||
|
||||
def test_sample():
|
||||
"""Test the sample data transformer."""
|
||||
data = _create_dataframe(20)
|
||||
result = pipe(data, sample(n=10))
|
||||
assert len(result) == 10
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
data = _create_data_with_values(20)
|
||||
result = sample(data, n=10)
|
||||
assert isinstance(result, dict)
|
||||
assert "values" in result
|
||||
assert len(result["values"]) == 10
|
||||
data = _create_dataframe(20)
|
||||
result = pipe(data, sample(frac=0.5))
|
||||
assert len(result) == 10
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
data = _create_data_with_values(20)
|
||||
result = sample(data, frac=0.5)
|
||||
assert isinstance(result, dict)
|
||||
assert "values" in result
|
||||
assert len(result["values"]) == 10
|
||||
|
||||
|
||||
def test_to_values():
|
||||
"""Test the to_values data transformer."""
|
||||
data = _create_dataframe(10)
|
||||
result = pipe(data, to_values)
|
||||
assert result == {"values": data.to_dict(orient="records")}
|
||||
|
||||
|
||||
def test_type_error():
|
||||
"""Ensure that TypeError is raised for types other than dict/DataFrame."""
|
||||
for f in (sample, limit_rows, to_values):
|
||||
with pytest.raises(TypeError):
|
||||
pipe(0, f)
|
||||
|
||||
|
||||
def test_dataframe_to_json():
|
||||
"""Test to_json
|
||||
- make certain the filename is deterministic
|
||||
- make certain the file contents match the data
|
||||
"""
|
||||
data = _create_dataframe(10)
|
||||
try:
|
||||
result1 = pipe(data, to_json)
|
||||
result2 = pipe(data, to_json)
|
||||
filename = result1["url"]
|
||||
output = pd.read_json(filename)
|
||||
finally:
|
||||
os.remove(filename)
|
||||
|
||||
assert result1 == result2
|
||||
assert output.equals(data)
|
||||
|
||||
|
||||
def test_dict_to_json():
|
||||
"""Test to_json
|
||||
- make certain the filename is deterministic
|
||||
- make certain the file contents match the data
|
||||
"""
|
||||
data = _create_data_with_values(10)
|
||||
try:
|
||||
result1 = pipe(data, to_json)
|
||||
result2 = pipe(data, to_json)
|
||||
filename = result1["url"]
|
||||
output = pd.read_json(filename).to_dict(orient="records")
|
||||
finally:
|
||||
os.remove(filename)
|
||||
|
||||
assert result1 == result2
|
||||
assert data == {"values": output}
|
||||
|
||||
|
||||
def test_dataframe_to_csv():
|
||||
"""Test to_csv with dataframe input
|
||||
- make certain the filename is deterministic
|
||||
- make certain the file contents match the data
|
||||
"""
|
||||
data = _create_dataframe(10)
|
||||
try:
|
||||
result1 = pipe(data, to_csv)
|
||||
result2 = pipe(data, to_csv)
|
||||
filename = result1["url"]
|
||||
output = pd.read_csv(filename)
|
||||
finally:
|
||||
os.remove(filename)
|
||||
|
||||
assert result1 == result2
|
||||
assert output.equals(data)
|
||||
|
||||
|
||||
def test_dict_to_csv():
|
||||
"""Test to_csv with dict input
|
||||
- make certain the filename is deterministic
|
||||
- make certain the file contents match the data
|
||||
"""
|
||||
data = _create_data_with_values(10)
|
||||
try:
|
||||
result1 = pipe(data, to_csv)
|
||||
result2 = pipe(data, to_csv)
|
||||
filename = result1["url"]
|
||||
output = pd.read_csv(filename).to_dict(orient="records")
|
||||
finally:
|
||||
os.remove(filename)
|
||||
|
||||
assert result1 == result2
|
||||
assert data == {"values": output}
|
@ -0,0 +1,24 @@
|
||||
import pytest
|
||||
|
||||
import altair as alt
|
||||
from altair.utils import AltairDeprecationWarning
|
||||
from altair.utils.deprecation import _deprecate, deprecated
|
||||
|
||||
|
||||
def test_deprecated_class():
|
||||
OldChart = _deprecate(alt.Chart, "OldChart")
|
||||
with pytest.warns(AltairDeprecationWarning) as record:
|
||||
OldChart()
|
||||
assert "alt.OldChart" in record[0].message.args[0]
|
||||
assert "alt.Chart" in record[0].message.args[0]
|
||||
|
||||
|
||||
def test_deprecation_decorator():
|
||||
@deprecated(message="func is deprecated")
|
||||
def func(x):
|
||||
return x + 1
|
||||
|
||||
with pytest.warns(AltairDeprecationWarning) as record:
|
||||
y = func(1)
|
||||
assert y == 2
|
||||
assert record[0].message.args[0] == "func is deprecated"
|
30
.venv/Lib/site-packages/altair/utils/tests/test_execeval.py
Normal file
30
.venv/Lib/site-packages/altair/utils/tests/test_execeval.py
Normal file
@ -0,0 +1,30 @@
|
||||
from ..execeval import eval_block
|
||||
|
||||
HAS_RETURN = """
|
||||
x = 4
|
||||
y = 2 * x
|
||||
3 * y
|
||||
"""
|
||||
|
||||
NO_RETURN = """
|
||||
x = 4
|
||||
y = 2 * x
|
||||
z = 3 * y
|
||||
"""
|
||||
|
||||
|
||||
def test_eval_block_with_return():
|
||||
_globals = {}
|
||||
result = eval_block(HAS_RETURN, _globals)
|
||||
assert result == 24
|
||||
assert _globals["x"] == 4
|
||||
assert _globals["y"] == 8
|
||||
|
||||
|
||||
def test_eval_block_without_return():
|
||||
_globals = {}
|
||||
result = eval_block(NO_RETURN, _globals)
|
||||
assert result is None
|
||||
assert _globals["x"] == 4
|
||||
assert _globals["y"] == 8
|
||||
assert _globals["z"] == 24
|
52
.venv/Lib/site-packages/altair/utils/tests/test_html.py
Normal file
52
.venv/Lib/site-packages/altair/utils/tests/test_html.py
Normal file
@ -0,0 +1,52 @@
|
||||
import pytest
|
||||
|
||||
from ..html import spec_to_html
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def spec():
|
||||
return {
|
||||
"data": {"url": "data.json"},
|
||||
"mark": "point",
|
||||
"encoding": {
|
||||
"x": {"field": "x", "type": "quantitative"},
|
||||
"y": {"field": "y", "type": "quantitative"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("requirejs", [True, False])
|
||||
@pytest.mark.parametrize("fullhtml", [True, False])
|
||||
def test_spec_to_html(requirejs, fullhtml, spec):
|
||||
# We can't test that the html actually renders, but we'll test aspects of
|
||||
# it to make certain that the keywords are respected.
|
||||
vegaembed_version = ("3.12",)
|
||||
vegalite_version = ("3.0",)
|
||||
vega_version = "4.0"
|
||||
|
||||
html = spec_to_html(
|
||||
spec,
|
||||
mode="vega-lite",
|
||||
requirejs=requirejs,
|
||||
fullhtml=fullhtml,
|
||||
vegalite_version=vegalite_version,
|
||||
vegaembed_version=vegaembed_version,
|
||||
vega_version=vega_version,
|
||||
)
|
||||
html = html.strip()
|
||||
|
||||
if fullhtml:
|
||||
assert html.startswith("<!DOCTYPE html>")
|
||||
assert html.endswith("</html>")
|
||||
else:
|
||||
assert html.startswith("<style>")
|
||||
assert html.endswith("</script>")
|
||||
|
||||
if requirejs:
|
||||
assert "require(" in html
|
||||
else:
|
||||
assert "require(" not in html
|
||||
|
||||
assert "vega-lite@{}".format(vegalite_version) in html
|
||||
assert "vega@{}".format(vega_version) in html
|
||||
assert "vega-embed@{}".format(vegaembed_version) in html
|
207
.venv/Lib/site-packages/altair/utils/tests/test_mimebundle.py
Normal file
207
.venv/Lib/site-packages/altair/utils/tests/test_mimebundle.py
Normal file
@ -0,0 +1,207 @@
|
||||
import pytest
|
||||
|
||||
import altair as alt
|
||||
from ..mimebundle import spec_to_mimebundle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def require_altair_saver():
|
||||
try:
|
||||
import altair_saver # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("altair_saver not importable; cannot run saver tests")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vegalite_spec():
|
||||
return {
|
||||
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
|
||||
"description": "A simple bar chart with embedded data.",
|
||||
"data": {
|
||||
"values": [
|
||||
{"a": "A", "b": 28},
|
||||
{"a": "B", "b": 55},
|
||||
{"a": "C", "b": 43},
|
||||
{"a": "D", "b": 91},
|
||||
{"a": "E", "b": 81},
|
||||
{"a": "F", "b": 53},
|
||||
{"a": "G", "b": 19},
|
||||
{"a": "H", "b": 87},
|
||||
{"a": "I", "b": 52},
|
||||
]
|
||||
},
|
||||
"mark": "bar",
|
||||
"encoding": {
|
||||
"x": {"field": "a", "type": "ordinal"},
|
||||
"y": {"field": "b", "type": "quantitative"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vega_spec():
|
||||
return {
|
||||
"$schema": "https://vega.github.io/schema/vega/v5.json",
|
||||
"axes": [
|
||||
{
|
||||
"aria": False,
|
||||
"domain": False,
|
||||
"grid": True,
|
||||
"gridScale": "x",
|
||||
"labels": False,
|
||||
"maxExtent": 0,
|
||||
"minExtent": 0,
|
||||
"orient": "left",
|
||||
"scale": "y",
|
||||
"tickCount": {"signal": "ceil(height/40)"},
|
||||
"ticks": False,
|
||||
"zindex": 0,
|
||||
},
|
||||
{
|
||||
"grid": False,
|
||||
"labelAlign": "right",
|
||||
"labelAngle": 270,
|
||||
"labelBaseline": "middle",
|
||||
"orient": "bottom",
|
||||
"scale": "x",
|
||||
"title": "a",
|
||||
"zindex": 0,
|
||||
},
|
||||
{
|
||||
"grid": False,
|
||||
"labelOverlap": True,
|
||||
"orient": "left",
|
||||
"scale": "y",
|
||||
"tickCount": {"signal": "ceil(height/40)"},
|
||||
"title": "b",
|
||||
"zindex": 0,
|
||||
},
|
||||
],
|
||||
"background": "white",
|
||||
"data": [
|
||||
{
|
||||
"name": "source_0",
|
||||
"values": [
|
||||
{"a": "A", "b": 28},
|
||||
{"a": "B", "b": 55},
|
||||
{"a": "C", "b": 43},
|
||||
{"a": "D", "b": 91},
|
||||
{"a": "E", "b": 81},
|
||||
{"a": "F", "b": 53},
|
||||
{"a": "G", "b": 19},
|
||||
{"a": "H", "b": 87},
|
||||
{"a": "I", "b": 52},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "data_0",
|
||||
"source": "source_0",
|
||||
"transform": [
|
||||
{
|
||||
"expr": 'isValid(datum["b"]) && isFinite(+datum["b"])',
|
||||
"type": "filter",
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
"description": "A simple bar chart with embedded data.",
|
||||
"height": 200,
|
||||
"marks": [
|
||||
{
|
||||
"encode": {
|
||||
"update": {
|
||||
"ariaRoleDescription": {"value": "bar"},
|
||||
"description": {
|
||||
"signal": '"a: " + (isValid(datum["a"]) ? datum["a"] : ""+datum["a"]) + "; b: " + (format(datum["b"], ""))'
|
||||
},
|
||||
"fill": {"value": "#4c78a8"},
|
||||
"width": {"band": 1, "scale": "x"},
|
||||
"x": {"field": "a", "scale": "x"},
|
||||
"y": {"field": "b", "scale": "y"},
|
||||
"y2": {"scale": "y", "value": 0},
|
||||
}
|
||||
},
|
||||
"from": {"data": "data_0"},
|
||||
"name": "marks",
|
||||
"style": ["bar"],
|
||||
"type": "rect",
|
||||
}
|
||||
],
|
||||
"padding": 5,
|
||||
"scales": [
|
||||
{
|
||||
"domain": {"data": "data_0", "field": "a", "sort": True},
|
||||
"name": "x",
|
||||
"paddingInner": 0.1,
|
||||
"paddingOuter": 0.05,
|
||||
"range": {"step": {"signal": "x_step"}},
|
||||
"type": "band",
|
||||
},
|
||||
{
|
||||
"domain": {"data": "data_0", "field": "b"},
|
||||
"name": "y",
|
||||
"nice": True,
|
||||
"range": [{"signal": "height"}, 0],
|
||||
"type": "linear",
|
||||
"zero": True,
|
||||
},
|
||||
],
|
||||
"signals": [
|
||||
{"name": "x_step", "value": 20},
|
||||
{
|
||||
"name": "width",
|
||||
"update": "bandspace(domain('x').length, 0.1, 0.05) * x_step",
|
||||
},
|
||||
],
|
||||
"style": "cell",
|
||||
}
|
||||
|
||||
|
||||
def test_vegalite_to_vega_mimebundle(require_altair_saver, vegalite_spec, vega_spec):
|
||||
# temporay fix for https://github.com/vega/vega-lite/issues/7776
|
||||
def delete_none(axes):
|
||||
for axis in axes:
|
||||
for key, value in list(axis.items()):
|
||||
if value is None:
|
||||
del axis[key]
|
||||
return axes
|
||||
|
||||
bundle = spec_to_mimebundle(
|
||||
spec=vegalite_spec,
|
||||
format="vega",
|
||||
mode="vega-lite",
|
||||
vega_version=alt.VEGA_VERSION,
|
||||
vegalite_version=alt.VEGALITE_VERSION,
|
||||
vegaembed_version=alt.VEGAEMBED_VERSION,
|
||||
)
|
||||
|
||||
bundle["application/vnd.vega.v5+json"]["axes"] = delete_none(
|
||||
bundle["application/vnd.vega.v5+json"]["axes"]
|
||||
)
|
||||
assert bundle == {"application/vnd.vega.v5+json": vega_spec}
|
||||
|
||||
|
||||
def test_spec_to_vegalite_mimebundle(vegalite_spec):
|
||||
bundle = spec_to_mimebundle(
|
||||
spec=vegalite_spec,
|
||||
mode="vega-lite",
|
||||
format="vega-lite",
|
||||
vegalite_version=alt.VEGALITE_VERSION,
|
||||
)
|
||||
assert bundle == {"application/vnd.vegalite.v4+json": vegalite_spec}
|
||||
|
||||
|
||||
def test_spec_to_vega_mimebundle(vega_spec):
|
||||
bundle = spec_to_mimebundle(
|
||||
spec=vega_spec, mode="vega", format="vega", vega_version=alt.VEGA_VERSION
|
||||
)
|
||||
assert bundle == {"application/vnd.vega.v5+json": vega_spec}
|
||||
|
||||
|
||||
def test_spec_to_json_mimebundle():
|
||||
bundle = spec_to_mimebundle(
|
||||
spec=vegalite_spec,
|
||||
mode="vega-lite",
|
||||
format="json",
|
||||
)
|
||||
assert bundle == {"application/json": vegalite_spec}
|
@ -0,0 +1,123 @@
|
||||
from ..plugin_registry import PluginRegistry
|
||||
from typing import Callable
|
||||
|
||||
|
||||
class TypedCallableRegistry(PluginRegistry[Callable[[int], int]]):
|
||||
pass
|
||||
|
||||
|
||||
class GeneralCallableRegistry(PluginRegistry):
|
||||
_global_settings = {"global_setting": None}
|
||||
|
||||
@property
|
||||
def global_setting(self):
|
||||
return self._global_settings["global_setting"]
|
||||
|
||||
@global_setting.setter
|
||||
def global_setting(self, val):
|
||||
self._global_settings["global_setting"] = val
|
||||
|
||||
|
||||
def test_plugin_registry():
|
||||
plugins = TypedCallableRegistry()
|
||||
|
||||
assert plugins.names() == []
|
||||
assert plugins.active == ""
|
||||
assert plugins.get() is None
|
||||
assert repr(plugins) == "TypedCallableRegistry(active='', registered=[])"
|
||||
|
||||
plugins.register("new_plugin", lambda x: x ** 2)
|
||||
assert plugins.names() == ["new_plugin"]
|
||||
assert plugins.active == ""
|
||||
assert plugins.get() is None
|
||||
assert repr(plugins) == (
|
||||
"TypedCallableRegistry(active='', " "registered=['new_plugin'])"
|
||||
)
|
||||
|
||||
plugins.enable("new_plugin")
|
||||
assert plugins.names() == ["new_plugin"]
|
||||
assert plugins.active == "new_plugin"
|
||||
assert plugins.get()(3) == 9
|
||||
assert repr(plugins) == (
|
||||
"TypedCallableRegistry(active='new_plugin', " "registered=['new_plugin'])"
|
||||
)
|
||||
|
||||
|
||||
def test_plugin_registry_extra_options():
|
||||
plugins = GeneralCallableRegistry()
|
||||
|
||||
plugins.register("metadata_plugin", lambda x, p=2: x ** p)
|
||||
plugins.enable("metadata_plugin")
|
||||
assert plugins.get()(3) == 9
|
||||
|
||||
plugins.enable("metadata_plugin", p=3)
|
||||
assert plugins.active == "metadata_plugin"
|
||||
assert plugins.get()(3) == 27
|
||||
|
||||
# enabling without changing name
|
||||
plugins.enable(p=2)
|
||||
assert plugins.active == "metadata_plugin"
|
||||
assert plugins.get()(3) == 9
|
||||
|
||||
|
||||
def test_plugin_registry_global_settings():
|
||||
plugins = GeneralCallableRegistry()
|
||||
|
||||
# we need some default plugin, but we won't do anything with it
|
||||
plugins.register("default", lambda x: x)
|
||||
plugins.enable("default")
|
||||
|
||||
# default value of the global flag
|
||||
assert plugins.global_setting is None
|
||||
|
||||
# enabling changes the global state, not the options
|
||||
plugins.enable(global_setting=True)
|
||||
assert plugins.global_setting is True
|
||||
assert plugins._options == {}
|
||||
|
||||
# context manager changes global state temporarily
|
||||
with plugins.enable(global_setting="temp"):
|
||||
assert plugins.global_setting == "temp"
|
||||
assert plugins._options == {}
|
||||
assert plugins.global_setting is True
|
||||
assert plugins._options == {}
|
||||
|
||||
|
||||
def test_plugin_registry_context():
|
||||
plugins = GeneralCallableRegistry()
|
||||
|
||||
plugins.register("default", lambda x, p=2: x ** p)
|
||||
|
||||
# At first there is no plugin enabled
|
||||
assert plugins.active == ""
|
||||
assert plugins.options == {}
|
||||
|
||||
# Make sure the context is set and reset correctly
|
||||
with plugins.enable("default", p=6):
|
||||
assert plugins.active == "default"
|
||||
assert plugins.options == {"p": 6}
|
||||
|
||||
assert plugins.active == ""
|
||||
assert plugins.options == {}
|
||||
|
||||
# Make sure the context is reset even if there is an error
|
||||
try:
|
||||
with plugins.enable("default", p=6):
|
||||
assert plugins.active == "default"
|
||||
assert plugins.options == {"p": 6}
|
||||
raise ValueError()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
assert plugins.active == ""
|
||||
assert plugins.options == {}
|
||||
|
||||
# Enabling without specifying name uses current name
|
||||
plugins.enable("default", p=2)
|
||||
|
||||
with plugins.enable(p=6):
|
||||
assert plugins.active == "default"
|
||||
assert plugins.options == {"p": 6}
|
||||
|
||||
assert plugins.active == "default"
|
||||
assert plugins.options == {"p": 2}
|
351
.venv/Lib/site-packages/altair/utils/tests/test_schemapi.py
Normal file
351
.venv/Lib/site-packages/altair/utils/tests/test_schemapi.py
Normal file
@ -0,0 +1,351 @@
|
||||
# The contents of this file are automatically written by
|
||||
# tools/generate_schema_wrapper.py. Do not modify directly.
|
||||
import copy
|
||||
import io
|
||||
import json
|
||||
import jsonschema
|
||||
import pickle
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..schemapi import (
|
||||
UndefinedType,
|
||||
SchemaBase,
|
||||
Undefined,
|
||||
_FromDict,
|
||||
SchemaValidationError,
|
||||
)
|
||||
|
||||
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
|
||||
# try to use SchemaBase objects defined elsewhere as wrappers.
|
||||
|
||||
|
||||
class _TestSchema(SchemaBase):
|
||||
@classmethod
|
||||
def _default_wrapper_classes(cls):
|
||||
return _TestSchema.__subclasses__()
|
||||
|
||||
|
||||
class MySchema(_TestSchema):
|
||||
_schema = {
|
||||
"definitions": {
|
||||
"StringMapping": {
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "string"},
|
||||
},
|
||||
"StringArray": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"properties": {
|
||||
"a": {"$ref": "#/definitions/StringMapping"},
|
||||
"a2": {"type": "object", "additionalProperties": {"type": "number"}},
|
||||
"b": {"$ref": "#/definitions/StringArray"},
|
||||
"b2": {"type": "array", "items": {"type": "number"}},
|
||||
"c": {"type": ["string", "number"]},
|
||||
"d": {
|
||||
"anyOf": [
|
||||
{"$ref": "#/definitions/StringMapping"},
|
||||
{"$ref": "#/definitions/StringArray"},
|
||||
]
|
||||
},
|
||||
"e": {"items": [{"type": "string"}, {"type": "string"}]},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class StringMapping(_TestSchema):
|
||||
_schema = {"$ref": "#/definitions/StringMapping"}
|
||||
_rootschema = MySchema._schema
|
||||
|
||||
|
||||
class StringArray(_TestSchema):
|
||||
_schema = {"$ref": "#/definitions/StringArray"}
|
||||
_rootschema = MySchema._schema
|
||||
|
||||
|
||||
class Derived(_TestSchema):
|
||||
_schema = {
|
||||
"definitions": {
|
||||
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
|
||||
"Bar": {"type": "string", "enum": ["A", "B"]},
|
||||
},
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"a": {"type": "integer"},
|
||||
"b": {"type": "string"},
|
||||
"c": {"$ref": "#/definitions/Foo"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class Foo(_TestSchema):
|
||||
_schema = {"$ref": "#/definitions/Foo"}
|
||||
_rootschema = Derived._schema
|
||||
|
||||
|
||||
class Bar(_TestSchema):
|
||||
_schema = {"$ref": "#/definitions/Bar"}
|
||||
_rootschema = Derived._schema
|
||||
|
||||
|
||||
class SimpleUnion(_TestSchema):
|
||||
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
|
||||
|
||||
|
||||
class DefinitionUnion(_TestSchema):
|
||||
_schema = {"anyOf": [{"$ref": "#/definitions/Foo"}, {"$ref": "#/definitions/Bar"}]}
|
||||
_rootschema = Derived._schema
|
||||
|
||||
|
||||
class SimpleArray(_TestSchema):
|
||||
_schema = {
|
||||
"type": "array",
|
||||
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
|
||||
}
|
||||
|
||||
|
||||
class InvalidProperties(_TestSchema):
|
||||
_schema = {
|
||||
"type": "object",
|
||||
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
|
||||
}
|
||||
|
||||
|
||||
def test_construct_multifaceted_schema():
|
||||
dct = {
|
||||
"a": {"foo": "bar"},
|
||||
"a2": {"foo": 42},
|
||||
"b": ["a", "b", "c"],
|
||||
"b2": [1, 2, 3],
|
||||
"c": 42,
|
||||
"d": ["x", "y", "z"],
|
||||
"e": ["a", "b"],
|
||||
}
|
||||
|
||||
myschema = MySchema.from_dict(dct)
|
||||
assert myschema.to_dict() == dct
|
||||
|
||||
myschema2 = MySchema(**dct)
|
||||
assert myschema2.to_dict() == dct
|
||||
|
||||
assert isinstance(myschema.a, StringMapping)
|
||||
assert isinstance(myschema.a2, dict)
|
||||
assert isinstance(myschema.b, StringArray)
|
||||
assert isinstance(myschema.b2, list)
|
||||
assert isinstance(myschema.d, StringArray)
|
||||
|
||||
|
||||
def test_schema_cases():
|
||||
assert Derived(a=4, b="yo").to_dict() == {"a": 4, "b": "yo"}
|
||||
assert Derived(a=4, c={"d": "hey"}).to_dict() == {"a": 4, "c": {"d": "hey"}}
|
||||
assert Derived(a=4, b="5", c=Foo(d="val")).to_dict() == {
|
||||
"a": 4,
|
||||
"b": "5",
|
||||
"c": {"d": "val"},
|
||||
}
|
||||
assert Foo(d="hello", f=4).to_dict() == {"d": "hello", "f": 4}
|
||||
|
||||
assert Derived().to_dict() == {}
|
||||
assert Foo().to_dict() == {}
|
||||
|
||||
with pytest.raises(jsonschema.ValidationError):
|
||||
# a needs to be an integer
|
||||
Derived(a="yo").to_dict()
|
||||
|
||||
with pytest.raises(jsonschema.ValidationError):
|
||||
# Foo.d needs to be a string
|
||||
Derived(c=Foo(4)).to_dict()
|
||||
|
||||
with pytest.raises(jsonschema.ValidationError):
|
||||
# no additional properties allowed
|
||||
Derived(foo="bar").to_dict()
|
||||
|
||||
|
||||
def test_round_trip():
|
||||
D = {"a": 4, "b": "yo"}
|
||||
assert Derived.from_dict(D).to_dict() == D
|
||||
|
||||
D = {"a": 4, "c": {"d": "hey"}}
|
||||
assert Derived.from_dict(D).to_dict() == D
|
||||
|
||||
D = {"a": 4, "b": "5", "c": {"d": "val"}}
|
||||
assert Derived.from_dict(D).to_dict() == D
|
||||
|
||||
D = {"d": "hello", "f": 4}
|
||||
assert Foo.from_dict(D).to_dict() == D
|
||||
|
||||
|
||||
def test_from_dict():
|
||||
D = {"a": 4, "b": "5", "c": {"d": "val"}}
|
||||
obj = Derived.from_dict(D)
|
||||
assert obj.a == 4
|
||||
assert obj.b == "5"
|
||||
assert isinstance(obj.c, Foo)
|
||||
|
||||
|
||||
def test_simple_type():
|
||||
assert SimpleUnion(4).to_dict() == 4
|
||||
|
||||
|
||||
def test_simple_array():
|
||||
assert SimpleArray([4, 5, "six"]).to_dict() == [4, 5, "six"]
|
||||
assert SimpleArray.from_dict(list("abc")).to_dict() == list("abc")
|
||||
|
||||
|
||||
def test_definition_union():
|
||||
obj = DefinitionUnion.from_dict("A")
|
||||
assert isinstance(obj, Bar)
|
||||
assert obj.to_dict() == "A"
|
||||
|
||||
obj = DefinitionUnion.from_dict("B")
|
||||
assert isinstance(obj, Bar)
|
||||
assert obj.to_dict() == "B"
|
||||
|
||||
obj = DefinitionUnion.from_dict({"d": "yo"})
|
||||
assert isinstance(obj, Foo)
|
||||
assert obj.to_dict() == {"d": "yo"}
|
||||
|
||||
|
||||
def test_invalid_properties():
|
||||
dct = {"for": 2, "as": 3, "vega-lite": 4, "$schema": 5}
|
||||
invalid = InvalidProperties.from_dict(dct)
|
||||
assert invalid["for"] == 2
|
||||
assert invalid["as"] == 3
|
||||
assert invalid["vega-lite"] == 4
|
||||
assert invalid["$schema"] == 5
|
||||
assert invalid.to_dict() == dct
|
||||
|
||||
|
||||
def test_undefined_singleton():
|
||||
assert Undefined is UndefinedType()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dct():
|
||||
return {
|
||||
"a": {"foo": "bar"},
|
||||
"a2": {"foo": 42},
|
||||
"b": ["a", "b", "c"],
|
||||
"b2": [1, 2, 3],
|
||||
"c": 42,
|
||||
"d": ["x", "y", "z"],
|
||||
}
|
||||
|
||||
|
||||
def test_copy_method(dct):
|
||||
myschema = MySchema.from_dict(dct)
|
||||
|
||||
# Make sure copy is deep
|
||||
copy = myschema.copy(deep=True)
|
||||
copy["a"]["foo"] = "new value"
|
||||
copy["b"] = ["A", "B", "C"]
|
||||
copy["c"] = 164
|
||||
assert myschema.to_dict() == dct
|
||||
|
||||
# If we ignore a value, changing the copy changes the original
|
||||
copy = myschema.copy(deep=True, ignore=["a"])
|
||||
copy["a"]["foo"] = "new value"
|
||||
copy["b"] = ["A", "B", "C"]
|
||||
copy["c"] = 164
|
||||
mydct = myschema.to_dict()
|
||||
assert mydct["a"]["foo"] == "new value"
|
||||
assert mydct["b"][0] == dct["b"][0]
|
||||
assert mydct["c"] == dct["c"]
|
||||
|
||||
# If copy is not deep, then changing copy below top level changes original
|
||||
copy = myschema.copy(deep=False)
|
||||
copy["a"]["foo"] = "baz"
|
||||
copy["b"] = ["A", "B", "C"]
|
||||
copy["c"] = 164
|
||||
mydct = myschema.to_dict()
|
||||
assert mydct["a"]["foo"] == "baz"
|
||||
assert mydct["b"] == dct["b"]
|
||||
assert mydct["c"] == dct["c"]
|
||||
|
||||
|
||||
def test_copy_module(dct):
|
||||
myschema = MySchema.from_dict(dct)
|
||||
|
||||
cp = copy.deepcopy(myschema)
|
||||
cp["a"]["foo"] = "new value"
|
||||
cp["b"] = ["A", "B", "C"]
|
||||
cp["c"] = 164
|
||||
assert myschema.to_dict() == dct
|
||||
|
||||
|
||||
def test_attribute_error():
|
||||
m = MySchema()
|
||||
with pytest.raises(AttributeError) as err:
|
||||
m.invalid_attribute
|
||||
assert str(err.value) == (
|
||||
"'MySchema' object has no attribute " "'invalid_attribute'"
|
||||
)
|
||||
|
||||
|
||||
def test_to_from_json(dct):
|
||||
json_str = MySchema.from_dict(dct).to_json()
|
||||
new_dct = MySchema.from_json(json_str).to_dict()
|
||||
|
||||
assert new_dct == dct
|
||||
|
||||
|
||||
def test_to_from_pickle(dct):
|
||||
myschema = MySchema.from_dict(dct)
|
||||
output = io.BytesIO()
|
||||
pickle.dump(myschema, output)
|
||||
output.seek(0)
|
||||
myschema_new = pickle.load(output)
|
||||
|
||||
assert myschema_new.to_dict() == dct
|
||||
|
||||
|
||||
def test_class_with_no_schema():
|
||||
class BadSchema(SchemaBase):
|
||||
pass
|
||||
|
||||
with pytest.raises(ValueError) as err:
|
||||
BadSchema(4)
|
||||
assert str(err.value).startswith("Cannot instantiate object")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_json", [True, False])
|
||||
def test_hash_schema(use_json):
|
||||
classes = _TestSchema._default_wrapper_classes()
|
||||
|
||||
for cls in classes:
|
||||
hsh1 = _FromDict.hash_schema(cls._schema, use_json=use_json)
|
||||
hsh2 = _FromDict.hash_schema(cls._schema, use_json=use_json)
|
||||
assert hsh1 == hsh2
|
||||
assert hash(hsh1) == hash(hsh2)
|
||||
|
||||
|
||||
def test_schema_validation_error():
|
||||
try:
|
||||
MySchema(a={"foo": 4})
|
||||
the_err = None
|
||||
except jsonschema.ValidationError as err:
|
||||
the_err = err
|
||||
|
||||
assert isinstance(the_err, SchemaValidationError)
|
||||
message = str(the_err)
|
||||
|
||||
assert message.startswith("Invalid specification")
|
||||
assert "test_schemapi.MySchema->a" in message
|
||||
assert "validating {!r}".format(the_err.validator) in message
|
||||
assert the_err.message in message
|
||||
|
||||
|
||||
def test_serialize_numpy_types():
|
||||
m = MySchema(
|
||||
a={"date": np.datetime64("2019-01-01")},
|
||||
a2={"int64": np.int64(1), "float64": np.float64(2)},
|
||||
b2=np.arange(4),
|
||||
)
|
||||
out = m.to_json()
|
||||
dct = json.loads(out)
|
||||
assert dct == {
|
||||
"a": {"date": "2019-01-01T00:00:00"},
|
||||
"a2": {"int64": 1, "float64": 2},
|
||||
"b2": [0, 1, 2, 3],
|
||||
}
|
10
.venv/Lib/site-packages/altair/utils/tests/test_server.py
Normal file
10
.venv/Lib/site-packages/altair/utils/tests/test_server.py
Normal file
@ -0,0 +1,10 @@
|
||||
"""
|
||||
Test http server
|
||||
"""
|
||||
|
||||
from altair.utils.server import serve, MockServer
|
||||
|
||||
|
||||
def test_serve():
|
||||
html = "<html><title>Title</title><body><p>Content</p></body></html>"
|
||||
serve(html, open_browser=False, http_server=MockServer)
|
192
.venv/Lib/site-packages/altair/utils/tests/test_utils.py
Normal file
192
.venv/Lib/site-packages/altair/utils/tests/test_utils.py
Normal file
@ -0,0 +1,192 @@
|
||||
import pytest
|
||||
import warnings
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .. import infer_vegalite_type, sanitize_dataframe
|
||||
|
||||
|
||||
def test_infer_vegalite_type():
|
||||
def _check(arr, typ):
|
||||
assert infer_vegalite_type(arr) == typ
|
||||
|
||||
_check(np.arange(5, dtype=float), "quantitative")
|
||||
_check(np.arange(5, dtype=int), "quantitative")
|
||||
_check(np.zeros(5, dtype=bool), "nominal")
|
||||
_check(pd.date_range("2012", "2013"), "temporal")
|
||||
_check(pd.timedelta_range(365, periods=12), "temporal")
|
||||
|
||||
nulled = pd.Series(np.random.randint(10, size=10))
|
||||
nulled[0] = None
|
||||
_check(nulled, "quantitative")
|
||||
_check(["a", "b", "c"], "nominal")
|
||||
|
||||
if hasattr(pytest, "warns"): # added in pytest 2.8
|
||||
with pytest.warns(UserWarning):
|
||||
_check([], "nominal")
|
||||
else:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore")
|
||||
_check([], "nominal")
|
||||
|
||||
|
||||
def test_sanitize_dataframe():
|
||||
# create a dataframe with various types
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"s": list("abcde"),
|
||||
"f": np.arange(5, dtype=float),
|
||||
"i": np.arange(5, dtype=int),
|
||||
"b": np.array([True, False, True, True, False]),
|
||||
"d": pd.date_range("2012-01-01", periods=5, freq="H"),
|
||||
"c": pd.Series(list("ababc"), dtype="category"),
|
||||
"c2": pd.Series([1, "A", 2.5, "B", None], dtype="category"),
|
||||
"o": pd.Series([np.array(i) for i in range(5)]),
|
||||
"p": pd.date_range("2012-01-01", periods=5, freq="H").tz_localize("UTC"),
|
||||
}
|
||||
)
|
||||
|
||||
# add some nulls
|
||||
df.iloc[0, df.columns.get_loc("s")] = None
|
||||
df.iloc[0, df.columns.get_loc("f")] = np.nan
|
||||
df.iloc[0, df.columns.get_loc("d")] = pd.NaT
|
||||
df.iloc[0, df.columns.get_loc("o")] = np.array(np.nan)
|
||||
|
||||
# JSON serialize. This will fail on non-sanitized dataframes
|
||||
print(df[["s", "c2"]])
|
||||
df_clean = sanitize_dataframe(df)
|
||||
print(df_clean[["s", "c2"]])
|
||||
print(df_clean[["s", "c2"]].to_dict())
|
||||
s = json.dumps(df_clean.to_dict(orient="records"))
|
||||
print(s)
|
||||
|
||||
# Re-construct pandas dataframe
|
||||
df2 = pd.read_json(s)
|
||||
|
||||
# Re-order the columns to match df
|
||||
df2 = df2[df.columns]
|
||||
|
||||
# Re-apply original types
|
||||
for col in df:
|
||||
if str(df[col].dtype).startswith("datetime"):
|
||||
# astype(datetime) introduces time-zone issues:
|
||||
# to_datetime() does not.
|
||||
utc = isinstance(df[col].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype)
|
||||
df2[col] = pd.to_datetime(df2[col], utc=utc)
|
||||
else:
|
||||
df2[col] = df2[col].astype(df[col].dtype)
|
||||
|
||||
# pandas doesn't properly recognize np.array(np.nan), so change it here
|
||||
df.iloc[0, df.columns.get_loc("o")] = np.nan
|
||||
assert df.equals(df2)
|
||||
|
||||
|
||||
def test_sanitize_dataframe_colnames():
|
||||
df = pd.DataFrame(np.arange(12).reshape(4, 3))
|
||||
|
||||
# Test that RangeIndex is converted to strings
|
||||
df = sanitize_dataframe(df)
|
||||
assert [isinstance(col, str) for col in df.columns]
|
||||
|
||||
# Test that non-string columns result in an error
|
||||
df.columns = [4, "foo", "bar"]
|
||||
with pytest.raises(ValueError) as err:
|
||||
sanitize_dataframe(df)
|
||||
assert str(err.value).startswith("Dataframe contains invalid column name: 4.")
|
||||
|
||||
|
||||
def test_sanitize_dataframe_timedelta():
|
||||
df = pd.DataFrame({"r": pd.timedelta_range(start="1 day", periods=4)})
|
||||
with pytest.raises(ValueError) as err:
|
||||
sanitize_dataframe(df)
|
||||
assert str(err.value).startswith('Field "r" has type "timedelta')
|
||||
|
||||
|
||||
def test_sanitize_dataframe_infs():
|
||||
df = pd.DataFrame({"x": [0, 1, 2, np.inf, -np.inf, np.nan]})
|
||||
df_clean = sanitize_dataframe(df)
|
||||
assert list(df_clean.dtypes) == [object]
|
||||
assert list(df_clean["x"]) == [0, 1, 2, None, None, None]
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not hasattr(pd, "Int64Dtype"),
|
||||
reason="Nullable integers not supported in pandas v{}".format(pd.__version__),
|
||||
)
|
||||
def test_sanitize_nullable_integers():
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"int_np": [1, 2, 3, 4, 5],
|
||||
"int64": pd.Series([1, 2, 3, None, 5], dtype="UInt8"),
|
||||
"int64_nan": pd.Series([1, 2, 3, float("nan"), 5], dtype="Int64"),
|
||||
"float": [1.0, 2.0, 3.0, 4, 5.0],
|
||||
"float_null": [1, 2, None, 4, 5],
|
||||
"float_inf": [1, 2, None, 4, (float("inf"))],
|
||||
}
|
||||
)
|
||||
|
||||
df_clean = sanitize_dataframe(df)
|
||||
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
|
||||
|
||||
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
|
||||
assert result_python == {
|
||||
"int_np": [1, 2, 3, 4, 5],
|
||||
"int64": [1, 2, 3, None, 5],
|
||||
"int64_nan": [1, 2, 3, None, 5],
|
||||
"float": [1.0, 2.0, 3.0, 4.0, 5.0],
|
||||
"float_null": [1.0, 2.0, None, 4.0, 5.0],
|
||||
"float_inf": [1.0, 2.0, None, 4.0, None],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not hasattr(pd, "StringDtype"),
|
||||
reason="dedicated String dtype not supported in pandas v{}".format(pd.__version__),
|
||||
)
|
||||
def test_sanitize_string_dtype():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"string_object": ["a", "b", "c", "d"],
|
||||
"string_string": pd.array(["a", "b", "c", "d"], dtype="string"),
|
||||
"string_object_null": ["a", "b", None, "d"],
|
||||
"string_string_null": pd.array(["a", "b", None, "d"], dtype="string"),
|
||||
}
|
||||
)
|
||||
|
||||
df_clean = sanitize_dataframe(df)
|
||||
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
|
||||
|
||||
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
|
||||
assert result_python == {
|
||||
"string_object": ["a", "b", "c", "d"],
|
||||
"string_string": ["a", "b", "c", "d"],
|
||||
"string_object_null": ["a", "b", None, "d"],
|
||||
"string_string_null": ["a", "b", None, "d"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not hasattr(pd, "BooleanDtype"),
|
||||
reason="Nullable boolean dtype not supported in pandas v{}".format(pd.__version__),
|
||||
)
|
||||
def test_sanitize_boolean_dtype():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"bool_none": pd.array([True, False, None], dtype="boolean"),
|
||||
"none": pd.array([None, None, None], dtype="boolean"),
|
||||
"bool": pd.array([True, False, True], dtype="boolean"),
|
||||
}
|
||||
)
|
||||
|
||||
df_clean = sanitize_dataframe(df)
|
||||
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
|
||||
|
||||
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
|
||||
assert result_python == {
|
||||
"bool_none": [True, False, None],
|
||||
"none": [None, None, None],
|
||||
"bool": [True, False, True],
|
||||
}
|
Reference in New Issue
Block a user