first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,265 @@
import types
import numpy as np
import pandas as pd
import pytest
import altair as alt
from .. import parse_shorthand, update_nested, infer_encoding_types
from ..core import infer_dtype
FAKE_CHANNELS_MODULE = '''
"""Fake channels module for utility tests."""
from altair.utils import schemapi
class FieldChannel(object):
def __init__(self, shorthand, **kwargs):
kwargs['shorthand'] = shorthand
return super(FieldChannel, self).__init__(**kwargs)
class ValueChannel(object):
def __init__(self, value, **kwargs):
kwargs['value'] = value
return super(ValueChannel, self).__init__(**kwargs)
class X(FieldChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "x"
class XValue(ValueChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "x"
class Y(FieldChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "y"
class YValue(ValueChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "y"
class StrokeWidth(FieldChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "strokeWidth"
class StrokeWidthValue(ValueChannel, schemapi.SchemaBase):
_schema = {}
_encoding_name = "strokeWidth"
'''
@pytest.mark.parametrize(
"value,expected_type",
[
([1, 2, 3], "integer"),
([1.0, 2.0, 3.0], "floating"),
([1, 2.0, 3], "mixed-integer-float"),
(["a", "b", "c"], "string"),
(["a", "b", np.nan], "mixed"),
],
)
def test_infer_dtype(value, expected_type):
assert infer_dtype(value) == expected_type
def test_parse_shorthand():
def check(s, **kwargs):
assert parse_shorthand(s) == kwargs
check("")
# Fields alone
check("foobar", field="foobar")
check("blah:(fd ", field="blah:(fd ")
# Fields with type
check("foobar:quantitative", type="quantitative", field="foobar")
check("foobar:nominal", type="nominal", field="foobar")
check("foobar:ordinal", type="ordinal", field="foobar")
check("foobar:temporal", type="temporal", field="foobar")
check("foobar:geojson", type="geojson", field="foobar")
check("foobar:Q", type="quantitative", field="foobar")
check("foobar:N", type="nominal", field="foobar")
check("foobar:O", type="ordinal", field="foobar")
check("foobar:T", type="temporal", field="foobar")
check("foobar:G", type="geojson", field="foobar")
# Fields with aggregate and/or type
check("average(foobar)", field="foobar", aggregate="average")
check("min(foobar):temporal", type="temporal", field="foobar", aggregate="min")
check("sum(foobar):Q", type="quantitative", field="foobar", aggregate="sum")
# check that invalid arguments are not split-out
check("invalid(blah)", field="invalid(blah)")
check("blah:invalid", field="blah:invalid")
check("invalid(blah):invalid", field="invalid(blah):invalid")
# check parsing in presence of strange characters
check(
"average(a b:(c\nd):Q",
aggregate="average",
field="a b:(c\nd",
type="quantitative",
)
# special case: count doesn't need an argument
check("count()", aggregate="count", type="quantitative")
check("count():O", aggregate="count", type="ordinal")
# time units:
check("month(x)", field="x", timeUnit="month", type="temporal")
check("year(foo):O", field="foo", timeUnit="year", type="ordinal")
check("date(date):quantitative", field="date", timeUnit="date", type="quantitative")
check(
"yearmonthdate(field)", field="field", timeUnit="yearmonthdate", type="temporal"
)
def test_parse_shorthand_with_data():
def check(s, data, **kwargs):
assert parse_shorthand(s, data) == kwargs
data = pd.DataFrame(
{
"x": [1, 2, 3, 4, 5],
"y": ["A", "B", "C", "D", "E"],
"z": pd.date_range("2018-01-01", periods=5, freq="D"),
"t": pd.date_range("2018-01-01", periods=5, freq="D").tz_localize("UTC"),
}
)
check("x", data, field="x", type="quantitative")
check("y", data, field="y", type="nominal")
check("z", data, field="z", type="temporal")
check("t", data, field="t", type="temporal")
check("count(x)", data, field="x", aggregate="count", type="quantitative")
check("count()", data, aggregate="count", type="quantitative")
check("month(z)", data, timeUnit="month", field="z", type="temporal")
check("month(t)", data, timeUnit="month", field="t", type="temporal")
def test_parse_shorthand_all_aggregates():
aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"]
for aggregate in aggregates:
shorthand = "{aggregate}(field):Q".format(aggregate=aggregate)
assert parse_shorthand(shorthand) == {
"aggregate": aggregate,
"field": "field",
"type": "quantitative",
}
def test_parse_shorthand_all_timeunits():
timeUnits = []
for loc in ["Local", "Utc"]:
for typ in ["Single", "Multi"]:
defn = loc + typ + "TimeUnit"
timeUnits.extend(alt.Root._schema["definitions"][defn]["enum"])
for timeUnit in timeUnits:
shorthand = "{timeUnit}(field):Q".format(timeUnit=timeUnit)
assert parse_shorthand(shorthand) == {
"timeUnit": timeUnit,
"field": "field",
"type": "quantitative",
}
def test_parse_shorthand_window_count():
shorthand = "count()"
dct = parse_shorthand(
shorthand,
parse_aggregates=False,
parse_window_ops=True,
parse_timeunits=False,
parse_types=False,
)
assert dct == {"op": "count"}
def test_parse_shorthand_all_window_ops():
window_ops = alt.Root._schema["definitions"]["WindowOnlyOp"]["enum"]
aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"]
for op in window_ops + aggregates:
shorthand = "{op}(field)".format(op=op)
dct = parse_shorthand(
shorthand,
parse_aggregates=False,
parse_window_ops=True,
parse_timeunits=False,
parse_types=False,
)
assert dct == {"field": "field", "op": op}
def test_update_nested():
original = {"x": {"b": {"foo": 2}, "c": 4}}
update = {"x": {"b": {"foo": 5}, "d": 6}, "y": 40}
output = update_nested(original, update, copy=True)
assert output is not original
assert output == {"x": {"b": {"foo": 5}, "c": 4, "d": 6}, "y": 40}
output2 = update_nested(original, update)
assert output2 is original
assert output == output2
@pytest.fixture
def channels():
channels = types.ModuleType("channels")
exec(FAKE_CHANNELS_MODULE, channels.__dict__)
return channels
def _getargs(*args, **kwargs):
return args, kwargs
def test_infer_encoding_types(channels):
expected = dict(
x=channels.X("xval"),
y=channels.YValue("yval"),
strokeWidth=channels.StrokeWidthValue(value=4),
)
# All positional args
args, kwds = _getargs(
channels.X("xval"), channels.YValue("yval"), channels.StrokeWidthValue(4)
)
assert infer_encoding_types(args, kwds, channels) == expected
# All keyword args
args, kwds = _getargs(x="xval", y=alt.value("yval"), strokeWidth=alt.value(4))
assert infer_encoding_types(args, kwds, channels) == expected
# Mixed positional & keyword
args, kwds = _getargs(
channels.X("xval"), channels.YValue("yval"), strokeWidth=alt.value(4)
)
assert infer_encoding_types(args, kwds, channels) == expected
def test_infer_encoding_types_with_condition(channels):
args, kwds = _getargs(
x=alt.condition("pred1", alt.value(1), alt.value(2)),
y=alt.condition("pred2", alt.value(1), "yval"),
strokeWidth=alt.condition("pred3", "sval", alt.value(2)),
)
expected = dict(
x=channels.XValue(2, condition=channels.XValue(1, test="pred1")),
y=channels.Y("yval", condition=channels.YValue(1, test="pred2")),
strokeWidth=channels.StrokeWidthValue(
2, condition=channels.StrokeWidth("sval", test="pred3")
),
)
assert infer_encoding_types(args, kwds, channels) == expected

View File

@ -0,0 +1,139 @@
import os
import pytest
import pandas as pd
from toolz import pipe
from ..data import limit_rows, MaxRowsError, sample, to_values, to_json, to_csv
def _create_dataframe(N):
data = pd.DataFrame({"x": range(N), "y": range(N)})
return data
def _create_data_with_values(N):
data = {"values": [{"x": i, "y": i + 1} for i in range(N)]}
return data
def test_limit_rows():
"""Test the limit_rows data transformer."""
data = _create_dataframe(10)
result = limit_rows(data, max_rows=20)
assert data is result
with pytest.raises(MaxRowsError):
pipe(data, limit_rows(max_rows=5))
data = _create_data_with_values(10)
result = pipe(data, limit_rows(max_rows=20))
assert data is result
with pytest.raises(MaxRowsError):
limit_rows(data, max_rows=5)
def test_sample():
"""Test the sample data transformer."""
data = _create_dataframe(20)
result = pipe(data, sample(n=10))
assert len(result) == 10
assert isinstance(result, pd.DataFrame)
data = _create_data_with_values(20)
result = sample(data, n=10)
assert isinstance(result, dict)
assert "values" in result
assert len(result["values"]) == 10
data = _create_dataframe(20)
result = pipe(data, sample(frac=0.5))
assert len(result) == 10
assert isinstance(result, pd.DataFrame)
data = _create_data_with_values(20)
result = sample(data, frac=0.5)
assert isinstance(result, dict)
assert "values" in result
assert len(result["values"]) == 10
def test_to_values():
"""Test the to_values data transformer."""
data = _create_dataframe(10)
result = pipe(data, to_values)
assert result == {"values": data.to_dict(orient="records")}
def test_type_error():
"""Ensure that TypeError is raised for types other than dict/DataFrame."""
for f in (sample, limit_rows, to_values):
with pytest.raises(TypeError):
pipe(0, f)
def test_dataframe_to_json():
"""Test to_json
- make certain the filename is deterministic
- make certain the file contents match the data
"""
data = _create_dataframe(10)
try:
result1 = pipe(data, to_json)
result2 = pipe(data, to_json)
filename = result1["url"]
output = pd.read_json(filename)
finally:
os.remove(filename)
assert result1 == result2
assert output.equals(data)
def test_dict_to_json():
"""Test to_json
- make certain the filename is deterministic
- make certain the file contents match the data
"""
data = _create_data_with_values(10)
try:
result1 = pipe(data, to_json)
result2 = pipe(data, to_json)
filename = result1["url"]
output = pd.read_json(filename).to_dict(orient="records")
finally:
os.remove(filename)
assert result1 == result2
assert data == {"values": output}
def test_dataframe_to_csv():
"""Test to_csv with dataframe input
- make certain the filename is deterministic
- make certain the file contents match the data
"""
data = _create_dataframe(10)
try:
result1 = pipe(data, to_csv)
result2 = pipe(data, to_csv)
filename = result1["url"]
output = pd.read_csv(filename)
finally:
os.remove(filename)
assert result1 == result2
assert output.equals(data)
def test_dict_to_csv():
"""Test to_csv with dict input
- make certain the filename is deterministic
- make certain the file contents match the data
"""
data = _create_data_with_values(10)
try:
result1 = pipe(data, to_csv)
result2 = pipe(data, to_csv)
filename = result1["url"]
output = pd.read_csv(filename).to_dict(orient="records")
finally:
os.remove(filename)
assert result1 == result2
assert data == {"values": output}

View File

@ -0,0 +1,24 @@
import pytest
import altair as alt
from altair.utils import AltairDeprecationWarning
from altair.utils.deprecation import _deprecate, deprecated
def test_deprecated_class():
OldChart = _deprecate(alt.Chart, "OldChart")
with pytest.warns(AltairDeprecationWarning) as record:
OldChart()
assert "alt.OldChart" in record[0].message.args[0]
assert "alt.Chart" in record[0].message.args[0]
def test_deprecation_decorator():
@deprecated(message="func is deprecated")
def func(x):
return x + 1
with pytest.warns(AltairDeprecationWarning) as record:
y = func(1)
assert y == 2
assert record[0].message.args[0] == "func is deprecated"

View File

@ -0,0 +1,30 @@
from ..execeval import eval_block
HAS_RETURN = """
x = 4
y = 2 * x
3 * y
"""
NO_RETURN = """
x = 4
y = 2 * x
z = 3 * y
"""
def test_eval_block_with_return():
_globals = {}
result = eval_block(HAS_RETURN, _globals)
assert result == 24
assert _globals["x"] == 4
assert _globals["y"] == 8
def test_eval_block_without_return():
_globals = {}
result = eval_block(NO_RETURN, _globals)
assert result is None
assert _globals["x"] == 4
assert _globals["y"] == 8
assert _globals["z"] == 24

View File

@ -0,0 +1,52 @@
import pytest
from ..html import spec_to_html
@pytest.fixture
def spec():
return {
"data": {"url": "data.json"},
"mark": "point",
"encoding": {
"x": {"field": "x", "type": "quantitative"},
"y": {"field": "y", "type": "quantitative"},
},
}
@pytest.mark.parametrize("requirejs", [True, False])
@pytest.mark.parametrize("fullhtml", [True, False])
def test_spec_to_html(requirejs, fullhtml, spec):
# We can't test that the html actually renders, but we'll test aspects of
# it to make certain that the keywords are respected.
vegaembed_version = ("3.12",)
vegalite_version = ("3.0",)
vega_version = "4.0"
html = spec_to_html(
spec,
mode="vega-lite",
requirejs=requirejs,
fullhtml=fullhtml,
vegalite_version=vegalite_version,
vegaembed_version=vegaembed_version,
vega_version=vega_version,
)
html = html.strip()
if fullhtml:
assert html.startswith("<!DOCTYPE html>")
assert html.endswith("</html>")
else:
assert html.startswith("<style>")
assert html.endswith("</script>")
if requirejs:
assert "require(" in html
else:
assert "require(" not in html
assert "vega-lite@{}".format(vegalite_version) in html
assert "vega@{}".format(vega_version) in html
assert "vega-embed@{}".format(vegaembed_version) in html

View File

@ -0,0 +1,207 @@
import pytest
import altair as alt
from ..mimebundle import spec_to_mimebundle
@pytest.fixture
def require_altair_saver():
try:
import altair_saver # noqa: F401
except ImportError:
pytest.skip("altair_saver not importable; cannot run saver tests")
@pytest.fixture
def vegalite_spec():
return {
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"description": "A simple bar chart with embedded data.",
"data": {
"values": [
{"a": "A", "b": 28},
{"a": "B", "b": 55},
{"a": "C", "b": 43},
{"a": "D", "b": 91},
{"a": "E", "b": 81},
{"a": "F", "b": 53},
{"a": "G", "b": 19},
{"a": "H", "b": 87},
{"a": "I", "b": 52},
]
},
"mark": "bar",
"encoding": {
"x": {"field": "a", "type": "ordinal"},
"y": {"field": "b", "type": "quantitative"},
},
}
@pytest.fixture
def vega_spec():
return {
"$schema": "https://vega.github.io/schema/vega/v5.json",
"axes": [
{
"aria": False,
"domain": False,
"grid": True,
"gridScale": "x",
"labels": False,
"maxExtent": 0,
"minExtent": 0,
"orient": "left",
"scale": "y",
"tickCount": {"signal": "ceil(height/40)"},
"ticks": False,
"zindex": 0,
},
{
"grid": False,
"labelAlign": "right",
"labelAngle": 270,
"labelBaseline": "middle",
"orient": "bottom",
"scale": "x",
"title": "a",
"zindex": 0,
},
{
"grid": False,
"labelOverlap": True,
"orient": "left",
"scale": "y",
"tickCount": {"signal": "ceil(height/40)"},
"title": "b",
"zindex": 0,
},
],
"background": "white",
"data": [
{
"name": "source_0",
"values": [
{"a": "A", "b": 28},
{"a": "B", "b": 55},
{"a": "C", "b": 43},
{"a": "D", "b": 91},
{"a": "E", "b": 81},
{"a": "F", "b": 53},
{"a": "G", "b": 19},
{"a": "H", "b": 87},
{"a": "I", "b": 52},
],
},
{
"name": "data_0",
"source": "source_0",
"transform": [
{
"expr": 'isValid(datum["b"]) && isFinite(+datum["b"])',
"type": "filter",
}
],
},
],
"description": "A simple bar chart with embedded data.",
"height": 200,
"marks": [
{
"encode": {
"update": {
"ariaRoleDescription": {"value": "bar"},
"description": {
"signal": '"a: " + (isValid(datum["a"]) ? datum["a"] : ""+datum["a"]) + "; b: " + (format(datum["b"], ""))'
},
"fill": {"value": "#4c78a8"},
"width": {"band": 1, "scale": "x"},
"x": {"field": "a", "scale": "x"},
"y": {"field": "b", "scale": "y"},
"y2": {"scale": "y", "value": 0},
}
},
"from": {"data": "data_0"},
"name": "marks",
"style": ["bar"],
"type": "rect",
}
],
"padding": 5,
"scales": [
{
"domain": {"data": "data_0", "field": "a", "sort": True},
"name": "x",
"paddingInner": 0.1,
"paddingOuter": 0.05,
"range": {"step": {"signal": "x_step"}},
"type": "band",
},
{
"domain": {"data": "data_0", "field": "b"},
"name": "y",
"nice": True,
"range": [{"signal": "height"}, 0],
"type": "linear",
"zero": True,
},
],
"signals": [
{"name": "x_step", "value": 20},
{
"name": "width",
"update": "bandspace(domain('x').length, 0.1, 0.05) * x_step",
},
],
"style": "cell",
}
def test_vegalite_to_vega_mimebundle(require_altair_saver, vegalite_spec, vega_spec):
# temporay fix for https://github.com/vega/vega-lite/issues/7776
def delete_none(axes):
for axis in axes:
for key, value in list(axis.items()):
if value is None:
del axis[key]
return axes
bundle = spec_to_mimebundle(
spec=vegalite_spec,
format="vega",
mode="vega-lite",
vega_version=alt.VEGA_VERSION,
vegalite_version=alt.VEGALITE_VERSION,
vegaembed_version=alt.VEGAEMBED_VERSION,
)
bundle["application/vnd.vega.v5+json"]["axes"] = delete_none(
bundle["application/vnd.vega.v5+json"]["axes"]
)
assert bundle == {"application/vnd.vega.v5+json": vega_spec}
def test_spec_to_vegalite_mimebundle(vegalite_spec):
bundle = spec_to_mimebundle(
spec=vegalite_spec,
mode="vega-lite",
format="vega-lite",
vegalite_version=alt.VEGALITE_VERSION,
)
assert bundle == {"application/vnd.vegalite.v4+json": vegalite_spec}
def test_spec_to_vega_mimebundle(vega_spec):
bundle = spec_to_mimebundle(
spec=vega_spec, mode="vega", format="vega", vega_version=alt.VEGA_VERSION
)
assert bundle == {"application/vnd.vega.v5+json": vega_spec}
def test_spec_to_json_mimebundle():
bundle = spec_to_mimebundle(
spec=vegalite_spec,
mode="vega-lite",
format="json",
)
assert bundle == {"application/json": vegalite_spec}

View File

@ -0,0 +1,123 @@
from ..plugin_registry import PluginRegistry
from typing import Callable
class TypedCallableRegistry(PluginRegistry[Callable[[int], int]]):
pass
class GeneralCallableRegistry(PluginRegistry):
_global_settings = {"global_setting": None}
@property
def global_setting(self):
return self._global_settings["global_setting"]
@global_setting.setter
def global_setting(self, val):
self._global_settings["global_setting"] = val
def test_plugin_registry():
plugins = TypedCallableRegistry()
assert plugins.names() == []
assert plugins.active == ""
assert plugins.get() is None
assert repr(plugins) == "TypedCallableRegistry(active='', registered=[])"
plugins.register("new_plugin", lambda x: x ** 2)
assert plugins.names() == ["new_plugin"]
assert plugins.active == ""
assert plugins.get() is None
assert repr(plugins) == (
"TypedCallableRegistry(active='', " "registered=['new_plugin'])"
)
plugins.enable("new_plugin")
assert plugins.names() == ["new_plugin"]
assert plugins.active == "new_plugin"
assert plugins.get()(3) == 9
assert repr(plugins) == (
"TypedCallableRegistry(active='new_plugin', " "registered=['new_plugin'])"
)
def test_plugin_registry_extra_options():
plugins = GeneralCallableRegistry()
plugins.register("metadata_plugin", lambda x, p=2: x ** p)
plugins.enable("metadata_plugin")
assert plugins.get()(3) == 9
plugins.enable("metadata_plugin", p=3)
assert plugins.active == "metadata_plugin"
assert plugins.get()(3) == 27
# enabling without changing name
plugins.enable(p=2)
assert plugins.active == "metadata_plugin"
assert plugins.get()(3) == 9
def test_plugin_registry_global_settings():
plugins = GeneralCallableRegistry()
# we need some default plugin, but we won't do anything with it
plugins.register("default", lambda x: x)
plugins.enable("default")
# default value of the global flag
assert plugins.global_setting is None
# enabling changes the global state, not the options
plugins.enable(global_setting=True)
assert plugins.global_setting is True
assert plugins._options == {}
# context manager changes global state temporarily
with plugins.enable(global_setting="temp"):
assert plugins.global_setting == "temp"
assert plugins._options == {}
assert plugins.global_setting is True
assert plugins._options == {}
def test_plugin_registry_context():
plugins = GeneralCallableRegistry()
plugins.register("default", lambda x, p=2: x ** p)
# At first there is no plugin enabled
assert plugins.active == ""
assert plugins.options == {}
# Make sure the context is set and reset correctly
with plugins.enable("default", p=6):
assert plugins.active == "default"
assert plugins.options == {"p": 6}
assert plugins.active == ""
assert plugins.options == {}
# Make sure the context is reset even if there is an error
try:
with plugins.enable("default", p=6):
assert plugins.active == "default"
assert plugins.options == {"p": 6}
raise ValueError()
except ValueError:
pass
assert plugins.active == ""
assert plugins.options == {}
# Enabling without specifying name uses current name
plugins.enable("default", p=2)
with plugins.enable(p=6):
assert plugins.active == "default"
assert plugins.options == {"p": 6}
assert plugins.active == "default"
assert plugins.options == {"p": 2}

View File

@ -0,0 +1,351 @@
# The contents of this file are automatically written by
# tools/generate_schema_wrapper.py. Do not modify directly.
import copy
import io
import json
import jsonschema
import pickle
import pytest
import numpy as np
from ..schemapi import (
UndefinedType,
SchemaBase,
Undefined,
_FromDict,
SchemaValidationError,
)
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.
class _TestSchema(SchemaBase):
@classmethod
def _default_wrapper_classes(cls):
return _TestSchema.__subclasses__()
class MySchema(_TestSchema):
_schema = {
"definitions": {
"StringMapping": {
"type": "object",
"additionalProperties": {"type": "string"},
},
"StringArray": {"type": "array", "items": {"type": "string"}},
},
"properties": {
"a": {"$ref": "#/definitions/StringMapping"},
"a2": {"type": "object", "additionalProperties": {"type": "number"}},
"b": {"$ref": "#/definitions/StringArray"},
"b2": {"type": "array", "items": {"type": "number"}},
"c": {"type": ["string", "number"]},
"d": {
"anyOf": [
{"$ref": "#/definitions/StringMapping"},
{"$ref": "#/definitions/StringArray"},
]
},
"e": {"items": [{"type": "string"}, {"type": "string"}]},
},
}
class StringMapping(_TestSchema):
_schema = {"$ref": "#/definitions/StringMapping"}
_rootschema = MySchema._schema
class StringArray(_TestSchema):
_schema = {"$ref": "#/definitions/StringArray"}
_rootschema = MySchema._schema
class Derived(_TestSchema):
_schema = {
"definitions": {
"Foo": {"type": "object", "properties": {"d": {"type": "string"}}},
"Bar": {"type": "string", "enum": ["A", "B"]},
},
"type": "object",
"additionalProperties": False,
"properties": {
"a": {"type": "integer"},
"b": {"type": "string"},
"c": {"$ref": "#/definitions/Foo"},
},
}
class Foo(_TestSchema):
_schema = {"$ref": "#/definitions/Foo"}
_rootschema = Derived._schema
class Bar(_TestSchema):
_schema = {"$ref": "#/definitions/Bar"}
_rootschema = Derived._schema
class SimpleUnion(_TestSchema):
_schema = {"anyOf": [{"type": "integer"}, {"type": "string"}]}
class DefinitionUnion(_TestSchema):
_schema = {"anyOf": [{"$ref": "#/definitions/Foo"}, {"$ref": "#/definitions/Bar"}]}
_rootschema = Derived._schema
class SimpleArray(_TestSchema):
_schema = {
"type": "array",
"items": {"anyOf": [{"type": "integer"}, {"type": "string"}]},
}
class InvalidProperties(_TestSchema):
_schema = {
"type": "object",
"properties": {"for": {}, "as": {}, "vega-lite": {}, "$schema": {}},
}
def test_construct_multifaceted_schema():
dct = {
"a": {"foo": "bar"},
"a2": {"foo": 42},
"b": ["a", "b", "c"],
"b2": [1, 2, 3],
"c": 42,
"d": ["x", "y", "z"],
"e": ["a", "b"],
}
myschema = MySchema.from_dict(dct)
assert myschema.to_dict() == dct
myschema2 = MySchema(**dct)
assert myschema2.to_dict() == dct
assert isinstance(myschema.a, StringMapping)
assert isinstance(myschema.a2, dict)
assert isinstance(myschema.b, StringArray)
assert isinstance(myschema.b2, list)
assert isinstance(myschema.d, StringArray)
def test_schema_cases():
assert Derived(a=4, b="yo").to_dict() == {"a": 4, "b": "yo"}
assert Derived(a=4, c={"d": "hey"}).to_dict() == {"a": 4, "c": {"d": "hey"}}
assert Derived(a=4, b="5", c=Foo(d="val")).to_dict() == {
"a": 4,
"b": "5",
"c": {"d": "val"},
}
assert Foo(d="hello", f=4).to_dict() == {"d": "hello", "f": 4}
assert Derived().to_dict() == {}
assert Foo().to_dict() == {}
with pytest.raises(jsonschema.ValidationError):
# a needs to be an integer
Derived(a="yo").to_dict()
with pytest.raises(jsonschema.ValidationError):
# Foo.d needs to be a string
Derived(c=Foo(4)).to_dict()
with pytest.raises(jsonschema.ValidationError):
# no additional properties allowed
Derived(foo="bar").to_dict()
def test_round_trip():
D = {"a": 4, "b": "yo"}
assert Derived.from_dict(D).to_dict() == D
D = {"a": 4, "c": {"d": "hey"}}
assert Derived.from_dict(D).to_dict() == D
D = {"a": 4, "b": "5", "c": {"d": "val"}}
assert Derived.from_dict(D).to_dict() == D
D = {"d": "hello", "f": 4}
assert Foo.from_dict(D).to_dict() == D
def test_from_dict():
D = {"a": 4, "b": "5", "c": {"d": "val"}}
obj = Derived.from_dict(D)
assert obj.a == 4
assert obj.b == "5"
assert isinstance(obj.c, Foo)
def test_simple_type():
assert SimpleUnion(4).to_dict() == 4
def test_simple_array():
assert SimpleArray([4, 5, "six"]).to_dict() == [4, 5, "six"]
assert SimpleArray.from_dict(list("abc")).to_dict() == list("abc")
def test_definition_union():
obj = DefinitionUnion.from_dict("A")
assert isinstance(obj, Bar)
assert obj.to_dict() == "A"
obj = DefinitionUnion.from_dict("B")
assert isinstance(obj, Bar)
assert obj.to_dict() == "B"
obj = DefinitionUnion.from_dict({"d": "yo"})
assert isinstance(obj, Foo)
assert obj.to_dict() == {"d": "yo"}
def test_invalid_properties():
dct = {"for": 2, "as": 3, "vega-lite": 4, "$schema": 5}
invalid = InvalidProperties.from_dict(dct)
assert invalid["for"] == 2
assert invalid["as"] == 3
assert invalid["vega-lite"] == 4
assert invalid["$schema"] == 5
assert invalid.to_dict() == dct
def test_undefined_singleton():
assert Undefined is UndefinedType()
@pytest.fixture
def dct():
return {
"a": {"foo": "bar"},
"a2": {"foo": 42},
"b": ["a", "b", "c"],
"b2": [1, 2, 3],
"c": 42,
"d": ["x", "y", "z"],
}
def test_copy_method(dct):
myschema = MySchema.from_dict(dct)
# Make sure copy is deep
copy = myschema.copy(deep=True)
copy["a"]["foo"] = "new value"
copy["b"] = ["A", "B", "C"]
copy["c"] = 164
assert myschema.to_dict() == dct
# If we ignore a value, changing the copy changes the original
copy = myschema.copy(deep=True, ignore=["a"])
copy["a"]["foo"] = "new value"
copy["b"] = ["A", "B", "C"]
copy["c"] = 164
mydct = myschema.to_dict()
assert mydct["a"]["foo"] == "new value"
assert mydct["b"][0] == dct["b"][0]
assert mydct["c"] == dct["c"]
# If copy is not deep, then changing copy below top level changes original
copy = myschema.copy(deep=False)
copy["a"]["foo"] = "baz"
copy["b"] = ["A", "B", "C"]
copy["c"] = 164
mydct = myschema.to_dict()
assert mydct["a"]["foo"] == "baz"
assert mydct["b"] == dct["b"]
assert mydct["c"] == dct["c"]
def test_copy_module(dct):
myschema = MySchema.from_dict(dct)
cp = copy.deepcopy(myschema)
cp["a"]["foo"] = "new value"
cp["b"] = ["A", "B", "C"]
cp["c"] = 164
assert myschema.to_dict() == dct
def test_attribute_error():
m = MySchema()
with pytest.raises(AttributeError) as err:
m.invalid_attribute
assert str(err.value) == (
"'MySchema' object has no attribute " "'invalid_attribute'"
)
def test_to_from_json(dct):
json_str = MySchema.from_dict(dct).to_json()
new_dct = MySchema.from_json(json_str).to_dict()
assert new_dct == dct
def test_to_from_pickle(dct):
myschema = MySchema.from_dict(dct)
output = io.BytesIO()
pickle.dump(myschema, output)
output.seek(0)
myschema_new = pickle.load(output)
assert myschema_new.to_dict() == dct
def test_class_with_no_schema():
class BadSchema(SchemaBase):
pass
with pytest.raises(ValueError) as err:
BadSchema(4)
assert str(err.value).startswith("Cannot instantiate object")
@pytest.mark.parametrize("use_json", [True, False])
def test_hash_schema(use_json):
classes = _TestSchema._default_wrapper_classes()
for cls in classes:
hsh1 = _FromDict.hash_schema(cls._schema, use_json=use_json)
hsh2 = _FromDict.hash_schema(cls._schema, use_json=use_json)
assert hsh1 == hsh2
assert hash(hsh1) == hash(hsh2)
def test_schema_validation_error():
try:
MySchema(a={"foo": 4})
the_err = None
except jsonschema.ValidationError as err:
the_err = err
assert isinstance(the_err, SchemaValidationError)
message = str(the_err)
assert message.startswith("Invalid specification")
assert "test_schemapi.MySchema->a" in message
assert "validating {!r}".format(the_err.validator) in message
assert the_err.message in message
def test_serialize_numpy_types():
m = MySchema(
a={"date": np.datetime64("2019-01-01")},
a2={"int64": np.int64(1), "float64": np.float64(2)},
b2=np.arange(4),
)
out = m.to_json()
dct = json.loads(out)
assert dct == {
"a": {"date": "2019-01-01T00:00:00"},
"a2": {"int64": 1, "float64": 2},
"b2": [0, 1, 2, 3],
}

View File

@ -0,0 +1,10 @@
"""
Test http server
"""
from altair.utils.server import serve, MockServer
def test_serve():
html = "<html><title>Title</title><body><p>Content</p></body></html>"
serve(html, open_browser=False, http_server=MockServer)

View File

@ -0,0 +1,192 @@
import pytest
import warnings
import json
import numpy as np
import pandas as pd
from .. import infer_vegalite_type, sanitize_dataframe
def test_infer_vegalite_type():
def _check(arr, typ):
assert infer_vegalite_type(arr) == typ
_check(np.arange(5, dtype=float), "quantitative")
_check(np.arange(5, dtype=int), "quantitative")
_check(np.zeros(5, dtype=bool), "nominal")
_check(pd.date_range("2012", "2013"), "temporal")
_check(pd.timedelta_range(365, periods=12), "temporal")
nulled = pd.Series(np.random.randint(10, size=10))
nulled[0] = None
_check(nulled, "quantitative")
_check(["a", "b", "c"], "nominal")
if hasattr(pytest, "warns"): # added in pytest 2.8
with pytest.warns(UserWarning):
_check([], "nominal")
else:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
_check([], "nominal")
def test_sanitize_dataframe():
# create a dataframe with various types
df = pd.DataFrame(
{
"s": list("abcde"),
"f": np.arange(5, dtype=float),
"i": np.arange(5, dtype=int),
"b": np.array([True, False, True, True, False]),
"d": pd.date_range("2012-01-01", periods=5, freq="H"),
"c": pd.Series(list("ababc"), dtype="category"),
"c2": pd.Series([1, "A", 2.5, "B", None], dtype="category"),
"o": pd.Series([np.array(i) for i in range(5)]),
"p": pd.date_range("2012-01-01", periods=5, freq="H").tz_localize("UTC"),
}
)
# add some nulls
df.iloc[0, df.columns.get_loc("s")] = None
df.iloc[0, df.columns.get_loc("f")] = np.nan
df.iloc[0, df.columns.get_loc("d")] = pd.NaT
df.iloc[0, df.columns.get_loc("o")] = np.array(np.nan)
# JSON serialize. This will fail on non-sanitized dataframes
print(df[["s", "c2"]])
df_clean = sanitize_dataframe(df)
print(df_clean[["s", "c2"]])
print(df_clean[["s", "c2"]].to_dict())
s = json.dumps(df_clean.to_dict(orient="records"))
print(s)
# Re-construct pandas dataframe
df2 = pd.read_json(s)
# Re-order the columns to match df
df2 = df2[df.columns]
# Re-apply original types
for col in df:
if str(df[col].dtype).startswith("datetime"):
# astype(datetime) introduces time-zone issues:
# to_datetime() does not.
utc = isinstance(df[col].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype)
df2[col] = pd.to_datetime(df2[col], utc=utc)
else:
df2[col] = df2[col].astype(df[col].dtype)
# pandas doesn't properly recognize np.array(np.nan), so change it here
df.iloc[0, df.columns.get_loc("o")] = np.nan
assert df.equals(df2)
def test_sanitize_dataframe_colnames():
df = pd.DataFrame(np.arange(12).reshape(4, 3))
# Test that RangeIndex is converted to strings
df = sanitize_dataframe(df)
assert [isinstance(col, str) for col in df.columns]
# Test that non-string columns result in an error
df.columns = [4, "foo", "bar"]
with pytest.raises(ValueError) as err:
sanitize_dataframe(df)
assert str(err.value).startswith("Dataframe contains invalid column name: 4.")
def test_sanitize_dataframe_timedelta():
df = pd.DataFrame({"r": pd.timedelta_range(start="1 day", periods=4)})
with pytest.raises(ValueError) as err:
sanitize_dataframe(df)
assert str(err.value).startswith('Field "r" has type "timedelta')
def test_sanitize_dataframe_infs():
df = pd.DataFrame({"x": [0, 1, 2, np.inf, -np.inf, np.nan]})
df_clean = sanitize_dataframe(df)
assert list(df_clean.dtypes) == [object]
assert list(df_clean["x"]) == [0, 1, 2, None, None, None]
@pytest.mark.skipif(
not hasattr(pd, "Int64Dtype"),
reason="Nullable integers not supported in pandas v{}".format(pd.__version__),
)
def test_sanitize_nullable_integers():
df = pd.DataFrame(
{
"int_np": [1, 2, 3, 4, 5],
"int64": pd.Series([1, 2, 3, None, 5], dtype="UInt8"),
"int64_nan": pd.Series([1, 2, 3, float("nan"), 5], dtype="Int64"),
"float": [1.0, 2.0, 3.0, 4, 5.0],
"float_null": [1, 2, None, 4, 5],
"float_inf": [1, 2, None, 4, (float("inf"))],
}
)
df_clean = sanitize_dataframe(df)
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
assert result_python == {
"int_np": [1, 2, 3, 4, 5],
"int64": [1, 2, 3, None, 5],
"int64_nan": [1, 2, 3, None, 5],
"float": [1.0, 2.0, 3.0, 4.0, 5.0],
"float_null": [1.0, 2.0, None, 4.0, 5.0],
"float_inf": [1.0, 2.0, None, 4.0, None],
}
@pytest.mark.skipif(
not hasattr(pd, "StringDtype"),
reason="dedicated String dtype not supported in pandas v{}".format(pd.__version__),
)
def test_sanitize_string_dtype():
df = pd.DataFrame(
{
"string_object": ["a", "b", "c", "d"],
"string_string": pd.array(["a", "b", "c", "d"], dtype="string"),
"string_object_null": ["a", "b", None, "d"],
"string_string_null": pd.array(["a", "b", None, "d"], dtype="string"),
}
)
df_clean = sanitize_dataframe(df)
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
assert result_python == {
"string_object": ["a", "b", "c", "d"],
"string_string": ["a", "b", "c", "d"],
"string_object_null": ["a", "b", None, "d"],
"string_string_null": ["a", "b", None, "d"],
}
@pytest.mark.skipif(
not hasattr(pd, "BooleanDtype"),
reason="Nullable boolean dtype not supported in pandas v{}".format(pd.__version__),
)
def test_sanitize_boolean_dtype():
df = pd.DataFrame(
{
"bool_none": pd.array([True, False, None], dtype="boolean"),
"none": pd.array([None, None, None], dtype="boolean"),
"bool": pd.array([True, False, True], dtype="boolean"),
}
)
df_clean = sanitize_dataframe(df)
assert {col.dtype.name for _, col in df_clean.iteritems()} == {"object"}
result_python = {col_name: list(col) for col_name, col in df_clean.iteritems()}
assert result_python == {
"bool_none": [True, False, None],
"none": [None, None, None],
"bool": [True, False, True],
}