first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,238 @@
# ___
# \./ DANGER: This project implements some code generation
# .--.O.--. techniques involving string concatenation.
# \/ \/ If you look at it, you might die.
#
r"""
Installation
************
.. code-block:: bash
pip install fastjsonschema
Support only for Python 3.3 and higher.
About
*****
``fastjsonschema`` implements validation of JSON documents by JSON schema.
The library implements JSON schema drafts 04, 06 and 07. The main purpose is
to have a really fast implementation. See some numbers:
* Probably most popular ``jsonschema`` can take up to 5 seconds for valid inputs
and 1.2 seconds for invalid inputs.
* Second most popular ``json-spec`` is even worse with up to 7.2 and 1.7 seconds.
* Last ``validictory``, now deprecated, is much better with 370 or 23 milliseconds,
but it does not follow all standards and it can be still slow for some purposes.
With this library you can gain big improvements as ``fastjsonschema`` takes
only about 25 milliseconds for valid inputs and 2 milliseconds for invalid ones.
Pretty amazing, right? :-)
Technically it works by generating the most stupid code on the fly which is fast but
is hard to write by hand. The best efficiency is achieved when compiled once and used
many times, of course. It works similarly like regular expressions. But you can also
generate the code to the file which is even slightly faster.
You can do the performance on your computer or server with an included script:
.. code-block:: bash
$ make performance
fast_compiled valid ==> 0.0464646
fast_compiled invalid ==> 0.0030227
fast_file valid ==> 0.0461219
fast_file invalid ==> 0.0030608
fast_not_compiled valid ==> 11.4627202
fast_not_compiled invalid ==> 2.5726230
jsonschema valid ==> 7.5844927
jsonschema invalid ==> 1.9204665
jsonschema_compiled valid ==> 0.6938364
jsonschema_compiled invalid ==> 0.0359244
jsonspec valid ==> 9.0715843
jsonspec invalid ==> 2.1650488
validictory valid ==> 0.4874793
validictory invalid ==> 0.0232244
This library follows and implements `JSON schema draft-04, draft-06, and draft-07
<http://json-schema.org>`_. Sometimes it's not perfectly clear so I recommend also
check out this `understanding JSON schema <https://spacetelescope.github.io/understanding-json-schema>`_.
Note that there are some differences compared to JSON schema standard:
* Regular expressions are full Python ones, not only what JSON schema allows. It's easier
to allow everything and also it's faster to compile without limits. So keep in mind that when
you will use a more advanced regular expression, it may not work with other library or in
other languages.
* Because Python matches new line for a dollar in regular expressions (``a$`` matches ``a`` and ``a\\n``),
instead of ``$`` is used ``\Z`` and all dollars in your regular expression are changed to ``\\Z``
as well. When you want to use dollar as regular character, you have to escape it (``\$``).
* JSON schema says you can use keyword ``default`` for providing default values. This implementation
uses that and always returns transformed input data.
API
***
"""
from functools import partial, update_wrapper
from .draft04 import CodeGeneratorDraft04
from .draft06 import CodeGeneratorDraft06
from .draft07 import CodeGeneratorDraft07
from .exceptions import JsonSchemaException, JsonSchemaValueException, JsonSchemaDefinitionException
from .ref_resolver import RefResolver
from .version import VERSION
__all__ = (
'VERSION',
'JsonSchemaException',
'JsonSchemaValueException',
'JsonSchemaDefinitionException',
'validate',
'compile',
'compile_to_code',
)
def validate(definition, data, handlers={}, formats={}, use_default=True):
"""
Validation function for lazy programmers or for use cases, when you need
to call validation only once, so you do not have to compile it first.
Use it only when you do not care about performance (even thought it will
be still faster than alternative implementations).
.. code-block:: python
import fastjsonschema
fastjsonschema.validate({'type': 'string'}, 'hello')
# same as: compile({'type': 'string'})('hello')
Preferred is to use :any:`compile` function.
"""
return compile(definition, handlers, formats, use_default)(data)
#TODO: Change use_default to False when upgrading to version 3.
# pylint: disable=redefined-builtin,dangerous-default-value,exec-used
def compile(definition, handlers={}, formats={}, use_default=True):
"""
Generates validation function for validating JSON schema passed in ``definition``.
Example:
.. code-block:: python
import fastjsonschema
validate = fastjsonschema.compile({'type': 'string'})
validate('hello')
This implementation supports keyword ``default`` (can be turned off
by passing `use_default=False`):
.. code-block:: python
validate = fastjsonschema.compile({
'type': 'object',
'properties': {
'a': {'type': 'number', 'default': 42},
},
})
data = validate({})
assert data == {'a': 42}
Supported implementations are draft-04, draft-06 and draft-07. Which version
should be used is determined by `$draft` in your ``definition``. When not
specified, the latest implementation is used (draft-07).
.. code-block:: python
validate = fastjsonschema.compile({
'$schema': 'http://json-schema.org/draft-04/schema',
'type': 'number',
})
You can pass mapping from URI to function that should be used to retrieve
remote schemes used in your ``definition`` in parameter ``handlers``.
Also, you can pass mapping for custom formats. Key is the name of your
formatter and value can be regular expression which will be compiled or
callback returning `bool` (or you can raise your own exception).
.. code-block:: python
validate = fastjsonschema.compile(definition, formats={
'foo': r'foo|bar',
'bar': lambda value: value in ('foo', 'bar'),
})
Exception :any:`JsonSchemaDefinitionException` is raised when generating the
code fails (bad definition).
Exception :any:`JsonSchemaValueException` is raised from generated function when
validation fails (data do not follow the definition).
"""
resolver, code_generator = _factory(definition, handlers, formats, use_default)
global_state = code_generator.global_state
# Do not pass local state so it can recursively call itself.
exec(code_generator.func_code, global_state)
func = global_state[resolver.get_scope_name()]
if formats:
return update_wrapper(partial(func, custom_formats=formats), func)
return func
# pylint: disable=dangerous-default-value
def compile_to_code(definition, handlers={}, formats={}, use_default=True):
"""
Generates validation code for validating JSON schema passed in ``definition``.
Example:
.. code-block:: python
import fastjsonschema
code = fastjsonschema.compile_to_code({'type': 'string'})
with open('your_file.py', 'w') as f:
f.write(code)
You can also use it as a script:
.. code-block:: bash
echo "{'type': 'string'}" | python3 -m fastjsonschema > your_file.py
python3 -m fastjsonschema "{'type': 'string'}" > your_file.py
Exception :any:`JsonSchemaDefinitionException` is raised when generating the
code fails (bad definition).
"""
_, code_generator = _factory(definition, handlers, formats, use_default)
return (
'VERSION = "' + VERSION + '"\n' +
code_generator.global_state_code + '\n' +
code_generator.func_code
)
def _factory(definition, handlers, formats={}, use_default=True):
resolver = RefResolver.from_schema(definition, handlers=handlers, store={})
code_generator = _get_code_generator_class(definition)(
definition,
resolver=resolver,
formats=formats,
use_default=use_default,
)
return resolver, code_generator
def _get_code_generator_class(schema):
# Schema in from draft-06 can be just the boolean value.
if isinstance(schema, dict):
schema_version = schema.get('$schema', '')
if 'draft-04' in schema_version:
return CodeGeneratorDraft04
if 'draft-06' in schema_version:
return CodeGeneratorDraft06
return CodeGeneratorDraft07

View File

@@ -0,0 +1,19 @@
import json
import sys
from . import compile_to_code
def main():
if len(sys.argv) == 2:
definition = sys.argv[1]
else:
definition = sys.stdin.read()
definition = json.loads(definition)
code = compile_to_code(definition)
print(code)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,592 @@
import decimal
import re
from .exceptions import JsonSchemaDefinitionException
from .generator import CodeGenerator, enforce_list
JSON_TYPE_TO_PYTHON_TYPE = {
'null': 'NoneType',
'boolean': 'bool',
'number': 'int, float',
'integer': 'int',
'string': 'str',
'array': 'list, tuple',
'object': 'dict',
}
DOLLAR_FINDER = re.compile(r"(?<!\\)\$") # Finds any un-escaped $ (including inside []-sets)
# pylint: disable=too-many-instance-attributes,too-many-public-methods
class CodeGeneratorDraft04(CodeGenerator):
# pylint: disable=line-too-long
# I was thinking about using ipaddress module instead of regexps for example, but it's big
# difference in performance. With a module I got this difference: over 100 ms with a module
# vs. 9 ms with a regex! Other modules are also ineffective or not available in standard
# library. Some regexps are not 100% precise but good enough, fast and without dependencies.
FORMAT_REGEXS = {
'date-time': r'^\d{4}-[01]\d-[0-3]\d(t|T)[0-2]\d:[0-5]\d:[0-5]\d(?:\.\d+)?(?:[+-][0-2]\d:[0-5]\d|[+-][0-2]\d[0-5]\d|z|Z)\Z',
'email': r'^[^@]+@[^@]+\.[^@]+\Z',
'hostname': r'^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]{0,61}[A-Za-z0-9])\Z',
'ipv4': r'^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\Z',
'ipv6': r'^(?:(?:[0-9A-Fa-f]{1,4}:){6}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|::(?:[0-9A-Fa-f]{1,4}:){5}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,4}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){,6}[0-9A-Fa-f]{1,4})?::)\Z',
'uri': r'^\w+:(\/?\/?)[^\s]+\Z',
}
def __init__(self, definition, resolver=None, formats={}, use_default=True):
super().__init__(definition, resolver)
self._custom_formats = formats
self._use_default = use_default
self._json_keywords_to_function.update((
('type', self.generate_type),
('enum', self.generate_enum),
('allOf', self.generate_all_of),
('anyOf', self.generate_any_of),
('oneOf', self.generate_one_of),
('not', self.generate_not),
('minLength', self.generate_min_length),
('maxLength', self.generate_max_length),
('pattern', self.generate_pattern),
('format', self.generate_format),
('minimum', self.generate_minimum),
('maximum', self.generate_maximum),
('multipleOf', self.generate_multiple_of),
('minItems', self.generate_min_items),
('maxItems', self.generate_max_items),
('uniqueItems', self.generate_unique_items),
('items', self.generate_items),
('minProperties', self.generate_min_properties),
('maxProperties', self.generate_max_properties),
('required', self.generate_required),
# Check dependencies before properties generates default values.
('dependencies', self.generate_dependencies),
('properties', self.generate_properties),
('patternProperties', self.generate_pattern_properties),
('additionalProperties', self.generate_additional_properties),
))
self._any_or_one_of_count = 0
@property
def global_state(self):
res = super().global_state
res['custom_formats'] = self._custom_formats
return res
def generate_type(self):
"""
Validation of type. Can be one type or list of types.
.. code-block:: python
{'type': 'string'}
{'type': ['string', 'number']}
"""
types = enforce_list(self._definition['type'])
try:
python_types = ', '.join(JSON_TYPE_TO_PYTHON_TYPE[t] for t in types)
except KeyError as exc:
raise JsonSchemaDefinitionException('Unknown type: {}'.format(exc))
extra = ''
if ('number' in types or 'integer' in types) and 'boolean' not in types:
extra = ' or isinstance({variable}, bool)'.format(variable=self._variable)
with self.l('if not isinstance({variable}, ({})){}:', python_types, extra):
self.exc('{name} must be {}', ' or '.join(types), rule='type')
def generate_enum(self):
"""
Means that only value specified in the enum is valid.
.. code-block:: python
{
'enum': ['a', 'b'],
}
"""
enum = self._definition['enum']
if not isinstance(enum, (list, tuple)):
raise JsonSchemaDefinitionException('enum must be an array')
with self.l('if {variable} not in {enum}:'):
self.exc('{name} must be one of {}', self.e(enum), rule='enum')
def generate_all_of(self):
"""
Means that value have to be valid by all of those definitions. It's like put it in
one big definition.
.. code-block:: python
{
'allOf': [
{'type': 'number'},
{'minimum': 5},
],
}
Valid values for this definition are 5, 6, 7, ... but not 4 or 'abc' for example.
"""
for definition_item in self._definition['allOf']:
self.generate_func_code_block(definition_item, self._variable, self._variable_name, clear_variables=True)
def generate_any_of(self):
"""
Means that value have to be valid by any of those definitions. It can also be valid
by all of them.
.. code-block:: python
{
'anyOf': [
{'type': 'number', 'minimum': 10},
{'type': 'number', 'maximum': 5},
],
}
Valid values for this definition are 3, 4, 5, 10, 11, ... but not 8 for example.
"""
self._any_or_one_of_count += 1
count = self._any_or_one_of_count
self.l('{variable}_any_of_count{count} = 0', count=count)
for definition_item in self._definition['anyOf']:
# When we know it's passing (at least once), we do not need to do another expensive try-except.
with self.l('if not {variable}_any_of_count{count}:', count=count, optimize=False):
with self.l('try:', optimize=False):
self.generate_func_code_block(definition_item, self._variable, self._variable_name, clear_variables=True)
self.l('{variable}_any_of_count{count} += 1', count=count)
self.l('except JsonSchemaValueException: pass')
with self.l('if not {variable}_any_of_count{count}:', count=count, optimize=False):
self.exc('{name} must be valid by one of anyOf definition', rule='anyOf')
def generate_one_of(self):
"""
Means that value have to be valid by only one of those definitions. It can't be valid
by two or more of them.
.. code-block:: python
{
'oneOf': [
{'type': 'number', 'multipleOf': 3},
{'type': 'number', 'multipleOf': 5},
],
}
Valid values for this definition are 3, 5, 6, ... but not 15 for example.
"""
self._any_or_one_of_count += 1
count = self._any_or_one_of_count
self.l('{variable}_one_of_count{count} = 0', count=count)
for definition_item in self._definition['oneOf']:
# When we know it's failing (one of means exactly once), we do not need to do another expensive try-except.
with self.l('if {variable}_one_of_count{count} < 2:', count=count, optimize=False):
with self.l('try:', optimize=False):
self.generate_func_code_block(definition_item, self._variable, self._variable_name, clear_variables=True)
self.l('{variable}_one_of_count{count} += 1', count=count)
self.l('except JsonSchemaValueException: pass')
with self.l('if {variable}_one_of_count{count} != 1:', count=count):
self.exc('{name} must be valid exactly by one of oneOf definition', rule='oneOf')
def generate_not(self):
"""
Means that value have not to be valid by this definition.
.. code-block:: python
{'not': {'type': 'null'}}
Valid values for this definition are 'hello', 42, {} ... but not None.
Since draft 06 definition can be boolean. False means nothing, True
means everything is invalid.
"""
not_definition = self._definition['not']
if not_definition is True:
self.exc('{name} must not be there', rule='not')
elif not_definition is False:
return
elif not not_definition:
with self.l('if {}:', self._variable):
self.exc('{name} must not be valid by not definition', rule='not')
else:
with self.l('try:', optimize=False):
self.generate_func_code_block(not_definition, self._variable, self._variable_name)
self.l('except JsonSchemaValueException: pass')
with self.l('else:'):
self.exc('{name} must not be valid by not definition', rule='not')
def generate_min_length(self):
with self.l('if isinstance({variable}, str):'):
self.create_variable_with_length()
if not isinstance(self._definition['minLength'], int):
raise JsonSchemaDefinitionException('minLength must be a number')
with self.l('if {variable}_len < {minLength}:'):
self.exc('{name} must be longer than or equal to {minLength} characters', rule='minLength')
def generate_max_length(self):
with self.l('if isinstance({variable}, str):'):
self.create_variable_with_length()
if not isinstance(self._definition['maxLength'], int):
raise JsonSchemaDefinitionException('maxLength must be a number')
with self.l('if {variable}_len > {maxLength}:'):
self.exc('{name} must be shorter than or equal to {maxLength} characters', rule='maxLength')
def generate_pattern(self):
with self.l('if isinstance({variable}, str):'):
pattern = self._definition['pattern']
safe_pattern = pattern.replace('\\', '\\\\').replace('"', '\\"')
end_of_string_fixed_pattern = DOLLAR_FINDER.sub(r'\\Z', pattern)
self._compile_regexps[pattern] = re.compile(end_of_string_fixed_pattern)
with self.l('if not REGEX_PATTERNS[{}].search({variable}):', repr(pattern)):
self.exc('{name} must match pattern {}', safe_pattern, rule='pattern')
def generate_format(self):
"""
Means that value have to be in specified format. For example date, email or other.
.. code-block:: python
{'format': 'email'}
Valid value for this definition is user@example.com but not @username
"""
with self.l('if isinstance({variable}, str):'):
format_ = self._definition['format']
# Checking custom formats - user is allowed to override default formats.
if format_ in self._custom_formats:
custom_format = self._custom_formats[format_]
if isinstance(custom_format, str):
self._generate_format(format_, format_ + '_re_pattern', custom_format)
else:
with self.l('if not custom_formats["{}"]({variable}):', format_):
self.exc('{name} must be {}', format_, rule='format')
elif format_ in self.FORMAT_REGEXS:
format_regex = self.FORMAT_REGEXS[format_]
self._generate_format(format_, format_ + '_re_pattern', format_regex)
# Format regex is used only in meta schemas.
elif format_ == 'regex':
with self.l('try:', optimize=False):
self.l('re.compile({variable})')
with self.l('except Exception:'):
self.exc('{name} must be a valid regex', rule='format')
else:
raise JsonSchemaDefinitionException('Unknown format: {}'.format(format_))
def _generate_format(self, format_name, regexp_name, regexp):
if self._definition['format'] == format_name:
if not regexp_name in self._compile_regexps:
self._compile_regexps[regexp_name] = re.compile(regexp)
with self.l('if not REGEX_PATTERNS["{}"].match({variable}):', regexp_name):
self.exc('{name} must be {}', format_name, rule='format')
def generate_minimum(self):
with self.l('if isinstance({variable}, (int, float)):'):
if not isinstance(self._definition['minimum'], (int, float)):
raise JsonSchemaDefinitionException('minimum must be a number')
if self._definition.get('exclusiveMinimum', False):
with self.l('if {variable} <= {minimum}:'):
self.exc('{name} must be bigger than {minimum}', rule='minimum')
else:
with self.l('if {variable} < {minimum}:'):
self.exc('{name} must be bigger than or equal to {minimum}', rule='minimum')
def generate_maximum(self):
with self.l('if isinstance({variable}, (int, float)):'):
if not isinstance(self._definition['maximum'], (int, float)):
raise JsonSchemaDefinitionException('maximum must be a number')
if self._definition.get('exclusiveMaximum', False):
with self.l('if {variable} >= {maximum}:'):
self.exc('{name} must be smaller than {maximum}', rule='maximum')
else:
with self.l('if {variable} > {maximum}:'):
self.exc('{name} must be smaller than or equal to {maximum}', rule='maximum')
def generate_multiple_of(self):
with self.l('if isinstance({variable}, (int, float)):'):
if not isinstance(self._definition['multipleOf'], (int, float)):
raise JsonSchemaDefinitionException('multipleOf must be a number')
# For proper multiplication check of floats we need to use decimals,
# because for example 19.01 / 0.01 = 1901.0000000000002.
if isinstance(self._definition['multipleOf'], float):
self._extra_imports_lines.append('from decimal import Decimal')
self._extra_imports_objects['Decimal'] = decimal.Decimal
self.l('quotient = Decimal(repr({variable})) / Decimal(repr({multipleOf}))')
else:
self.l('quotient = {variable} / {multipleOf}')
with self.l('if int(quotient) != quotient:'):
self.exc('{name} must be multiple of {multipleOf}', rule='multipleOf')
def generate_min_items(self):
self.create_variable_is_list()
with self.l('if {variable}_is_list:'):
if not isinstance(self._definition['minItems'], int):
raise JsonSchemaDefinitionException('minItems must be a number')
self.create_variable_with_length()
with self.l('if {variable}_len < {minItems}:'):
self.exc('{name} must contain at least {minItems} items', rule='minItems')
def generate_max_items(self):
self.create_variable_is_list()
with self.l('if {variable}_is_list:'):
if not isinstance(self._definition['maxItems'], int):
raise JsonSchemaDefinitionException('maxItems must be a number')
self.create_variable_with_length()
with self.l('if {variable}_len > {maxItems}:'):
self.exc('{name} must contain less than or equal to {maxItems} items', rule='maxItems')
def generate_unique_items(self):
"""
With Python 3.4 module ``timeit`` recommended this solutions:
.. code-block:: python
>>> timeit.timeit("len(x) > len(set(x))", "x=range(100)+range(100)", number=100000)
0.5839540958404541
>>> timeit.timeit("len({}.fromkeys(x)) == len(x)", "x=range(100)+range(100)", number=100000)
0.7094449996948242
>>> timeit.timeit("seen = set(); any(i in seen or seen.add(i) for i in x)", "x=range(100)+range(100)", number=100000)
2.0819358825683594
>>> timeit.timeit("np.unique(x).size == len(x)", "x=range(100)+range(100); import numpy as np", number=100000)
2.1439831256866455
"""
self.create_variable_is_list()
with self.l('if {variable}_is_list:'):
self.l(
'def fn(var): '
'return frozenset(dict((k, fn(v)) '
'for k, v in var.items()).items()) '
'if hasattr(var, "items") else tuple(fn(v) '
'for v in var) '
'if isinstance(var, (dict, list)) else str(var) '
'if isinstance(var, bool) else var')
self.create_variable_with_length()
with self.l('if {variable}_len > len(set(fn({variable}_x) for {variable}_x in {variable})):'):
self.exc('{name} must contain unique items', rule='uniqueItems')
def generate_items(self):
"""
Means array is valid only when all items are valid by this definition.
.. code-block:: python
{
'items': [
{'type': 'integer'},
{'type': 'string'},
],
}
Valid arrays are those with integers or strings, nothing else.
Since draft 06 definition can be also boolean. True means nothing, False
means everything is invalid.
"""
items_definition = self._definition['items']
if items_definition is True:
return
self.create_variable_is_list()
with self.l('if {variable}_is_list:'):
self.create_variable_with_length()
if items_definition is False:
with self.l('if {variable}:'):
self.exc('{name} must not be there', rule='items')
elif isinstance(items_definition, list):
for idx, item_definition in enumerate(items_definition):
with self.l('if {variable}_len > {}:', idx):
self.l('{variable}__{0} = {variable}[{0}]', idx)
self.generate_func_code_block(
item_definition,
'{}__{}'.format(self._variable, idx),
'{}[{}]'.format(self._variable_name, idx),
)
if self._use_default and isinstance(item_definition, dict) and 'default' in item_definition:
self.l('else: {variable}.append({})', repr(item_definition['default']))
if 'additionalItems' in self._definition:
if self._definition['additionalItems'] is False:
with self.l('if {variable}_len > {}:', len(items_definition)):
self.exc('{name} must contain only specified items', rule='items')
else:
with self.l('for {variable}_x, {variable}_item in enumerate({variable}[{0}:], {0}):', len(items_definition)):
self.generate_func_code_block(
self._definition['additionalItems'],
'{}_item'.format(self._variable),
'{}[{{{}_x}}]'.format(self._variable_name, self._variable),
)
else:
if items_definition:
with self.l('for {variable}_x, {variable}_item in enumerate({variable}):'):
self.generate_func_code_block(
items_definition,
'{}_item'.format(self._variable),
'{}[{{{}_x}}]'.format(self._variable_name, self._variable),
)
def generate_min_properties(self):
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
if not isinstance(self._definition['minProperties'], int):
raise JsonSchemaDefinitionException('minProperties must be a number')
self.create_variable_with_length()
with self.l('if {variable}_len < {minProperties}:'):
self.exc('{name} must contain at least {minProperties} properties', rule='minProperties')
def generate_max_properties(self):
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
if not isinstance(self._definition['maxProperties'], int):
raise JsonSchemaDefinitionException('maxProperties must be a number')
self.create_variable_with_length()
with self.l('if {variable}_len > {maxProperties}:'):
self.exc('{name} must contain less than or equal to {maxProperties} properties', rule='maxProperties')
def generate_required(self):
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
if not isinstance(self._definition['required'], (list, tuple)):
raise JsonSchemaDefinitionException('required must be an array')
self.create_variable_with_length()
with self.l('if not all(prop in {variable} for prop in {required}):'):
self.exc('{name} must contain {} properties', self.e(self._definition['required']), rule='required')
def generate_properties(self):
"""
Means object with defined keys.
.. code-block:: python
{
'properties': {
'key': {'type': 'number'},
},
}
Valid object is containing key called 'key' and value any number.
"""
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
self.create_variable_keys()
for key, prop_definition in self._definition['properties'].items():
key_name = re.sub(r'($[^a-zA-Z]|[^a-zA-Z0-9])', '', key)
if not isinstance(prop_definition, (dict, bool)):
raise JsonSchemaDefinitionException('{}[{}] must be object'.format(self._variable, key_name))
with self.l('if "{}" in {variable}_keys:', self.e(key)):
self.l('{variable}_keys.remove("{}")', self.e(key))
self.l('{variable}__{0} = {variable}["{1}"]', key_name, self.e(key))
self.generate_func_code_block(
prop_definition,
'{}__{}'.format(self._variable, key_name),
'{}.{}'.format(self._variable_name, self.e(key)),
clear_variables=True,
)
if self._use_default and isinstance(prop_definition, dict) and 'default' in prop_definition:
self.l('else: {variable}["{}"] = {}', self.e(key), repr(prop_definition['default']))
def generate_pattern_properties(self):
"""
Means object with defined keys as patterns.
.. code-block:: python
{
'patternProperties': {
'^x': {'type': 'number'},
},
}
Valid object is containing key starting with a 'x' and value any number.
"""
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
self.create_variable_keys()
for pattern, definition in self._definition['patternProperties'].items():
self._compile_regexps[pattern] = re.compile(pattern)
with self.l('for {variable}_key, {variable}_val in {variable}.items():'):
for pattern, definition in self._definition['patternProperties'].items():
with self.l('if REGEX_PATTERNS[{}].search({variable}_key):', repr(pattern)):
with self.l('if {variable}_key in {variable}_keys:'):
self.l('{variable}_keys.remove({variable}_key)')
self.generate_func_code_block(
definition,
'{}_val'.format(self._variable),
'{}.{{{}_key}}'.format(self._variable_name, self._variable),
clear_variables=True,
)
def generate_additional_properties(self):
"""
Means object with keys with values defined by definition.
.. code-block:: python
{
'properties': {
'key': {'type': 'number'},
}
'additionalProperties': {'type': 'string'},
}
Valid object is containing key called 'key' and it's value any number and
any other key with any string.
"""
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
self.create_variable_keys()
add_prop_definition = self._definition["additionalProperties"]
if add_prop_definition is True or add_prop_definition == {}:
return
if add_prop_definition:
properties_keys = list(self._definition.get("properties", {}).keys())
with self.l('for {variable}_key in {variable}_keys:'):
with self.l('if {variable}_key not in {}:', properties_keys):
self.l('{variable}_value = {variable}.get({variable}_key)')
self.generate_func_code_block(
add_prop_definition,
'{}_value'.format(self._variable),
'{}.{{{}_key}}'.format(self._variable_name, self._variable),
)
else:
with self.l('if {variable}_keys:'):
self.exc('{name} must not contain "+str({variable}_keys)+" properties', rule='additionalProperties')
def generate_dependencies(self):
"""
Means when object has property, it needs to have also other property.
.. code-block:: python
{
'dependencies': {
'bar': ['foo'],
},
}
Valid object is containing only foo, both bar and foo or none of them, but not
object with only bar.
Since draft 06 definition can be boolean or empty array. True and empty array
means nothing, False means that key cannot be there at all.
"""
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
is_empty = True
for key, values in self._definition["dependencies"].items():
if values == [] or values is True:
continue
is_empty = False
with self.l('if "{}" in {variable}:', self.e(key)):
if values is False:
self.exc('{} in {name} must not be there', key, rule='dependencies')
elif isinstance(values, list):
for value in values:
with self.l('if "{}" not in {variable}:', self.e(value)):
self.exc('{name} missing dependency {} for {}', self.e(value), self.e(key), rule='dependencies')
else:
self.generate_func_code_block(values, self._variable, self._variable_name, clear_variables=True)
if is_empty:
self.l('pass')

View File

@@ -0,0 +1,185 @@
from .draft04 import CodeGeneratorDraft04, JSON_TYPE_TO_PYTHON_TYPE
from .exceptions import JsonSchemaDefinitionException
from .generator import enforce_list
class CodeGeneratorDraft06(CodeGeneratorDraft04):
FORMAT_REGEXS = dict(CodeGeneratorDraft04.FORMAT_REGEXS, **{
'json-pointer': r'^(/(([^/~])|(~[01]))*)*\Z',
'uri-reference': r'^(\w+:(\/?\/?))?[^#\\\s]*(#[^\\\s]*)?\Z',
'uri-template': (
r'^(?:(?:[^\x00-\x20\"\'<>%\\^`{|}]|%[0-9a-f]{2})|'
r'\{[+#./;?&=,!@|]?(?:[a-z0-9_]|%[0-9a-f]{2})+'
r'(?::[1-9][0-9]{0,3}|\*)?(?:,(?:[a-z0-9_]|%[0-9a-f]{2})+'
r'(?::[1-9][0-9]{0,3}|\*)?)*\})*\Z'
),
})
def __init__(self, definition, resolver=None, formats={}, use_default=True):
super().__init__(definition, resolver, formats, use_default)
self._json_keywords_to_function.update((
('exclusiveMinimum', self.generate_exclusive_minimum),
('exclusiveMaximum', self.generate_exclusive_maximum),
('propertyNames', self.generate_property_names),
('contains', self.generate_contains),
('const', self.generate_const),
))
def _generate_func_code_block(self, definition):
if isinstance(definition, bool):
self.generate_boolean_schema()
elif '$ref' in definition:
# needed because ref overrides any sibling keywords
self.generate_ref()
else:
self.run_generate_functions(definition)
def generate_boolean_schema(self):
"""
Means that schema can be specified by boolean.
True means everything is valid, False everything is invalid.
"""
if self._definition is False:
self.exc('{name} must not be there')
def generate_type(self):
"""
Validation of type. Can be one type or list of types.
Since draft 06 a float without fractional part is an integer.
.. code-block:: python
{'type': 'string'}
{'type': ['string', 'number']}
"""
types = enforce_list(self._definition['type'])
try:
python_types = ', '.join(JSON_TYPE_TO_PYTHON_TYPE[t] for t in types)
except KeyError as exc:
raise JsonSchemaDefinitionException('Unknown type: {}'.format(exc))
extra = ''
if 'integer' in types:
extra += ' and not (isinstance({variable}, float) and {variable}.is_integer())'.format(
variable=self._variable,
)
if ('number' in types or 'integer' in types) and 'boolean' not in types:
extra += ' or isinstance({variable}, bool)'.format(variable=self._variable)
with self.l('if not isinstance({variable}, ({})){}:', python_types, extra):
self.exc('{name} must be {}', ' or '.join(types), rule='type')
def generate_exclusive_minimum(self):
with self.l('if isinstance({variable}, (int, float)):'):
if not isinstance(self._definition['exclusiveMinimum'], (int, float)):
raise JsonSchemaDefinitionException('exclusiveMinimum must be an integer or a float')
with self.l('if {variable} <= {exclusiveMinimum}:'):
self.exc('{name} must be bigger than {exclusiveMinimum}', rule='exclusiveMinimum')
def generate_exclusive_maximum(self):
with self.l('if isinstance({variable}, (int, float)):'):
if not isinstance(self._definition['exclusiveMaximum'], (int, float)):
raise JsonSchemaDefinitionException('exclusiveMaximum must be an integer or a float')
with self.l('if {variable} >= {exclusiveMaximum}:'):
self.exc('{name} must be smaller than {exclusiveMaximum}', rule='exclusiveMaximum')
def generate_property_names(self):
"""
Means that keys of object must to follow this definition.
.. code-block:: python
{
'propertyNames': {
'maxLength': 3,
},
}
Valid keys of object for this definition are foo, bar, ... but not foobar for example.
"""
property_names_definition = self._definition.get('propertyNames', {})
if property_names_definition is True:
pass
elif property_names_definition is False:
self.create_variable_keys()
with self.l('if {variable}_keys:'):
self.exc('{name} must not be there', rule='propertyNames')
else:
self.create_variable_is_dict()
with self.l('if {variable}_is_dict:'):
self.create_variable_with_length()
with self.l('if {variable}_len != 0:'):
self.l('{variable}_property_names = True')
with self.l('for {variable}_key in {variable}:'):
with self.l('try:'):
self.generate_func_code_block(
property_names_definition,
'{}_key'.format(self._variable),
self._variable_name,
clear_variables=True,
)
with self.l('except JsonSchemaValueException:'):
self.l('{variable}_property_names = False')
with self.l('if not {variable}_property_names:'):
self.exc('{name} must be named by propertyName definition', rule='propertyNames')
def generate_contains(self):
"""
Means that array must contain at least one defined item.
.. code-block:: python
{
'contains': {
'type': 'number',
},
}
Valid array is any with at least one number.
"""
self.create_variable_is_list()
with self.l('if {variable}_is_list:'):
contains_definition = self._definition['contains']
if contains_definition is False:
self.exc('{name} is always invalid', rule='contains')
elif contains_definition is True:
with self.l('if not {variable}:'):
self.exc('{name} must not be empty', rule='contains')
else:
self.l('{variable}_contains = False')
with self.l('for {variable}_key in {variable}:'):
with self.l('try:'):
self.generate_func_code_block(
contains_definition,
'{}_key'.format(self._variable),
self._variable_name,
clear_variables=True,
)
self.l('{variable}_contains = True')
self.l('break')
self.l('except JsonSchemaValueException: pass')
with self.l('if not {variable}_contains:'):
self.exc('{name} must contain one of contains definition', rule='contains')
def generate_const(self):
"""
Means that value is valid when is equeal to const definition.
.. code-block:: python
{
'const': 42,
}
Only valid value is 42 in this example.
"""
const = self._definition['const']
if isinstance(const, str):
const = '"{}"'.format(self.e(const))
with self.l('if {variable} != {}:', const):
self.exc('{name} must be same as const definition: {definition_rule}', rule='const')

View File

@@ -0,0 +1,116 @@
from .draft06 import CodeGeneratorDraft06
class CodeGeneratorDraft07(CodeGeneratorDraft06):
FORMAT_REGEXS = dict(CodeGeneratorDraft06.FORMAT_REGEXS, **{
'date': r'^(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})\Z',
'iri': r'^\w+:(\/?\/?)[^\s]+\Z',
'iri-reference': r'^(\w+:(\/?\/?))?[^#\\\s]*(#[^\\\s]*)?\Z',
'idn-email': r'^[^@]+@[^@]+\.[^@]+\Z',
#'idn-hostname': r'',
'relative-json-pointer': r'^(?:0|[1-9][0-9]*)(?:#|(?:\/(?:[^~/]|~0|~1)*)*)\Z',
#'regex': r'',
'time': (
r'^(?P<hour>\d{1,2}):(?P<minute>\d{1,2})'
r'(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6}))?'
r'([zZ]|[+-]\d\d:\d\d)?)?\Z'
),
})
def __init__(self, definition, resolver=None, formats={}, use_default=True):
super().__init__(definition, resolver, formats, use_default)
# pylint: disable=duplicate-code
self._json_keywords_to_function.update((
('if', self.generate_if_then_else),
('contentEncoding', self.generate_content_encoding),
('contentMediaType', self.generate_content_media_type),
))
def generate_if_then_else(self):
"""
Implementation of if-then-else.
.. code-block:: python
{
'if': {
'exclusiveMaximum': 0,
},
'then': {
'minimum': -10,
},
'else': {
'multipleOf': 2,
},
}
Valid values are any between -10 and 0 or any multiplication of two.
"""
with self.l('try:', optimize=False):
self.generate_func_code_block(
self._definition['if'],
self._variable,
self._variable_name,
clear_variables=True
)
with self.l('except JsonSchemaValueException:'):
if 'else' in self._definition:
self.generate_func_code_block(
self._definition['else'],
self._variable,
self._variable_name,
clear_variables=True
)
else:
self.l('pass')
if 'then' in self._definition:
with self.l('else:'):
self.generate_func_code_block(
self._definition['then'],
self._variable,
self._variable_name,
clear_variables=True
)
def generate_content_encoding(self):
"""
Means decoding value when it's encoded by base64.
.. code-block:: python
{
'contentEncoding': 'base64',
}
"""
if self._definition['contentEncoding'] == 'base64':
with self.l('if isinstance({variable}, str):'):
with self.l('try:'):
self.l('import base64')
self.l('{variable} = base64.b64decode({variable})')
with self.l('except Exception:'):
self.exc('{name} must be encoded by base64')
with self.l('if {variable} == "":'):
self.exc('contentEncoding must be base64')
def generate_content_media_type(self):
"""
Means loading value when it's specified as JSON.
.. code-block:: python
{
'contentMediaType': 'application/json',
}
"""
if self._definition['contentMediaType'] == 'application/json':
with self.l('if isinstance({variable}, bytes):'):
with self.l('try:'):
self.l('{variable} = {variable}.decode("utf-8")')
with self.l('except Exception:'):
self.exc('{name} must encoded by utf8')
with self.l('if isinstance({variable}, str):'):
with self.l('try:'):
self.l('import json')
self.l('{variable} = json.loads({variable})')
with self.l('except Exception:'):
self.exc('{name} must be valid JSON')

View File

@@ -0,0 +1,51 @@
import re
SPLIT_RE = re.compile(r'[\.\[\]]+')
class JsonSchemaException(ValueError):
"""
Base exception of ``fastjsonschema`` library.
"""
class JsonSchemaValueException(JsonSchemaException):
"""
Exception raised by validation function. Available properties:
* ``message`` containing human-readable information what is wrong (e.g. ``data.property[index] must be smaller than or equal to 42``),
* invalid ``value`` (e.g. ``60``),
* ``name`` of a path in the data structure (e.g. ``data.propery[index]``),
* ``path`` as an array in the data structure (e.g. ``['data', 'propery', 'index']``),
* the whole ``definition`` which the ``value`` has to fulfil (e.g. ``{'type': 'number', 'maximum': 42}``),
* ``rule`` which the ``value`` is breaking (e.g. ``maximum``)
* and ``rule_definition`` (e.g. ``42``).
.. versionchanged:: 2.14.0
Added all extra properties.
"""
def __init__(self, message, value=None, name=None, definition=None, rule=None):
super().__init__(message)
self.message = message
self.value = value
self.name = name
self.definition = definition
self.rule = rule
@property
def path(self):
return [item for item in SPLIT_RE.split(self.name) if item != '']
@property
def rule_definition(self):
if not self.rule or not self.definition:
return None
return self.definition.get(self.rule)
class JsonSchemaDefinitionException(JsonSchemaException):
"""
Exception raised by generator of validation function.
"""

View File

@@ -0,0 +1,312 @@
from collections import OrderedDict
import re
from .exceptions import JsonSchemaValueException, JsonSchemaDefinitionException
from .indent import indent
from .ref_resolver import RefResolver
def enforce_list(variable):
if isinstance(variable, list):
return variable
return [variable]
# pylint: disable=too-many-instance-attributes,too-many-public-methods
class CodeGenerator:
"""
This class is not supposed to be used directly. Anything
inside of this class can be changed without noticing.
This class generates code of validation function from JSON
schema object as string. Example:
.. code-block:: python
CodeGenerator(json_schema_definition).func_code
"""
INDENT = 4 # spaces
def __init__(self, definition, resolver=None):
self._code = []
self._compile_regexps = {}
self._custom_formats = {}
# Any extra library should be here to be imported only once.
# Lines are imports to be printed in the file and objects
# key-value pair to pass to compile function directly.
self._extra_imports_lines = []
self._extra_imports_objects = {}
self._variables = set()
self._indent = 0
self._indent_last_line = None
self._variable = None
self._variable_name = None
self._root_definition = definition
self._definition = None
# map schema URIs to validation function names for functions
# that are not yet generated, but need to be generated
self._needed_validation_functions = {}
# validation function names that are already done
self._validation_functions_done = set()
if resolver is None:
resolver = RefResolver.from_schema(definition, store={})
self._resolver = resolver
# add main function to `self._needed_validation_functions`
self._needed_validation_functions[self._resolver.get_uri()] = self._resolver.get_scope_name()
self._json_keywords_to_function = OrderedDict()
@property
def func_code(self):
"""
Returns generated code of whole validation function as string.
"""
self._generate_func_code()
return '\n'.join(self._code)
@property
def global_state(self):
"""
Returns global variables for generating function from ``func_code``. Includes
compiled regular expressions and imports, so it does not have to do it every
time when validation function is called.
"""
self._generate_func_code()
return dict(
**self._extra_imports_objects,
REGEX_PATTERNS=self._compile_regexps,
re=re,
JsonSchemaValueException=JsonSchemaValueException,
)
@property
def global_state_code(self):
"""
Returns global variables for generating function from ``func_code`` as code.
Includes compiled regular expressions and imports.
"""
self._generate_func_code()
if not self._compile_regexps:
return '\n'.join(self._extra_imports_lines + [
'from fastjsonschema import JsonSchemaValueException',
'',
'',
])
return '\n'.join(self._extra_imports_lines + [
'import re',
'from fastjsonschema import JsonSchemaValueException',
'',
'',
'REGEX_PATTERNS = ' + serialize_regexes(self._compile_regexps),
'',
])
def _generate_func_code(self):
if not self._code:
self.generate_func_code()
def generate_func_code(self):
"""
Creates base code of validation function and calls helper
for creating code by definition.
"""
self.l('NoneType = type(None)')
# Generate parts that are referenced and not yet generated
while self._needed_validation_functions:
# During generation of validation function, could be needed to generate
# new one that is added again to `_needed_validation_functions`.
# Therefore usage of while instead of for loop.
uri, name = self._needed_validation_functions.popitem()
self.generate_validation_function(uri, name)
def generate_validation_function(self, uri, name):
"""
Generate validation function for given uri with given name
"""
self._validation_functions_done.add(uri)
self.l('')
with self._resolver.resolving(uri) as definition:
with self.l('def {}(data, custom_formats={{}}):', name):
self.generate_func_code_block(definition, 'data', 'data', clear_variables=True)
self.l('return data')
def generate_func_code_block(self, definition, variable, variable_name, clear_variables=False):
"""
Creates validation rules for current definition.
"""
backup = self._definition, self._variable, self._variable_name
self._definition, self._variable, self._variable_name = definition, variable, variable_name
if clear_variables:
backup_variables = self._variables
self._variables = set()
self._generate_func_code_block(definition)
self._definition, self._variable, self._variable_name = backup
if clear_variables:
self._variables = backup_variables
def _generate_func_code_block(self, definition):
if not isinstance(definition, dict):
raise JsonSchemaDefinitionException("definition must be an object")
if '$ref' in definition:
# needed because ref overrides any sibling keywords
self.generate_ref()
else:
self.run_generate_functions(definition)
def run_generate_functions(self, definition):
for key, func in self._json_keywords_to_function.items():
if key in definition:
func()
def generate_ref(self):
"""
Ref can be link to remote or local definition.
.. code-block:: python
{'$ref': 'http://json-schema.org/draft-04/schema#'}
{
'properties': {
'foo': {'type': 'integer'},
'bar': {'$ref': '#/properties/foo'}
}
}
"""
with self._resolver.in_scope(self._definition['$ref']):
name = self._resolver.get_scope_name()
uri = self._resolver.get_uri()
if uri not in self._validation_functions_done:
self._needed_validation_functions[uri] = name
# call validation function
self.l('{}({variable}, custom_formats)', name)
# pylint: disable=invalid-name
@indent
def l(self, line, *args, **kwds):
"""
Short-cut of line. Used for inserting line. It's formated with parameters
``variable``, ``variable_name`` (as ``name`` for short-cut), all keys from
current JSON schema ``definition`` and also passed arguments in ``args``
and named ``kwds``.
.. code-block:: python
self.l('if {variable} not in {enum}: raise JsonSchemaValueException("Wrong!")')
When you want to indent block, use it as context manager. For example:
.. code-block:: python
with self.l('if {variable} not in {enum}:'):
self.l('raise JsonSchemaValueException("Wrong!")')
"""
spaces = ' ' * self.INDENT * self._indent
name = self._variable_name
if name and '{' in name:
name = '"+"{}".format(**locals())+"'.format(self._variable_name)
context = dict(
self._definition or {},
variable=self._variable,
name=name,
**kwds
)
line = line.format(*args, **context)
line = line.replace('\n', '\\n').replace('\r', '\\r')
self._code.append(spaces + line)
return line
def e(self, string):
"""
Short-cut of escape. Used for inserting user values into a string message.
.. code-block:: python
self.l('raise JsonSchemaValueException("Variable: {}")', self.e(variable))
"""
return str(string).replace('"', '\\"')
def exc(self, msg, *args, rule=None):
"""
Short-cut for creating raising exception in the code.
"""
msg = 'raise JsonSchemaValueException("'+msg+'", value={variable}, name="{name}", definition={definition}, rule={rule})'
definition_rule = self.e(self._definition.get(rule) if isinstance(self._definition, dict) else None)
self.l(msg, *args, definition=repr(self._definition), rule=repr(rule), definition_rule=definition_rule)
def create_variable_with_length(self):
"""
Append code for creating variable with length of that variable
(for example length of list or dictionary) with name ``{variable}_len``.
It can be called several times and always it's done only when that variable
still does not exists.
"""
variable_name = '{}_len'.format(self._variable)
if variable_name in self._variables:
return
self._variables.add(variable_name)
self.l('{variable}_len = len({variable})')
def create_variable_keys(self):
"""
Append code for creating variable with keys of that variable (dictionary)
with a name ``{variable}_keys``. Similar to `create_variable_with_length`.
"""
variable_name = '{}_keys'.format(self._variable)
if variable_name in self._variables:
return
self._variables.add(variable_name)
self.l('{variable}_keys = set({variable}.keys())')
def create_variable_is_list(self):
"""
Append code for creating variable with bool if it's instance of list
with a name ``{variable}_is_list``. Similar to `create_variable_with_length`.
"""
variable_name = '{}_is_list'.format(self._variable)
if variable_name in self._variables:
return
self._variables.add(variable_name)
self.l('{variable}_is_list = isinstance({variable}, (list, tuple))')
def create_variable_is_dict(self):
"""
Append code for creating variable with bool if it's instance of list
with a name ``{variable}_is_dict``. Similar to `create_variable_with_length`.
"""
variable_name = '{}_is_dict'.format(self._variable)
if variable_name in self._variables:
return
self._variables.add(variable_name)
self.l('{variable}_is_dict = isinstance({variable}, dict)')
def serialize_regexes(patterns_dict):
# Unfortunately using `pprint.pformat` is causing errors
# specially with big regexes
regex_patterns = (
repr(k) + ": " + repr_regex(v)
for k, v in patterns_dict.items()
)
return '{\n ' + ",\n ".join(regex_patterns) + "\n}"
def repr_regex(regex):
all_flags = ("A", "I", "DEBUG", "L", "M", "S", "X")
flags = " | ".join(f"re.{f}" for f in all_flags if regex.flags & getattr(re, f))
flags = ", " + flags if flags else ""
return "re.compile({!r}{})".format(regex.pattern, flags)

View File

@@ -0,0 +1,28 @@
def indent(func):
"""
Decorator for allowing to use method as normal method or with
context manager for auto-indenting code blocks.
"""
def wrapper(self, line, *args, optimize=True, **kwds):
last_line = self._indent_last_line
line = func(self, line, *args, **kwds)
# When two blocks have the same condition (such as value has to be dict),
# do the check only once and keep it under one block.
if optimize and last_line == line:
self._code.pop()
self._indent_last_line = line
return Indent(self, line)
return wrapper
class Indent:
def __init__(self, instance, line):
self.instance = instance
self.line = line
def __enter__(self):
self.instance._indent += 1
def __exit__(self, type_, value, traceback):
self.instance._indent -= 1
self.instance._indent_last_line = self.line

View File

@@ -0,0 +1,175 @@
"""
JSON Schema URI resolution scopes and dereferencing
https://tools.ietf.org/id/draft-zyp-json-schema-04.html#rfc.section.7
Code adapted from https://github.com/Julian/jsonschema
"""
import contextlib
import json
import re
from urllib import parse as urlparse
from urllib.parse import unquote
from urllib.request import urlopen
from .exceptions import JsonSchemaDefinitionException
def get_id(schema):
"""
Originally ID was `id` and since v7 it's `$id`.
"""
return schema.get('$id', schema.get('id', ''))
def resolve_path(schema, fragment):
"""
Return definition from path.
Path is unescaped according https://tools.ietf.org/html/rfc6901
"""
fragment = fragment.lstrip('/')
parts = unquote(fragment).split('/') if fragment else []
for part in parts:
part = part.replace('~1', '/').replace('~0', '~')
if isinstance(schema, list):
schema = schema[int(part)]
elif part in schema:
schema = schema[part]
else:
raise JsonSchemaDefinitionException('Unresolvable ref: {}'.format(part))
return schema
def normalize(uri):
return urlparse.urlsplit(uri).geturl()
def resolve_remote(uri, handlers):
"""
Resolve a remote ``uri``.
.. note::
urllib library is used to fetch requests from the remote ``uri``
if handlers does notdefine otherwise.
"""
scheme = urlparse.urlsplit(uri).scheme
if scheme in handlers:
result = handlers[scheme](uri)
else:
req = urlopen(uri)
encoding = req.info().get_content_charset() or 'utf-8'
try:
result = json.loads(req.read().decode(encoding),)
except ValueError as exc:
raise JsonSchemaDefinitionException('{} failed to decode: {}'.format(uri, exc))
return result
class RefResolver:
"""
Resolve JSON References.
"""
# pylint: disable=dangerous-default-value,too-many-arguments
def __init__(self, base_uri, schema, store={}, cache=True, handlers={}):
"""
`base_uri` is URI of the referring document from the `schema`.
`store` is an dictionary that will be used to cache the fetched schemas
(if `cache=True`).
Please notice that you can have caching problems when compiling schemas
with colliding `$ref`. To force overwriting use `cache=False` or
explicitly pass the `store` argument (with a brand new dictionary)
"""
self.base_uri = base_uri
self.resolution_scope = base_uri
self.schema = schema
self.store = store
self.cache = cache
self.handlers = handlers
self.walk(schema)
@classmethod
def from_schema(cls, schema, handlers={}, **kwargs):
"""
Construct a resolver from a JSON schema object.
"""
return cls(
get_id(schema) if isinstance(schema, dict) else '',
schema,
handlers=handlers,
**kwargs
)
@contextlib.contextmanager
def in_scope(self, scope: str):
"""
Context manager to handle current scope.
"""
old_scope = self.resolution_scope
self.resolution_scope = urlparse.urljoin(old_scope, scope)
try:
yield
finally:
self.resolution_scope = old_scope
@contextlib.contextmanager
def resolving(self, ref: str):
"""
Context manager which resolves a JSON ``ref`` and enters the
resolution scope of this ref.
"""
new_uri = urlparse.urljoin(self.resolution_scope, ref)
uri, fragment = urlparse.urldefrag(new_uri)
if uri and normalize(uri) in self.store:
schema = self.store[normalize(uri)]
elif not uri or uri == self.base_uri:
schema = self.schema
else:
schema = resolve_remote(uri, self.handlers)
if self.cache:
self.store[normalize(uri)] = schema
old_base_uri, old_schema = self.base_uri, self.schema
self.base_uri, self.schema = uri, schema
try:
with self.in_scope(uri):
yield resolve_path(schema, fragment)
finally:
self.base_uri, self.schema = old_base_uri, old_schema
def get_uri(self):
return normalize(self.resolution_scope)
def get_scope_name(self):
"""
Get current scope and return it as a valid function name.
"""
name = 'validate_' + unquote(self.resolution_scope).replace('~1', '_').replace('~0', '_').replace('"', '')
name = re.sub(r'($[^a-zA-Z]|[^a-zA-Z0-9])', '_', name)
name = name.lower().rstrip('_')
return name
def walk(self, node: dict):
"""
Walk thru schema and dereferencing ``id`` and ``$ref`` instances
"""
if isinstance(node, bool):
pass
elif '$ref' in node and isinstance(node['$ref'], str):
ref = node['$ref']
node['$ref'] = urlparse.urljoin(self.resolution_scope, ref)
elif ('$id' in node or 'id' in node) and isinstance(get_id(node), str):
with self.in_scope(get_id(node)):
self.store[normalize(self.resolution_scope)] = node
for _, item in node.items():
if isinstance(item, dict):
self.walk(item)
else:
for _, item in node.items():
if isinstance(item, dict):
self.walk(item)

View File

@@ -0,0 +1 @@
VERSION = '2.15.3'