first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,207 @@
"""The Jupyter notebook format
Use this module to read or write notebook files as particular nbformat versions.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from traitlets.log import get_logger
from . import v1, v2, v3, v4
from ._version import __version__, version_info
from .sentinel import Sentinel
__all__ = [
"versions",
"validate",
"ValidationError",
"convert",
"from_dict",
"NotebookNode",
"current_nbformat",
"current_nbformat_minor",
"NBFormatError",
"NO_CONVERT",
"reads",
"read",
"writes",
"write",
"version_info",
"__version__",
]
versions = {
1: v1,
2: v2,
3: v3,
4: v4,
}
from . import reader
from .converter import convert
from .notebooknode import NotebookNode, from_dict
from .v4 import nbformat as current_nbformat
from .v4 import nbformat_minor as current_nbformat_minor
from .validator import ValidationError, validate
class NBFormatError(ValueError):
pass
# no-conversion singleton
NO_CONVERT = Sentinel(
"NO_CONVERT",
__name__,
"""Value to prevent nbformat to convert notebooks to most recent version.
""",
)
def reads(s, as_version, capture_validation_error=None, **kwargs):
"""Read a notebook from a string and return the NotebookNode object as the given version.
The string can contain a notebook of any version.
The notebook will be returned `as_version`, converting, if necessary.
Notebook format errors will be logged.
Parameters
----------
s : unicode
The raw unicode string to read the notebook from.
as_version : int
The version of the notebook format to return.
The notebook will be converted, if necessary.
Pass nbformat.NO_CONVERT to prevent conversion.
capture_validation_error : dict, optional
If provided, a key of "ValidationError" with a
value of the ValidationError instance will be added
to the dictionary.
Returns
-------
nb : NotebookNode
The notebook that was read.
"""
nb = reader.reads(s, **kwargs)
if as_version is not NO_CONVERT:
nb = convert(nb, as_version)
try:
validate(nb)
except ValidationError as e:
get_logger().error("Notebook JSON is invalid: %s", e)
if isinstance(capture_validation_error, dict):
capture_validation_error["ValidationError"] = e
return nb
def writes(nb, version=NO_CONVERT, capture_validation_error=None, **kwargs):
"""Write a notebook to a string in a given format in the given nbformat version.
Any notebook format errors will be logged.
Parameters
----------
nb : NotebookNode
The notebook to write.
version : int, optional
The nbformat version to write.
If unspecified, or specified as nbformat.NO_CONVERT,
the notebook's own version will be used and no conversion performed.
capture_validation_error : dict, optional
If provided, a key of "ValidationError" with a
value of the ValidationError instance will be added
to the dictionary.
Returns
-------
s : unicode
The notebook as a JSON string.
"""
if version is not NO_CONVERT:
nb = convert(nb, version)
else:
version, _ = reader.get_version(nb)
try:
validate(nb)
except ValidationError as e:
get_logger().error("Notebook JSON is invalid: %s", e)
if isinstance(capture_validation_error, dict):
capture_validation_error["ValidationError"] = e
return versions[version].writes_json(nb, **kwargs)
def read(fp, as_version, capture_validation_error=None, **kwargs):
"""Read a notebook from a file as a NotebookNode of the given version.
The string can contain a notebook of any version.
The notebook will be returned `as_version`, converting, if necessary.
Notebook format errors will be logged.
Parameters
----------
fp : file or str
A file-like object with a read method that returns unicode (use
``io.open()`` in Python 2), or a path to a file.
as_version: int
The version of the notebook format to return.
The notebook will be converted, if necessary.
Pass nbformat.NO_CONVERT to prevent conversion.
capture_validation_error : dict, optional
If provided, a key of "ValidationError" with a
value of the ValidationError instance will be added
to the dictionary.
Returns
-------
nb : NotebookNode
The notebook that was read.
"""
try:
buf = fp.read()
except AttributeError:
with open(fp, encoding="utf-8") as f:
return reads(f.read(), as_version, capture_validation_error, **kwargs)
return reads(buf, as_version, capture_validation_error, **kwargs)
def write(nb, fp, version=NO_CONVERT, capture_validation_error=None, **kwargs):
"""Write a notebook to a file in a given nbformat version.
The file-like object must accept unicode input.
Parameters
----------
nb : NotebookNode
The notebook to write.
fp : file or str
Any file-like object with a write method that accepts unicode, or
a path to write a file.
version : int, optional
The nbformat version to write.
If nb is not this version, it will be converted.
If unspecified, or specified as nbformat.NO_CONVERT,
the notebook's own version will be used and no conversion performed.
capture_validation_error : dict, optional
If provided, a key of "ValidationError" with a
value of the ValidationError instance will be added
to the dictionary.
"""
s = writes(nb, version, capture_validation_error, **kwargs)
if isinstance(s, bytes):
s = s.decode("utf8")
try:
fp.write(s)
if not s.endswith("\n"):
fp.write("\n")
except AttributeError:
with open(fp, "w", encoding="utf-8") as f:
f.write(s)
if not s.endswith("\n"):
f.write("\n")

View File

@@ -0,0 +1,40 @@
"""
A simple utility to import something by its string name.
Vendored form ipython_genutils
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
def import_item(name):
"""Import and return ``bar`` given the string ``foo.bar``.
Calling ``bar = import_item("foo.bar")`` is the functional equivalent of
executing the code ``from foo import bar``.
Parameters
----------
name : string
The fully qualified name of the module/package being imported.
Returns
-------
mod : module object
The module that was imported.
"""
parts = name.rsplit(".", 1)
if len(parts) == 2:
# called with 'foo.bar....'
package, obj = parts
module = __import__(package, fromlist=[obj])
try:
pak = getattr(module, obj)
except AttributeError:
raise ImportError("No module named %s" % obj)
return pak
else:
# called with un-dotted string
return __import__(parts[0])

View File

@@ -0,0 +1,379 @@
"""
Vendoring of old ipython_genutils Struct
"""
"""A dict subclass that supports attribute style access.
Can probably be replaced by types.SimpleNamespace from Python 3.3
"""
__all__ = ["Struct"]
class Struct(dict):
"""A dict subclass with attribute style access.
This dict subclass has a a few extra features:
* Attribute style access.
* Protection of class members (like keys, items) when using attribute
style access.
* The ability to restrict assignment to only existing keys.
* Intelligent merging.
* Overloaded operators.
"""
_allownew = True
def __init__(self, *args, **kw):
"""Initialize with a dictionary, another Struct, or data.
Parameters
----------
args : dict, Struct
Initialize with one dict or Struct
kw : dict
Initialize with key, value pairs.
Examples
--------
>>> s = Struct(a=10,b=30)
>>> s.a
10
>>> s.b
30
>>> s2 = Struct(s,c=30)
>>> sorted(s2.keys())
['a', 'b', 'c']
"""
object.__setattr__(self, "_allownew", True)
dict.__init__(self, *args, **kw)
def __setitem__(self, key, value):
"""Set an item with check for allownew.
Examples
--------
>>> s = Struct()
>>> s['a'] = 10
>>> s.allow_new_attr(False)
>>> s['a'] = 10
>>> s['a']
10
>>> try:
... s['b'] = 20
... except KeyError:
... print('this is not allowed')
...
this is not allowed
"""
if not self._allownew and key not in self:
raise KeyError("can't create new attribute %s when allow_new_attr(False)" % key)
dict.__setitem__(self, key, value)
def __setattr__(self, key, value):
"""Set an attr with protection of class members.
This calls :meth:`self.__setitem__` but convert :exc:`KeyError` to
:exc:`AttributeError`.
Examples
--------
>>> s = Struct()
>>> s.a = 10
>>> s.a
10
>>> try:
... s.get = 10
... except AttributeError:
... print("you can't set a class member")
...
you can't set a class member
"""
# If key is an str it might be a class member or instance var
if isinstance(key, str):
# I can't simply call hasattr here because it calls getattr, which
# calls self.__getattr__, which returns True for keys in
# self._data. But I only want keys in the class and in
# self.__dict__
if key in self.__dict__ or hasattr(Struct, key):
raise AttributeError("attr %s is a protected member of class Struct." % key)
try:
self.__setitem__(key, value)
except KeyError as e:
raise AttributeError(e)
def __getattr__(self, key):
"""Get an attr by calling :meth:`dict.__getitem__`.
Like :meth:`__setattr__`, this method converts :exc:`KeyError` to
:exc:`AttributeError`.
Examples
--------
>>> s = Struct(a=10)
>>> s.a
10
>>> type(s.get)
<... 'builtin_function_or_method'>
>>> try:
... s.b
... except AttributeError:
... print("I don't have that key")
...
I don't have that key
"""
try:
result = self[key]
except KeyError:
raise AttributeError(key)
else:
return result
def __iadd__(self, other):
"""s += s2 is a shorthand for s.merge(s2).
Examples
--------
>>> s = Struct(a=10,b=30)
>>> s2 = Struct(a=20,c=40)
>>> s += s2
>>> sorted(s.keys())
['a', 'b', 'c']
"""
self.merge(other)
return self
def __add__(self, other):
"""s + s2 -> New Struct made from s.merge(s2).
Examples
--------
>>> s1 = Struct(a=10,b=30)
>>> s2 = Struct(a=20,c=40)
>>> s = s1 + s2
>>> sorted(s.keys())
['a', 'b', 'c']
"""
sout = self.copy()
sout.merge(other)
return sout
def __sub__(self, other):
"""s1 - s2 -> remove keys in s2 from s1.
Examples
--------
>>> s1 = Struct(a=10,b=30)
>>> s2 = Struct(a=40)
>>> s = s1 - s2
>>> s
{'b': 30}
"""
sout = self.copy()
sout -= other
return sout
def __isub__(self, other):
"""Inplace remove keys from self that are in other.
Examples
--------
>>> s1 = Struct(a=10,b=30)
>>> s2 = Struct(a=40)
>>> s1 -= s2
>>> s1
{'b': 30}
"""
for k in other.keys():
if k in self:
del self[k]
return self
def __dict_invert(self, data):
"""Helper function for merge.
Takes a dictionary whose values are lists and returns a dict with
the elements of each list as keys and the original keys as values.
"""
outdict = {}
for k, lst in data.items():
if isinstance(lst, str):
lst = lst.split()
for entry in lst:
outdict[entry] = k
return outdict
def dict(self):
return self
def copy(self):
"""Return a copy as a Struct.
Examples
--------
>>> s = Struct(a=10,b=30)
>>> s2 = s.copy()
>>> type(s2) is Struct
True
"""
return Struct(dict.copy(self))
def hasattr(self, key):
"""hasattr function available as a method.
Implemented like has_key.
Examples
--------
>>> s = Struct(a=10)
>>> s.hasattr('a')
True
>>> s.hasattr('b')
False
>>> s.hasattr('get')
False
"""
return key in self
def allow_new_attr(self, allow=True):
"""Set whether new attributes can be created in this Struct.
This can be used to catch typos by verifying that the attribute user
tries to change already exists in this Struct.
"""
object.__setattr__(self, "_allownew", allow)
def merge(self, __loc_data__=None, __conflict_solve=None, **kw):
"""Merge two Structs with customizable conflict resolution.
This is similar to :meth:`update`, but much more flexible. First, a
dict is made from data+key=value pairs. When merging this dict with
the Struct S, the optional dictionary 'conflict' is used to decide
what to do.
If conflict is not given, the default behavior is to preserve any keys
with their current value (the opposite of the :meth:`update` method's
behavior).
Parameters
----------
__loc_data : dict, Struct
The data to merge into self
__conflict_solve : dict
The conflict policy dict. The keys are binary functions used to
resolve the conflict and the values are lists of strings naming
the keys the conflict resolution function applies to. Instead of
a list of strings a space separated string can be used, like
'a b c'.
kw : dict
Additional key, value pairs to merge in
Notes
-----
The `__conflict_solve` dict is a dictionary of binary functions which will be used to
solve key conflicts. Here is an example::
__conflict_solve = dict(
func1=['a','b','c'],
func2=['d','e']
)
In this case, the function :func:`func1` will be used to resolve
keys 'a', 'b' and 'c' and the function :func:`func2` will be used for
keys 'd' and 'e'. This could also be written as::
__conflict_solve = dict(func1='a b c',func2='d e')
These functions will be called for each key they apply to with the
form::
func1(self['a'], other['a'])
The return value is used as the final merged value.
As a convenience, merge() provides five (the most commonly needed)
pre-defined policies: preserve, update, add, add_flip and add_s. The
easiest explanation is their implementation::
preserve = lambda old,new: old
update = lambda old,new: new
add = lambda old,new: old + new
add_flip = lambda old,new: new + old # note change of order!
add_s = lambda old,new: old + ' ' + new # only for str!
You can use those four words (as strings) as keys instead
of defining them as functions, and the merge method will substitute
the appropriate functions for you.
For more complicated conflict resolution policies, you still need to
construct your own functions.
Examples
--------
This show the default policy:
>>> s = Struct(a=10,b=30)
>>> s2 = Struct(a=20,c=40)
>>> s.merge(s2)
>>> sorted(s.items())
[('a', 10), ('b', 30), ('c', 40)]
Now, show how to specify a conflict dict:
>>> s = Struct(a=10,b=30)
>>> s2 = Struct(a=20,b=40)
>>> conflict = {'update':'a','add':'b'}
>>> s.merge(s2,conflict)
>>> sorted(s.items())
[('a', 20), ('b', 70)]
"""
data_dict = dict(__loc_data__, **kw)
# policies for conflict resolution: two argument functions which return
# the value that will go in the new struct
preserve = lambda old, new: old # noqa
update = lambda old, new: new # noqa
add = lambda old, new: old + new # noqa
add_flip = lambda old, new: new + old # noqa # note change of order!
add_s = lambda old, new: old + " " + new # noqa
# default policy is to keep current keys when there's a conflict
conflict_solve = dict.fromkeys(self, preserve)
# the confli_allownewct_solve dictionary is given by the user 'inverted': we
# need a name-function mapping, it comes as a function -> names
# dict. Make a local copy (b/c we'll make changes), replace user
# strings for the three builtin policies and invert it.
if __conflict_solve:
inv_conflict_solve_user = __conflict_solve.copy()
for name, func in [
("preserve", preserve),
("update", update),
("add", add),
("add_flip", add_flip),
("add_s", add_s),
]:
if name in inv_conflict_solve_user.keys():
inv_conflict_solve_user[func] = inv_conflict_solve_user[name]
del inv_conflict_solve_user[name]
conflict_solve.update(self.__dict_invert(inv_conflict_solve_user))
for key in data_dict:
if key not in self:
self[key] = data_dict[key]
else:
self[key] = conflict_solve[key](self[key], data_dict[key])

View File

@@ -0,0 +1,3 @@
# Make sure to update package.json, too!
version_info = (5, 4, 0)
__version__ = ".".join(map(str, version_info))

View File

@@ -0,0 +1,74 @@
"""API for converting notebooks between versions."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from . import versions
from .reader import get_version
from .validator import ValidationError
def convert(nb, to_version):
"""Convert a notebook node object to a specific version. Assumes that
all the versions starting from 1 to the latest major X are implemented.
In other words, there should never be a case where v1 v2 v3 v5 exist without
a v4. Also assumes that all conversions can be made in one step increments
between major versions and ignores minor revisions.
Parameters
----------
nb : NotebookNode
to_version : int
Major revision to convert the notebook to. Can either be an upgrade or
a downgrade.
Raises
------
ValueError
Notebook failed to convert.
ValueError
The version specified is invalid or doesn't exist.
ValidationError
Conversion failed due to missing expected attributes.
"""
# Get input notebook version.
(version, version_minor) = get_version(nb)
# Check if destination is target version, if so return contents
if version == to_version:
return nb
# If the version exist, try to convert to it one step at a time.
elif to_version in versions:
# Get the the version that this recursion will convert to as a step
# closer to the final revision. Make sure the newer of the conversion
# functions is used to perform the conversion.
if to_version > version:
step_version = version + 1
convert_function = versions[step_version].upgrade
else:
step_version = version - 1
convert_function = versions[version].downgrade
try:
# Convert and make sure version changed during conversion.
converted = convert_function(nb)
if converted.get("nbformat", 1) == version:
raise ValueError(
"Failed to convert notebook from v%d to v%d." % (version, step_version)
)
except AttributeError as e:
raise ValidationError(
f"Notebook could not be converted from version {version} to version {step_version} because it's missing a key: {e}"
)
# Recursively convert until target version is reached.
return convert(converted, to_version)
else:
raise ValueError(
"Cannot convert notebook to v%d because that version doesn't exist" % (to_version)
)

View File

@@ -0,0 +1,13 @@
"""Tests for nbformat corpus"""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
from .. import words
def test_generate_corpus_id(recwarn):
assert len(words.generate_corpus_id()) > 7
# 1 in 4294967296 (2^32) times this will fail
assert words.generate_corpus_id() != words.generate_corpus_id()
assert len(recwarn) == 0

View File

@@ -0,0 +1,5 @@
import uuid
def generate_corpus_id():
return uuid.uuid4().hex[:8]

View File

@@ -0,0 +1,229 @@
"""Deprecated API for working with notebooks
- use nbformat for read/write/validate public API
- use nbformat.vX directly for Python API for composing notebooks
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import re
import warnings
warnings.warn(
"""nbformat.current is deprecated.
- use nbformat for read/write/validate public API
- use nbformat.vX directly to composing notebooks of a particular version
"""
)
from traitlets.log import get_logger
from nbformat import v3 as _v_latest
from nbformat.v3 import (
NotebookNode,
nbformat,
nbformat_minor,
nbformat_schema,
new_author,
new_code_cell,
new_heading_cell,
new_metadata,
new_notebook,
new_output,
new_text_cell,
new_worksheet,
parse_filename,
to_notebook_json,
)
from . import versions
from .converter import convert
from .reader import reads as reader_reads
from .validator import ValidationError, validate
__all__ = [
"NotebookNode",
"new_code_cell",
"new_text_cell",
"new_notebook",
"new_output",
"new_worksheet",
"parse_filename",
"new_metadata",
"new_author",
"new_heading_cell",
"nbformat",
"nbformat_minor",
"nbformat_schema",
"to_notebook_json",
"convert",
"validate",
"NBFormatError",
"parse_py",
"reads_json",
"writes_json",
"reads_py",
"writes_py",
"reads",
"writes",
"read",
"write",
]
current_nbformat = nbformat
current_nbformat_minor = nbformat_minor
current_nbformat_module = _v_latest.__name__
class NBFormatError(ValueError):
pass
def _warn_format():
warnings.warn(
"""Non-JSON file support in nbformat is deprecated.
Use nbconvert to create files of other formats."""
)
def parse_py(s, **kwargs):
"""Parse a string into a (nbformat, string) tuple."""
nbf = current_nbformat
nbm = current_nbformat_minor
pattern = r"# <nbformat>(?P<nbformat>\d+[\.\d+]*)</nbformat>"
m = re.search(pattern, s)
if m is not None:
digits = m.group("nbformat").split(".")
nbf = int(digits[0])
if len(digits) > 1:
nbm = int(digits[1])
return nbf, nbm, s
def reads_json(nbjson, **kwargs):
"""DEPRECATED, use reads"""
warnings.warn("reads_json is deprecated, use reads")
return reads(nbjson)
def writes_json(nb, **kwargs):
"""DEPRECATED, use writes"""
warnings.warn("writes_json is deprecated, use writes")
return writes(nb, **kwargs)
def reads_py(s, **kwargs):
"""DEPRECATED: use nbconvert"""
_warn_format()
nbf, nbm, s = parse_py(s, **kwargs)
if nbf in (2, 3):
nb = versions[nbf].to_notebook_py(s, **kwargs)
else:
raise NBFormatError("Unsupported PY nbformat version: %i" % nbf)
return nb
def writes_py(nb, **kwargs):
"""DEPRECATED: use nbconvert"""
_warn_format()
return versions[3].writes_py(nb, **kwargs)
# High level API
def reads(s, format="DEPRECATED", version=current_nbformat, **kwargs):
"""Read a notebook from a string and return the NotebookNode object.
This function properly handles notebooks of any version. The notebook
returned will always be in the current version's format.
Parameters
----------
s : unicode
The raw unicode string to read the notebook from.
Returns
-------
nb : NotebookNode
The notebook that was read.
"""
if format not in {"DEPRECATED", "json"}:
_warn_format()
nb = reader_reads(s, **kwargs)
nb = convert(nb, version)
try:
validate(nb)
except ValidationError as e:
get_logger().error("Notebook JSON is invalid: %s", e)
return nb
def writes(nb, format="DEPRECATED", version=current_nbformat, **kwargs):
"""Write a notebook to a string in a given format in the current nbformat version.
This function always writes the notebook in the current nbformat version.
Parameters
----------
nb : NotebookNode
The notebook to write.
version : int
The nbformat version to write.
Used for downgrading notebooks.
Returns
-------
s : unicode
The notebook string.
"""
if format not in {"DEPRECATED", "json"}:
_warn_format()
nb = convert(nb, version)
try:
validate(nb)
except ValidationError as e:
get_logger().error("Notebook JSON is invalid: %s", e)
return versions[version].writes_json(nb, **kwargs)
def read(fp, format="DEPRECATED", **kwargs):
"""Read a notebook from a file and return the NotebookNode object.
This function properly handles notebooks of any version. The notebook
returned will always be in the current version's format.
Parameters
----------
fp : file
Any file-like object with a read method.
Returns
-------
nb : NotebookNode
The notebook that was read.
"""
return reads(fp.read(), **kwargs)
def write(nb, fp, format="DEPRECATED", **kwargs):
"""Write a notebook to a file in a given format in the current nbformat version.
This function always writes the notebook in the current nbformat version.
Parameters
----------
nb : NotebookNode
The notebook to write.
fp : file
Any file-like object with a write method.
"""
s = writes(nb, **kwargs)
if isinstance(s, bytes):
s = s.decode("utf8")
return fp.write(s)

View File

@@ -0,0 +1,98 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
"""
Common validator wrapper to provide a uniform usage of other schema validation
libraries.
"""
import os
import fastjsonschema
import jsonschema
from fastjsonschema import JsonSchemaException as _JsonSchemaException
from jsonschema import Draft4Validator as _JsonSchemaValidator
from jsonschema import ErrorTree, ValidationError
class JsonSchemaValidator:
name = "jsonschema"
def __init__(self, schema):
self._schema = schema
self._default_validator = _JsonSchemaValidator(schema) # Default
self._validator = self._default_validator
def validate(self, data):
self._default_validator.validate(data)
def iter_errors(self, data, schema=None):
if schema is None:
return self._default_validator.iter_errors(data)
if hasattr(self._default_validator, "evolve"):
return self._default_validator.evolve(schema=schema).iter_errors(data)
return self._default_validator.iter_errors(data, schema)
def error_tree(self, errors):
return ErrorTree(errors=errors)
class FastJsonSchemaValidator(JsonSchemaValidator):
name = "fastjsonschema"
def __init__(self, schema):
super().__init__(schema)
self._validator = fastjsonschema.compile(schema)
def validate(self, data):
try:
self._validator(data)
except _JsonSchemaException as error:
raise ValidationError(str(error), schema_path=error.path)
def iter_errors(self, data, schema=None):
if schema is not None:
return super().iter_errors(data, schema)
errors = []
validate_func = self._validator
try:
validate_func(data)
except _JsonSchemaException as error:
errors = [ValidationError(str(error), schema_path=error.path)]
return errors
def error_tree(self, errors):
# fastjsonschema's exceptions don't contain the same information that the jsonschema ValidationErrors
# do. This method is primarily used for introspecting metadata schema failures so that we can strip
# them if asked to do so in `nbformat.validate`.
# Another way forward for compatibility: we could distill both validator errors into a custom collection
# for this data. Since implementation details of ValidationError is used elsewhere, we would probably
# just use this data for schema introspection.
raise NotImplementedError("JSON schema error introspection not enabled for fastjsonschema")
_VALIDATOR_MAP = [
("fastjsonschema", fastjsonschema, FastJsonSchemaValidator),
("jsonschema", jsonschema, JsonSchemaValidator),
]
VALIDATORS = [item[0] for item in _VALIDATOR_MAP]
def _validator_for_name(validator_name):
if validator_name not in VALIDATORS:
raise ValueError(
f"Invalid validator '{validator_name}' value!\nValid values are: {VALIDATORS}"
)
for (name, module, validator_cls) in _VALIDATOR_MAP:
if module and validator_name == name:
return validator_cls
def get_current_validator():
"""
Return the default validator based on the value of an environment variable.
"""
validator_name = os.environ.get("NBFORMAT_VALIDATOR", "fastjsonschema")
return _validator_for_name(validator_name)

View File

@@ -0,0 +1,50 @@
"""NotebookNode - adding attribute access to dicts"""
from collections.abc import Mapping
from ._struct import Struct
class NotebookNode(Struct):
"""A dict-like node with attribute-access"""
def __setitem__(self, key, value):
if isinstance(value, Mapping) and not isinstance(value, NotebookNode):
value = from_dict(value)
super().__setitem__(key, value)
def update(self, *args, **kwargs):
"""
A dict-like update method based on CPython's MutableMapping `update`
method.
"""
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, got %d" % len(args))
if args:
other = args[0]
if isinstance(other, Mapping):
for key in other:
self[key] = other[key]
elif hasattr(other, "keys"):
for key in other.keys():
self[key] = other[key]
else:
for key, value in other:
self[key] = value
for key, value in kwargs.items():
self[key] = value
def from_dict(d):
"""Convert dict to dict-like NotebookNode
Recursively converts any dict in the container to a NotebookNode.
This does not check that the contents of the dictionary make a valid
notebook or part of a notebook.
"""
if isinstance(d, dict):
return NotebookNode({k: from_dict(v) for k, v in d.items()})
elif isinstance(d, (tuple, list)):
return [from_dict(i) for i in d]
else:
return d

View File

@@ -0,0 +1,99 @@
"""API for reading notebooks of different versions"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import json
from .validator import ValidationError
class NotJSONError(ValueError):
pass
def parse_json(s, **kwargs):
"""Parse a JSON string into a dict."""
try:
nb_dict = json.loads(s, **kwargs)
except ValueError as e:
# Limit the error message to 80 characters. Display whatever JSON will fit.
raise NotJSONError(("Notebook does not appear to be JSON: %r" % s)[:77] + "...") from e
return nb_dict
# High level API
def get_version(nb):
"""Get the version of a notebook.
Parameters
----------
nb : dict
NotebookNode or dict containing notebook data.
Returns
-------
Tuple containing major (int) and minor (int) version numbers
"""
major = nb.get("nbformat", 1)
minor = nb.get("nbformat_minor", 0)
return (major, minor)
def reads(s, **kwargs):
"""Read a notebook from a json string and return the
NotebookNode object.
This function properly reads notebooks of any version. No version
conversion is performed.
Parameters
----------
s : unicode | bytes
The raw string or bytes object to read the notebook from.
Returns
-------
nb : NotebookNode
The notebook that was read.
Raises
------
ValidationError
Notebook JSON for a given version is missing an expected key and cannot be read.
NBFormatError
Specified major version is invalid or unsupported.
"""
from . import NBFormatError, versions
nb_dict = parse_json(s, **kwargs)
(major, minor) = get_version(nb_dict)
if major in versions:
try:
return versions[major].to_notebook_json(nb_dict, minor=minor)
except AttributeError as e:
raise ValidationError(f"The notebook is invalid and is missing an expected key: {e}")
else:
raise NBFormatError("Unsupported nbformat version %s" % major)
def read(fp, **kwargs):
"""Read a notebook from a file and return the NotebookNode object.
This function properly reads notebooks of any version. No version
conversion is performed.
Parameters
----------
fp : file
Any file-like object with a read method.
Returns
-------
nb : NotebookNode
The notebook that was read.
"""
return reads(fp.read(), **kwargs)

View File

@@ -0,0 +1,15 @@
"""Sentinel class for constants with useful reprs"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
class Sentinel:
def __init__(self, name, module, docstring=None):
self.name = name
self.module = module
if docstring:
self.__doc__ = docstring
def __repr__(self):
return str(self.module) + "." + self.name

View File

@@ -0,0 +1,626 @@
"""Utilities for signing notebooks"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import hashlib
import os
import sys
import typing as t
from collections import OrderedDict
from contextlib import contextmanager
from datetime import datetime
from hmac import HMAC
try:
import sqlite3
except ImportError:
try:
from pysqlite2 import dbapi2 as sqlite3 # type:ignore[no-redef]
except ImportError:
sqlite3 = None # type:ignore[assignment]
from base64 import encodebytes
from jupyter_core.application import JupyterApp, base_flags
from traitlets import (
Any,
Bool,
Bytes,
Callable,
Enum,
Instance,
Integer,
Unicode,
default,
observe,
)
from traitlets.config import LoggingConfigurable, MultipleInstanceError
from . import NO_CONVERT, __version__, read, reads
algorithms_set = hashlib.algorithms_guaranteed
# The shake algorithms in are not compatible with hmac
# due to required length argument in digests
algorithms = [a for a in algorithms_set if not a.startswith("shake_")]
class SignatureStore:
"""Base class for a signature store."""
def store_signature(self, digest, algorithm):
"""Implement in subclass to store a signature.
Should not raise if the signature is already stored.
"""
raise NotImplementedError
def check_signature(self, digest, algorithm):
"""Implement in subclass to check if a signature is known.
Return True for a known signature, False for unknown.
"""
raise NotImplementedError
def remove_signature(self, digest, algorithm):
"""Implement in subclass to delete a signature.
Should not raise if the signature is not stored.
"""
raise NotImplementedError
def close(self):
"""Close any open connections this store may use.
If the store maintains any open connections (e.g. to a database),
they should be closed.
"""
pass
class MemorySignatureStore(SignatureStore):
"""Non-persistent storage of signatures in memory."""
cache_size = 65535
def __init__(self):
# We really only want an ordered set, but the stdlib has OrderedDict,
# and it's easy to use a dict as a set.
self.data = OrderedDict()
def store_signature(self, digest, algorithm):
key = (digest, algorithm)
# Pop it so it goes to the end when we reinsert it
self.data.pop(key, None)
self.data[key] = None
self._maybe_cull()
def _maybe_cull(self):
"""If more than cache_size signatures are stored, delete the oldest 25%"""
if len(self.data) < self.cache_size:
return
for _ in range(len(self.data) // 4):
self.data.popitem(last=False)
def check_signature(self, digest, algorithm):
key = (digest, algorithm)
if key in self.data:
# Move it to the end (.move_to_end() method is new in Py3)
del self.data[key]
self.data[key] = None
return True
return False
def remove_signature(self, digest, algorithm):
self.data.pop((digest, algorithm), None)
class SQLiteSignatureStore(SignatureStore, LoggingConfigurable):
"""Store signatures in an SQLite database."""
# 64k entries ~ 12MB
cache_size = Integer(
65535,
help="""The number of notebook signatures to cache.
When the number of signatures exceeds this value,
the oldest 25% of signatures will be culled.
""",
).tag(config=True)
def __init__(self, db_file, **kwargs):
super().__init__(**kwargs)
self.db_file = db_file
self.db = self._connect_db(db_file)
def close(self):
if self.db is not None:
self.db.close()
def _connect_db(self, db_file):
kwargs: t.Dict[str, t.Any] = dict(
detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES
)
db = None
try:
db = sqlite3.connect(db_file, **kwargs)
self.init_db(db)
except (sqlite3.DatabaseError, sqlite3.OperationalError):
if db_file != ":memory:":
old_db_location = db_file + ".bak"
if db is not None:
db.close()
self.log.warning(
(
"The signatures database cannot be opened; maybe it is corrupted or encrypted. "
"You may need to rerun your notebooks to ensure that they are trusted to run Javascript. "
"The old signatures database has been renamed to %s and a new one has been created."
),
old_db_location,
)
try:
os.rename(db_file, old_db_location)
db = sqlite3.connect(db_file, **kwargs)
self.init_db(db)
except (sqlite3.DatabaseError, sqlite3.OperationalError, OSError):
if db is not None:
db.close()
self.log.warning(
"Failed commiting signatures database to disk. "
"You may need to move the database file to a non-networked file system, "
"using config option `NotebookNotary.db_file`. "
"Using in-memory signatures database for the remainder of this session."
)
self.db_file = ":memory:"
db = sqlite3.connect(":memory:", **kwargs)
self.init_db(db)
else:
raise
return db
def init_db(self, db):
db.execute(
"""
CREATE TABLE IF NOT EXISTS nbsignatures
(
id integer PRIMARY KEY AUTOINCREMENT,
algorithm text,
signature text,
path text,
last_seen timestamp
)"""
)
db.execute(
"""
CREATE INDEX IF NOT EXISTS algosig ON nbsignatures(algorithm, signature)
"""
)
db.commit()
def store_signature(self, digest, algorithm):
if self.db is None:
return
if not self.check_signature(digest, algorithm):
self.db.execute(
"""
INSERT INTO nbsignatures (algorithm, signature, last_seen)
VALUES (?, ?, ?)
""",
(algorithm, digest, datetime.utcnow()),
)
else:
self.db.execute(
"""UPDATE nbsignatures SET last_seen = ? WHERE
algorithm = ? AND
signature = ?;
""",
(datetime.utcnow(), algorithm, digest),
)
self.db.commit()
# Check size and cull old entries if necessary
(n,) = self.db.execute("SELECT Count(*) FROM nbsignatures").fetchone()
if n > self.cache_size:
self.cull_db()
def check_signature(self, digest, algorithm):
if self.db is None:
return False
r = self.db.execute(
"""SELECT id FROM nbsignatures WHERE
algorithm = ? AND
signature = ?;
""",
(algorithm, digest),
).fetchone()
if r is None:
return False
self.db.execute(
"""UPDATE nbsignatures SET last_seen = ? WHERE
algorithm = ? AND
signature = ?;
""",
(datetime.utcnow(), algorithm, digest),
)
self.db.commit()
return True
def remove_signature(self, digest, algorithm):
self.db.execute(
"""DELETE FROM nbsignatures WHERE
algorithm = ? AND
signature = ?;
""",
(algorithm, digest),
)
self.db.commit()
def cull_db(self):
"""Cull oldest 25% of the trusted signatures when the size limit is reached"""
self.db.execute(
"""DELETE FROM nbsignatures WHERE id IN (
SELECT id FROM nbsignatures ORDER BY last_seen DESC LIMIT -1 OFFSET ?
);
""",
(max(int(0.75 * self.cache_size), 1),),
)
def yield_everything(obj):
"""Yield every item in a container as bytes
Allows any JSONable object to be passed to an HMAC digester
without having to serialize the whole thing.
"""
if isinstance(obj, dict):
for key in sorted(obj):
value = obj[key]
assert isinstance(key, str)
yield key.encode()
yield from yield_everything(value)
elif isinstance(obj, (list, tuple)):
for element in obj:
yield from yield_everything(element)
elif isinstance(obj, str):
yield obj.encode("utf8")
else:
yield str(obj).encode("utf8")
def yield_code_cells(nb):
"""Iterator that yields all cells in a notebook
nbformat version independent
"""
if nb.nbformat >= 4:
for cell in nb["cells"]:
if cell["cell_type"] == "code":
yield cell
elif nb.nbformat == 3:
for ws in nb["worksheets"]:
for cell in ws["cells"]:
if cell["cell_type"] == "code":
yield cell
@contextmanager
def signature_removed(nb):
"""Context manager for operating on a notebook with its signature removed
Used for excluding the previous signature when computing a notebook's signature.
"""
save_signature = nb["metadata"].pop("signature", None)
try:
yield
finally:
if save_signature is not None:
nb["metadata"]["signature"] = save_signature
class NotebookNotary(LoggingConfigurable):
"""A class for computing and verifying notebook signatures."""
data_dir = Unicode(help="""The storage directory for notary secret and database.""").tag(
config=True
)
@default("data_dir")
def _data_dir_default(self):
app = None
try:
if JupyterApp.initialized():
app = JupyterApp.instance()
except MultipleInstanceError:
pass
if app is None:
# create an app, without the global instance
app = JupyterApp()
app.initialize(argv=[])
return app.data_dir
store_factory = Callable(
help="""A callable returning the storage backend for notebook signatures.
The default uses an SQLite database."""
).tag(config=True)
@default("store_factory")
def _store_factory_default(self):
def factory():
if sqlite3 is None:
self.log.warning("Missing SQLite3, all notebooks will be untrusted!")
return MemorySignatureStore()
return SQLiteSignatureStore(self.db_file)
return factory
db_file = Unicode(
help="""The sqlite file in which to store notebook signatures.
By default, this will be in your Jupyter data directory.
You can set it to ':memory:' to disable sqlite writing to the filesystem.
"""
).tag(config=True)
@default("db_file")
def _db_file_default(self):
if not self.data_dir:
return ":memory:"
return os.path.join(self.data_dir, "nbsignatures.db")
algorithm = Enum(
algorithms, default_value="sha256", help="""The hashing algorithm used to sign notebooks."""
).tag(config=True)
@observe("algorithm")
def _algorithm_changed(self, change):
self.digestmod = getattr(hashlib, change["new"])
digestmod = Any()
@default("digestmod")
def _digestmod_default(self):
return getattr(hashlib, self.algorithm)
secret_file = Unicode(help="""The file where the secret key is stored.""").tag(config=True)
@default("secret_file")
def _secret_file_default(self):
if not self.data_dir:
return ""
return os.path.join(self.data_dir, "notebook_secret")
secret = Bytes(help="""The secret key with which notebooks are signed.""").tag(config=True)
@default("secret")
def _secret_default(self):
# note : this assumes an Application is running
if os.path.exists(self.secret_file):
with open(self.secret_file, "rb") as f:
return f.read()
else:
secret = encodebytes(os.urandom(1024))
self._write_secret_file(secret)
return secret
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.store = self.store_factory()
def _write_secret_file(self, secret):
"""write my secret to my secret_file"""
self.log.info("Writing notebook-signing key to %s", self.secret_file)
with open(self.secret_file, "wb") as f:
f.write(secret)
try:
os.chmod(self.secret_file, 0o600)
except OSError:
self.log.warning("Could not set permissions on %s", self.secret_file)
return secret
def compute_signature(self, nb):
"""Compute a notebook's signature
by hashing the entire contents of the notebook via HMAC digest.
"""
hmac = HMAC(self.secret, digestmod=self.digestmod)
# don't include the previous hash in the content to hash
with signature_removed(nb):
# sign the whole thing
for b in yield_everything(nb):
hmac.update(b)
return hmac.hexdigest()
def check_signature(self, nb):
"""Check a notebook's stored signature
If a signature is stored in the notebook's metadata,
a new signature is computed and compared with the stored value.
Returns True if the signature is found and matches, False otherwise.
The following conditions must all be met for a notebook to be trusted:
- a signature is stored in the form 'scheme:hexdigest'
- the stored scheme matches the requested scheme
- the requested scheme is available from hashlib
- the computed hash from notebook_signature matches the stored hash
"""
if nb.nbformat < 3:
return False
signature = self.compute_signature(nb)
return self.store.check_signature(signature, self.algorithm)
def sign(self, nb):
"""Sign a notebook, indicating that its output is trusted on this machine
Stores hash algorithm and hmac digest in a local database of trusted notebooks.
"""
if nb.nbformat < 3:
return
signature = self.compute_signature(nb)
self.store.store_signature(signature, self.algorithm)
def unsign(self, nb):
"""Ensure that a notebook is untrusted
by removing its signature from the trusted database, if present.
"""
signature = self.compute_signature(nb)
self.store.remove_signature(signature, self.algorithm)
def mark_cells(self, nb, trusted):
"""Mark cells as trusted if the notebook's signature can be verified
Sets ``cell.metadata.trusted = True | False`` on all code cells,
depending on the *trusted* parameter. This will typically be the return
value from ``self.check_signature(nb)``.
This function is the inverse of check_cells
"""
if nb.nbformat < 3:
return
for cell in yield_code_cells(nb):
cell["metadata"]["trusted"] = trusted
def _check_cell(self, cell, nbformat_version):
"""Do we trust an individual cell?
Return True if:
- cell is explicitly trusted
- cell has no potentially unsafe rich output
If a cell has no output, or only simple print statements,
it will always be trusted.
"""
# explicitly trusted
if cell["metadata"].pop("trusted", False):
return True
# explicitly safe output
if nbformat_version >= 4:
unsafe_output_types = ["execute_result", "display_data"]
safe_keys = {"output_type", "execution_count", "metadata"}
else: # v3
unsafe_output_types = ["pyout", "display_data"]
safe_keys = {"output_type", "prompt_number", "metadata"}
for output in cell["outputs"]:
output_type = output["output_type"]
if output_type in unsafe_output_types:
# if there are any data keys not in the safe whitelist
output_keys = set(output)
if output_keys.difference(safe_keys):
return False
return True
def check_cells(self, nb):
"""Return whether all code cells are trusted.
A cell is trusted if the 'trusted' field in its metadata is truthy, or
if it has no potentially unsafe outputs.
If there are no code cells, return True.
This function is the inverse of mark_cells.
"""
if nb.nbformat < 3:
return False
trusted = True
for cell in yield_code_cells(nb):
# only distrust a cell if it actually has some output to distrust
if not self._check_cell(cell, nb.nbformat):
trusted = False
return trusted
trust_flags = {
"reset": (
{"TrustNotebookApp": {"reset": True}},
"""Delete the trusted notebook cache.
All previously signed notebooks will become untrusted.
""",
),
}
trust_flags.update(base_flags)
class TrustNotebookApp(JupyterApp):
version = __version__
description = """Sign one or more Jupyter notebooks with your key,
to trust their dynamic (HTML, Javascript) output.
Otherwise, you will have to re-execute the notebook to see output.
"""
# This command line tool should use the same config file as the notebook
@default("config_file_name")
def _config_file_name_default(self):
return "jupyter_notebook_config"
examples = """
jupyter trust mynotebook.ipynb and_this_one.ipynb
"""
flags = trust_flags
reset = Bool(
False,
help="""If True, delete the trusted signature cache.
After reset, all previously signed notebooks will become untrusted.
""",
).tag(config=True)
notary = Instance(NotebookNotary)
@default("notary")
def _notary_default(self):
return NotebookNotary(parent=self, data_dir=self.data_dir)
def sign_notebook_file(self, notebook_path):
"""Sign a notebook from the filesystem"""
if not os.path.exists(notebook_path):
self.log.error("Notebook missing: %s" % notebook_path)
self.exit(1)
with open(notebook_path, encoding="utf8") as f:
nb = read(f, NO_CONVERT)
self.sign_notebook(nb, notebook_path)
def sign_notebook(self, nb, notebook_path="<stdin>"):
"""Sign a notebook that's been loaded"""
if self.notary.check_signature(nb):
print("Notebook already signed: %s" % notebook_path)
else:
print("Signing notebook: %s" % notebook_path)
self.notary.sign(nb)
def generate_new_key(self):
"""Generate a new notebook signature key"""
print("Generating new notebook key: %s" % self.notary.secret_file)
self.notary._write_secret_file(os.urandom(1024))
def start(self):
if self.reset:
if os.path.exists(self.notary.db_file):
print("Removing trusted signature cache: %s" % self.notary.db_file)
os.remove(self.notary.db_file)
self.generate_new_key()
return
if not self.extra_args:
self.log.debug("Reading notebook from stdin")
nb_s = sys.stdin.read()
assert isinstance(nb_s, str)
nb = reads(nb_s, NO_CONVERT)
self.sign_notebook(nb, "<stdin>")
else:
for notebook_path in self.extra_args:
self.sign_notebook_file(notebook_path)
main = TrustNotebookApp.launch_instance
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,20 @@
"""The main module for the v1 notebook format."""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from .convert import upgrade
from .nbbase import NotebookNode, new_code_cell, new_notebook, new_text_cell
from .nbjson import reads as read_json
from .nbjson import reads as reads_json
from .nbjson import to_notebook as to_notebook_json
from .nbjson import writes as write_json
from .nbjson import writes as writes_json

View File

@@ -0,0 +1,16 @@
"""Convert notebook to the v1 format."""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
def upgrade(nb, orig_version=None):
raise ValueError("Cannot convert to v1 notebook format")

View File

@@ -0,0 +1,69 @@
"""The basic dict based notebook format.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from .._struct import Struct
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
class NotebookNode(Struct):
pass
def from_dict(d):
if isinstance(d, dict):
newd = NotebookNode()
for k, v in d.items():
newd[k] = from_dict(v)
return newd
elif isinstance(d, (tuple, list)):
return [from_dict(i) for i in d]
else:
return d
def new_code_cell(code=None, prompt_number=None):
"""Create a new code cell with input and output"""
cell = NotebookNode()
cell.cell_type = "code"
if code is not None:
cell.code = str(code)
if prompt_number is not None:
cell.prompt_number = int(prompt_number)
return cell
def new_text_cell(text=None):
"""Create a new text cell."""
cell = NotebookNode()
if text is not None:
cell.text = str(text)
cell.cell_type = "text"
return cell
def new_notebook(cells=None):
"""Create a notebook by name, id and a list of worksheets."""
nb = NotebookNode()
if cells is not None:
nb.cells = cells
else:
nb.cells = []
return nb

View File

@@ -0,0 +1,52 @@
"""Read and write notebooks in JSON format.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
import json
from .nbbase import from_dict
from .rwbase import NotebookReader, NotebookWriter
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
class JSONReader(NotebookReader):
def reads(self, s, **kwargs):
nb = json.loads(s, **kwargs)
return self.to_notebook(nb, **kwargs)
def to_notebook(self, d, **kwargs):
"""Convert from a raw JSON dict to a nested NotebookNode structure."""
return from_dict(d)
class JSONWriter(NotebookWriter):
def writes(self, nb, **kwargs):
kwargs["indent"] = 4
return json.dumps(nb, **kwargs)
_reader = JSONReader()
_writer = JSONWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,41 @@
"""Base classes and function for readers and writers.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
class NotebookReader:
def reads(self, s, **kwargs):
"""Read a notebook from a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def read(self, fp, **kwargs):
"""Read a notebook from a file like object"""
return self.reads(fp.read(), **kwargs)
class NotebookWriter:
def writes(self, nb, **kwargs):
"""Write a notebook to a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def write(self, nb, fp, **kwargs):
"""Write a notebook to a file like object"""
return fp.write(self.writes(nb, **kwargs))

View File

@@ -0,0 +1,91 @@
"""The main API for the v2 notebook format.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
import os
from .convert import downgrade, upgrade
from .nbbase import (
NotebookNode,
new_author,
new_code_cell,
new_metadata,
new_notebook,
new_output,
new_text_cell,
new_worksheet,
)
from .nbjson import reads as read_json
from .nbjson import reads as reads_json
from .nbjson import to_notebook as to_notebook_json
from .nbjson import writes as write_json
from .nbjson import writes as writes_json
from .nbpy import reads as read_py
from .nbpy import reads as reads_py
from .nbpy import to_notebook as to_notebook_py
from .nbpy import writes as write_py
from .nbpy import writes as writes_py
# Implementation removed, vulnerable to DoS attacks
from .nbxml import reads as read_xml
from .nbxml import reads as reads_xml
from .nbxml import to_notebook as to_notebook_xml
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
nbformat = 2
nbformat_minor = 0
def parse_filename(fname):
"""Parse a notebook filename.
This function takes a notebook filename and returns the notebook
format (json/py) and the notebook name. This logic can be
summarized as follows:
* notebook.ipynb -> (notebook.ipynb, notebook, json)
* notebook.json -> (notebook.json, notebook, json)
* notebook.py -> (notebook.py, notebook, py)
* notebook -> (notebook.ipynb, notebook, json)
Parameters
----------
fname : unicode
The notebook filename. The filename can use a specific filename
extention (.ipynb, .json, .py) or none, in which case .ipynb will
be assumed.
Returns
-------
(fname, name, format) : (unicode, unicode, unicode)
The filename, notebook name and format.
"""
basename, ext = os.path.splitext(fname)
if ext == ".ipynb":
format = "json"
elif ext == ".json":
format = "json"
elif ext == ".py":
format = "py"
else:
basename = fname
fname = fname + ".ipynb"
format = "json"
return fname, basename, format

View File

@@ -0,0 +1,62 @@
"""Code for converting notebooks to and from the v2 format.
Authors:
* Brian Granger
* Jonathan Frederic
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from .nbbase import new_code_cell, new_notebook, new_text_cell, new_worksheet
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
def upgrade(nb, from_version=1):
"""Convert a notebook to the v2 format.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
from_version : int
The version of the notebook to convert from.
"""
if from_version == 1:
newnb = new_notebook()
ws = new_worksheet()
for cell in nb.cells:
if cell.cell_type == "code":
newcell = new_code_cell(
input=cell.get("code"), prompt_number=cell.get("prompt_number")
)
elif cell.cell_type == "text":
newcell = new_text_cell("markdown", source=cell.get("text"))
ws.cells.append(newcell)
newnb.worksheets.append(ws)
return newnb
else:
raise ValueError("Cannot convert a notebook from v%s to v2" % from_version)
def downgrade(nb):
"""Convert a v2 notebook to v1.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
"""
raise Exception("Downgrade from notebook v2 to v1 is not supported.")

View File

@@ -0,0 +1,187 @@
"""The basic dict based notebook format.
The Python representation of a notebook is a nested structure of
dictionary subclasses that support attribute access.
The functions in this module are merely
helpers to build the structs in the right form.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from .._struct import Struct
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
class NotebookNode(Struct):
pass
def from_dict(d):
if isinstance(d, dict):
newd = NotebookNode()
for k, v in d.items():
newd[k] = from_dict(v)
return newd
elif isinstance(d, (tuple, list)):
return [from_dict(i) for i in d]
else:
return d
def new_output(
output_type=None,
output_text=None,
output_png=None,
output_html=None,
output_svg=None,
output_latex=None,
output_json=None,
output_javascript=None,
output_jpeg=None,
prompt_number=None,
etype=None,
evalue=None,
traceback=None,
):
"""Create a new code cell with input and output"""
output = NotebookNode()
if output_type is not None:
output.output_type = str(output_type)
if output_type != "pyerr":
if output_text is not None:
output.text = str(output_text)
if output_png is not None:
output.png = bytes(output_png)
if output_jpeg is not None:
output.jpeg = bytes(output_jpeg)
if output_html is not None:
output.html = str(output_html)
if output_svg is not None:
output.svg = str(output_svg)
if output_latex is not None:
output.latex = str(output_latex)
if output_json is not None:
output.json = str(output_json)
if output_javascript is not None:
output.javascript = str(output_javascript)
if output_type == "pyout":
if prompt_number is not None:
output.prompt_number = int(prompt_number)
if output_type == "pyerr":
if etype is not None:
output.etype = str(etype)
if evalue is not None:
output.evalue = str(evalue)
if traceback is not None:
output.traceback = [str(frame) for frame in list(traceback)]
return output
def new_code_cell(input=None, prompt_number=None, outputs=None, language="python", collapsed=False):
"""Create a new code cell with input and output"""
cell = NotebookNode()
cell.cell_type = "code"
if language is not None:
cell.language = str(language)
if input is not None:
cell.input = str(input)
if prompt_number is not None:
cell.prompt_number = int(prompt_number)
if outputs is None:
cell.outputs = []
else:
cell.outputs = outputs
if collapsed is not None:
cell.collapsed = bool(collapsed)
return cell
def new_text_cell(cell_type, source=None, rendered=None):
"""Create a new text cell."""
cell = NotebookNode()
if source is not None:
cell.source = str(source)
if rendered is not None:
cell.rendered = str(rendered)
cell.cell_type = cell_type
return cell
def new_worksheet(name=None, cells=None):
"""Create a worksheet by name with with a list of cells."""
ws = NotebookNode()
if name is not None:
ws.name = str(name)
if cells is None:
ws.cells = []
else:
ws.cells = list(cells)
return ws
def new_notebook(metadata=None, worksheets=None):
"""Create a notebook by name, id and a list of worksheets."""
nb = NotebookNode()
nb.nbformat = 2
if worksheets is None:
nb.worksheets = []
else:
nb.worksheets = list(worksheets)
if metadata is None:
nb.metadata = new_metadata()
else:
nb.metadata = NotebookNode(metadata)
return nb
def new_metadata(name=None, authors=None, license=None, created=None, modified=None, gistid=None):
"""Create a new metadata node."""
metadata = NotebookNode()
if name is not None:
metadata.name = str(name)
if authors is not None:
metadata.authors = list(authors)
if created is not None:
metadata.created = str(created)
if modified is not None:
metadata.modified = str(modified)
if license is not None:
metadata.license = str(license)
if gistid is not None:
metadata.gistid = str(gistid)
return metadata
def new_author(name=None, email=None, affiliation=None, url=None):
"""Create a new author."""
author = NotebookNode()
if name is not None:
author.name = str(name)
if email is not None:
author.email = str(email)
if affiliation is not None:
author.affiliation = str(affiliation)
if url is not None:
author.url = str(url)
return author

View File

@@ -0,0 +1,72 @@
"""Read and write notebooks in JSON format.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
import copy
import json
from .nbbase import from_dict
from .rwbase import (
NotebookReader,
NotebookWriter,
rejoin_lines,
restore_bytes,
split_lines,
)
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
class BytesEncoder(json.JSONEncoder):
"""A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
def default(self, obj):
if isinstance(obj, bytes):
return obj.decode("ascii")
return json.JSONEncoder.default(self, obj)
class JSONReader(NotebookReader):
def reads(self, s, **kwargs):
nb = json.loads(s, **kwargs)
nb = self.to_notebook(nb, **kwargs)
return nb
def to_notebook(self, d, **kwargs):
return restore_bytes(rejoin_lines(from_dict(d)))
class JSONWriter(NotebookWriter):
def writes(self, nb, **kwargs):
kwargs["cls"] = BytesEncoder
kwargs["indent"] = 1
kwargs["sort_keys"] = True
if kwargs.pop("split_lines", True):
nb = split_lines(copy.deepcopy(nb))
return json.dumps(nb, **kwargs)
_reader = JSONReader()
_writer = JSONWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,151 @@
"""Read and write notebooks as regular .py files.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
import re
from typing import List
from .nbbase import new_code_cell, new_notebook, new_text_cell, new_worksheet
from .rwbase import NotebookReader, NotebookWriter
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
_encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")
class PyReaderError(Exception):
pass
class PyReader(NotebookReader):
def reads(self, s, **kwargs):
return self.to_notebook(s, **kwargs)
def to_notebook(self, s, **kwargs):
lines = s.splitlines()
cells = []
cell_lines: List[str] = []
state = "codecell"
for line in lines:
if line.startswith("# <nbformat>") or _encoding_declaration_re.match(line):
pass
elif line.startswith("# <codecell>"):
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
state = "codecell"
cell_lines = []
elif line.startswith("# <htmlcell>"):
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
state = "htmlcell"
cell_lines = []
elif line.startswith("# <markdowncell>"):
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
state = "markdowncell"
cell_lines = []
else:
cell_lines.append(line)
if cell_lines and state == "codecell":
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
ws = new_worksheet(cells=cells)
nb = new_notebook(worksheets=[ws])
return nb
def new_cell(self, state, lines):
if state == "codecell":
input = "\n".join(lines)
input = input.strip("\n")
if input:
return new_code_cell(input=input)
elif state == "htmlcell":
text = self._remove_comments(lines)
if text:
return new_text_cell("html", source=text)
elif state == "markdowncell":
text = self._remove_comments(lines)
if text:
return new_text_cell("markdown", source=text)
def _remove_comments(self, lines):
new_lines = []
for line in lines:
if line.startswith("#"):
new_lines.append(line[2:])
else:
new_lines.append(line)
text = "\n".join(new_lines)
text = text.strip("\n")
return text
def split_lines_into_blocks(self, lines):
if len(lines) == 1:
yield lines[0]
raise StopIteration()
import ast
source = "\n".join(lines)
code = ast.parse(source)
starts = [x.lineno - 1 for x in code.body]
for i in range(len(starts) - 1):
yield "\n".join(lines[starts[i] : starts[i + 1]]).strip("\n")
yield "\n".join(lines[starts[-1] :]).strip("\n")
class PyWriter(NotebookWriter):
def writes(self, nb, **kwargs):
lines = ["# -*- coding: utf-8 -*-"]
lines.extend(["# <nbformat>2</nbformat>", ""])
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
input = cell.get("input")
if input is not None:
lines.extend(["# <codecell>", ""])
lines.extend(input.splitlines())
lines.append("")
elif cell.cell_type == "html":
input = cell.get("source")
if input is not None:
lines.extend(["# <htmlcell>", ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
elif cell.cell_type == "markdown":
input = cell.get("source")
if input is not None:
lines.extend(["# <markdowncell>", ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
lines.append("")
return str("\n".join(lines))
_reader = PyReader()
_writer = PyWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,30 @@
"""REMOVED: Read and write notebook files as XML.
"""
REMOVED_MSG = """\
Reading notebooks as XML has been removed to harden security and avoid
possible denial-of-service attacks.
The XML notebook format was deprecated before the Jupyter (previously IPython)
Notebook was ever released. We are not aware of anyone using it, so we have
removed it.
If you were using this code, and you need to continue using it, feel free to
fork an earlier version of the nbformat package and maintain it yourself.
The issue which prompted this removal is:
https://github.com/jupyter/nbformat/issues/132
"""
def reads(s, **kwargs):
raise Exception(REMOVED_MSG)
def read(fp, **kwargs):
raise Exception(REMOVED_MSG)
def to_notebook(root, **kwargs):
raise Exception(REMOVED_MSG)

View File

@@ -0,0 +1,164 @@
"""Base classes and utilities for readers and writers.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
from base64 import decodebytes, encodebytes
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
def restore_bytes(nb):
"""Restore bytes of image data from unicode-only formats.
Base64 encoding is handled elsewhere. Bytes objects in the notebook are
always b64-encoded. We DO NOT encode/decode around file formats.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
output.png = output.png.encode("ascii")
if "jpeg" in output:
output.jpeg = output.jpeg.encode("ascii")
return nb
# output keys that are likely to have multiline values
_multiline_outputs = ["text", "html", "svg", "latex", "javascript", "json"]
def rejoin_lines(nb):
"""rejoin multiline text into strings
For reversing effects of ``split_lines(nb)``.
This only rejoins lines that have been split, so if text objects were not split
they will pass through unchanged.
Used when reading JSON files that may have been passed through split_lines.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
if "input" in cell and isinstance(cell.input, list):
cell.input = "\n".join(cell.input)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, list):
output[key] = "\n".join(item)
else: # text cell
for key in ["source", "rendered"]:
item = cell.get(key, None)
if isinstance(item, list):
cell[key] = "\n".join(item)
return nb
def split_lines(nb):
"""split likely multiline text into lists of strings
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
reverse the effects of ``split_lines(nb)``.
Used when writing JSON files.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
if "input" in cell and isinstance(cell.input, str):
cell.input = cell.input.splitlines()
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, str):
output[key] = item.splitlines()
else: # text cell
for key in ["source", "rendered"]:
item = cell.get(key, None)
if isinstance(item, str):
cell[key] = item.splitlines()
return nb
# b64 encode/decode are never actually used, because all bytes objects in
# the notebook are already b64-encoded, and we don't need/want to double-encode
def base64_decode(nb):
"""Restore all bytes objects in the notebook from base64-encoded strings.
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
if isinstance(output.png, str):
output.png = output.png.encode("ascii")
output.png = decodebytes(output.png)
if "jpeg" in output:
if isinstance(output.jpeg, str):
output.jpeg = output.jpeg.encode("ascii")
output.jpeg = decodebytes(output.jpeg)
return nb
def base64_encode(nb):
"""Base64 encode all bytes objects in the notebook.
These will be b64-encoded unicode strings
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
output.png = encodebytes(output.png).decode("ascii")
if "jpeg" in output:
output.jpeg = encodebytes(output.jpeg).decode("ascii")
return nb
class NotebookReader:
"""A class for reading notebooks."""
def reads(self, s, **kwargs):
"""Read a notebook from a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def read(self, fp, **kwargs):
"""Read a notebook from a file like object"""
return self.read(fp.read(), **kwargs)
class NotebookWriter:
"""A class for writing notebooks."""
def writes(self, nb, **kwargs):
"""Write a notebook to a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def write(self, nb, fp, **kwargs):
"""Write a notebook to a file like object"""
return fp.write(self.writes(nb, **kwargs))

View File

@@ -0,0 +1,99 @@
"""The main API for the v3 notebook format.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
__all__ = [
"NotebookNode",
"new_code_cell",
"new_text_cell",
"new_notebook",
"new_output",
"new_worksheet",
"new_metadata",
"new_author",
"new_heading_cell",
"nbformat",
"nbformat_minor",
"nbformat_schema",
"reads_json",
"writes_json",
"read_json",
"write_json",
"to_notebook_json",
"reads_py",
"writes_py",
"read_py",
"write_py",
"to_notebook_py",
"downgrade",
"upgrade",
"parse_filename",
]
import os
from .convert import downgrade, upgrade
from .nbbase import (
NotebookNode,
nbformat,
nbformat_minor,
nbformat_schema,
new_author,
new_code_cell,
new_heading_cell,
new_metadata,
new_notebook,
new_output,
new_text_cell,
new_worksheet,
)
from .nbjson import reads as read_json
from .nbjson import reads as reads_json
from .nbjson import to_notebook as to_notebook_json
from .nbjson import writes as write_json
from .nbjson import writes as writes_json
from .nbpy import reads as read_py
from .nbpy import reads as reads_py
from .nbpy import to_notebook as to_notebook_py
from .nbpy import writes as write_py
from .nbpy import writes as writes_py
def parse_filename(fname):
"""Parse a notebook filename.
This function takes a notebook filename and returns the notebook
format (json/py) and the notebook name. This logic can be
summarized as follows:
* notebook.ipynb -> (notebook.ipynb, notebook, json)
* notebook.json -> (notebook.json, notebook, json)
* notebook.py -> (notebook.py, notebook, py)
* notebook -> (notebook.ipynb, notebook, json)
Parameters
----------
fname : unicode
The notebook filename. The filename can use a specific filename
extention (.ipynb, .json, .py) or none, in which case .ipynb will
be assumed.
Returns
-------
(fname, name, format) : (unicode, unicode, unicode)
The filename, notebook name and format.
"""
basename, ext = os.path.splitext(fname)
if ext == ".ipynb":
format = "json"
elif ext == ".json":
format = "json"
elif ext == ".py":
format = "py"
else:
basename = fname
fname = fname + ".ipynb"
format = "json"
return fname, basename, format

View File

@@ -0,0 +1,90 @@
"""Code for converting notebooks to and from the v2 format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from .nbbase import nbformat, nbformat_minor
def _unbytes(obj):
"""There should be no bytes objects in a notebook
v2 stores png/jpeg as b64 ascii bytes
"""
if isinstance(obj, dict):
for k, v in obj.items():
obj[k] = _unbytes(v)
elif isinstance(obj, list):
for i, v in enumerate(obj):
obj[i] = _unbytes(v)
elif isinstance(obj, bytes):
# only valid bytes are b64-encoded ascii
obj = obj.decode("ascii")
return obj
def upgrade(nb, from_version=2, from_minor=0):
"""Convert a notebook to v3.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
from_version : int
The original version of the notebook to convert.
from_minor : int
The original minor version of the notebook to convert (only relevant for v >= 3).
"""
if from_version == 2:
# Mark the original nbformat so consumers know it has been converted.
nb.nbformat = nbformat
nb.nbformat_minor = nbformat_minor
nb.orig_nbformat = 2
nb = _unbytes(nb)
for ws in nb["worksheets"]:
for cell in ws["cells"]:
cell.setdefault("metadata", {})
return nb
elif from_version == 3:
if from_minor != nbformat_minor:
nb.orig_nbformat_minor = from_minor
nb.nbformat_minor = nbformat_minor
return nb
else:
raise ValueError(
"Cannot convert a notebook directly from v%s to v3. "
"Try using the nbformat.convert module." % from_version
)
def heading_to_md(cell):
"""turn heading cell into corresponding markdown"""
cell.cell_type = "markdown"
level = cell.pop("level", 1)
cell.source = "#" * level + " " + cell.source
def raw_to_md(cell):
"""let raw passthrough as markdown"""
cell.cell_type = "markdown"
def downgrade(nb):
"""Convert a v3 notebook to v2.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
"""
if nb.nbformat != 3:
return nb
nb.nbformat = 2
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "heading":
heading_to_md(cell)
elif cell.cell_type == "raw":
raw_to_md(cell)
return nb

View File

@@ -0,0 +1,237 @@
"""The basic dict based notebook format.
The Python representation of a notebook is a nested structure of
dictionary subclasses that support attribute access.
The functions in this module are merely
helpers to build the structs in the right form.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import warnings
from .._struct import Struct
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
# Change this when incrementing the nbformat version
nbformat = 3
nbformat_minor = 0
nbformat_schema = {(3, 0): "nbformat.v3.schema.json"}
class NotebookNode(Struct):
pass
def from_dict(d):
if isinstance(d, dict):
newd = NotebookNode()
for k, v in d.items():
newd[k] = from_dict(v)
return newd
elif isinstance(d, (tuple, list)):
return [from_dict(i) for i in d]
else:
return d
def str_passthrough(obj):
"""
Used to be cast_unicode, add this temporarily to make sure no further breakage.
"""
assert isinstance(obj, str)
return obj
def cast_str(obj):
if isinstance(obj, bytes):
# really this should never happend, it should
# have been base64 encoded before.
warnings.warn(
"A notebook got bytes instead of likely base64 encoded values."
"The content will likely be corrupted.",
UserWarning,
stacklevel=3,
)
return obj.decode("ascii", "replace")
else:
assert isinstance(obj, str)
return obj
def new_output(
output_type,
output_text=None,
output_png=None,
output_html=None,
output_svg=None,
output_latex=None,
output_json=None,
output_javascript=None,
output_jpeg=None,
prompt_number=None,
ename=None,
evalue=None,
traceback=None,
stream=None,
metadata=None,
):
"""Create a new output, to go in the ``cell.outputs`` list of a code cell."""
output = NotebookNode()
output.output_type = str(output_type)
if metadata is None:
metadata = {}
if not isinstance(metadata, dict):
raise TypeError("metadata must be dict")
if output_type in {"pyout", "display_data"}:
output.metadata = metadata
if output_type != "pyerr":
if output_text is not None:
output.text = str_passthrough(output_text)
if output_png is not None:
output.png = cast_str(output_png)
if output_jpeg is not None:
output.jpeg = cast_str(output_jpeg)
if output_html is not None:
output.html = str_passthrough(output_html)
if output_svg is not None:
output.svg = str_passthrough(output_svg)
if output_latex is not None:
output.latex = str_passthrough(output_latex)
if output_json is not None:
output.json = str_passthrough(output_json)
if output_javascript is not None:
output.javascript = str_passthrough(output_javascript)
if output_type == "pyout":
if prompt_number is not None:
output.prompt_number = int(prompt_number)
if output_type == "pyerr":
if ename is not None:
output.ename = str_passthrough(ename)
if evalue is not None:
output.evalue = str_passthrough(evalue)
if traceback is not None:
output.traceback = [str_passthrough(frame) for frame in list(traceback)]
if output_type == "stream":
output.stream = "stdout" if stream is None else str_passthrough(stream)
return output
def new_code_cell(
input=None, prompt_number=None, outputs=None, language="python", collapsed=False, metadata=None
):
"""Create a new code cell with input and output"""
cell = NotebookNode()
cell.cell_type = "code"
if language is not None:
cell.language = str_passthrough(language)
if input is not None:
cell.input = str_passthrough(input)
if prompt_number is not None:
cell.prompt_number = int(prompt_number)
if outputs is None:
cell.outputs = []
else:
cell.outputs = outputs
if collapsed is not None:
cell.collapsed = bool(collapsed)
cell.metadata = NotebookNode(metadata or {})
return cell
def new_text_cell(cell_type, source=None, rendered=None, metadata=None):
"""Create a new text cell."""
cell = NotebookNode()
# VERSIONHACK: plaintext -> raw
# handle never-released plaintext name for raw cells
if cell_type == "plaintext":
cell_type = "raw"
if source is not None:
cell.source = str_passthrough(source)
cell.metadata = NotebookNode(metadata or {})
cell.cell_type = cell_type
return cell
def new_heading_cell(source=None, level=1, rendered=None, metadata=None):
"""Create a new section cell with a given integer level."""
cell = NotebookNode()
cell.cell_type = "heading"
if source is not None:
cell.source = str_passthrough(source)
cell.level = int(level)
cell.metadata = NotebookNode(metadata or {})
return cell
def new_worksheet(name=None, cells=None, metadata=None):
"""Create a worksheet by name with with a list of cells."""
ws = NotebookNode()
if cells is None:
ws.cells = []
else:
ws.cells = list(cells)
ws.metadata = NotebookNode(metadata or {})
return ws
def new_notebook(name=None, metadata=None, worksheets=None):
"""Create a notebook by name, id and a list of worksheets."""
nb = NotebookNode()
nb.nbformat = nbformat
nb.nbformat_minor = nbformat_minor
if worksheets is None:
nb.worksheets = []
else:
nb.worksheets = list(worksheets)
if metadata is None:
nb.metadata = new_metadata()
else:
nb.metadata = NotebookNode(metadata)
if name is not None:
nb.metadata.name = str_passthrough(name)
return nb
def new_metadata(name=None, authors=None, license=None, created=None, modified=None, gistid=None):
"""Create a new metadata node."""
metadata = NotebookNode()
if name is not None:
metadata.name = str_passthrough(name)
if authors is not None:
metadata.authors = list(authors)
if created is not None:
metadata.created = str_passthrough(created)
if modified is not None:
metadata.modified = str_passthrough(modified)
if license is not None:
metadata.license = str_passthrough(license)
if gistid is not None:
metadata.gistid = str_passthrough(gistid)
return metadata
def new_author(name=None, email=None, affiliation=None, url=None):
"""Create a new author."""
author = NotebookNode()
if name is not None:
author.name = str_passthrough(name)
if email is not None:
author.email = str_passthrough(email)
if affiliation is not None:
author.affiliation = str_passthrough(affiliation)
if url is not None:
author.url = str_passthrough(url)
return author

View File

@@ -0,0 +1,367 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "IPython Notebook v3.0 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "worksheets"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernel_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "language"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"language": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"type": "string"
}
}
},
"signature": {
"description": "Hash of the notebook.",
"type": "string"
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 0
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 3,
"maximum": 3
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions.",
"type": "integer",
"minimum": 1
},
"orig_nbformat_minor": {
"description": "Original notebook format (minor number) before converting the notebook between versions.",
"type": "integer",
"minimum": 0
},
"worksheets": {
"description": "Array of worksheets",
"type": "array",
"items": { "$ref": "#/definitions/worksheet" }
}
},
"definitions": {
"worksheet": {
"additionalProperties": false,
"required": ["cells"],
"properties": {
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/heading_cell" },
{ "$ref": "#/definitions/code_cell" }
]
}
},
"metadata": {
"type": "object",
"description": "metadata of the current worksheet"
}
}
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown", "html"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
},
"source": { "$ref": "#/definitions/misc/source" }
}
},
"heading_cell": {
"description": "Notebook heading cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source", "level"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["heading"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true
},
"source": { "$ref": "#/definitions/misc/source" },
"level": {
"description": "Level of heading cells.",
"type": "integer",
"minimum": 1
}
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "input", "outputs", "language"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"language": {
"description": "The cell's language (always Python)",
"type": "string"
},
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true
},
"input": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"prompt_number": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/pyout" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/pyerr" }
]
},
"pyout": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "prompt_number"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["pyout"]
},
"prompt_number": {
"description": "A result's prompt number.",
"type": ["integer"],
"minimum": 0
},
"text": { "$ref": "#/definitions/misc/multiline_string" },
"latex": { "$ref": "#/definitions/misc/multiline_string" },
"png": { "$ref": "#/definitions/misc/multiline_string" },
"jpeg": { "$ref": "#/definitions/misc/multiline_string" },
"svg": { "$ref": "#/definitions/misc/multiline_string" },
"html": { "$ref": "#/definitions/misc/multiline_string" },
"javascript": { "$ref": "#/definitions/misc/multiline_string" },
"json": { "$ref": "#/definitions/misc/multiline_string" },
"pdf": { "$ref": "#/definitions/misc/multiline_string" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
},
"patternProperties": {
"^[a-zA-Z0-9]+/[a-zA-Z0-9\\-\\+\\.]+$": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"text": { "$ref": "#/definitions/misc/multiline_string" },
"latex": { "$ref": "#/definitions/misc/multiline_string" },
"png": { "$ref": "#/definitions/misc/multiline_string" },
"jpeg": { "$ref": "#/definitions/misc/multiline_string" },
"svg": { "$ref": "#/definitions/misc/multiline_string" },
"html": { "$ref": "#/definitions/misc/multiline_string" },
"javascript": { "$ref": "#/definitions/misc/multiline_string" },
"json": { "$ref": "#/definitions/misc/multiline_string" },
"pdf": { "$ref": "#/definitions/misc/multiline_string" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
},
"patternProperties": {
"[a-zA-Z0-9]+/[a-zA-Z0-9\\-\\+\\.]+$": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "stream", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"stream": {
"description": "The stream type/destination.",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"pyerr": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["pyerr"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"prompt_number": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimetype": {
"patternProperties": {
"^[a-zA-Z0-9\\-\\+]+/[a-zA-Z0-9\\-\\+]+": {
"description": "The cell's mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,59 @@
"""Read and write notebooks in JSON format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import copy
import json
from .nbbase import from_dict
from .rwbase import (
NotebookReader,
NotebookWriter,
rejoin_lines,
split_lines,
strip_transient,
)
class BytesEncoder(json.JSONEncoder):
"""A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
def default(self, obj):
if isinstance(obj, bytes):
return obj.decode("ascii")
return json.JSONEncoder.default(self, obj)
class JSONReader(NotebookReader):
def reads(self, s, **kwargs):
nb = json.loads(s, **kwargs)
nb = self.to_notebook(nb, **kwargs)
nb = strip_transient(nb)
return nb
def to_notebook(self, d, **kwargs):
return rejoin_lines(from_dict(d))
class JSONWriter(NotebookWriter):
def writes(self, nb, **kwargs):
kwargs["cls"] = BytesEncoder
kwargs["indent"] = 1
kwargs["sort_keys"] = True
kwargs["separators"] = (",", ": ")
nb = copy.deepcopy(nb)
nb = strip_transient(nb)
if kwargs.pop("split_lines", True):
nb = split_lines(nb)
return json.dumps(nb, **kwargs)
_reader = JSONReader()
_writer = JSONWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,212 @@
"""Read and write notebooks as regular .py files.
Authors:
* Brian Granger
"""
# -----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
import re
from typing import List
from .nbbase import (
nbformat,
nbformat_minor,
new_code_cell,
new_heading_cell,
new_notebook,
new_text_cell,
new_worksheet,
)
from .rwbase import NotebookReader, NotebookWriter
# -----------------------------------------------------------------------------
# Code
# -----------------------------------------------------------------------------
_encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")
class PyReaderError(Exception):
pass
class PyReader(NotebookReader):
def reads(self, s, **kwargs):
return self.to_notebook(s, **kwargs)
def to_notebook(self, s, **kwargs):
lines = s.splitlines()
cells = []
cell_lines: List[str] = []
kwargs = {}
state = "codecell"
for line in lines:
if line.startswith("# <nbformat>") or _encoding_declaration_re.match(line):
pass
elif line.startswith("# <codecell>"):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = "codecell"
cell_lines = []
kwargs = {}
elif line.startswith("# <htmlcell>"):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = "htmlcell"
cell_lines = []
kwargs = {}
elif line.startswith("# <markdowncell>"):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = "markdowncell"
cell_lines = []
kwargs = {}
# VERSIONHACK: plaintext -> raw
elif line.startswith("# <rawcell>") or line.startswith("# <plaintextcell>"):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = "rawcell"
cell_lines = []
kwargs = {}
elif line.startswith("# <headingcell"):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
cell_lines = []
m = re.match(r"# <headingcell level=(?P<level>\d)>", line)
if m is not None:
state = "headingcell"
kwargs = {}
kwargs["level"] = int(m.group("level"))
else:
state = "codecell"
kwargs = {}
cell_lines = []
else:
cell_lines.append(line)
if cell_lines and state == "codecell":
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
ws = new_worksheet(cells=cells)
nb = new_notebook(worksheets=[ws])
return nb
def new_cell(self, state, lines, **kwargs):
if state == "codecell":
input = "\n".join(lines)
input = input.strip("\n")
if input:
return new_code_cell(input=input)
elif state == "htmlcell":
text = self._remove_comments(lines)
if text:
return new_text_cell("html", source=text)
elif state == "markdowncell":
text = self._remove_comments(lines)
if text:
return new_text_cell("markdown", source=text)
elif state == "rawcell":
text = self._remove_comments(lines)
if text:
return new_text_cell("raw", source=text)
elif state == "headingcell":
text = self._remove_comments(lines)
level = kwargs.get("level", 1)
if text:
return new_heading_cell(source=text, level=level)
def _remove_comments(self, lines):
new_lines = []
for line in lines:
if line.startswith("#"):
new_lines.append(line[2:])
else:
new_lines.append(line)
text = "\n".join(new_lines)
text = text.strip("\n")
return text
def split_lines_into_blocks(self, lines):
if len(lines) == 1:
yield lines[0]
raise StopIteration()
import ast
source = "\n".join(lines)
code = ast.parse(source)
starts = [x.lineno - 1 for x in code.body]
for i in range(len(starts) - 1):
yield "\n".join(lines[starts[i] : starts[i + 1]]).strip("\n")
yield "\n".join(lines[starts[-1] :]).strip("\n")
class PyWriter(NotebookWriter):
def writes(self, nb, **kwargs):
lines = ["# -*- coding: utf-8 -*-"]
lines.extend(
[
"# <nbformat>%i.%i</nbformat>" % (nbformat, nbformat_minor),
"",
]
)
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
input = cell.get("input")
if input is not None:
lines.extend(["# <codecell>", ""])
lines.extend(input.splitlines())
lines.append("")
elif cell.cell_type == "html":
input = cell.get("source")
if input is not None:
lines.extend(["# <htmlcell>", ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
elif cell.cell_type == "markdown":
input = cell.get("source")
if input is not None:
lines.extend(["# <markdowncell>", ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
elif cell.cell_type == "raw":
input = cell.get("source")
if input is not None:
lines.extend(["# <rawcell>", ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
elif cell.cell_type == "heading":
input = cell.get("source")
level = cell.get("level", 1)
if input is not None:
lines.extend(["# <headingcell level=%s>" % level, ""])
lines.extend(["# " + line for line in input.splitlines()])
lines.append("")
lines.append("")
return "\n".join(lines)
_reader = PyReader()
_writer = PyWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,183 @@
"""Base classes and utilities for readers and writers."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from base64 import decodebytes, encodebytes
def restore_bytes(nb):
"""Restore bytes of image data from unicode-only formats.
Base64 encoding is handled elsewhere. Bytes objects in the notebook are
always b64-encoded. We DO NOT encode/decode around file formats.
Note: this is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
output.png = output.png.encode("ascii", "replace")
if "jpeg" in output:
output.jpeg = output.jpeg.encode("ascii", "replace")
return nb
# output keys that are likely to have multiline values
_multiline_outputs = ["text", "html", "svg", "latex", "javascript", "json"]
# FIXME: workaround for old splitlines()
def _join_lines(lines):
"""join lines that have been written by splitlines()
Has logic to protect against `splitlines()`, which
should have been `splitlines(True)`
"""
if lines and lines[0].endswith(("\n", "\r")):
# created by splitlines(True)
return "".join(lines)
else:
# created by splitlines()
return "\n".join(lines)
def rejoin_lines(nb):
"""rejoin multiline text into strings
For reversing effects of ``split_lines(nb)``.
This only rejoins lines that have been split, so if text objects were not split
they will pass through unchanged.
Used when reading JSON files that may have been passed through split_lines.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
if "input" in cell and isinstance(cell.input, list):
cell.input = _join_lines(cell.input)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, list):
output[key] = _join_lines(item)
else: # text, heading cell
for key in ["source", "rendered"]:
item = cell.get(key, None)
if isinstance(item, list):
cell[key] = _join_lines(item)
return nb
def split_lines(nb):
"""split likely multiline text into lists of strings
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
reverse the effects of ``split_lines(nb)``.
Used when writing JSON files.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
if "input" in cell and isinstance(cell.input, str):
cell.input = cell.input.splitlines(True)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, str):
output[key] = item.splitlines(True)
else: # text, heading cell
for key in ["source", "rendered"]:
item = cell.get(key, None)
if isinstance(item, str):
cell[key] = item.splitlines(True)
return nb
# b64 encode/decode are never actually used, because all bytes objects in
# the notebook are already b64-encoded, and we don't need/want to double-encode
def base64_decode(nb):
"""Restore all bytes objects in the notebook from base64-encoded strings.
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
if isinstance(output.png, str):
output.png = output.png.encode("ascii")
output.png = decodebytes(output.png)
if "jpeg" in output:
if isinstance(output.jpeg, str):
output.jpeg = output.jpeg.encode("ascii")
output.jpeg = decodebytes(output.jpeg)
return nb
def base64_encode(nb):
"""Base64 encode all bytes objects in the notebook.
These will be b64-encoded unicode strings
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == "code":
for output in cell.outputs:
if "png" in output:
output.png = encodebytes(output.png).decode("ascii")
if "jpeg" in output:
output.jpeg = encodebytes(output.jpeg).decode("ascii")
return nb
def strip_transient(nb):
"""Strip transient values that shouldn't be stored in files.
This should be called in *both* read and write.
"""
nb.pop("orig_nbformat", None)
nb.pop("orig_nbformat_minor", None)
for ws in nb["worksheets"]:
for cell in ws["cells"]:
cell.get("metadata", {}).pop("trusted", None)
# strip cell.trusted even though it shouldn't be used,
# since it's where the transient value used to be stored.
cell.pop("trusted", None)
return nb
class NotebookReader:
"""A class for reading notebooks."""
def reads(self, s, **kwargs):
"""Read a notebook from a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def read(self, fp, **kwargs):
"""Read a notebook from a file like object"""
nbs = fp.read()
return self.reads(nbs, **kwargs)
class NotebookWriter:
"""A class for writing notebooks."""
def writes(self, nb, **kwargs):
"""Write a notebook to a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def write(self, nb, fp, **kwargs):
"""Write a notebook to a file like object"""
nbs = self.writes(nb, **kwargs)
return fp.write(nbs)

View File

@@ -0,0 +1,39 @@
"""The main API for the v4 notebook format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
__all__ = [
"nbformat",
"nbformat_minor",
"nbformat_schema",
"new_code_cell",
"new_markdown_cell",
"new_notebook",
"new_output",
"output_from_msg",
"reads",
"writes",
"to_notebook",
"downgrade",
"upgrade",
]
from .nbbase import (
nbformat,
nbformat_minor,
nbformat_schema,
new_code_cell,
new_markdown_cell,
new_notebook,
new_output,
new_raw_cell,
output_from_msg,
)
from .nbjson import reads, to_notebook, writes
reads_json = reads
writes_json = writes
to_notebook_json = to_notebook
from .convert import downgrade, upgrade

View File

@@ -0,0 +1,293 @@
"""Code for converting notebooks to and from v3."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import json
import re
from traitlets.log import get_logger
from nbformat import v3
from .. import validator
from .nbbase import NotebookNode, nbformat, nbformat_minor, random_cell_id
def _warn_if_invalid(nb, version):
"""Log validation errors, if there are any."""
from nbformat import ValidationError, validate
try:
validate(nb, version=version)
except ValidationError as e:
get_logger().error("Notebook JSON is not valid v%i: %s", version, e)
def upgrade(nb, from_version=None, from_minor=None):
"""Convert a notebook to latest v4.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
from_version : int
The original version of the notebook to convert.
from_minor : int
The original minor version of the notebook to convert (only relevant for v >= 3).
"""
if not from_version:
from_version = nb["nbformat"]
if not from_minor:
if "nbformat_minor" not in nb:
raise validator.ValidationError(
"The notebook does not include the nbformat minor which is needed"
)
from_minor = nb["nbformat_minor"]
if from_version == 3:
# Validate the notebook before conversion
_warn_if_invalid(nb, from_version)
# Mark the original nbformat so consumers know it has been converted
orig_nbformat = nb.pop("orig_nbformat", None)
orig_nbformat_minor = nb.pop("orig_nbformat_minor", None)
nb.metadata.orig_nbformat = orig_nbformat or 3
nb.metadata.orig_nbformat_minor = orig_nbformat_minor or 0
# Mark the new format
nb.nbformat = nbformat
nb.nbformat_minor = nbformat_minor
# remove worksheet(s)
nb["cells"] = cells = []
# In the unlikely event of multiple worksheets,
# they will be flattened
for ws in nb.pop("worksheets", []):
# upgrade each cell
for cell in ws["cells"]:
cells.append(upgrade_cell(cell))
# upgrade metadata
nb.metadata.pop("name", "")
nb.metadata.pop("signature", "")
# Validate the converted notebook before returning it
_warn_if_invalid(nb, nbformat)
return nb
elif from_version == 4:
if from_minor == nbformat_minor:
return nb
# other versions migration code e.g.
# if from_minor < 3:
# if from_minor < 4:
if from_minor < 5:
for cell in nb.cells:
cell.id = random_cell_id()
nb.metadata.orig_nbformat_minor = from_minor
nb.nbformat_minor = nbformat_minor
return nb
else:
raise ValueError(
"Cannot convert a notebook directly from v%s to v4. "
"Try using the nbformat.convert module." % from_version
)
def upgrade_cell(cell):
"""upgrade a cell from v3 to v4
heading cell:
- -> markdown heading
code cell:
- remove language metadata
- cell.input -> cell.source
- cell.prompt_number -> cell.execution_count
- update outputs
"""
cell.setdefault("metadata", NotebookNode())
cell.id = random_cell_id()
if cell.cell_type == "code":
cell.pop("language", "")
if "collapsed" in cell:
cell.metadata["collapsed"] = cell.pop("collapsed")
cell.source = cell.pop("input", "")
cell.execution_count = cell.pop("prompt_number", None)
cell.outputs = upgrade_outputs(cell.outputs)
elif cell.cell_type == "heading":
cell.cell_type = "markdown"
level = cell.pop("level", 1)
cell.source = "{hashes} {single_line}".format(
hashes="#" * level,
single_line=" ".join(cell.get("source", "").splitlines()),
)
elif cell.cell_type == "html":
# Technically, this exists. It will never happen in practice.
cell.cell_type = "markdown"
return cell
def downgrade_cell(cell):
"""downgrade a cell from v4 to v3
code cell:
- set cell.language
- cell.input <- cell.source
- cell.prompt_number <- cell.execution_count
- update outputs
markdown cell:
- single-line heading -> heading cell
"""
if cell.cell_type == "code":
cell.language = "python"
cell.input = cell.pop("source", "")
cell.prompt_number = cell.pop("execution_count", None)
cell.collapsed = cell.metadata.pop("collapsed", False)
cell.outputs = downgrade_outputs(cell.outputs)
elif cell.cell_type == "markdown":
source = cell.get("source", "")
if "\n" not in source and source.startswith("#"):
match = re.match(r"(#+)\s*(.*)", source)
assert match is not None
prefix, text = match.groups()
cell.cell_type = "heading"
cell.source = text
cell.level = len(prefix)
cell.pop("id", None)
cell.pop("attachments", None)
return cell
_mime_map = {
"text": "text/plain",
"html": "text/html",
"svg": "image/svg+xml",
"png": "image/png",
"jpeg": "image/jpeg",
"latex": "text/latex",
"json": "application/json",
"javascript": "application/javascript",
}
def to_mime_key(d):
"""convert dict with v3 aliases to plain mime-type keys"""
for alias, mime in _mime_map.items():
if alias in d:
d[mime] = d.pop(alias)
return d
def from_mime_key(d):
"""convert dict with mime-type keys to v3 aliases"""
d2 = {}
for alias, mime in _mime_map.items():
if mime in d:
d2[alias] = d[mime]
return d2
def upgrade_output(output):
"""upgrade a single code cell output from v3 to v4
- pyout -> execute_result
- pyerr -> error
- output.type -> output.data.mime/type
- mime-type keys
- stream.stream -> stream.name
"""
if output["output_type"] in {"pyout", "display_data"}:
output.setdefault("metadata", NotebookNode())
if output["output_type"] == "pyout":
output["output_type"] = "execute_result"
output["execution_count"] = output.pop("prompt_number", None)
# move output data into data sub-dict
data = {}
for key in list(output):
if key in {"output_type", "execution_count", "metadata"}:
continue
data[key] = output.pop(key)
to_mime_key(data)
output["data"] = data
to_mime_key(output.metadata)
if "application/json" in data:
data["application/json"] = json.loads(data["application/json"])
# promote ascii bytes (from v2) to unicode
for key in ("image/png", "image/jpeg"):
if key in data and isinstance(data[key], bytes):
data[key] = data[key].decode("ascii")
elif output["output_type"] == "pyerr":
output["output_type"] = "error"
elif output["output_type"] == "stream":
output["name"] = output.pop("stream", "stdout")
return output
def downgrade_output(output):
"""downgrade a single code cell output to v3 from v4
- pyout <- execute_result
- pyerr <- error
- output.data.mime/type -> output.type
- un-mime-type keys
- stream.stream <- stream.name
"""
if output["output_type"] in {"execute_result", "display_data"}:
if output["output_type"] == "execute_result":
output["output_type"] = "pyout"
output["prompt_number"] = output.pop("execution_count", None)
# promote data dict to top-level output namespace
data = output.pop("data", {})
if "application/json" in data:
data["application/json"] = json.dumps(data["application/json"])
data = from_mime_key(data)
output.update(data)
from_mime_key(output.get("metadata", {}))
elif output["output_type"] == "error":
output["output_type"] = "pyerr"
elif output["output_type"] == "stream":
output["stream"] = output.pop("name")
return output
def upgrade_outputs(outputs):
"""upgrade outputs of a code cell from v3 to v4"""
return [upgrade_output(op) for op in outputs]
def downgrade_outputs(outputs):
"""downgrade outputs of a code cell to v3 from v4"""
return [downgrade_output(op) for op in outputs]
def downgrade(nb):
"""Convert a v4 notebook to v3.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
"""
if nb.nbformat != nbformat:
return nb
# Validate the notebook before conversion
_warn_if_invalid(nb, nbformat)
nb.nbformat = v3.nbformat
nb.nbformat_minor = v3.nbformat_minor
cells = [downgrade_cell(cell) for cell in nb.pop("cells")]
nb.worksheets = [v3.new_worksheet(cells=cells)]
nb.metadata.setdefault("name", "")
# Validate the converted notebook before returning it
_warn_if_invalid(nb, v3.nbformat)
nb.orig_nbformat = nb.metadata.pop("orig_nbformat", nbformat)
nb.orig_nbformat_minor = nb.metadata.pop("orig_nbformat_minor", nbformat_minor)
return nb

View File

@@ -0,0 +1,173 @@
"""Python API for composing notebook elements
The Python representation of a notebook is a nested structure of
dictionary subclasses that support attribute access.
The functions in this module are merely helpers to build the structs
in the right form.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from ..corpus.words import generate_corpus_id as random_cell_id
from ..notebooknode import NotebookNode
# Change the nbformat_minor and nbformat_schema variables when incrementing the
# nbformat version
# current major version
nbformat = 4
# current minor version
nbformat_minor = 5
# schema files for (major, minor) version tuples. (None, None) means the current version
nbformat_schema = {
(None, None): "nbformat.v4.schema.json",
(4, 0): "nbformat.v4.0.schema.json",
(4, 1): "nbformat.v4.1.schema.json",
(4, 2): "nbformat.v4.2.schema.json",
(4, 3): "nbformat.v4.3.schema.json",
(4, 4): "nbformat.v4.4.schema.json",
(4, 5): "nbformat.v4.5.schema.json",
}
def validate(node, ref=None):
"""validate a v4 node"""
from .. import validate as validate_orig
return validate_orig(node, ref=ref, version=nbformat)
def new_output(output_type, data=None, **kwargs):
"""Create a new output, to go in the ``cell.outputs`` list of a code cell."""
output = NotebookNode(output_type=output_type)
# populate defaults:
if output_type == "stream":
output.name = "stdout"
output.text = ""
elif output_type == "display_data":
output.metadata = NotebookNode()
output.data = NotebookNode()
elif output_type == "execute_result":
output.metadata = NotebookNode()
output.data = NotebookNode()
output.execution_count = None
elif output_type == "error":
output.ename = "NotImplementedError"
output.evalue = ""
output.traceback = []
# load from args:
output.update(kwargs)
if data is not None:
output.data = data
# validate
validate(output, output_type)
return output
def output_from_msg(msg):
"""Create a NotebookNode for an output from a kernel's IOPub message.
Returns
-------
NotebookNode: the output as a notebook node.
Raises
------
ValueError: if the message is not an output message.
"""
msg_type = msg["header"]["msg_type"]
content = msg["content"]
if msg_type == "execute_result":
return new_output(
output_type=msg_type,
metadata=content["metadata"],
data=content["data"],
execution_count=content["execution_count"],
)
elif msg_type == "stream":
return new_output(
output_type=msg_type,
name=content["name"],
text=content["text"],
)
elif msg_type == "display_data":
return new_output(
output_type=msg_type,
metadata=content["metadata"],
data=content["data"],
)
elif msg_type == "error":
return new_output(
output_type=msg_type,
ename=content["ename"],
evalue=content["evalue"],
traceback=content["traceback"],
)
else:
raise ValueError("Unrecognized output msg type: %r" % msg_type)
def new_code_cell(source="", **kwargs):
"""Create a new code cell"""
cell = NotebookNode(
id=random_cell_id(),
cell_type="code",
metadata=NotebookNode(),
execution_count=None,
source=source,
outputs=[],
)
cell.update(kwargs)
validate(cell, "code_cell")
return cell
def new_markdown_cell(source="", **kwargs):
"""Create a new markdown cell"""
cell = NotebookNode(
id=random_cell_id(),
cell_type="markdown",
source=source,
metadata=NotebookNode(),
)
cell.update(kwargs)
validate(cell, "markdown_cell")
return cell
def new_raw_cell(source="", **kwargs):
"""Create a new raw cell"""
cell = NotebookNode(
id=random_cell_id(),
cell_type="raw",
source=source,
metadata=NotebookNode(),
)
cell.update(kwargs)
validate(cell, "raw_cell")
return cell
def new_notebook(**kwargs):
"""Create a new notebook"""
nb = NotebookNode(
nbformat=nbformat,
nbformat_minor=nbformat_minor,
metadata=NotebookNode(),
cells=[],
)
nb.update(kwargs)
validate(nb)
return nb

View File

@@ -0,0 +1,383 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "IPython Notebook v4.0 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 0
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,383 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "IPython Notebook v4.1 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 1
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,400 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Jupyter Notebook v4.2 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
},
"title": {
"description": "The title of the notebook document",
"type": "string"
},
"authors": {
"description": "The author(s) of the notebook document",
"type": "array",
"item": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": true
}
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 2
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string. Must be unique across all the cells of a given notebook.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,431 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Jupyter Notebook v4.3 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
},
"title": {
"description": "The title of the notebook document",
"type": "string"
},
"authors": {
"description": "The author(s) of the notebook document",
"type": "array",
"item": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": true
}
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 3
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"jupyter": {
"description": "Official Jupyter Metadata for Raw Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" },
"jupyter": {
"description": "Official Jupyter Metadata for Markdown Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
}
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"jupyter": {
"description": "Official Jupyter Metadata for Code Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
},
"outputs_hidden": {
"description": "Whether the outputs are hidden.",
"type": "boolean"
}
},
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string. Must be unique across all the cells of a given notebook.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,459 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Jupyter Notebook v4.4 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
},
"title": {
"description": "The title of the notebook document",
"type": "string"
},
"authors": {
"description": "The author(s) of the notebook document",
"type": "array",
"item": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": true
}
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 4
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"jupyter": {
"description": "Official Jupyter Metadata for Raw Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "metadata", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" },
"jupyter": {
"description": "Official Jupyter Metadata for Markdown Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
}
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"jupyter": {
"description": "Official Jupyter Metadata for Code Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
},
"outputs_hidden": {
"description": "Whether the outputs are hidden.",
"type": "boolean"
}
},
"execution": {
"description": "Execution time for the code in the cell. This tracks time at which messages are received from iopub or shell channels",
"type": "object",
"properties": {
"iopub.execute_input": {
"description": "header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates the time at which the kernel broadcasts an execute_input message to connected frontends",
"type": "string"
},
"iopub.status.busy": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'busy'",
"type": "string"
},
"shell.execute_reply": {
"description": "header.date (in ISO 8601 format) of the shell channel's execute_reply message. It indicates the time at which the execute_reply message was created",
"type": "string"
},
"iopub.status.idle": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'idle'. It indicates the time at which kernel finished processing the associated request",
"type": "string"
}
},
"additionalProperties": true,
"patternProperties": {
"^.*$": {
"type": "string"
}
}
},
"collapsed": {
"description": "Whether the cell's output is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string. Cell names are expected to be unique across all the cells in a given notebook. This criterion cannot be checked by the json schema and must be established by an additional check.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,471 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Jupyter Notebook v4.5 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
},
"title": {
"description": "The title of the notebook document",
"type": "string"
},
"authors": {
"description": "The author(s) of the notebook document",
"type": "array",
"item": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": true
}
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 5
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell_id": {
"description": "A string field representing the identifier of this particular cell.",
"type": "string",
"pattern": "^[a-zA-Z0-9-_]+$",
"minLength": 1,
"maxLength": 64
},
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["id", "cell_type", "metadata", "source"],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"jupyter": {
"description": "Official Jupyter Metadata for Raw Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["id", "cell_type", "metadata", "source"],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" },
"jupyter": {
"description": "Official Jupyter Metadata for Markdown Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
}
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"id",
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"jupyter": {
"description": "Official Jupyter Metadata for Code Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
},
"outputs_hidden": {
"description": "Whether the outputs are hidden.",
"type": "boolean"
}
},
"execution": {
"description": "Execution time for the code in the cell. This tracks time at which messages are received from iopub or shell channels",
"type": "object",
"properties": {
"iopub.execute_input": {
"description": "header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates the time at which the kernel broadcasts an execute_input message to connected frontends",
"type": "string"
},
"iopub.status.busy": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'busy'",
"type": "string"
},
"shell.execute_reply": {
"description": "header.date (in ISO 8601 format) of the shell channel's execute_reply message. It indicates the time at which the execute_reply message was created",
"type": "string"
},
"iopub.status.idle": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'idle'. It indicates the time at which kernel finished processing the associated request",
"type": "string"
}
},
"additionalProperties": true,
"patternProperties": {
"^.*$": {
"type": "string"
}
}
},
"collapsed": {
"description": "Whether the cell's output is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string. Cell names are expected to be unique across all the cells in a given notebook. This criterion cannot be checked by the json schema and must be established by an additional check.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,471 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Jupyter Notebook v4.5 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "cells"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernelspec": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "display_name"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"display_name": {
"description": "Name to display in UI.",
"type": "string"
}
}
},
"language_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name"],
"properties": {
"name": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"oneOf": [{ "type": "string" }, { "type": "object" }]
},
"file_extension": {
"description": "The file extension for files in this language.",
"type": "string"
},
"mimetype": {
"description": "The mimetype corresponding to files in this language.",
"type": "string"
},
"pygments_lexer": {
"description": "The pygments lexer to use for code in this language.",
"type": "string"
}
}
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions. This should never be written to a file.",
"type": "integer",
"minimum": 1
},
"title": {
"description": "The title of the notebook document",
"type": "string"
},
"authors": {
"description": "The author(s) of the notebook document",
"type": "array",
"item": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": true
}
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 5
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 4,
"maximum": 4
},
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": { "$ref": "#/definitions/cell" }
}
},
"definitions": {
"cell_id": {
"description": "A string field representing the identifier of this particular cell.",
"type": "string",
"pattern": "^[a-zA-Z0-9-_]+$",
"minLength": 1,
"maxLength": 64
},
"cell": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/raw_cell" },
{ "$ref": "#/definitions/markdown_cell" },
{ "$ref": "#/definitions/code_cell" }
]
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["id", "cell_type", "metadata", "source"],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"jupyter": {
"description": "Official Jupyter Metadata for Raw Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["id", "cell_type", "metadata", "source"],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" },
"jupyter": {
"description": "Official Jupyter Metadata for Markdown Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
}
}
},
"additionalProperties": true
},
"attachments": { "$ref": "#/definitions/misc/attachments" },
"source": { "$ref": "#/definitions/misc/source" }
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": [
"id",
"cell_type",
"metadata",
"source",
"outputs",
"execution_count"
],
"properties": {
"id": { "$ref": "#/definitions/cell_id" },
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"jupyter": {
"description": "Official Jupyter Metadata for Code Cells",
"type": "object",
"additionalProperties": true,
"source_hidden": {
"description": "Whether the source is hidden.",
"type": "boolean"
},
"outputs_hidden": {
"description": "Whether the outputs are hidden.",
"type": "boolean"
}
},
"execution": {
"description": "Execution time for the code in the cell. This tracks time at which messages are received from iopub or shell channels",
"type": "object",
"properties": {
"iopub.execute_input": {
"description": "header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates the time at which the kernel broadcasts an execute_input message to connected frontends",
"type": "string"
},
"iopub.status.busy": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'busy'",
"type": "string"
},
"shell.execute_reply": {
"description": "header.date (in ISO 8601 format) of the shell channel's execute_reply message. It indicates the time at which the execute_reply message was created",
"type": "string"
},
"iopub.status.idle": {
"description": "header.date (in ISO 8601 format) of iopub channel's kernel status message when the status is 'idle'. It indicates the time at which kernel finished processing the associated request",
"type": "string"
}
},
"additionalProperties": true,
"patternProperties": {
"^.*$": {
"type": "string"
}
}
},
"collapsed": {
"description": "Whether the cell's output is collapsed/expanded.",
"type": "boolean"
},
"scrolled": {
"description": "Whether the cell's output is scrolled, unscrolled, or autoscrolled.",
"enum": [true, false, "auto"]
},
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
}
},
"source": { "$ref": "#/definitions/misc/source" },
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": { "$ref": "#/definitions/output" }
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"unrecognized_cell": {
"description": "Unrecognized cell from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["cell_type", "metadata"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"not": {
"enum": ["markdown", "code", "raw"]
}
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/misc/metadata_name" },
"tags": { "$ref": "#/definitions/misc/metadata_tags" }
},
"additionalProperties": true
}
}
},
"output": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/execute_result" },
{ "$ref": "#/definitions/display_data" },
{ "$ref": "#/definitions/stream" },
{ "$ref": "#/definitions/error" }
]
},
"execute_result": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata", "execution_count"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["execute_result"]
},
"execution_count": {
"description": "A result's prompt number.",
"type": ["integer", "null"],
"minimum": 0
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "data", "metadata"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"data": { "$ref": "#/definitions/misc/mimebundle" },
"metadata": { "$ref": "#/definitions/misc/output_metadata" }
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "name", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"name": {
"description": "The name of the stream (stdout, stderr).",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"error": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["error"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": { "type": "string" }
}
}
},
"unrecognized_output": {
"description": "Unrecognized output from a future minor-revision to the notebook format.",
"type": "object",
"additionalProperties": true,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"not": {
"enum": ["execute_result", "display_data", "stream", "error"]
}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string. Cell names are expected to be unique across all the cells in a given notebook. This criterion cannot be checked by the json schema and must be established by an additional check.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"attachments": {
"description": "Media attachments (e.g. inline images), stored as mimebundle keyed by filename.",
"type": "object",
"patternProperties": {
".*": {
"description": "The attachment's data stored as a mimebundle.",
"$ref": "#/definitions/misc/mimebundle"
}
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"execution_count": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimebundle": {
"description": "A mime-type keyed dictionary of data",
"type": "object",
"additionalProperties": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
},
"patternProperties": {
"^application/(.*\\+)?json$": {
"description": "Mimetypes with JSON output, can be any type"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf": [
{ "type": "string" },
{
"type": "array",
"items": { "type": "string" }
}
]
}
}
}
}

View File

@@ -0,0 +1,69 @@
"""Read and write notebooks in JSON format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import copy
import json
from ..notebooknode import from_dict
from .rwbase import (
NotebookReader,
NotebookWriter,
rejoin_lines,
split_lines,
strip_transient,
)
class BytesEncoder(json.JSONEncoder):
"""A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
def default(self, obj):
if isinstance(obj, bytes):
return obj.decode("ascii")
return json.JSONEncoder.default(self, obj)
class JSONReader(NotebookReader):
def reads(self, s, **kwargs):
"""Read a JSON string into a Notebook object"""
nb = json.loads(s, **kwargs)
nb = self.to_notebook(nb, **kwargs)
return nb
def to_notebook(self, d, **kwargs):
"""Convert a disk-format notebook dict to in-memory NotebookNode
handles multi-line values as strings, scrubbing of transient values, etc.
"""
nb = from_dict(d)
nb = rejoin_lines(nb)
nb = strip_transient(nb)
return nb
class JSONWriter(NotebookWriter):
def writes(self, nb, **kwargs):
"""Serialize a NotebookNode object as a JSON string"""
kwargs["cls"] = BytesEncoder
kwargs["indent"] = 1
kwargs["sort_keys"] = True
kwargs["separators"] = (",", ": ")
kwargs.setdefault("ensure_ascii", False)
# don't modify in-memory dict
nb = copy.deepcopy(nb)
if kwargs.pop("split_lines", True):
nb = split_lines(nb)
nb = strip_transient(nb)
return json.dumps(nb, **kwargs)
_reader = JSONReader()
_writer = JSONWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View File

@@ -0,0 +1,132 @@
"""Base classes and utilities for readers and writers."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
def _is_json_mime(mime):
"""Is a key a JSON mime-type that should be left alone?"""
return mime == "application/json" or (
mime.startswith("application/") and mime.endswith("+json")
)
def _rejoin_mimebundle(data):
"""Rejoin the multi-line string fields in a mimebundle (in-place)"""
for key, value in list(data.items()):
if (
not _is_json_mime(key)
and isinstance(value, list)
and all(isinstance(line, str) for line in value)
):
data[key] = "".join(value)
return data
def rejoin_lines(nb):
"""rejoin multiline text into strings
For reversing effects of ``split_lines(nb)``.
This only rejoins lines that have been split, so if text objects were not split
they will pass through unchanged.
Used when reading JSON files that may have been passed through split_lines.
"""
for cell in nb.cells:
if "source" in cell and isinstance(cell.source, list):
cell.source = "".join(cell.source)
attachments = cell.get("attachments", {})
for _, attachment in attachments.items():
_rejoin_mimebundle(attachment)
if cell.get("cell_type", None) == "code":
for output in cell.get("outputs", []):
output_type = output.get("output_type", "")
if output_type in {"execute_result", "display_data"}:
_rejoin_mimebundle(output.get("data", {}))
elif output_type:
if isinstance(output.get("text", ""), list):
output.text = "".join(output.text)
return nb
_non_text_split_mimes = {
"application/javascript",
"image/svg+xml",
}
def _split_mimebundle(data):
"""Split multi-line string fields in a mimebundle (in-place)"""
for key, value in list(data.items()):
if isinstance(value, str) and (key.startswith("text/") or key in _non_text_split_mimes):
data[key] = value.splitlines(True)
return data
def split_lines(nb):
"""split likely multiline text into lists of strings
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
reverse the effects of ``split_lines(nb)``.
Used when writing JSON files.
"""
for cell in nb.cells:
source = cell.get("source", None)
if isinstance(source, str):
cell["source"] = source.splitlines(True)
attachments = cell.get("attachments", {})
for _, attachment in attachments.items():
_split_mimebundle(attachment)
if cell.cell_type == "code":
for output in cell.outputs:
if output.output_type in {"execute_result", "display_data"}:
_split_mimebundle(output.get("data", {}))
elif output.output_type == "stream":
if isinstance(output.text, str):
output.text = output.text.splitlines(True)
return nb
def strip_transient(nb):
"""Strip transient values that shouldn't be stored in files.
This should be called in *both* read and write.
"""
nb.metadata.pop("orig_nbformat", None)
nb.metadata.pop("orig_nbformat_minor", None)
nb.metadata.pop("signature", None)
for cell in nb.cells:
cell.metadata.pop("trusted", None)
return nb
class NotebookReader:
"""A class for reading notebooks."""
def reads(self, s, **kwargs):
"""Read a notebook from a string."""
raise NotImplementedError("reads must be implemented in a subclass")
def read(self, fp, **kwargs):
"""Read a notebook from a file like object"""
nbs = fp.read()
return self.reads(nbs, **kwargs)
class NotebookWriter:
"""A class for writing notebooks."""
def writes(self, nb, **kwargs):
"""Write a notebook to a string."""
raise NotImplementedError("writes must be implemented in a subclass")
def write(self, nb, fp, **kwargs):
"""Write a notebook to a file like object"""
nbs = self.writes(nb, **kwargs)
return fp.write(nbs)

View File

@@ -0,0 +1,404 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import json
import os
import pprint
from traitlets.log import get_logger
from ._imports import import_item
from .corpus.words import generate_corpus_id
from .json_compat import ValidationError, _validator_for_name, get_current_validator
from .reader import get_version
validators = {}
def _relax_additional_properties(obj):
"""relax any `additionalProperties`"""
if isinstance(obj, dict):
for key, value in obj.items():
if key == "additionalProperties":
value = True
else:
value = _relax_additional_properties(value)
obj[key] = value
elif isinstance(obj, list):
for i, value in enumerate(obj):
obj[i] = _relax_additional_properties(value)
return obj
def _allow_undefined(schema):
schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"})
schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"})
return schema
def get_validator(version=None, version_minor=None, relax_add_props=False, name=None):
"""Load the JSON schema into a Validator"""
if version is None:
from . import current_nbformat
version = current_nbformat
v = import_item("nbformat.v%s" % version)
current_minor = getattr(v, "nbformat_minor", 0)
if version_minor is None:
version_minor = current_minor
if name:
current_validator = _validator_for_name(name)
else:
current_validator = get_current_validator()
version_tuple = (current_validator.name, version, version_minor)
if version_tuple not in validators:
try:
schema_json = _get_schema_json(v, version=version, version_minor=version_minor)
except AttributeError:
return None
if current_minor < version_minor:
# notebook from the future, relax all `additionalProperties: False` requirements
schema_json = _relax_additional_properties(schema_json)
# and allow undefined cell types and outputs
schema_json = _allow_undefined(schema_json)
validators[version_tuple] = current_validator(schema_json)
if relax_add_props:
try:
schema_json = _get_schema_json(v, version=version, version_minor=version_minor)
except AttributeError:
return None
# this allows properties to be added for intermediate
# representations while validating for all other kinds of errors
schema_json = _relax_additional_properties(schema_json)
validators[version_tuple] = current_validator(schema_json)
return validators[version_tuple]
def _get_schema_json(v, version=None, version_minor=None):
"""
Gets the json schema from a given imported library and nbformat version.
"""
if (version, version_minor) in v.nbformat_schema:
schema_path = os.path.join(
os.path.dirname(v.__file__), v.nbformat_schema[(version, version_minor)]
)
elif version_minor > v.nbformat_minor:
# load the latest schema
schema_path = os.path.join(os.path.dirname(v.__file__), v.nbformat_schema[(None, None)])
else:
raise AttributeError("Cannot find appropriate nbformat schema file.")
with open(schema_path) as f:
schema_json = json.load(f)
return schema_json
def isvalid(nbjson, ref=None, version=None, version_minor=None):
"""Checks whether the given notebook JSON conforms to the current
notebook format schema. Returns True if the JSON is valid, and
False otherwise.
To see the individual errors that were encountered, please use the
`validate` function instead.
"""
try:
validate(nbjson, ref, version, version_minor)
except ValidationError:
return False
else:
return True
def _format_as_index(indices):
"""
(from jsonschema._utils.format_as_index, copied to avoid relying on private API)
Construct a single string containing indexing operations for the indices.
For example, [1, 2, "foo"] -> [1][2]["foo"]
"""
if not indices:
return ""
return "[%s]" % "][".join(repr(index) for index in indices)
_ITEM_LIMIT = 16
_STR_LIMIT = 64
def _truncate_obj(obj):
"""Truncate objects for use in validation tracebacks
Cell and output lists are squashed, as are long strings, lists, and dicts.
"""
if isinstance(obj, dict):
truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]}
if isinstance(truncated_dict.get("cells"), list):
truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])]
if isinstance(truncated_dict.get("outputs"), list):
truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])]
if len(obj) > _ITEM_LIMIT:
truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT)
return truncated_dict
elif isinstance(obj, list):
truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]]
if len(obj) > _ITEM_LIMIT:
truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT))
return truncated_list
elif isinstance(obj, str):
truncated_str = obj[:_STR_LIMIT]
if len(obj) > _STR_LIMIT:
truncated_str += "..."
return truncated_str
else:
return obj
class NotebookValidationError(ValidationError):
"""Schema ValidationError with truncated representation
to avoid massive verbose tracebacks.
"""
def __init__(self, original, ref=None):
self.original = original
self.ref = getattr(self.original, "ref", ref)
self.message = self.original.message
def __getattr__(self, key):
return getattr(self.original, key)
def __unicode__(self):
"""Custom str for validation errors
avoids dumping full schema and notebook to logs
"""
error = self.original
instance = _truncate_obj(error.instance)
return "\n".join(
[
error.message,
"",
"Failed validating %r in %s%s:"
% (
error.validator,
self.ref or "notebook",
_format_as_index(list(error.relative_schema_path)[:-1]),
),
"",
"On instance%s:" % _format_as_index(error.relative_path),
pprint.pformat(instance, width=78),
]
)
__str__ = __unicode__
def better_validation_error(error, version, version_minor):
"""Get better ValidationError on oneOf failures
oneOf errors aren't informative.
if it's a cell type or output_type error,
try validating directly based on the type for a better error message
"""
key = error.schema_path[-1]
ref = None
if key.endswith("Of"):
if isinstance(error.instance, dict):
if "cell_type" in error.instance:
ref = error.instance["cell_type"] + "_cell"
elif "output_type" in error.instance:
ref = error.instance["output_type"]
if ref:
try:
validate(
error.instance,
ref,
version=version,
version_minor=version_minor,
)
except ValidationError as sub_error:
# keep extending relative path
error.relative_path.extend(sub_error.relative_path)
sub_error.relative_path = error.relative_path
better = better_validation_error(sub_error, version, version_minor)
if better.ref is None:
better.ref = ref
return better
except Exception:
# if it fails for some reason,
# let the original error through
pass
return NotebookValidationError(error, ref)
def validate(
nbdict=None,
ref=None,
version=None,
version_minor=None,
relax_add_props=False,
nbjson=None,
repair_duplicate_cell_ids=True,
strip_invalid_metadata=False,
):
"""Checks whether the given notebook dict-like object
conforms to the relevant notebook format schema.
Raises ValidationError if not valid.
"""
# backwards compatibility for nbjson argument
if nbdict is not None:
pass
elif nbjson is not None:
nbdict = nbjson
else:
raise TypeError("validate() missing 1 required argument: 'nbdict'")
if ref is None:
# if ref is not specified, we have a whole notebook, so we can get the version
nbdict_version, nbdict_version_minor = get_version(nbdict)
if version is None:
version = nbdict_version
if version_minor is None:
version_minor = nbdict_version_minor
else:
# if ref is specified, and we don't have a version number, assume we're validating against 1.0
if version is None:
version, version_minor = 1, 0
notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
if notebook_supports_cell_ids and repair_duplicate_cell_ids:
# Auto-generate cell ids for cells that are missing them.
for cell in nbdict["cells"]:
if "id" not in cell:
# Generate cell ids if any are missing
cell["id"] = generate_corpus_id()
for error in iter_validate(
nbdict,
ref=ref,
version=version,
version_minor=version_minor,
relax_add_props=relax_add_props,
strip_invalid_metadata=strip_invalid_metadata,
):
raise error
if notebook_supports_cell_ids:
# if we support cell ids check for uniqueness when validating the whole notebook
seen_ids = set()
for cell in nbdict["cells"]:
cell_id = cell["id"]
if cell_id in seen_ids:
if repair_duplicate_cell_ids:
# Best effort to repair if we find a duplicate id
cell["id"] = generate_corpus_id()
get_logger().warning(
"Non-unique cell id '{}' detected. Corrected to '{}'.".format(
cell_id, cell["id"]
)
)
else:
raise ValidationError(f"Non-unique cell id '{cell_id}' detected.")
seen_ids.add(cell_id)
def iter_validate(
nbdict=None,
ref=None,
version=None,
version_minor=None,
relax_add_props=False,
nbjson=None,
strip_invalid_metadata=False,
):
"""Checks whether the given notebook dict-like object conforms to the
relevant notebook format schema.
Returns a generator of all ValidationErrors if not valid.
"""
# backwards compatibility for nbjson argument
if nbdict is not None:
pass
elif nbjson is not None:
nbdict = nbjson
else:
raise TypeError("iter_validate() missing 1 required argument: 'nbdict'")
if version is None:
version, version_minor = get_version(nbdict)
validator = get_validator(version, version_minor, relax_add_props=relax_add_props)
if validator is None:
# no validator
yield ValidationError("No schema for validating v%s notebooks" % version)
return
if ref:
errors = validator.iter_errors(nbdict, {"$ref": "#/definitions/%s" % ref})
else:
errors = [e for e in validator.iter_errors(nbdict)]
if len(errors) > 0 and strip_invalid_metadata:
if validator.name == "fastjsonschema":
validator = get_validator(
version, version_minor, relax_add_props=relax_add_props, name="jsonschema"
)
errors = [e for e in validator.iter_errors(nbdict)]
error_tree = validator.error_tree(errors)
if "metadata" in error_tree:
for key in error_tree["metadata"]:
nbdict["metadata"].pop(key, None)
if "cells" in error_tree:
number_of_cells = len(nbdict.get("cells", 0))
for cell_idx in range(number_of_cells):
# Cells don't report individual metadata keys as having failed validation
# Instead it reports that it failed to validate against each cell-type definition.
# We have to delve into why those definitions failed to uncover which metadata
# keys are misbehaving.
if "oneOf" in error_tree["cells"][cell_idx].errors:
intended_cell_type = nbdict["cells"][cell_idx]["cell_type"]
schemas_by_index = [
ref["$ref"]
for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"]
]
cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell"
if cell_type_definition_name in schemas_by_index:
schema_index = schemas_by_index.index(cell_type_definition_name)
for error in error_tree["cells"][cell_idx].errors["oneOf"].context:
rel_path = error.relative_path
error_for_intended_schema = error.schema_path[0] == schema_index
is_top_level_metadata_key = (
len(rel_path) == 2 and rel_path[0] == "metadata"
)
if error_for_intended_schema and is_top_level_metadata_key:
nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None)
# Validate one more time to ensure that us removing metadata
# didn't cause another complex validation issue in the schema.
# Also to ensure that higher-level errors produced by individual metadata validation
# failures are removed.
errors = validator.iter_errors(nbdict)
for error in errors:
yield better_validation_error(error, version, version_minor)