"""This module defines TemplateExporter, a highly configurable converter that uses Jinja2 to export notebook files into different formats. """ # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. import json import os import uuid import warnings from pathlib import Path from jinja2 import ( BaseLoader, ChoiceLoader, DictLoader, Environment, FileSystemLoader, TemplateNotFound, ) from jupyter_core.paths import jupyter_path from traitlets import Bool, Dict, HasTraits, List, Unicode, default, observe, validate from traitlets.config import Config from traitlets.utils.importstring import import_item from nbconvert import filters from .exporter import Exporter # Jinja2 extensions to load. JINJA_EXTENSIONS = ["jinja2.ext.loopcontrols"] ROOT = os.path.dirname(__file__) DEV_MODE = os.path.exists(os.path.join(ROOT, "../../setup.py")) and os.path.exists( os.path.join(ROOT, "../../share") ) default_filters = { "indent": filters.indent, "markdown2html": filters.markdown2html, "markdown2asciidoc": filters.markdown2asciidoc, "ansi2html": filters.ansi2html, "filter_data_type": filters.DataTypeFilter, "get_lines": filters.get_lines, "highlight2html": filters.Highlight2HTML, "highlight2latex": filters.Highlight2Latex, "ipython2python": filters.ipython2python, "posix_path": filters.posix_path, "markdown2latex": filters.markdown2latex, "markdown2rst": filters.markdown2rst, "comment_lines": filters.comment_lines, "strip_ansi": filters.strip_ansi, "strip_dollars": filters.strip_dollars, "strip_files_prefix": filters.strip_files_prefix, "html2text": filters.html2text, "add_anchor": filters.add_anchor, "ansi2latex": filters.ansi2latex, "wrap_text": filters.wrap_text, "escape_latex": filters.escape_latex, "citation2latex": filters.citation2latex, "path2url": filters.path2url, "add_prompts": filters.add_prompts, "ascii_only": filters.ascii_only, "prevent_list_blocks": filters.prevent_list_blocks, "get_metadata": filters.get_metadata, "convert_pandoc": filters.convert_pandoc, "json_dumps": json.dumps, # browsers will parse , closing a script tag early # Since JSON allows escaping forward slash, this will still be parsed by JSON "escape_html_script": lambda x: x.replace("", "<\\/script>"), "strip_trailing_newline": filters.strip_trailing_newline, "text_base64": filters.text_base64, } # copy of https://github.com/jupyter/jupyter_server/blob/b62458a7f5ad6b5246d2f142258dedaa409de5d9/jupyter_server/config_manager.py#L19 def recursive_update(target, new): """Recursively update one dictionary using another. None values will delete their keys. """ for k, v in new.items(): if isinstance(v, dict): if k not in target: target[k] = {} recursive_update(target[k], v) if not target[k]: # Prune empty subdicts del target[k] elif v is None: target.pop(k, None) else: target[k] = v return target # return for convenience # define function at the top level to avoid pickle errors def deprecated(msg): warnings.warn(msg, DeprecationWarning) class ExtensionTolerantLoader(BaseLoader): """A template loader which optionally adds a given extension when searching. Constructor takes two arguments: *loader* is another Jinja loader instance to wrap. *extension* is the extension, which will be added to the template name if finding the template without it fails. This should include the dot, e.g. '.tpl'. """ def __init__(self, loader, extension): self.loader = loader self.extension = extension def get_source(self, environment, template): try: return self.loader.get_source(environment, template) except TemplateNotFound: if template.endswith(self.extension): raise TemplateNotFound(template) return self.loader.get_source(environment, template + self.extension) def list_templates(self): return self.loader.list_templates() class TemplateExporter(Exporter): """ Exports notebooks into other file formats. Uses Jinja 2 templating engine to output new formats. Inherit from this class if you are creating a new template type along with new filters/preprocessors. If the filters/ preprocessors provided by default suffice, there is no need to inherit from this class. Instead, override the template_file and file_extension traits via a config file. Filters available by default for templates: {filters} """ # finish the docstring __doc__ = __doc__.format(filters="- " + "\n - ".join(sorted(default_filters.keys()))) _template_cached = None def _invalidate_template_cache(self, change=None): self._template_cached = None @property def template(self): if self._template_cached is None: self._template_cached = self._load_template() return self._template_cached _environment_cached = None def _invalidate_environment_cache(self, change=None): self._environment_cached = None self._invalidate_template_cache() @property def environment(self): if self._environment_cached is None: self._environment_cached = self._create_environment() return self._environment_cached @property def default_config(self): c = Config( { "RegexRemovePreprocessor": {"enabled": True}, "TagRemovePreprocessor": {"enabled": True}, } ) c.merge(super().default_config) return c template_name = Unicode(help="Name of the template to use").tag( config=True, affects_template=True ) template_file = Unicode(None, allow_none=True, help="Name of the template file to use").tag( config=True, affects_template=True ) raw_template = Unicode("", help="raw template string").tag(affects_environment=True) enable_async = Bool(False, help="Enable Jinja async template execution").tag( affects_environment=True ) _last_template_file = "" _raw_template_key = "" @validate("template_name") def _template_name_validate(self, change): template_name = change["value"] if template_name and template_name.endswith(".tpl"): warnings.warn( f"5.x style template name passed '{self.template_name}'. Use --template-name for the template directory with a index..j2 file and/or --template-file to denote a different template.", DeprecationWarning, ) directory, self.template_file = os.path.split(self.template_name) if directory: directory, template_name = os.path.split(directory) if directory: if os.path.isabs(directory): self.extra_template_basedirs = [directory] return template_name @observe("template_file") def _template_file_changed(self, change): new = change["new"] if new == "default": self.template_file = self.default_template return # check if template_file is a file path # rather than a name already on template_path full_path = os.path.abspath(new) if os.path.isfile(full_path): directory, self.template_file = os.path.split(full_path) self.extra_template_paths = [directory] + self.extra_template_paths # While not strictly an invalid template file name, the extension hints that there isn't a template directory involved if self.template_file.endswith(".tpl"): warnings.warn( f"5.x style template file passed '{new}'. Use --template-name for the template directory with a index..j2 file and/or --template-file to denote a different template.", DeprecationWarning, ) @default("template_file") def _template_file_default(self): if self.template_extension: return "index" + self.template_extension @observe("raw_template") def _raw_template_changed(self, change): if not change["new"]: self.template_file = self._last_template_file self._invalidate_template_cache() template_paths = List(["."]).tag(config=True, affects_environment=True) extra_template_basedirs = List().tag(config=True, affects_environment=True) extra_template_paths = List([]).tag(config=True, affects_environment=True) @default("extra_template_basedirs") def _default_extra_template_basedirs(self): return [os.getcwd()] # Extension that the template files use. template_extension = Unicode().tag(config=True, affects_environment=True) template_data_paths = List( jupyter_path("nbconvert", "templates"), help="Path where templates can be installed too." ).tag(affects_environment=True) # Extension that the template files use. template_extension = Unicode().tag(config=True, affects_environment=True) @default("template_extension") def _template_extension_default(self): if self.file_extension: return self.file_extension + ".j2" else: return self.file_extension exclude_input = Bool( False, help="This allows you to exclude code cell inputs from all templates if set to True." ).tag(config=True) exclude_input_prompt = Bool( False, help="This allows you to exclude input prompts from all templates if set to True." ).tag(config=True) exclude_output = Bool( False, help="This allows you to exclude code cell outputs from all templates if set to True.", ).tag(config=True) exclude_output_prompt = Bool( False, help="This allows you to exclude output prompts from all templates if set to True." ).tag(config=True) exclude_output_stdin = Bool( True, help="This allows you to exclude output of stdin stream from lab template if set to True.", ).tag(config=True) exclude_code_cell = Bool( False, help="This allows you to exclude code cells from all templates if set to True." ).tag(config=True) exclude_markdown = Bool( False, help="This allows you to exclude markdown cells from all templates if set to True." ).tag(config=True) exclude_raw = Bool( False, help="This allows you to exclude raw cells from all templates if set to True." ).tag(config=True) exclude_unknown = Bool( False, help="This allows you to exclude unknown cells from all templates if set to True." ).tag(config=True) extra_loaders = List( help="Jinja loaders to find templates. Will be tried in order " "before the default FileSystem ones.", ).tag(affects_environment=True) filters = Dict( help="""Dictionary of filters, by name and namespace, to add to the Jinja environment.""" ).tag(config=True, affects_environment=True) raw_mimetypes = List( help="""formats of raw cells to be included in this Exporter's output.""" ).tag(config=True) @default("raw_mimetypes") def _raw_mimetypes_default(self): return [self.output_mimetype, ""] # TODO: passing config is wrong, but changing this revealed more complicated issues def __init__(self, config=None, **kw): """ Public constructor Parameters ---------- config : config User configuration instance. extra_loaders : list[of Jinja Loaders] ordered list of Jinja loader to find templates. Will be tried in order before the default FileSystem ones. template_file : str (optional, kw arg) Template to use when exporting. """ super().__init__(config=config, **kw) self.observe( self._invalidate_environment_cache, list(self.traits(affects_environment=True)) ) self.observe(self._invalidate_template_cache, list(self.traits(affects_template=True))) def _load_template(self): """Load the Jinja template object from the template file This is triggered by various trait changes that would change the template. """ # this gives precedence to a raw_template if present with self.hold_trait_notifications(): if self.template_file != self._raw_template_key: self._last_template_file = self.template_file if self.raw_template: self.template_file = self._raw_template_key if not self.template_file: raise ValueError("No template_file specified!") # First try to load the # template by name with extension added, then try loading the template # as if the name is explicitly specified. template_file = self.template_file self.log.debug("Attempting to load template %s", template_file) self.log.debug(" template_paths: %s", os.pathsep.join(self.template_paths)) return self.environment.get_template(template_file) def from_notebook_node(self, nb, resources=None, **kw): """ Convert a notebook from a notebook node instance. Parameters ---------- nb : :class:`~nbformat.NotebookNode` Notebook node resources : dict Additional resources that can be accessed read/write by preprocessors and filters. """ nb_copy, resources = super().from_notebook_node(nb, resources, **kw) resources.setdefault("raw_mimetypes", self.raw_mimetypes) resources["global_content_filter"] = { "include_code": not self.exclude_code_cell, "include_markdown": not self.exclude_markdown, "include_raw": not self.exclude_raw, "include_unknown": not self.exclude_unknown, "include_input": not self.exclude_input, "include_output": not self.exclude_output, "include_output_stdin": not self.exclude_output_stdin, "include_input_prompt": not self.exclude_input_prompt, "include_output_prompt": not self.exclude_output_prompt, "no_prompt": self.exclude_input_prompt and self.exclude_output_prompt, } # Top level variables are passed to the template_exporter here. output = self.template.render(nb=nb_copy, resources=resources) output = output.lstrip("\r\n") return output, resources def _register_filter(self, environ, name, jinja_filter): """ Register a filter. A filter is a function that accepts and acts on one string. The filters are accessible within the Jinja templating engine. Parameters ---------- name : str name to give the filter in the Jinja engine filter : filter """ if jinja_filter is None: raise TypeError("filter") isclass = isinstance(jinja_filter, type) constructed = not isclass # Handle filter's registration based on it's type if constructed and isinstance(jinja_filter, (str,)): # filter is a string, import the namespace and recursively call # this register_filter method filter_cls = import_item(jinja_filter) return self._register_filter(environ, name, filter_cls) if constructed and hasattr(jinja_filter, "__call__"): # noqa # filter is a function, no need to construct it. environ.filters[name] = jinja_filter return jinja_filter elif isclass and issubclass(jinja_filter, HasTraits): # filter is configurable. Make sure to pass in new default for # the enabled flag if one was specified. filter_instance = jinja_filter(parent=self) self._register_filter(environ, name, filter_instance) elif isclass: # filter is not configurable, construct it filter_instance = jinja_filter() self._register_filter(environ, name, filter_instance) else: # filter is an instance of something without a __call__ # attribute. raise TypeError("filter") def register_filter(self, name, jinja_filter): """ Register a filter. A filter is a function that accepts and acts on one string. The filters are accessible within the Jinja templating engine. Parameters ---------- name : str name to give the filter in the Jinja engine filter : filter """ return self._register_filter(self.environment, name, jinja_filter) def default_filters(self): """Override in subclasses to provide extra filters. This should return an iterable of 2-tuples: (name, class-or-function). You should call the method on the parent class and include the filters it provides. If a name is repeated, the last filter provided wins. Filters from user-supplied config win over filters provided by classes. """ return default_filters.items() def _create_environment(self): """ Create the Jinja templating environment. """ paths = self.template_paths self.log.debug("Template paths:\n\t%s", "\n\t".join(paths)) loaders = self.extra_loaders + [ ExtensionTolerantLoader(FileSystemLoader(paths), self.template_extension), DictLoader({self._raw_template_key: self.raw_template}), ] environment = Environment( loader=ChoiceLoader(loaders), extensions=JINJA_EXTENSIONS, enable_async=self.enable_async, ) environment.globals["uuid4"] = uuid.uuid4 # Add default filters to the Jinja2 environment for key, value in self.default_filters(): self._register_filter(environment, key, value) # Load user filters. Overwrite existing filters if need be. if self.filters: for key, user_filter in self.filters.items(): self._register_filter(environment, key, user_filter) return environment def _init_preprocessors(self): super()._init_preprocessors() conf = self._get_conf() preprocessors = conf.get("preprocessors", {}) # preprocessors is a dict for three reasons # * We rely on recursive_update, which can only merge dicts, lists will be overwritten # * We can use the key with numerical prefixing to guarantee ordering (/etc/*.d/XY-file style) # * We can disable preprocessors by overwriting the value with None for _, preprocessor in sorted(preprocessors.items(), key=lambda x: x[0]): if preprocessor is not None: kwargs = preprocessor.copy() preprocessor_cls = kwargs.pop("type") preprocessor_cls = import_item(preprocessor_cls) if preprocessor_cls.__name__ in self.config: kwargs.update(self.config[preprocessor_cls.__name__]) preprocessor = preprocessor_cls(**kwargs) self.register_preprocessor(preprocessor) def _get_conf(self): conf = {} # the configuration once all conf files are merged for path in map(Path, self.template_paths): conf_path = path / "conf.json" if conf_path.exists(): with conf_path.open() as f: conf = recursive_update(conf, json.load(f)) return conf @default("template_paths") def _template_paths(self, prune=True, root_dirs=None): paths = [] root_dirs = self.get_prefix_root_dirs() template_names = self.get_template_names() for template_name in template_names: for base_dir in self.extra_template_basedirs: path = os.path.join(base_dir, template_name) if not prune or os.path.exists(path): paths.append(path) for root_dir in root_dirs: base_dir = os.path.join(root_dir, "nbconvert", "templates") path = os.path.join(base_dir, template_name) if not prune or os.path.exists(path): paths.append(path) for root_dir in root_dirs: # we include root_dir for when we want to be very explicit, e.g. # {% extends 'nbconvert/templates/classic/base.html' %} paths.append(root_dir) # we include base_dir for when we want to be explicit, but less than root_dir, e.g. # {% extends 'classic/base.html' %} base_dir = os.path.join(root_dir, "nbconvert", "templates") paths.append(base_dir) compatibility_dir = os.path.join(root_dir, "nbconvert", "templates", "compatibility") paths.append(compatibility_dir) additional_paths = [] for path in self.template_data_paths: if not prune or os.path.exists(path): additional_paths.append(path) return paths + self.extra_template_paths + additional_paths @classmethod def get_compatibility_base_template_conf(cls, name): # Hard-coded base template confs to use for backwards compatibility for 5.x-only templates if name == "display_priority": return dict(base_template="base") if name == "full": return dict(base_template="classic", mimetypes={"text/html": True}) def get_template_names(self): # finds a list of template names where each successive template name is the base template template_names = [] root_dirs = self.get_prefix_root_dirs() base_template = self.template_name merged_conf = {} # the configuration once all conf files are merged while base_template is not None: template_names.append(base_template) conf = {} found_at_least_one = False for base_dir in self.extra_template_basedirs: template_dir = os.path.join(base_dir, base_template) if os.path.exists(template_dir): found_at_least_one = True conf_file = os.path.join(template_dir, "conf.json") if os.path.exists(conf_file): with open(conf_file) as f: conf = recursive_update(json.load(f), conf) for root_dir in root_dirs: template_dir = os.path.join(root_dir, "nbconvert", "templates", base_template) if os.path.exists(template_dir): found_at_least_one = True conf_file = os.path.join(template_dir, "conf.json") if os.path.exists(conf_file): with open(conf_file) as f: conf = recursive_update(json.load(f), conf) if not found_at_least_one: # Check for backwards compatibility template names for root_dir in root_dirs: compatibility_file = base_template + ".tpl" compatibility_path = os.path.join( root_dir, "nbconvert", "templates", "compatibility", compatibility_file ) if os.path.exists(compatibility_path): found_at_least_one = True warnings.warn( f"5.x template name passed '{self.template_name}'. Use 'lab' or 'classic' for new template usage.", DeprecationWarning, ) self.template_file = compatibility_file conf = self.get_compatibility_base_template_conf(base_template) self.template_name = conf.get("base_template") break if not found_at_least_one: paths = "\n\t".join(root_dirs) raise ValueError( "No template sub-directory with name %r found in the following paths:\n\t%s" % (base_template, paths) ) merged_conf = recursive_update(dict(conf), merged_conf) base_template = conf.get("base_template") conf = merged_conf mimetypes = [mimetype for mimetype, enabled in conf.get("mimetypes", {}).items() if enabled] if self.output_mimetype and self.output_mimetype not in mimetypes and mimetypes: supported_mimetypes = "\n\t".join(mimetypes) raise ValueError( "Unsupported mimetype %r for template %r, mimetypes supported are: \n\t%s" % (self.output_mimetype, self.template_name, supported_mimetypes) ) return template_names def get_prefix_root_dirs(self): # We look at the usual jupyter locations, and for development purposes also # relative to the package directory (first entry, meaning with highest precedence) root_dirs = [] if DEV_MODE: root_dirs.append(os.path.abspath(os.path.join(ROOT, "..", "..", "share", "jupyter"))) root_dirs.extend(jupyter_path()) return root_dirs def _init_resources(self, resources): resources = super()._init_resources(resources) resources["deprecated"] = deprecated return resources