2022-05-23 00:16:32 +04:00

160 lines
5.6 KiB
Python

"""Export to PDF via a headless browser"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import asyncio
import concurrent.futures
import os
import tempfile
from traitlets import Bool, default
from .html import HTMLExporter
class WebPDFExporter(HTMLExporter):
"""Writer designed to write to PDF files.
This inherits from :class:`HTMLExporter`. It creates the HTML using the
template machinery, and then run pyppeteer to create a pdf.
"""
export_from_notebook = "PDF via HTML"
allow_chromium_download = Bool(
False,
help="Whether to allow downloading Chromium if no suitable version is found on the system.",
).tag(config=True)
paginate = Bool(
True,
help="""
Split generated notebook into multiple pages.
If False, a PDF with one long page will be generated.
Set to True to match behavior of LaTeX based PDF generator
""",
).tag(config=True)
output_mimetype = "text/html"
@default("file_extension")
def _file_extension_default(self):
return ".html"
@default("template_name")
def _template_name_default(self):
return "webpdf"
disable_sandbox = Bool(
False,
help="""
Disable chromium security sandbox when converting to PDF.
WARNING: This could cause arbitrary code execution in specific circumstances,
where JS in your notebook can execute serverside code! Please use with
caution.
``https://github.com/puppeteer/puppeteer/blob/main@%7B2020-12-14T17:22:24Z%7D/docs/troubleshooting.md#setting-up-chrome-linux-sandbox``
has more information.
This is required for webpdf to work inside most container environments.
""",
).tag(config=True)
def _check_launch_reqs(self):
try:
from pyppeteer import launch
from pyppeteer.util import check_chromium
except ModuleNotFoundError as e:
raise RuntimeError(
"Pyppeteer is not installed to support Web PDF conversion. "
"Please install `nbconvert[webpdf]` to enable."
) from e
if not self.allow_chromium_download and not check_chromium():
raise RuntimeError(
"No suitable chromium executable found on the system. "
"Please use '--allow-chromium-download' to allow downloading one."
)
return launch
def run_pyppeteer(self, html):
"""Run pyppeteer."""
async def main(temp_file):
args = ["--no-sandbox"] if self.disable_sandbox else []
browser = await self._check_launch_reqs()(
handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False, args=args
)
page = await browser.newPage()
await page.emulateMedia("screen")
await page.waitFor(100)
await page.goto(f"file://{temp_file.name}", waitUntil="networkidle0")
await page.waitFor(100)
pdf_params = {"printBackground": True}
if not self.paginate:
# Floating point precision errors cause the printed
# PDF from spilling over a new page by a pixel fraction.
dimensions = await page.evaluate(
"""() => {
const rect = document.body.getBoundingClientRect();
return {
width: Math.ceil(rect.width) + 1,
height: Math.ceil(rect.height) + 1,
}
}"""
)
width = dimensions["width"]
height = dimensions["height"]
# 200 inches is the maximum size for Adobe Acrobat Reader.
pdf_params.update(
{
"width": min(width, 200 * 72),
"height": min(height, 200 * 72),
}
)
pdf_data = await page.pdf(pdf_params)
await browser.close()
return pdf_data
pool = concurrent.futures.ThreadPoolExecutor()
# Create a temporary file to pass the HTML code to Chromium:
# Unfortunately, tempfile on Windows does not allow for an already open
# file to be opened by a separate process. So we must close it first
# before calling Chromium. We also specify delete=False to ensure the
# file is not deleted after closing (the default behavior).
temp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
with temp_file:
temp_file.write(html.encode("utf-8"))
try:
# TODO: when dropping Python 3.6, use
# pdf_data = pool.submit(asyncio.run, main(temp_file)).result()
def run_coroutine(coro):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(coro)
pdf_data = pool.submit(run_coroutine, main(temp_file)).result()
finally:
# Ensure the file is deleted even if pypeteer raises an exception
os.unlink(temp_file.name)
return pdf_data
def from_notebook_node(self, nb, resources=None, **kw):
self._check_launch_reqs()
html, resources = super().from_notebook_node(nb, resources=resources, **kw)
self.log.info("Building PDF")
pdf_data = self.run_pyppeteer(html)
self.log.info("PDF successfully created")
# convert output extension to pdf
# the writer above required it to be html
resources["output_extension"] = ".pdf"
return pdf_data, resources