first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,18 @@
"""
tinycss2
========
tinycss2 is a low-level CSS parser and generator: it can parse strings, return
Python objects representing tokens and blocks, and generate CSS strings
corresponding to these objects.
"""
from .bytes import parse_stylesheet_bytes # noqa
from .parser import ( # noqa
parse_declaration_list, parse_one_component_value, parse_one_declaration,
parse_one_rule, parse_rule_list, parse_stylesheet)
from .serializer import serialize, serialize_identifier # noqa
from .tokenizer import parse_component_value_list # noqa
VERSION = __version__ = '1.1.1'

View File

@ -0,0 +1,879 @@
"""
Data structures for the CSS abstract syntax tree.
"""
from webencodings import ascii_lower
from .serializer import _serialize_to, serialize_identifier, serialize_name
class Node:
"""Every node type inherits from this class,
which is never instantiated directly.
.. attribute:: type
Each child class has a :attr:`type` class attribute
with a unique string value.
This allows checking for the node type with code like:
.. code-block:: python
if node.type == 'whitespace':
instead of the more verbose:
.. code-block:: python
from tinycss2.ast import WhitespaceToken
if isinstance(node, WhitespaceToken):
Every node also has these attributes and methods,
which are not repeated for brevity:
.. attribute:: source_line
The line number of the start of the node in the CSS source.
Starts at 1.
.. attribute:: source_column
The column number within :attr:`source_line` of the start of the node
in the CSS source.
Starts at 1.
.. automethod:: serialize
"""
__slots__ = ['source_line', 'source_column']
def __init__(self, source_line, source_column):
self.source_line = source_line
self.source_column = source_column
if str is bytes: # pragma: no cover
def __repr__(self):
return self.repr_format.format(self=self).encode('utf8')
else: # pragma: no cover
def __repr__(self):
return self.repr_format.format(self=self)
def serialize(self):
"""Serialize this node to CSS syntax and return a Unicode string."""
chunks = []
self._serialize_to(chunks.append)
return ''.join(chunks)
def _serialize_to(self, write):
"""Serialize this node to CSS syntax, writing chunks as Unicode string
by calling the provided :obj:`write` callback.
"""
raise NotImplementedError # pragma: no cover
class ParseError(Node):
"""A syntax error of some sort. May occur anywhere in the tree.
Syntax errors are not fatal in the parser
to allow for different error handling behaviors.
For example, an error in a Selector list makes the whole rule invalid,
but an error in a Media Query list only replaces one comma-separated query
with ``not all``.
.. autoattribute:: type
.. attribute:: kind
Machine-readable string indicating the type of error.
Example: ``'bad-url'``.
.. attribute:: message
Human-readable explanation of the error, as a string.
Could be translated, expanded to include details, etc.
"""
__slots__ = ['kind', 'message']
type = 'error'
repr_format = '<{self.__class__.__name__} {self.kind}>'
def __init__(self, line, column, kind, message):
Node.__init__(self, line, column)
self.kind = kind
self.message = message
def _serialize_to(self, write):
if self.kind == 'bad-string':
write('"[bad string]\n')
elif self.kind == 'bad-url':
write('url([bad url])')
elif self.kind in ')]}':
write(self.kind)
elif self.kind in ('eof-in-string', 'eof-in-url'):
pass
else: # pragma: no cover
raise TypeError('Can not serialize %r' % self)
class Comment(Node):
"""A CSS comment.
Comments can be ignored by passing ``skip_comments=True``
to functions such as :func:`~tinycss2.parse_component_value_list`.
.. autoattribute:: type
.. attribute:: value
The content of the comment, between ``/*`` and ``*/``, as a string.
"""
__slots__ = ['value']
type = 'comment'
repr_format = '<{self.__class__.__name__} {self.value}>'
def __init__(self, line, column, value):
Node.__init__(self, line, column)
self.value = value
def _serialize_to(self, write):
write('/*')
write(self.value)
write('*/')
class WhitespaceToken(Node):
"""A :diagram:`whitespace-token`.
.. autoattribute:: type
.. attribute:: value
The whitespace sequence, as a string, as in the original CSS source.
"""
__slots__ = ['value']
type = 'whitespace'
repr_format = '<{self.__class__.__name__}>'
def __init__(self, line, column, value):
Node.__init__(self, line, column)
self.value = value
def _serialize_to(self, write):
write(self.value)
class LiteralToken(Node):
r"""Token that represents one or more characters as in the CSS source.
.. autoattribute:: type
.. attribute:: value
A string of one to four characters.
Instances compare equal to their :attr:`value`,
so that these are equivalent:
.. code-block:: python
if node == ';':
if node.type == 'literal' and node.value == ';':
This regroups what `the specification`_ defines as separate token types:
.. _the specification: https://drafts.csswg.org/css-syntax-3/
* *<colon-token>* ``:``
* *<semicolon-token>* ``;``
* *<comma-token>* ``,``
* *<cdc-token>* ``-->``
* *<cdo-token>* ``<!--``
* *<include-match-token>* ``~=``
* *<dash-match-token>* ``|=``
* *<prefix-match-token>* ``^=``
* *<suffix-match-token>* ``$=``
* *<substring-match-token>* ``*=``
* *<column-token>* ``||``
* *<delim-token>* (a single ASCII character not part of any another token)
"""
__slots__ = ['value']
type = 'literal'
repr_format = '<{self.__class__.__name__} {self.value}>'
def __init__(self, line, column, value):
Node.__init__(self, line, column)
self.value = value
def __eq__(self, other):
return self.value == other or self is other
def __ne__(self, other):
return not self == other
def _serialize_to(self, write):
write(self.value)
class IdentToken(Node):
"""An :diagram:`ident-token`.
.. autoattribute:: type
.. attribute:: value
The unescaped value, as a Unicode string.
.. attribute:: lower_value
Same as :attr:`value` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to a CSS keyword.
"""
__slots__ = ['value', 'lower_value']
type = 'ident'
repr_format = '<{self.__class__.__name__} {self.value}>'
def __init__(self, line, column, value):
Node.__init__(self, line, column)
self.value = value
try:
self.lower_value = ascii_lower(value)
except UnicodeEncodeError:
self.lower_value = value
def _serialize_to(self, write):
write(serialize_identifier(self.value))
class AtKeywordToken(Node):
"""An :diagram:`at-keyword-token`.
.. code-block:: text
'@' <value>
.. autoattribute:: type
.. attribute:: value
The unescaped value, as a Unicode string, without the preceding ``@``.
.. attribute:: lower_value
Same as :attr:`value` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to a CSS at-keyword.
.. code-block:: python
if node.type == 'at-keyword' and node.lower_value == 'import':
"""
__slots__ = ['value', 'lower_value']
type = 'at-keyword'
repr_format = '<{self.__class__.__name__} @{self.value}>'
def __init__(self, line, column, value):
Node.__init__(self, line, column)
self.value = value
try:
self.lower_value = ascii_lower(value)
except UnicodeEncodeError:
self.lower_value = value
def _serialize_to(self, write):
write('@')
write(serialize_identifier(self.value))
class HashToken(Node):
r"""A :diagram:`hash-token`.
.. code-block:: text
'#' <value>
.. autoattribute:: type
.. attribute:: value
The unescaped value, as a Unicode string, without the preceding ``#``.
.. attribute:: is_identifier
A boolean, true if the CSS source for this token
was ``#`` followed by a valid identifier.
(Only such hash tokens are valid ID selectors.)
"""
__slots__ = ['value', 'is_identifier']
type = 'hash'
repr_format = '<{self.__class__.__name__} #{self.value}>'
def __init__(self, line, column, value, is_identifier):
Node.__init__(self, line, column)
self.value = value
self.is_identifier = is_identifier
def _serialize_to(self, write):
write('#')
if self.is_identifier:
write(serialize_identifier(self.value))
else:
write(serialize_name(self.value))
class StringToken(Node):
"""A :diagram:`string-token`.
.. code-block:: text
'"' <value> '"'
.. autoattribute:: type
.. attribute:: value
The unescaped value, as a Unicode string, without the quotes.
"""
__slots__ = ['value', 'representation']
type = 'string'
repr_format = '<{self.__class__.__name__} {self.representation}>'
def __init__(self, line, column, value, representation):
Node.__init__(self, line, column)
self.value = value
self.representation = representation
def _serialize_to(self, write):
write(self.representation)
class URLToken(Node):
"""An :diagram:`url-token`.
.. code-block:: text
'url(' <value> ')'
.. autoattribute:: type
.. attribute:: value
The unescaped URL, as a Unicode string, without the ``url(`` and ``)``
markers.
"""
__slots__ = ['value', 'representation']
type = 'url'
repr_format = '<{self.__class__.__name__} {self.representation}>'
def __init__(self, line, column, value, representation):
Node.__init__(self, line, column)
self.value = value
self.representation = representation
def _serialize_to(self, write):
write(self.representation)
class UnicodeRangeToken(Node):
"""A `unicode-range token <https://www.w3.org/TR/css-syntax-3/#urange>`_.
.. autoattribute:: type
.. attribute:: start
The start of the range, as an integer between 0 and 1114111.
.. attribute:: end
The end of the range, as an integer between 0 and 1114111.
Same as :attr:`start` if the source only specified one value.
"""
__slots__ = ['start', 'end']
type = 'unicode-range'
repr_format = '<{self.__class__.__name__} {self.start} {self.end}>'
def __init__(self, line, column, start, end):
Node.__init__(self, line, column)
self.start = start
self.end = end
def _serialize_to(self, write):
if self.end == self.start:
write('U+%X' % self.start)
else:
write('U+%X-%X' % (self.start, self.end))
class NumberToken(Node):
"""A :diagram:`number-token`.
.. autoattribute:: type
.. attribute:: value
The numeric value as a :class:`float`.
.. attribute:: int_value
The numeric value as an :class:`int`
if :attr:`is_integer` is true, :obj:`None` otherwise.
.. attribute:: is_integer
Whether the token was syntactically an integer, as a boolean.
.. attribute:: representation
The CSS representation of the value, as a Unicode string.
"""
__slots__ = ['value', 'int_value', 'is_integer', 'representation']
type = 'number'
repr_format = '<{self.__class__.__name__} {self.representation}>'
def __init__(self, line, column, value, int_value, representation):
Node.__init__(self, line, column)
self.value = value
self.int_value = int_value
self.is_integer = int_value is not None
self.representation = representation
def _serialize_to(self, write):
write(self.representation)
class PercentageToken(Node):
"""A :diagram:`percentage-token`.
.. code-block:: text
<representation> '%'
.. autoattribute:: type
.. attribute:: value
The value numeric as a :class:`float`.
.. attribute:: int_value
The numeric value as an :class:`int`
if the token was syntactically an integer,
or :obj:`None`.
.. attribute:: is_integer
Whether the tokens value was syntactically an integer, as a boolean.
.. attribute:: representation
The CSS representation of the value without the unit,
as a Unicode string.
"""
__slots__ = ['value', 'int_value', 'is_integer', 'representation']
type = 'percentage'
repr_format = '<{self.__class__.__name__} {self.representation}%>'
def __init__(self, line, column, value, int_value, representation):
Node.__init__(self, line, column)
self.value = value
self.int_value = int_value
self.is_integer = int_value is not None
self.representation = representation
def _serialize_to(self, write):
write(self.representation)
write('%')
class DimensionToken(Node):
"""A :diagram:`dimension-token`.
.. code-block:: text
<representation> <unit>
.. autoattribute:: type
.. attribute:: value
The value numeric as a :class:`float`.
.. attribute:: int_value
The numeric value as an :class:`int`
if the token was syntactically an integer,
or :obj:`None`.
.. attribute:: is_integer
Whether the tokens value was syntactically an integer, as a boolean.
.. attribute:: representation
The CSS representation of the value without the unit,
as a Unicode string.
.. attribute:: unit
The unescaped unit, as a Unicode string.
.. attribute:: lower_unit
Same as :attr:`unit` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to a CSS unit.
.. code-block:: python
if node.type == 'dimension' and node.lower_unit == 'px':
"""
__slots__ = ['value', 'int_value', 'is_integer', 'representation',
'unit', 'lower_unit']
type = 'dimension'
repr_format = ('<{self.__class__.__name__} '
'{self.representation}{self.unit}>')
def __init__(self, line, column, value, int_value, representation, unit):
Node.__init__(self, line, column)
self.value = value
self.int_value = int_value
self.is_integer = int_value is not None
self.representation = representation
self.unit = unit
self.lower_unit = ascii_lower(unit)
def _serialize_to(self, write):
write(self.representation)
# Disambiguate with scientific notation
unit = self.unit
if unit in ('e', 'E') or unit.startswith(('e-', 'E-')):
write('\\65 ')
write(serialize_name(unit[1:]))
else:
write(serialize_identifier(unit))
class ParenthesesBlock(Node):
"""A :diagram:`()-block`.
.. code-block:: text
'(' <content> ')'
.. autoattribute:: type
.. attribute:: content
The content of the block, as list of :term:`component values`.
The ``(`` and ``)`` markers themselves are not represented in the list.
"""
__slots__ = ['content']
type = '() block'
repr_format = '<{self.__class__.__name__} ( … )>'
def __init__(self, line, column, content):
Node.__init__(self, line, column)
self.content = content
def _serialize_to(self, write):
write('(')
_serialize_to(self.content, write)
write(')')
class SquareBracketsBlock(Node):
"""A :diagram:`[]-block`.
.. code-block:: text
'[' <content> ']'
.. autoattribute:: type
.. attribute:: content
The content of the block, as list of :term:`component values`.
The ``[`` and ``]`` markers themselves are not represented in the list.
"""
__slots__ = ['content']
type = '[] block'
repr_format = '<{self.__class__.__name__} [ … ]>'
def __init__(self, line, column, content):
Node.__init__(self, line, column)
self.content = content
def _serialize_to(self, write):
write('[')
_serialize_to(self.content, write)
write(']')
class CurlyBracketsBlock(Node):
"""A :diagram:`{}-block`.
.. code-block:: text
'{' <content> '}'
.. autoattribute:: type
.. attribute:: content
The content of the block, as list of :term:`component values`.
The ``[`` and ``]`` markers themselves are not represented in the list.
"""
__slots__ = ['content']
type = '{} block'
repr_format = '<{self.__class__.__name__} {{ … }}>'
def __init__(self, line, column, content):
Node.__init__(self, line, column)
self.content = content
def _serialize_to(self, write):
write('{')
_serialize_to(self.content, write)
write('}')
class FunctionBlock(Node):
"""A :diagram:`function-block`.
.. code-block:: text
<name> '(' <arguments> ')'
.. autoattribute:: type
.. attribute:: name
The unescaped name of the function, as a Unicode string.
.. attribute:: lower_name
Same as :attr:`name` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to a CSS function name.
.. attribute:: arguments
The arguments of the function, as list of :term:`component values`.
The ``(`` and ``)`` markers themselves are not represented in the list.
Commas are not special, but represented as :obj:`LiteralToken` objects
in the list.
"""
__slots__ = ['name', 'lower_name', 'arguments']
type = 'function'
repr_format = '<{self.__class__.__name__} {self.name}( … )>'
def __init__(self, line, column, name, arguments):
Node.__init__(self, line, column)
self.name = name
self.lower_name = ascii_lower(name)
self.arguments = arguments
def _serialize_to(self, write):
write(serialize_identifier(self.name))
write('(')
_serialize_to(self.arguments, write)
if self.arguments:
function = self
while isinstance(function, FunctionBlock):
eof_in_string = (
isinstance(function.arguments[-1], ParseError) and
function.arguments[-1].kind == 'eof-in-string')
if eof_in_string:
return
function = function.arguments[-1]
write(')')
class Declaration(Node):
"""A (property or descriptor) :diagram:`declaration`.
.. code-block:: text
<name> ':' <value>
<name> ':' <value> '!important'
.. autoattribute:: type
.. attribute:: name
The unescaped name, as a Unicode string.
.. attribute:: lower_name
Same as :attr:`name` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to
a CSS property or descriptor name.
.. code-block:: python
if node.type == 'declaration' and node.lower_name == 'color':
.. attribute:: value
The declaration value as a list of :term:`component values`:
anything between ``:`` and
the end of the declaration, or ``!important``.
.. attribute:: important
A boolean, true if the declaration had an ``!important`` marker.
It is up to the consumer to reject declarations that do not accept
this flag, such as non-property descriptor declarations.
"""
__slots__ = ['name', 'lower_name', 'value', 'important']
type = 'declaration'
repr_format = '<{self.__class__.__name__} {self.name}: …>'
def __init__(self, line, column, name, lower_name, value, important):
Node.__init__(self, line, column)
self.name = name
self.lower_name = lower_name
self.value = value
self.important = important
def _serialize_to(self, write):
write(serialize_identifier(self.name))
write(':')
_serialize_to(self.value, write)
if self.important:
write('!important')
class QualifiedRule(Node):
"""A :diagram:`qualified rule`.
.. code-block:: text
<prelude> '{' <content> '}'
The interpretation of qualified rules depend on their context.
At the top-level of a stylesheet
or in a conditional rule such as ``@media``,
they are **style rules** where the :attr:`prelude` is Selectors list
and the :attr:`content` is a list of property declarations.
.. autoattribute:: type
.. attribute:: prelude
The rules prelude, the part before the {} block,
as a list of :term:`component values`.
.. attribute:: content
The rules content, the part inside the {} block,
as a list of :term:`component values`.
"""
__slots__ = ['prelude', 'content']
type = 'qualified-rule'
repr_format = ('<{self.__class__.__name__} '
'{{ … }}>')
def __init__(self, line, column, prelude, content):
Node.__init__(self, line, column)
self.prelude = prelude
self.content = content
def _serialize_to(self, write):
_serialize_to(self.prelude, write)
write('{')
_serialize_to(self.content, write)
write('}')
class AtRule(Node):
"""An :diagram:`at-rule`.
.. code-block:: text
@<at_keyword> <prelude> '{' <content> '}'
@<at_keyword> <prelude> ';'
The interpretation of at-rules depend on their at-keyword
as well as their context.
Most types of at-rules (ie. at-keyword values)
are only allowed in some context,
and must either end with a {} block or a semicolon.
.. autoattribute:: type
.. attribute:: at_keyword
The unescaped value of the rules at-keyword,
without the ``@`` symbol, as a Unicode string.
.. attribute:: lower_at_keyword
Same as :attr:`at_keyword` but normalized to *ASCII lower case*,
see :func:`~webencodings.ascii_lower`.
This is the value to use when comparing to a CSS at-keyword.
.. code-block:: python
if node.type == 'at-rule' and node.lower_at_keyword == 'import':
.. attribute:: prelude
The rules prelude, the part before the {} block or semicolon,
as a list of :term:`component values`.
.. attribute:: content
The rules content, if any.
The blocks content as a list of :term:`component values`
for at-rules with a {} block,
or :obj:`None` for at-rules ending with a semicolon.
"""
__slots__ = ['at_keyword', 'lower_at_keyword', 'prelude', 'content']
type = 'at-rule'
repr_format = ('<{self.__class__.__name__} '
'@{self.at_keyword}{{ … }}>')
def __init__(self, line, column,
at_keyword, lower_at_keyword, prelude, content):
Node.__init__(self, line, column)
self.at_keyword = at_keyword
self.lower_at_keyword = lower_at_keyword
self.prelude = prelude
self.content = content
def _serialize_to(self, write):
write('@')
write(serialize_identifier(self.at_keyword))
_serialize_to(self.prelude, write)
if self.content is None:
write(';')
else:
write('{')
_serialize_to(self.content, write)
write('}')

View File

@ -0,0 +1,113 @@
from webencodings import UTF8, decode, lookup
from .parser import parse_stylesheet
def decode_stylesheet_bytes(css_bytes, protocol_encoding=None,
environment_encoding=None):
"""Determine the character encoding of a CSS stylesheet and decode it.
This is based on the presence of a :abbr:`BOM (Byte Order Mark)`,
a ``@charset`` rule, and encoding meta-information.
:type css_bytes: :obj:`bytes`
:param css_bytes: A CSS byte string.
:type protocol_encoding: :obj:`str`
:param protocol_encoding:
The encoding label, if any, defined by HTTP or equivalent protocol.
(e.g. via the ``charset`` parameter of the ``Content-Type`` header.)
:type environment_encoding: :class:`webencodings.Encoding`
:param environment_encoding:
The `environment encoding
<https://www.w3.org/TR/css-syntax/#environment-encoding>`_, if any.
:returns:
A 2-tuple of a decoded Unicode string and the
:class:`webencodings.Encoding` object that was used.
"""
# https://drafts.csswg.org/css-syntax/#the-input-byte-stream
if protocol_encoding:
fallback = lookup(protocol_encoding)
if fallback:
return decode(css_bytes, fallback)
if css_bytes.startswith(b'@charset "'):
# 10 is len(b'@charset "')
# 100 is arbitrary so that no encoding label is more than 100-10 bytes.
end_quote = css_bytes.find(b'"', 10, 100)
if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
if fallback:
if fallback.name in ('utf-16be', 'utf-16le'):
return decode(css_bytes, UTF8)
return decode(css_bytes, fallback)
if environment_encoding:
return decode(css_bytes, environment_encoding)
return decode(css_bytes, UTF8)
def parse_stylesheet_bytes(css_bytes, protocol_encoding=None,
environment_encoding=None,
skip_comments=False, skip_whitespace=False):
"""Parse :diagram:`stylesheet` from bytes,
determining the character encoding as web browsers do.
This is used when reading a file or fetching a URL.
The character encoding is determined from the initial bytes
(a :abbr:`BOM (Byte Order Mark)` or a ``@charset`` rule)
as well as the parameters. The ultimate fallback is UTF-8.
:type css_bytes: :obj:`bytes`
:param css_bytes: A CSS byte string.
:type protocol_encoding: :obj:`str`
:param protocol_encoding:
The encoding label, if any, defined by HTTP or equivalent protocol.
(e.g. via the ``charset`` parameter of the ``Content-Type`` header.)
:type environment_encoding: :class:`webencodings.Encoding`
:param environment_encoding:
The `environment encoding`_, if any.
:type skip_comments: :obj:`bool`
:param skip_comments:
Ignore CSS comments at the top-level of the stylesheet.
If the input is a string, ignore all comments.
:type skip_whitespace: :obj:`bool`
:param skip_whitespace:
Ignore whitespace at the top-level of the stylesheet.
Whitespace is still preserved
in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
:returns:
A ``(rules, encoding)`` tuple.
* ``rules`` is a list of
:class:`~tinycss2.ast.QualifiedRule`,
:class:`~tinycss2.ast.AtRule`,
:class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
:class:`~tinycss2.ast.WhitespaceToken`
(if ``skip_whitespace`` is false),
and :class:`~tinycss2.ast.ParseError` objects.
* ``encoding`` is the :class:`webencodings.Encoding` object
that was used.
If ``rules`` contains an ``@import`` rule, this is
the `environment encoding`_ for the imported stylesheet.
.. _environment encoding:
https://www.w3.org/TR/css-syntax/#environment-encoding
.. code-block:: python
response = urlopen('http://example.net/foo.css')
rules, encoding = parse_stylesheet_bytes(
css_bytes=response.read(),
# Python 3.x
protocol_encoding=response.info().get_content_type().get_param('charset'),
# Python 2.x
protocol_encoding=response.info().gettype().getparam('charset'),
)
for rule in rules:
...
"""
css_unicode, encoding = decode_stylesheet_bytes(
css_bytes, protocol_encoding, environment_encoding)
stylesheet = parse_stylesheet(css_unicode, skip_comments, skip_whitespace)
return stylesheet, encoding

View File

@ -0,0 +1,336 @@
import collections
import re
from colorsys import hls_to_rgb
from .parser import parse_one_component_value
class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
"""An RGBA color.
A tuple of four floats in the 0..1 range: ``(red, green, blue, alpha)``.
.. attribute:: red
Convenience access to the red channel. Same as ``rgba[0]``.
.. attribute:: green
Convenience access to the green channel. Same as ``rgba[1]``.
.. attribute:: blue
Convenience access to the blue channel. Same as ``rgba[2]``.
.. attribute:: alpha
Convenience access to the alpha channel. Same as ``rgba[3]``.
"""
def parse_color(input):
"""Parse a color value as defined in `CSS Color Level 3
<https://www.w3.org/TR/css-color-3/>`_.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:returns:
* :obj:`None` if the input is not a valid color value.
(No exception is raised.)
* The string ``'currentColor'`` for the ``currentColor`` keyword
* Or a :class:`RGBA` object for every other values
(including keywords, HSL and HSLA.)
The alpha channel is clipped to [0, 1]
but red, green, or blue can be out of range
(eg. ``rgb(-10%, 120%, 0%)`` is represented as
``(-0.1, 1.2, 0, 1)``.)
"""
if isinstance(input, str):
token = parse_one_component_value(input, skip_comments=True)
else:
token = input
if token.type == 'ident':
return _COLOR_KEYWORDS.get(token.lower_value)
elif token.type == 'hash':
for multiplier, regexp in _HASH_REGEXPS:
match = regexp(token.value)
if match:
channels = [
int(group * multiplier, 16) / 255
for group in match.groups()]
if len(channels) == 3:
channels.append(1.)
return RGBA(*channels)
elif token.type == 'function':
args = _parse_comma_separated(token.arguments)
if args:
name = token.lower_name
if name == 'rgb':
return _parse_rgb(args, alpha=1.)
elif name == 'rgba':
alpha = _parse_alpha(args[3:])
if alpha is not None:
return _parse_rgb(args[:3], alpha)
elif name == 'hsl':
return _parse_hsl(args, alpha=1.)
elif name == 'hsla':
alpha = _parse_alpha(args[3:])
if alpha is not None:
return _parse_hsl(args[:3], alpha)
def _parse_alpha(args):
"""Parse a list of one alpha value.
If args is a list of a single INTEGER or NUMBER token,
return its value clipped to the 0..1 range. Otherwise, return None.
"""
if len(args) == 1 and args[0].type == 'number':
return min(1, max(0, args[0].value))
def _parse_rgb(args, alpha):
"""Parse a list of RGB channels.
If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens, return RGB
values as a tuple of 3 floats in 0..1. Otherwise, return None.
"""
types = [arg.type for arg in args]
if (types == ['number', 'number', 'number'] and
all(a.is_integer for a in args)):
r, g, b = [arg.int_value / 255 for arg in args[:3]]
return RGBA(r, g, b, alpha)
elif types == ['percentage', 'percentage', 'percentage']:
r, g, b = [arg.value / 100 for arg in args[:3]]
return RGBA(r, g, b, alpha)
def _parse_hsl(args, alpha):
"""Parse a list of HSL channels.
If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens, return RGB
values as a tuple of 3 floats in 0..1. Otherwise, return None.
"""
types = [arg.type for arg in args]
if types == ['number', 'percentage', 'percentage'] and args[0].is_integer:
r, g, b = hls_to_rgb(
args[0].int_value / 360, args[2].value / 100, args[1].value / 100)
return RGBA(r, g, b, alpha)
def _parse_comma_separated(tokens):
"""Parse a list of tokens (typically the content of a function token)
as arguments made of a single token each, separated by mandatory commas,
with optional white space around each argument.
return the argument list without commas or white space;
or None if the function token content do not match the description above.
"""
tokens = [token for token in tokens
if token.type not in ('whitespace', 'comment')]
if not tokens:
return []
if len(tokens) % 2 == 1 and all(token == ',' for token in tokens[1::2]):
return tokens[::2]
_HASH_REGEXPS = (
(2, re.compile('^{}$'.format(4 * '([\\da-f])'), re.I).match),
(1, re.compile('^{}$'.format(4 * '([\\da-f]{2})'), re.I).match),
(2, re.compile('^{}$'.format(3 * '([\\da-f])'), re.I).match),
(1, re.compile('^{}$'.format(3 * '([\\da-f]{2})'), re.I).match),
)
# (r, g, b) in 0..255
_BASIC_COLOR_KEYWORDS = [
('black', (0, 0, 0)),
('silver', (192, 192, 192)),
('gray', (128, 128, 128)),
('white', (255, 255, 255)),
('maroon', (128, 0, 0)),
('red', (255, 0, 0)),
('purple', (128, 0, 128)),
('fuchsia', (255, 0, 255)),
('green', (0, 128, 0)),
('lime', (0, 255, 0)),
('olive', (128, 128, 0)),
('yellow', (255, 255, 0)),
('navy', (0, 0, 128)),
('blue', (0, 0, 255)),
('teal', (0, 128, 128)),
('aqua', (0, 255, 255)),
]
# (r, g, b) in 0..255
_EXTENDED_COLOR_KEYWORDS = [
('aliceblue', (240, 248, 255)),
('antiquewhite', (250, 235, 215)),
('aqua', (0, 255, 255)),
('aquamarine', (127, 255, 212)),
('azure', (240, 255, 255)),
('beige', (245, 245, 220)),
('bisque', (255, 228, 196)),
('black', (0, 0, 0)),
('blanchedalmond', (255, 235, 205)),
('blue', (0, 0, 255)),
('blueviolet', (138, 43, 226)),
('brown', (165, 42, 42)),
('burlywood', (222, 184, 135)),
('cadetblue', (95, 158, 160)),
('chartreuse', (127, 255, 0)),
('chocolate', (210, 105, 30)),
('coral', (255, 127, 80)),
('cornflowerblue', (100, 149, 237)),
('cornsilk', (255, 248, 220)),
('crimson', (220, 20, 60)),
('cyan', (0, 255, 255)),
('darkblue', (0, 0, 139)),
('darkcyan', (0, 139, 139)),
('darkgoldenrod', (184, 134, 11)),
('darkgray', (169, 169, 169)),
('darkgreen', (0, 100, 0)),
('darkgrey', (169, 169, 169)),
('darkkhaki', (189, 183, 107)),
('darkmagenta', (139, 0, 139)),
('darkolivegreen', (85, 107, 47)),
('darkorange', (255, 140, 0)),
('darkorchid', (153, 50, 204)),
('darkred', (139, 0, 0)),
('darksalmon', (233, 150, 122)),
('darkseagreen', (143, 188, 143)),
('darkslateblue', (72, 61, 139)),
('darkslategray', (47, 79, 79)),
('darkslategrey', (47, 79, 79)),
('darkturquoise', (0, 206, 209)),
('darkviolet', (148, 0, 211)),
('deeppink', (255, 20, 147)),
('deepskyblue', (0, 191, 255)),
('dimgray', (105, 105, 105)),
('dimgrey', (105, 105, 105)),
('dodgerblue', (30, 144, 255)),
('firebrick', (178, 34, 34)),
('floralwhite', (255, 250, 240)),
('forestgreen', (34, 139, 34)),
('fuchsia', (255, 0, 255)),
('gainsboro', (220, 220, 220)),
('ghostwhite', (248, 248, 255)),
('gold', (255, 215, 0)),
('goldenrod', (218, 165, 32)),
('gray', (128, 128, 128)),
('green', (0, 128, 0)),
('greenyellow', (173, 255, 47)),
('grey', (128, 128, 128)),
('honeydew', (240, 255, 240)),
('hotpink', (255, 105, 180)),
('indianred', (205, 92, 92)),
('indigo', (75, 0, 130)),
('ivory', (255, 255, 240)),
('khaki', (240, 230, 140)),
('lavender', (230, 230, 250)),
('lavenderblush', (255, 240, 245)),
('lawngreen', (124, 252, 0)),
('lemonchiffon', (255, 250, 205)),
('lightblue', (173, 216, 230)),
('lightcoral', (240, 128, 128)),
('lightcyan', (224, 255, 255)),
('lightgoldenrodyellow', (250, 250, 210)),
('lightgray', (211, 211, 211)),
('lightgreen', (144, 238, 144)),
('lightgrey', (211, 211, 211)),
('lightpink', (255, 182, 193)),
('lightsalmon', (255, 160, 122)),
('lightseagreen', (32, 178, 170)),
('lightskyblue', (135, 206, 250)),
('lightslategray', (119, 136, 153)),
('lightslategrey', (119, 136, 153)),
('lightsteelblue', (176, 196, 222)),
('lightyellow', (255, 255, 224)),
('lime', (0, 255, 0)),
('limegreen', (50, 205, 50)),
('linen', (250, 240, 230)),
('magenta', (255, 0, 255)),
('maroon', (128, 0, 0)),
('mediumaquamarine', (102, 205, 170)),
('mediumblue', (0, 0, 205)),
('mediumorchid', (186, 85, 211)),
('mediumpurple', (147, 112, 219)),
('mediumseagreen', (60, 179, 113)),
('mediumslateblue', (123, 104, 238)),
('mediumspringgreen', (0, 250, 154)),
('mediumturquoise', (72, 209, 204)),
('mediumvioletred', (199, 21, 133)),
('midnightblue', (25, 25, 112)),
('mintcream', (245, 255, 250)),
('mistyrose', (255, 228, 225)),
('moccasin', (255, 228, 181)),
('navajowhite', (255, 222, 173)),
('navy', (0, 0, 128)),
('oldlace', (253, 245, 230)),
('olive', (128, 128, 0)),
('olivedrab', (107, 142, 35)),
('orange', (255, 165, 0)),
('orangered', (255, 69, 0)),
('orchid', (218, 112, 214)),
('palegoldenrod', (238, 232, 170)),
('palegreen', (152, 251, 152)),
('paleturquoise', (175, 238, 238)),
('palevioletred', (219, 112, 147)),
('papayawhip', (255, 239, 213)),
('peachpuff', (255, 218, 185)),
('peru', (205, 133, 63)),
('pink', (255, 192, 203)),
('plum', (221, 160, 221)),
('powderblue', (176, 224, 230)),
('purple', (128, 0, 128)),
('red', (255, 0, 0)),
('rosybrown', (188, 143, 143)),
('royalblue', (65, 105, 225)),
('saddlebrown', (139, 69, 19)),
('salmon', (250, 128, 114)),
('sandybrown', (244, 164, 96)),
('seagreen', (46, 139, 87)),
('seashell', (255, 245, 238)),
('sienna', (160, 82, 45)),
('silver', (192, 192, 192)),
('skyblue', (135, 206, 235)),
('slateblue', (106, 90, 205)),
('slategray', (112, 128, 144)),
('slategrey', (112, 128, 144)),
('snow', (255, 250, 250)),
('springgreen', (0, 255, 127)),
('steelblue', (70, 130, 180)),
('tan', (210, 180, 140)),
('teal', (0, 128, 128)),
('thistle', (216, 191, 216)),
('tomato', (255, 99, 71)),
('turquoise', (64, 224, 208)),
('violet', (238, 130, 238)),
('wheat', (245, 222, 179)),
('white', (255, 255, 255)),
('whitesmoke', (245, 245, 245)),
('yellow', (255, 255, 0)),
('yellowgreen', (154, 205, 50)),
]
# (r, g, b, a) in 0..1 or a string marker
_SPECIAL_COLOR_KEYWORDS = {
'currentcolor': 'currentColor',
'transparent': RGBA(0., 0., 0., 0.),
}
# RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
_COLOR_KEYWORDS = _SPECIAL_COLOR_KEYWORDS.copy()
_COLOR_KEYWORDS.update(
# 255 maps to 1, 0 to 0, the rest is linear.
(keyword, RGBA(r / 255., g / 255., b / 255., 1.))
for keyword, (r, g, b) in _BASIC_COLOR_KEYWORDS + _EXTENDED_COLOR_KEYWORDS)

View File

@ -0,0 +1,100 @@
import re
from .parser import _next_significant, _to_token_iterator
def parse_nth(input):
"""Parse `<An+B> <https://drafts.csswg.org/css-syntax-3/#anb>`_,
as found in `:nth-child()
<https://drafts.csswg.org/selectors/#nth-child-pseudo>`_
and related Selector pseudo-classes.
Although tinycss2 does not include a full Selector parser,
this bit of syntax is included as it is particularly tricky to define
on top of a CSS tokenizer.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:returns:
A ``(a, b)`` tuple of integers, or :obj:`None` if the input is invalid.
"""
tokens = _to_token_iterator(input, skip_comments=True)
token = _next_significant(tokens)
if token is None:
return
token_type = token.type
if token_type == 'number' and token.is_integer:
return parse_end(tokens, 0, token.int_value)
elif token_type == 'dimension' and token.is_integer:
unit = token.lower_unit
if unit == 'n':
return parse_b(tokens, token.int_value)
elif unit == 'n-':
return parse_signless_b(tokens, token.int_value, -1)
else:
match = N_DASH_DIGITS_RE.match(unit)
if match:
return parse_end(tokens, token.int_value, int(match.group(1)))
elif token_type == 'ident':
ident = token.lower_value
if ident == 'even':
return parse_end(tokens, 2, 0)
elif ident == 'odd':
return parse_end(tokens, 2, 1)
elif ident == 'n':
return parse_b(tokens, 1)
elif ident == '-n':
return parse_b(tokens, -1)
elif ident == 'n-':
return parse_signless_b(tokens, 1, -1)
elif ident == '-n-':
return parse_signless_b(tokens, -1, -1)
elif ident[0] == '-':
match = N_DASH_DIGITS_RE.match(ident[1:])
if match:
return parse_end(tokens, -1, int(match.group(1)))
else:
match = N_DASH_DIGITS_RE.match(ident)
if match:
return parse_end(tokens, 1, int(match.group(1)))
elif token == '+':
token = next(tokens) # Whitespace after an initial '+' is invalid.
if token.type == 'ident':
ident = token.lower_value
if ident == 'n':
return parse_b(tokens, 1)
elif ident == 'n-':
return parse_signless_b(tokens, 1, -1)
else:
match = N_DASH_DIGITS_RE.match(ident)
if match:
return parse_end(tokens, 1, int(match.group(1)))
def parse_b(tokens, a):
token = _next_significant(tokens)
if token is None:
return (a, 0)
elif token == '+':
return parse_signless_b(tokens, a, 1)
elif token == '-':
return parse_signless_b(tokens, a, -1)
elif (token.type == 'number' and token.is_integer and
token.representation[0] in '-+'):
return parse_end(tokens, a, token.int_value)
def parse_signless_b(tokens, a, b_sign):
token = _next_significant(tokens)
if (token.type == 'number' and token.is_integer and
not token.representation[0] in '-+'):
return parse_end(tokens, a, b_sign * token.int_value)
def parse_end(tokens, a, b):
if _next_significant(tokens) is None:
return (a, b)
N_DASH_DIGITS_RE = re.compile('^n(-[0-9]+)$')

View File

@ -0,0 +1,377 @@
from .ast import AtRule, Declaration, ParseError, QualifiedRule
from .tokenizer import parse_component_value_list
def _to_token_iterator(input, skip_comments=False):
"""Iterate component values out of string or component values iterable.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments: If the input is a string, ignore all CSS comments.
:returns: An iterator yielding :term:`component values`.
"""
# Accept ASCII-only byte strings on Python 2, with implicit conversion.
if isinstance(input, str):
input = parse_component_value_list(input, skip_comments)
return iter(input)
def _next_significant(tokens):
"""Return the next significant (neither whitespace or comment) token.
:type tokens: :term:`iterator`
:param tokens: An iterator yielding :term:`component values`.
:returns: A :term:`component value`, or :obj:`None`.
"""
for token in tokens:
if token.type not in ('whitespace', 'comment'):
return token
def parse_one_component_value(input, skip_comments=False):
"""Parse a single :diagram:`component value`.
This is used e.g. for an attribute value
referred to by ``attr(foo length)``.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments: If the input is a string, ignore all CSS comments.
:returns:
A :term:`component value` (that is neither whitespace or comment),
or a :class:`~tinycss2.ast.ParseError`.
"""
tokens = _to_token_iterator(input, skip_comments)
first = _next_significant(tokens)
second = _next_significant(tokens)
if first is None:
return ParseError(1, 1, 'empty', 'Input is empty')
if second is not None:
return ParseError(
second.source_line, second.source_column, 'extra-input',
'Got more than one token')
else:
return first
def parse_one_declaration(input, skip_comments=False):
"""Parse a single :diagram:`declaration`.
This is used e.g. for a declaration in an `@supports
<https://drafts.csswg.org/css-conditional/#at-supports>`_ test.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments: If the input is a string, ignore all CSS comments.
:returns:
A :class:`~tinycss2.ast.Declaration`
or :class:`~tinycss2.ast.ParseError`.
Any whitespace or comment before the ``:`` colon is dropped.
"""
tokens = _to_token_iterator(input, skip_comments)
first_token = _next_significant(tokens)
if first_token is None:
return ParseError(1, 1, 'empty', 'Input is empty')
return _parse_declaration(first_token, tokens)
def _parse_declaration(first_token, tokens):
"""Parse a declaration.
Consume :obj:`tokens` until the end of the declaration or the first error.
:type first_token: :term:`component value`
:param first_token: The first component value of the rule.
:type tokens: :term:`iterator`
:param tokens: An iterator yielding :term:`component values`.
:returns:
A :class:`~tinycss2.ast.Declaration`
or :class:`~tinycss2.ast.ParseError`.
"""
name = first_token
if name.type != 'ident':
return ParseError(name.source_line, name.source_column, 'invalid',
'Expected <ident> for declaration name, got %s.'
% name.type)
colon = _next_significant(tokens)
if colon is None:
return ParseError(name.source_line, name.source_column, 'invalid',
"Expected ':' after declaration name, got EOF")
elif colon != ':':
return ParseError(colon.source_line, colon.source_column, 'invalid',
"Expected ':' after declaration name, got %s."
% colon.type)
value = []
state = 'value'
for i, token in enumerate(tokens):
if state == 'value' and token == '!':
state = 'bang'
bang_position = i
elif state == 'bang' and token.type == 'ident' \
and token.lower_value == 'important':
state = 'important'
elif token.type not in ('whitespace', 'comment'):
state = 'value'
value.append(token)
if state == 'important':
del value[bang_position:]
return Declaration(name.source_line, name.source_column, name.value,
name.lower_value, value, state == 'important')
def _consume_declaration_in_list(first_token, tokens):
"""Like :func:`_parse_declaration`, but stop at the first ``;``."""
other_declaration_tokens = []
for token in tokens:
if token == ';':
break
other_declaration_tokens.append(token)
return _parse_declaration(first_token, iter(other_declaration_tokens))
def parse_declaration_list(input, skip_comments=False, skip_whitespace=False):
"""Parse a :diagram:`declaration list` (which may also contain at-rules).
This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
of a style rule or ``@page`` rule,
or for the ``style`` attribute of an HTML element.
In contexts that dont expect any at-rule,
all :class:`~tinycss2.ast.AtRule` objects
should simply be rejected as invalid.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments:
Ignore CSS comments at the top-level of the list.
If the input is a string, ignore all comments.
:type skip_whitespace: :obj:`bool`
:param skip_whitespace:
Ignore whitespace at the top-level of the list.
Whitespace is still preserved
in the :attr:`~tinycss2.ast.Declaration.value` of declarations
and the :attr:`~tinycss2.ast.AtRule.prelude`
and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
:returns:
A list of
:class:`~tinycss2.ast.Declaration`,
:class:`~tinycss2.ast.AtRule`,
:class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
:class:`~tinycss2.ast.WhitespaceToken`
(if ``skip_whitespace`` is false),
and :class:`~tinycss2.ast.ParseError` objects
"""
tokens = _to_token_iterator(input, skip_comments)
result = []
for token in tokens:
if token.type == 'whitespace':
if not skip_whitespace:
result.append(token)
elif token.type == 'comment':
if not skip_comments:
result.append(token)
elif token.type == 'at-keyword':
result.append(_consume_at_rule(token, tokens))
elif token != ';':
result.append(_consume_declaration_in_list(token, tokens))
return result
def parse_one_rule(input, skip_comments=False):
"""Parse a single :diagram:`qualified rule` or :diagram:`at-rule`.
This would be used e.g. by `insertRule()
<https://drafts.csswg.org/cssom/#dom-cssstylesheet-insertrule>`_
in an implementation of CSSOM.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments:
If the input is a string, ignore all CSS comments.
:returns:
A :class:`~tinycss2.ast.QualifiedRule`,
:class:`~tinycss2.ast.AtRule`,
or :class:`~tinycss2.ast.ParseError` objects.
Any whitespace or comment before or after the rule is dropped.
"""
tokens = _to_token_iterator(input, skip_comments)
first = _next_significant(tokens)
if first is None:
return ParseError(1, 1, 'empty', 'Input is empty')
rule = _consume_rule(first, tokens)
next = _next_significant(tokens)
if next is not None:
return ParseError(
next.source_line, next.source_column, 'extra-input',
'Expected a single rule, got %s after the first rule.' % next.type)
return rule
def parse_rule_list(input, skip_comments=False, skip_whitespace=False):
"""Parse a non-top-level :diagram:`rule list`.
This is used for parsing the :attr:`~tinycss2.ast.AtRule.content`
of nested rules like ``@media``.
This differs from :func:`parse_stylesheet` in that
top-level ``<!--`` and ``-->`` tokens are not ignored.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments:
Ignore CSS comments at the top-level of the list.
If the input is a string, ignore all comments.
:type skip_whitespace: :obj:`bool`
:param skip_whitespace:
Ignore whitespace at the top-level of the list.
Whitespace is still preserved
in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
:returns:
A list of
:class:`~tinycss2.ast.QualifiedRule`,
:class:`~tinycss2.ast.AtRule`,
:class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
:class:`~tinycss2.ast.WhitespaceToken`
(if ``skip_whitespace`` is false),
and :class:`~tinycss2.ast.ParseError` objects.
"""
tokens = _to_token_iterator(input, skip_comments)
result = []
for token in tokens:
if token.type == 'whitespace':
if not skip_whitespace:
result.append(token)
elif token.type == 'comment':
if not skip_comments:
result.append(token)
else:
result.append(_consume_rule(token, tokens))
return result
def parse_stylesheet(input, skip_comments=False, skip_whitespace=False):
"""Parse :diagram:`stylesheet` from text.
This is used e.g. for a ``<style>`` HTML element.
This differs from :func:`parse_rule_list` in that
top-level ``<!--`` and ``-->`` tokens are ignored.
This is a legacy quirk for the ``<style>`` HTML element.
:type input: :obj:`str` or :term:`iterable`
:param input: A string or an iterable of :term:`component values`.
:type skip_comments: :obj:`bool`
:param skip_comments:
Ignore CSS comments at the top-level of the stylesheet.
If the input is a string, ignore all comments.
:type skip_whitespace: :obj:`bool`
:param skip_whitespace:
Ignore whitespace at the top-level of the stylesheet.
Whitespace is still preserved
in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
:returns:
A list of
:class:`~tinycss2.ast.QualifiedRule`,
:class:`~tinycss2.ast.AtRule`,
:class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
:class:`~tinycss2.ast.WhitespaceToken`
(if ``skip_whitespace`` is false),
and :class:`~tinycss2.ast.ParseError` objects.
"""
tokens = _to_token_iterator(input, skip_comments)
result = []
for token in tokens:
if token.type == 'whitespace':
if not skip_whitespace:
result.append(token)
elif token.type == 'comment':
if not skip_comments:
result.append(token)
elif token not in ('<!--', '-->'):
result.append(_consume_rule(token, tokens))
return result
def _consume_rule(first_token, tokens):
"""Parse a qualified rule or at-rule.
Consume just enough of :obj:`tokens` for this rule.
:type first_token: :term:`component value`
:param first_token: The first component value of the rule.
:type tokens: :term:`iterator`
:param tokens: An iterator yielding :term:`component values`.
:returns:
A :class:`~tinycss2.ast.QualifiedRule`,
:class:`~tinycss2.ast.AtRule`,
or :class:`~tinycss2.ast.ParseError`.
"""
if first_token.type == 'at-keyword':
return _consume_at_rule(first_token, tokens)
if first_token.type == '{} block':
prelude = []
block = first_token
else:
prelude = [first_token]
for token in tokens:
if token.type == '{} block':
block = token
break
prelude.append(token)
else:
return ParseError(
prelude[-1].source_line, prelude[-1].source_column, 'invalid',
'EOF reached before {} block for a qualified rule.')
return QualifiedRule(first_token.source_line, first_token.source_column,
prelude, block.content)
def _consume_at_rule(at_keyword, tokens):
"""Parse an at-rule.
Consume just enough of :obj:`tokens` for this rule.
:type at_keyword: :class:`AtKeywordToken`
:param at_keyword: The at-rule keyword token starting this rule.
:type tokens: :term:`iterator`
:param tokens: An iterator yielding :term:`component values`.
:returns:
A :class:`~tinycss2.ast.QualifiedRule`,
or :class:`~tinycss2.ast.ParseError`.
"""
prelude = []
content = None
for token in tokens:
if token.type == '{} block':
content = token.content
break
elif token == ';':
break
prelude.append(token)
return AtRule(at_keyword.source_line, at_keyword.source_column,
at_keyword.value, at_keyword.lower_value, prelude, content)

View File

@ -0,0 +1,141 @@
def serialize(nodes):
"""Serialize nodes to CSS syntax.
This should be used for :term:`component values`
instead of just :meth:`tinycss2.ast.Node.serialize` on each node
as it takes care of corner cases such as ``;`` between declarations,
and consecutive identifiers
that would otherwise parse back as the same token.
:type nodes: :term:`iterable`
:param nodes: An iterable of :class:`tinycss2.ast.Node` objects.
:returns: A :obj:`string <str>` representing the nodes.
"""
chunks = []
_serialize_to(nodes, chunks.append)
return ''.join(chunks)
def serialize_identifier(value):
"""Serialize any string as a CSS identifier
:type value: :obj:`str`
:param value: A string representing a CSS value.
:returns:
A :obj:`string <str>` that would parse as an
:class:`tinycss2.ast.IdentToken` whose
:attr:`tinycss2.ast.IdentToken.value` attribute equals the passed
``value`` argument.
"""
if value == '-':
return r'\-'
if value[:2] == '--':
return '--' + serialize_name(value[2:])
if value[0] == '-':
result = '-'
value = value[1:]
else:
result = ''
c = value[0]
result += (
c if c in ('abcdefghijklmnopqrstuvwxyz_'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or ord(c) > 0x7F else
r'\A ' if c == '\n' else
r'\D ' if c == '\r' else
r'\C ' if c == '\f' else
'\\%X ' % ord(c) if c in '0123456789' else
'\\' + c
)
result += serialize_name(value[1:])
return result
def serialize_name(value):
return ''.join(
c if c in ('abcdefghijklmnopqrstuvwxyz-_0123456789'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or ord(c) > 0x7F else
r'\A ' if c == '\n' else
r'\D ' if c == '\r' else
r'\C ' if c == '\f' else
'\\' + c
for c in value
)
def serialize_string_value(value):
return ''.join(
r'\"' if c == '"' else
r'\\' if c == '\\' else
r'\A ' if c == '\n' else
r'\D ' if c == '\r' else
r'\C ' if c == '\f' else
c
for c in value
)
def serialize_url(value):
return ''.join(
r"\'" if c == "'" else
r'\"' if c == '"' else
r'\\' if c == '\\' else
r'\ ' if c == ' ' else
r'\9 ' if c == '\t' else
r'\A ' if c == '\n' else
r'\D ' if c == '\r' else
r'\C ' if c == '\f' else
r'\(' if c == '(' else
r'\)' if c == ')' else
c
for c in value
)
# https://drafts.csswg.org/css-syntax/#serialization-tables
def _serialize_to(nodes, write):
"""Serialize an iterable of nodes to CSS syntax.
White chunks as a string by calling the provided :obj:`write` callback.
"""
bad_pairs = BAD_PAIRS
previous_type = None
for node in nodes:
serialization_type = (node.type if node.type != 'literal'
else node.value)
if (previous_type, serialization_type) in bad_pairs:
write('/**/')
elif previous_type == '\\' and not (
serialization_type == 'whitespace' and
node.value.startswith('\n')):
write('\n')
node._serialize_to(write)
if serialization_type == 'declaration':
write(';')
previous_type = serialization_type
BAD_PAIRS = set(
[(a, b)
for a in ('ident', 'at-keyword', 'hash', 'dimension', '#', '-',
'number')
for b in ('ident', 'function', 'url', 'number', 'percentage',
'dimension', 'unicode-range')] +
[(a, b)
for a in ('ident', 'at-keyword', 'hash', 'dimension')
for b in ('-', '-->')] +
[(a, b)
for a in ('#', '-', 'number', '@')
for b in ('ident', 'function', 'url')] +
[(a, b)
for a in ('unicode-range', '.', '+')
for b in ('number', 'percentage', 'dimension')] +
[('@', b) for b in ('ident', 'function', 'url', 'unicode-range', '-')] +
[('unicode-range', b) for b in ('ident', 'function', '?')] +
[(a, '=') for a in '$*^~|'] +
[('ident', '() block'), ('|', '|'), ('/', '*')]
)

View File

@ -0,0 +1,424 @@
import re
import sys
from webencodings import ascii_lower
from .ast import (
AtKeywordToken, Comment, CurlyBracketsBlock, DimensionToken, FunctionBlock,
HashToken, IdentToken, LiteralToken, NumberToken, ParenthesesBlock,
ParseError, PercentageToken, SquareBracketsBlock, StringToken,
UnicodeRangeToken, URLToken, WhitespaceToken)
from .serializer import serialize_string_value, serialize_url
_NUMBER_RE = re.compile(r'[-+]?([0-9]*\.)?[0-9]+([eE][+-]?[0-9]+)?')
_HEX_ESCAPE_RE = re.compile(r'([0-9A-Fa-f]{1,6})[ \n\t]?')
def parse_component_value_list(css, skip_comments=False):
"""Parse a list of component values.
:type css: :obj:`str`
:param css: A CSS string.
:type skip_comments: :obj:`bool`
:param skip_comments:
Ignore CSS comments.
The return values (and recursively its blocks and functions)
will not contain any :class:`~tinycss2.ast.Comment` object.
:returns: A list of :term:`component values`.
"""
css = (css.replace('\0', '\uFFFD')
# This turns out to be faster than a regexp:
.replace('\r\n', '\n').replace('\r', '\n').replace('\f', '\n'))
length = len(css)
token_start_pos = pos = 0 # Character index in the css source.
line = 1 # First line is line 1.
last_newline = -1
root = tokens = []
end_char = None # Pop the stack when encountering this character.
stack = [] # Stack of nested blocks: (tokens, end_char) tuples.
while pos < length:
newline = css.rfind('\n', token_start_pos, pos)
if newline != -1:
line += 1 + css.count('\n', token_start_pos, newline)
last_newline = newline
# First character in a line is in column 1.
column = pos - last_newline
token_start_pos = pos
c = css[pos]
if c in ' \n\t':
pos += 1
while css.startswith((' ', '\n', '\t'), pos):
pos += 1
value = css[token_start_pos:pos]
tokens.append(WhitespaceToken(line, column, value))
continue
elif (c in 'Uu' and pos + 2 < length and css[pos + 1] == '+' and
css[pos + 2] in '0123456789abcdefABCDEF?'):
start, end, pos = _consume_unicode_range(css, pos + 2)
tokens.append(UnicodeRangeToken(line, column, start, end))
continue
elif css.startswith('-->', pos): # Check before identifiers
tokens.append(LiteralToken(line, column, '-->'))
pos += 3
continue
elif _is_ident_start(css, pos):
value, pos = _consume_ident(css, pos)
if not css.startswith('(', pos): # Not a function
tokens.append(IdentToken(line, column, value))
continue
pos += 1 # Skip the '('
if ascii_lower(value) == 'url':
url_pos = pos
while css.startswith((' ', '\n', '\t'), url_pos):
url_pos += 1
if url_pos >= length or css[url_pos] not in ('"', "'"):
value, pos, error = _consume_url(css, pos)
if value is not None:
repr = 'url({})'.format(serialize_url(value))
if error is not None:
error_key = error[0]
if error_key == 'eof-in-string':
repr = repr[:-2]
else:
assert error_key == 'eof-in-url'
repr = repr[:-1]
tokens.append(URLToken(line, column, value, repr))
if error is not None:
tokens.append(ParseError(line, column, *error))
continue
arguments = []
tokens.append(FunctionBlock(line, column, value, arguments))
stack.append((tokens, end_char))
end_char = ')'
tokens = arguments
continue
match = _NUMBER_RE.match(css, pos)
if match:
pos = match.end()
repr_ = css[token_start_pos:pos]
value = float(repr_)
int_value = int(repr_) if not any(match.groups()) else None
if pos < length and _is_ident_start(css, pos):
unit, pos = _consume_ident(css, pos)
tokens.append(DimensionToken(
line, column, value, int_value, repr_, unit))
elif css.startswith('%', pos):
pos += 1
tokens.append(PercentageToken(
line, column, value, int_value, repr_))
else:
tokens.append(NumberToken(
line, column, value, int_value, repr_))
elif c == '@':
pos += 1
if pos < length and _is_ident_start(css, pos):
value, pos = _consume_ident(css, pos)
tokens.append(AtKeywordToken(line, column, value))
else:
tokens.append(LiteralToken(line, column, '@'))
elif c == '#':
pos += 1
if pos < length and (
css[pos] in '0123456789abcdefghijklmnopqrstuvwxyz'
'-_ABCDEFGHIJKLMNOPQRSTUVWXYZ' or
ord(css[pos]) > 0x7F or # Non-ASCII
# Valid escape:
(css[pos] == '\\' and not css.startswith('\\\n', pos))):
is_identifier = _is_ident_start(css, pos)
value, pos = _consume_ident(css, pos)
tokens.append(HashToken(line, column, value, is_identifier))
else:
tokens.append(LiteralToken(line, column, '#'))
elif c == '{':
content = []
tokens.append(CurlyBracketsBlock(line, column, content))
stack.append((tokens, end_char))
end_char = '}'
tokens = content
pos += 1
elif c == '[':
content = []
tokens.append(SquareBracketsBlock(line, column, content))
stack.append((tokens, end_char))
end_char = ']'
tokens = content
pos += 1
elif c == '(':
content = []
tokens.append(ParenthesesBlock(line, column, content))
stack.append((tokens, end_char))
end_char = ')'
tokens = content
pos += 1
elif c == end_char: # Matching }, ] or )
# The top-level end_char is None (never equal to a character),
# so we never get here if the stack is empty.
tokens, end_char = stack.pop()
pos += 1
elif c in '}])':
tokens.append(ParseError(line, column, c, 'Unmatched ' + c))
pos += 1
elif c in ('"', "'"):
value, pos, error = _consume_quoted_string(css, pos)
if value is not None:
repr = '"{}"'.format(serialize_string_value(value))
if error is not None:
repr = repr[:-1]
tokens.append(StringToken(line, column, value, repr))
if error is not None:
tokens.append(ParseError(line, column, *error))
elif css.startswith('/*', pos): # Comment
pos = css.find('*/', pos + 2)
if pos == -1:
if not skip_comments:
tokens.append(
Comment(line, column, css[token_start_pos + 2:]))
break
if not skip_comments:
tokens.append(
Comment(line, column, css[token_start_pos + 2:pos]))
pos += 2
elif css.startswith('<!--', pos):
tokens.append(LiteralToken(line, column, '<!--'))
pos += 4
elif css.startswith('||', pos):
tokens.append(LiteralToken(line, column, '||'))
pos += 2
elif c in '~|^$*':
pos += 1
if css.startswith('=', pos):
pos += 1
tokens.append(LiteralToken(line, column, c + '='))
else:
tokens.append(LiteralToken(line, column, c))
else:
tokens.append(LiteralToken(line, column, c))
pos += 1
return root
def _is_name_start(css, pos):
"""Return true if the given character is a name-start code point."""
# https://www.w3.org/TR/css-syntax-3/#name-start-code-point
c = css[pos]
return (
c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' or
ord(c) > 0x7F)
def _is_ident_start(css, pos):
"""Return True if the given position is the start of a CSS identifier."""
# https://drafts.csswg.org/css-syntax/#would-start-an-identifier
if _is_name_start(css, pos):
return True
elif css[pos] == '-':
pos += 1
return (
# Name-start code point or hyphen:
(pos < len(css) and (
_is_name_start(css, pos) or css[pos] == '-')) or
# Valid escape:
(css.startswith('\\', pos) and not css.startswith('\\\n', pos)))
elif css[pos] == '\\':
return not css.startswith('\\\n', pos)
return False
def _consume_ident(css, pos):
"""Return (unescaped_value, new_pos).
Assumes pos starts at a valid identifier. See :func:`_is_ident_start`.
"""
# http://dev.w3.org/csswg/css-syntax/#consume-a-name
chunks = []
length = len(css)
start_pos = pos
while pos < length:
c = css[pos]
if c in ('abcdefghijklmnopqrstuvwxyz-_0123456789'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or ord(c) > 0x7F:
pos += 1
elif c == '\\' and not css.startswith('\\\n', pos):
# Valid escape
chunks.append(css[start_pos:pos])
c, pos = _consume_escape(css, pos + 1)
chunks.append(c)
start_pos = pos
else:
break
chunks.append(css[start_pos:pos])
return ''.join(chunks), pos
def _consume_quoted_string(css, pos):
"""Return (unescaped_value, new_pos)."""
# https://drafts.csswg.org/css-syntax/#consume-a-string-token
error = None
quote = css[pos]
assert quote in ('"', "'")
pos += 1
chunks = []
length = len(css)
start_pos = pos
while pos < length:
c = css[pos]
if c == quote:
chunks.append(css[start_pos:pos])
pos += 1
break
elif c == '\\':
chunks.append(css[start_pos:pos])
pos += 1
if pos < length:
if css[pos] == '\n': # Ignore escaped newlines
pos += 1
else:
c, pos = _consume_escape(css, pos)
chunks.append(c)
# else: Escaped EOF, do nothing
start_pos = pos
elif c == '\n': # Unescaped newline
return None, pos, ('bad-string', 'Bad string token')
else:
pos += 1
else:
error = ('eof-in-string', 'EOF in string')
chunks.append(css[start_pos:pos])
return ''.join(chunks), pos, error
def _consume_escape(css, pos):
r"""Return (unescaped_char, new_pos).
Assumes a valid escape: pos is just after '\' and not followed by '\n'.
"""
# https://drafts.csswg.org/css-syntax/#consume-an-escaped-character
hex_match = _HEX_ESCAPE_RE.match(css, pos)
if hex_match:
codepoint = int(hex_match.group(1), 16)
return (
chr(codepoint) if 0 < codepoint <= sys.maxunicode else '\uFFFD',
hex_match.end())
elif pos < len(css):
return css[pos], pos + 1
else:
return '\uFFFD', pos
def _consume_url(css, pos):
"""Return (unescaped_url, new_pos)
The given pos is assumed to be just after the '(' of 'url('.
"""
error = None
length = len(css)
# https://drafts.csswg.org/css-syntax/#consume-a-url-token
# Skip whitespace
while css.startswith((' ', '\n', '\t'), pos):
pos += 1
if pos >= length: # EOF
return '', pos, ('eof-in-url', 'EOF in URL')
c = css[pos]
if c in ('"', "'"):
value, pos, error = _consume_quoted_string(css, pos)
elif c == ')':
return '', pos + 1, error
else:
chunks = []
start_pos = pos
while 1:
if pos >= length: # EOF
chunks.append(css[start_pos:pos])
return ''.join(chunks), pos, ('eof-in-url', 'EOF in URL')
c = css[pos]
if c == ')':
chunks.append(css[start_pos:pos])
pos += 1
return ''.join(chunks), pos, error
elif c in ' \n\t':
chunks.append(css[start_pos:pos])
value = ''.join(chunks)
pos += 1
break
elif c == '\\' and not css.startswith('\\\n', pos):
# Valid escape
chunks.append(css[start_pos:pos])
c, pos = _consume_escape(css, pos + 1)
chunks.append(c)
start_pos = pos
elif (c in
'"\'('
# https://drafts.csswg.org/css-syntax/#non-printable-character
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0e'
'\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19'
'\x1a\x1b\x1c\x1d\x1e\x1f\x7f'):
value = None # Parse error
pos += 1
break
else:
pos += 1
if value is not None:
while css.startswith((' ', '\n', '\t'), pos):
pos += 1
if pos < length:
if css[pos] == ')':
return value, pos + 1, error
else:
if error is None:
error = ('eof-in-url', 'EOF in URL')
return value, pos, error
# https://drafts.csswg.org/css-syntax/#consume-the-remnants-of-a-bad-url0
while pos < length:
if css.startswith('\\)', pos):
pos += 2
elif css[pos] == ')':
pos += 1
break
else:
pos += 1
return None, pos, ('bad-url', 'bad URL token')
def _consume_unicode_range(css, pos):
"""Return (range, new_pos)
The given pos is assume to be just after the '+' of 'U+' or 'u+'.
"""
# https://drafts.csswg.org/css-syntax/#consume-a-unicode-range-token
length = len(css)
start_pos = pos
max_pos = min(pos + 6, length)
while pos < max_pos and css[pos] in '0123456789abcdefABCDEF':
pos += 1
start = css[start_pos:pos]
start_pos = pos
# Same max_pos as before: total of hex digits and question marks <= 6
while pos < max_pos and css[pos] == '?':
pos += 1
question_marks = pos - start_pos
if question_marks:
end = start + 'F' * question_marks
start = start + '0' * question_marks
elif (pos + 1 < length and css[pos] == '-' and
css[pos + 1] in '0123456789abcdefABCDEF'):
pos += 1
start_pos = pos
max_pos = min(pos + 6, length)
while pos < max_pos and css[pos] in '0123456789abcdefABCDEF':
pos += 1
end = css[start_pos:pos]
else:
end = start
return int(start, 16), int(end, 16), pos