2022-05-23 00:16:32 +04:00

155 lines
4.8 KiB
Python

import re
from .utils import validator
ip_middle_octet = r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))"
ip_last_octet = r"(?:\.(?:0|[1-9]\d?|1\d\d|2[0-4]\d|25[0-5]))"
regex = re.compile( # noqa: W605
r"^"
# protocol identifier
r"(?:(?:https?|ftp)://)"
# user:pass authentication
r"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+"
r"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?"
r"(?:"
r"(?P<private_ip>"
# IP address exclusion
# private & local networks
r"(?:(?:10|127)" + ip_middle_octet + r"{2}" + ip_last_octet + r")|"
r"(?:(?:169\.254|192\.168)" + ip_middle_octet + ip_last_octet + r")|"
r"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + r"))"
r"|"
# private & local hosts
r"(?P<private_host>"
r"(?:localhost))"
r"|"
# IP address dotted notation octets
# excludes loopback network 0.0.0.0
# excludes reserved space >= 224.0.0.0
# excludes network & broadcast addresses
# (first & last IP address of each class)
r"(?P<public_ip>"
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
r"" + ip_middle_octet + r"{2}"
r"" + ip_last_octet + r")"
r"|"
# IPv6 RegEx from https://stackoverflow.com/a/17871737
r"\[("
# 1:2:3:4:5:6:7:8
r"([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|"
# 1:: 1:2:3:4:5:6:7::
r"([0-9a-fA-F]{1,4}:){1,7}:|"
# 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8
r"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|"
# 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8
r"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|"
# 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8
r"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|"
# 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8
r"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|"
# 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8
r"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|"
# 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8
r"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|"
# ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::
r":((:[0-9a-fA-F]{1,4}){1,7}|:)|"
# fe80::7:8%eth0 fe80::7:8%1
# (link-local IPv6 addresses with zone index)
r"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|"
r"::(ffff(:0{1,4}){0,1}:){0,1}"
r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"
# ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255
# (IPv4-mapped IPv6 addresses and IPv4-translated addresses)
r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|"
r"([0-9a-fA-F]{1,4}:){1,4}:"
r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"
# 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33
# (IPv4-Embedded IPv6 Address)
r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])"
r")\]|"
# host name
r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)"
# domain name
r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*"
# TLD identifier
r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|"
r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))"
r")"
# port number
r"(?::\d{2,5})?"
# resource path
r"(?:/[-a-z\u00a1-\uffff\U00010000-\U0010ffff0-9._~%!$&'()*+,;=:@/]*)?"
# query string
r"(?:\?\S*)?"
# fragment
r"(?:#\S*)?"
r"$",
re.UNICODE | re.IGNORECASE
)
pattern = re.compile(regex)
@validator
def url(value, public=False):
"""
Return whether or not given value is a valid URL.
If the value is valid URL this function returns ``True``, otherwise
:class:`~validators.utils.ValidationFailure`.
This validator is based on the wonderful `URL validator of dperini`_.
.. _URL validator of dperini:
https://gist.github.com/dperini/729294
Examples::
>>> url('http://foobar.dk')
True
>>> url('ftp://foobar.dk')
True
>>> url('http://10.0.0.1')
True
>>> url('http://foobar.d')
ValidationFailure(func=url, ...)
>>> url('http://10.0.0.1', public=True)
ValidationFailure(func=url, ...)
.. versionadded:: 0.2
.. versionchanged:: 0.10.2
Added support for various exotic URLs and fixed various false
positives.
.. versionchanged:: 0.10.3
Added ``public`` parameter.
.. versionchanged:: 0.11.0
Made the regular expression this function uses case insensitive.
.. versionchanged:: 0.11.3
Added support for URLs containing localhost
:param value: URL address string to validate
:param public: (default=False) Set True to only allow a public IP address
"""
result = pattern.match(value)
if not public:
return result
return result and not any(
(result.groupdict().get(key) for key in ('private_ip', 'private_host'))
)