first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,19 @@
from pandas.util._decorators import ( # noqa:F401
Appender,
Substitution,
cache_readonly,
)
from pandas.core.util.hashing import ( # noqa:F401
hash_array,
hash_pandas_object,
)
def __getattr__(name):
if name == "testing":
import pandas.util.testing
return pandas.util.testing
else:
raise AttributeError(f"module 'pandas.util' has no attribute '{name}'")

View File

@ -0,0 +1,497 @@
from __future__ import annotations
from functools import wraps
import inspect
from textwrap import dedent
from typing import (
Any,
Callable,
Mapping,
cast,
)
import warnings
from pandas._libs.properties import cache_readonly # noqa:F401
from pandas._typing import F
def deprecate(
name: str,
alternative: Callable[..., Any],
version: str,
alt_name: str | None = None,
klass: type[Warning] | None = None,
stacklevel: int = 2,
msg: str | None = None,
) -> Callable[[F], F]:
"""
Return a new function that emits a deprecation warning on use.
To use this method for a deprecated function, another function
`alternative` with the same signature must exist. The deprecated
function will emit a deprecation warning, and in the docstring
it will contain the deprecation directive with the provided version
so it can be detected for future removal.
Parameters
----------
name : str
Name of function to deprecate.
alternative : func
Function to use instead.
version : str
Version of pandas in which the method has been deprecated.
alt_name : str, optional
Name to use in preference of alternative.__name__.
klass : Warning, default FutureWarning
stacklevel : int, default 2
msg : str
The message to display in the warning.
Default is '{name} is deprecated. Use {alt_name} instead.'
"""
alt_name = alt_name or alternative.__name__
klass = klass or FutureWarning
warning_msg = msg or f"{name} is deprecated, use {alt_name} instead."
@wraps(alternative)
def wrapper(*args, **kwargs) -> Callable[..., Any]:
warnings.warn(warning_msg, klass, stacklevel=stacklevel)
return alternative(*args, **kwargs)
# adding deprecated directive to the docstring
msg = msg or f"Use `{alt_name}` instead."
doc_error_msg = (
"deprecate needs a correctly formatted docstring in "
"the target function (should have a one liner short "
"summary, and opening quotes should be in their own "
f"line). Found:\n{alternative.__doc__}"
)
# when python is running in optimized mode (i.e. `-OO`), docstrings are
# removed, so we check that a docstring with correct formatting is used
# but we allow empty docstrings
if alternative.__doc__:
if alternative.__doc__.count("\n") < 3:
raise AssertionError(doc_error_msg)
empty1, summary, empty2, doc = alternative.__doc__.split("\n", 3)
if empty1 or empty2 and not summary:
raise AssertionError(doc_error_msg)
wrapper.__doc__ = dedent(
f"""
{summary.strip()}
.. deprecated:: {version}
{msg}
{dedent(doc)}"""
)
# error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)],
# Callable[...,Any]]", expected "Callable[[F], F]")
return wrapper # type: ignore[return-value]
def deprecate_kwarg(
old_arg_name: str,
new_arg_name: str | None,
mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None,
stacklevel: int = 2,
) -> Callable[[F], F]:
"""
Decorator to deprecate a keyword argument of a function.
Parameters
----------
old_arg_name : str
Name of argument in function to deprecate
new_arg_name : str or None
Name of preferred argument in function. Use None to raise warning that
``old_arg_name`` keyword is deprecated.
mapping : dict or callable
If mapping is present, use it to translate old arguments to
new arguments. A callable must do its own value checking;
values not found in a dict will be forwarded unchanged.
Examples
--------
The following deprecates 'cols', using 'columns' instead
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
... def f(columns=''):
... print(columns)
...
>>> f(columns='should work ok')
should work ok
>>> f(cols='should raise warning') # doctest: +SKIP
FutureWarning: cols is deprecated, use columns instead
warnings.warn(msg, FutureWarning)
should raise warning
>>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP
TypeError: Can only specify 'cols' or 'columns', not both
>>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
... def f(new=False):
... print('yes!' if new else 'no!')
...
>>> f(old='yes') # doctest: +SKIP
FutureWarning: old='yes' is deprecated, use new=True instead
warnings.warn(msg, FutureWarning)
yes!
To raise a warning that a keyword will be removed entirely in the future
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None)
... def f(cols='', another_param=''):
... print(cols)
...
>>> f(cols='should raise warning') # doctest: +SKIP
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
future version please takes steps to stop use of 'cols'
should raise warning
>>> f(another_param='should not raise warning') # doctest: +SKIP
should not raise warning
>>> f(cols='should raise warning', another_param='') # doctest: +SKIP
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
future version please takes steps to stop use of 'cols'
should raise warning
"""
if mapping is not None and not hasattr(mapping, "get") and not callable(mapping):
raise TypeError(
"mapping from old to new argument values must be dict or callable!"
)
def _deprecate_kwarg(func: F) -> F:
@wraps(func)
def wrapper(*args, **kwargs) -> Callable[..., Any]:
old_arg_value = kwargs.pop(old_arg_name, None)
if old_arg_value is not None:
if new_arg_name is None:
msg = (
f"the {repr(old_arg_name)} keyword is deprecated and "
"will be removed in a future version. Please take "
f"steps to stop the use of {repr(old_arg_name)}"
)
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
kwargs[old_arg_name] = old_arg_value
return func(*args, **kwargs)
elif mapping is not None:
if callable(mapping):
new_arg_value = mapping(old_arg_value)
else:
new_arg_value = mapping.get(old_arg_value, old_arg_value)
msg = (
f"the {old_arg_name}={repr(old_arg_value)} keyword is "
"deprecated, use "
f"{new_arg_name}={repr(new_arg_value)} instead."
)
else:
new_arg_value = old_arg_value
msg = (
f"the {repr(old_arg_name)}' keyword is deprecated, "
f"use {repr(new_arg_name)} instead."
)
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
if kwargs.get(new_arg_name) is not None:
msg = (
f"Can only specify {repr(old_arg_name)} "
f"or {repr(new_arg_name)}, not both."
)
raise TypeError(msg)
else:
kwargs[new_arg_name] = new_arg_value
return func(*args, **kwargs)
return cast(F, wrapper)
return _deprecate_kwarg
def _format_argument_list(allow_args: list[str]):
"""
Convert the allow_args argument (either string or integer) of
`deprecate_nonkeyword_arguments` function to a string describing
it to be inserted into warning message.
Parameters
----------
allowed_args : list, tuple or int
The `allowed_args` argument for `deprecate_nonkeyword_arguments`,
but None value is not allowed.
Returns
-------
s : str
The substring describing the argument list in best way to be
inserted to the warning message.
Examples
--------
`format_argument_list([])` -> ''
`format_argument_list(['a'])` -> "except for the arguments 'a'"
`format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'"
`format_argument_list(['a', 'b', 'c'])` ->
"except for the arguments 'a', 'b' and 'c'"
"""
if "self" in allow_args:
allow_args.remove("self")
if not allow_args:
return ""
elif len(allow_args) == 1:
return f" except for the argument '{allow_args[0]}'"
else:
last = allow_args[-1]
args = ", ".join(["'" + x + "'" for x in allow_args[:-1]])
return f" except for the arguments {args} and '{last}'"
def future_version_msg(version: str | None) -> str:
"""Specify which version of pandas the deprecation will take place in."""
if version is None:
return "In a future version of pandas"
else:
return f"Starting with pandas version {version}"
def deprecate_nonkeyword_arguments(
version: str | None,
allowed_args: list[str] | None = None,
stacklevel: int = 2,
) -> Callable[[F], F]:
"""
Decorator to deprecate a use of non-keyword arguments of a function.
Parameters
----------
version : str, optional
The version in which positional arguments will become
keyword-only. If None, then the warning message won't
specify any particular version.
allowed_args : list, optional
In case of list, it must be the list of names of some
first arguments of the decorated functions that are
OK to be given as positional arguments. In case of None value,
defaults to list of all arguments not having the
default value.
stacklevel : int, default=2
The stack level for warnings.warn
"""
def decorate(func):
if allowed_args is not None:
allow_args = allowed_args
else:
spec = inspect.getfullargspec(func)
# We must have some defaults if we are deprecating default-less
assert spec.defaults is not None # for mypy
allow_args = spec.args[: -len(spec.defaults)]
num_allow_args = len(allow_args)
msg = (
f"{future_version_msg(version)} all arguments of "
f"{func.__qualname__}{{arguments}} will be keyword-only."
)
@wraps(func)
def wrapper(*args, **kwargs):
arguments = _format_argument_list(allow_args)
if len(args) > num_allow_args:
warnings.warn(
msg.format(arguments=arguments),
FutureWarning,
stacklevel=stacklevel,
)
return func(*args, **kwargs)
return wrapper
return decorate
def rewrite_axis_style_signature(
name: str, extra_params: list[tuple[str, Any]]
) -> Callable[..., Any]:
def decorate(func: F) -> F:
@wraps(func)
def wrapper(*args, **kwargs) -> Callable[..., Any]:
return func(*args, **kwargs)
kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
params = [
inspect.Parameter("self", kind),
inspect.Parameter(name, kind, default=None),
inspect.Parameter("index", kind, default=None),
inspect.Parameter("columns", kind, default=None),
inspect.Parameter("axis", kind, default=None),
]
for pname, default in extra_params:
params.append(inspect.Parameter(pname, kind, default=default))
sig = inspect.Signature(params)
# https://github.com/python/typing/issues/598
# error: "F" has no attribute "__signature__"
func.__signature__ = sig # type: ignore[attr-defined]
return cast(F, wrapper)
return decorate
def doc(*docstrings: str | Callable, **params) -> Callable[[F], F]:
"""
A decorator take docstring templates, concatenate them and perform string
substitution on it.
This decorator will add a variable "_docstring_components" to the wrapped
callable to keep track the original docstring template for potential usage.
If it should be consider as a template, it will be saved as a string.
Otherwise, it will be saved as callable, and later user __doc__ and dedent
to get docstring.
Parameters
----------
*docstrings : str or callable
The string / docstring / docstring template to be appended in order
after default docstring under callable.
**params
The string which would be used to format docstring template.
"""
def decorator(decorated: F) -> F:
# collecting docstring and docstring templates
docstring_components: list[str | Callable] = []
if decorated.__doc__:
docstring_components.append(dedent(decorated.__doc__))
for docstring in docstrings:
if hasattr(docstring, "_docstring_components"):
# error: Item "str" of "Union[str, Callable[..., Any]]" has no attribute
# "_docstring_components"
# error: Item "function" of "Union[str, Callable[..., Any]]" has no
# attribute "_docstring_components"
docstring_components.extend(
docstring._docstring_components # type: ignore[union-attr]
)
elif isinstance(docstring, str) or docstring.__doc__:
docstring_components.append(docstring)
# formatting templates and concatenating docstring
decorated.__doc__ = "".join(
[
component.format(**params)
if isinstance(component, str)
else dedent(component.__doc__ or "")
for component in docstring_components
]
)
# error: "F" has no attribute "_docstring_components"
decorated._docstring_components = ( # type: ignore[attr-defined]
docstring_components
)
return decorated
return decorator
# Substitution and Appender are derived from matplotlib.docstring (1.1.0)
# module https://matplotlib.org/users/license.html
class Substitution:
"""
A decorator to take a function's docstring and perform string
substitution on it.
This decorator should be robust even if func.__doc__ is None
(for example, if -OO was passed to the interpreter)
Usage: construct a docstring.Substitution with a sequence or
dictionary suitable for performing substitution; then
decorate a suitable function with the constructed object. e.g.
sub_author_name = Substitution(author='Jason')
@sub_author_name
def some_function(x):
"%(author)s wrote this function"
# note that some_function.__doc__ is now "Jason wrote this function"
One can also use positional arguments.
sub_first_last_names = Substitution('Edgar Allen', 'Poe')
@sub_first_last_names
def some_function(x):
"%s %s wrote the Raven"
"""
def __init__(self, *args, **kwargs):
if args and kwargs:
raise AssertionError("Only positional or keyword args are allowed")
self.params = args or kwargs
def __call__(self, func: F) -> F:
func.__doc__ = func.__doc__ and func.__doc__ % self.params
return func
def update(self, *args, **kwargs) -> None:
"""
Update self.params with supplied args.
"""
if isinstance(self.params, dict):
self.params.update(*args, **kwargs)
class Appender:
"""
A function decorator that will append an addendum to the docstring
of the target function.
This decorator should be robust even if func.__doc__ is None
(for example, if -OO was passed to the interpreter).
Usage: construct a docstring.Appender with a string to be joined to
the original docstring. An optional 'join' parameter may be supplied
which will be used to join the docstring and addendum. e.g.
add_copyright = Appender("Copyright (c) 2009", join='\n')
@add_copyright
def my_dog(has='fleas'):
"This docstring will have a copyright below"
pass
"""
addendum: str | None
def __init__(self, addendum: str | None, join: str = "", indents: int = 0):
if indents > 0:
self.addendum = indent(addendum, indents=indents)
else:
self.addendum = addendum
self.join = join
def __call__(self, func: F) -> F:
func.__doc__ = func.__doc__ if func.__doc__ else ""
self.addendum = self.addendum if self.addendum else ""
docitems = [func.__doc__, self.addendum]
func.__doc__ = dedent(self.join.join(docitems))
return func
def indent(text: str | None, indents: int = 1) -> str:
if not text or not isinstance(text, str):
return ""
jointext = "".join(["\n"] + [" "] * indents)
return jointext.join(text.split("\n"))

View File

@ -0,0 +1,193 @@
from __future__ import annotations
import numpy as np
import pandas as pd
class TablePlotter:
"""
Layout some DataFrames in vertical/horizontal layout for explanation.
Used in merging.rst
"""
def __init__(
self,
cell_width: float = 0.37,
cell_height: float = 0.25,
font_size: float = 7.5,
):
self.cell_width = cell_width
self.cell_height = cell_height
self.font_size = font_size
def _shape(self, df: pd.DataFrame) -> tuple[int, int]:
"""
Calculate table shape considering index levels.
"""
row, col = df.shape
return row + df.columns.nlevels, col + df.index.nlevels
def _get_cells(self, left, right, vertical) -> tuple[int, int]:
"""
Calculate appropriate figure size based on left and right data.
"""
if vertical:
# calculate required number of cells
vcells = max(sum(self._shape(df)[0] for df in left), self._shape(right)[0])
hcells = max(self._shape(df)[1] for df in left) + self._shape(right)[1]
else:
vcells = max([self._shape(df)[0] for df in left] + [self._shape(right)[0]])
hcells = sum([self._shape(df)[1] for df in left] + [self._shape(right)[1]])
return hcells, vcells
def plot(self, left, right, labels=None, vertical: bool = True):
"""
Plot left / right DataFrames in specified layout.
Parameters
----------
left : list of DataFrames before operation is applied
right : DataFrame of operation result
labels : list of str to be drawn as titles of left DataFrames
vertical : bool, default True
If True, use vertical layout. If False, use horizontal layout.
"""
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
if not isinstance(left, list):
left = [left]
left = [self._conv(df) for df in left]
right = self._conv(right)
hcells, vcells = self._get_cells(left, right, vertical)
if vertical:
figsize = self.cell_width * hcells, self.cell_height * vcells
else:
# include margin for titles
figsize = self.cell_width * hcells, self.cell_height * vcells
fig = plt.figure(figsize=figsize)
if vertical:
gs = gridspec.GridSpec(len(left), hcells)
# left
max_left_cols = max(self._shape(df)[1] for df in left)
max_left_rows = max(self._shape(df)[0] for df in left)
for i, (l, label) in enumerate(zip(left, labels)):
ax = fig.add_subplot(gs[i, 0:max_left_cols])
self._make_table(ax, l, title=label, height=1.0 / max_left_rows)
# right
ax = plt.subplot(gs[:, max_left_cols:])
self._make_table(ax, right, title="Result", height=1.05 / vcells)
fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95)
else:
max_rows = max(self._shape(df)[0] for df in left + [right])
height = 1.0 / np.max(max_rows)
gs = gridspec.GridSpec(1, hcells)
# left
i = 0
for df, label in zip(left, labels):
sp = self._shape(df)
ax = fig.add_subplot(gs[0, i : i + sp[1]])
self._make_table(ax, df, title=label, height=height)
i += sp[1]
# right
ax = plt.subplot(gs[0, i:])
self._make_table(ax, right, title="Result", height=height)
fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95)
return fig
def _conv(self, data):
"""
Convert each input to appropriate for table outplot.
"""
if isinstance(data, pd.Series):
if data.name is None:
data = data.to_frame(name="")
else:
data = data.to_frame()
data = data.fillna("NaN")
return data
def _insert_index(self, data):
# insert is destructive
data = data.copy()
idx_nlevels = data.index.nlevels
if idx_nlevels == 1:
data.insert(0, "Index", data.index)
else:
for i in range(idx_nlevels):
data.insert(i, f"Index{i}", data.index._get_level_values(i))
col_nlevels = data.columns.nlevels
if col_nlevels > 1:
col = data.columns._get_level_values(0)
values = [
data.columns._get_level_values(i)._values for i in range(1, col_nlevels)
]
col_df = pd.DataFrame(values)
data.columns = col_df.columns
data = pd.concat([col_df, data])
data.columns = col
return data
def _make_table(self, ax, df, title: str, height: float | None = None):
if df is None:
ax.set_visible(False)
return
import pandas.plotting as plotting
idx_nlevels = df.index.nlevels
col_nlevels = df.columns.nlevels
# must be convert here to get index levels for colorization
df = self._insert_index(df)
tb = plotting.table(ax, df, loc=9)
tb.set_fontsize(self.font_size)
if height is None:
height = 1.0 / (len(df) + 1)
props = tb.properties()
for (r, c), cell in props["celld"].items():
if c == -1:
cell.set_visible(False)
elif r < col_nlevels and c < idx_nlevels:
cell.set_visible(False)
elif r < col_nlevels or c < idx_nlevels:
cell.set_facecolor("#AAAAAA")
cell.set_height(height)
ax.set_title(title, size=self.font_size)
ax.axis("off")
if __name__ == "__main__":
import matplotlib.pyplot as plt
p = TablePlotter()
df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]})
df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]})
p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True)
plt.show()
df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]})
p.plot(
[df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False
)
plt.show()
idx = pd.MultiIndex.from_tuples(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")]
)
col = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")])
df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx)
df3.columns = col
p.plot(df3, df3, labels=["df3"])
plt.show()

View File

@ -0,0 +1,45 @@
from __future__ import annotations
import contextlib
import inspect
import os
@contextlib.contextmanager
def rewrite_exception(old_name: str, new_name: str):
"""
Rewrite the message of an exception.
"""
try:
yield
except Exception as err:
if not err.args:
raise
msg = str(err.args[0])
msg = msg.replace(old_name, new_name)
args: tuple[str, ...] = (msg,)
if len(err.args) > 1:
args = args + err.args[1:]
err.args = args
raise
def find_stack_level() -> int:
"""
Find the first place in the stack that is not inside pandas
(tests notwithstanding).
"""
stack = inspect.stack()
import pandas as pd
pkg_dir = os.path.dirname(pd.__file__)
test_dir = os.path.join(pkg_dir, "tests")
for n in range(len(stack)):
fname = stack[n].filename
if fname.startswith(pkg_dir) and not fname.startswith(test_dir):
continue
else:
break
return n

View File

@ -0,0 +1,160 @@
from __future__ import annotations
import codecs
import json
import locale
import os
import platform
import struct
import sys
from pandas._typing import JSONSerializable
from pandas.compat._optional import (
VERSIONS,
get_version,
import_optional_dependency,
)
def _get_commit_hash() -> str | None:
"""
Use vendored versioneer code to get git hash, which handles
git worktree correctly.
"""
from pandas._version import get_versions
versions = get_versions()
return versions["full-revisionid"]
def _get_sys_info() -> dict[str, JSONSerializable]:
"""
Returns system information as a JSON serializable dictionary.
"""
uname_result = platform.uname()
language_code, encoding = locale.getlocale()
return {
"commit": _get_commit_hash(),
"python": ".".join([str(i) for i in sys.version_info]),
"python-bits": struct.calcsize("P") * 8,
"OS": uname_result.system,
"OS-release": uname_result.release,
"Version": uname_result.version,
"machine": uname_result.machine,
"processor": uname_result.processor,
"byteorder": sys.byteorder,
"LC_ALL": os.environ.get("LC_ALL"),
"LANG": os.environ.get("LANG"),
"LOCALE": {"language-code": language_code, "encoding": encoding},
}
def _get_dependency_info() -> dict[str, JSONSerializable]:
"""
Returns dependency information as a JSON serializable dictionary.
"""
deps = [
"pandas",
# required
"numpy",
"pytz",
"dateutil",
# install / build,
"pip",
"setuptools",
"Cython",
# test
"pytest",
"hypothesis",
# docs
"sphinx",
# Other, need a min version
"blosc",
"feather",
"xlsxwriter",
"lxml.etree",
"html5lib",
"pymysql",
"psycopg2",
"jinja2",
# Other, not imported.
"IPython",
"pandas_datareader",
]
deps.extend(list(VERSIONS))
result: dict[str, JSONSerializable] = {}
for modname in deps:
mod = import_optional_dependency(modname, errors="ignore")
result[modname] = get_version(mod) if mod else None
return result
def show_versions(as_json: str | bool = False) -> None:
"""
Provide useful information, important for bug reports.
It comprises info about hosting operation system, pandas version,
and versions of other installed relative packages.
Parameters
----------
as_json : str or bool, default False
* If False, outputs info in a human readable form to the console.
* If str, it will be considered as a path to a file.
Info will be written to that file in JSON format.
* If True, outputs info in JSON format to the console.
"""
sys_info = _get_sys_info()
deps = _get_dependency_info()
if as_json:
j = {"system": sys_info, "dependencies": deps}
if as_json is True:
sys.stdout.writelines(json.dumps(j, indent=2))
else:
assert isinstance(as_json, str) # needed for mypy
with codecs.open(as_json, "wb", encoding="utf8") as f:
json.dump(j, f, indent=2)
else:
assert isinstance(sys_info["LOCALE"], dict) # needed for mypy
language_code = sys_info["LOCALE"]["language-code"]
encoding = sys_info["LOCALE"]["encoding"]
sys_info["LOCALE"] = f"{language_code}.{encoding}"
maxlen = max(len(x) for x in deps)
print("\nINSTALLED VERSIONS")
print("------------------")
for k, v in sys_info.items():
print(f"{k:<{maxlen}}: {v}")
print("")
for k, v in deps.items():
print(f"{k:<{maxlen}}: {v}")
def main() -> int:
from optparse import OptionParser
parser = OptionParser()
parser.add_option(
"-j",
"--json",
metavar="FILE",
nargs=1,
help="Save output as JSON into file, pass in '-' to output to stdout",
)
(options, args) = parser.parse_args()
if options.json == "-":
options.json = True
show_versions(as_json=options.json)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,308 @@
"""
This module provides decorator functions which can be applied to test objects
in order to skip those objects when certain conditions occur. A sample use case
is to detect if the platform is missing ``matplotlib``. If so, any test objects
which require ``matplotlib`` and decorated with ``@td.skip_if_no_mpl`` will be
skipped by ``pytest`` during the execution of the test suite.
To illustrate, after importing this module:
import pandas.util._test_decorators as td
The decorators can be applied to classes:
@td.skip_if_some_reason
class Foo:
...
Or individual functions:
@td.skip_if_some_reason
def test_foo():
...
For more information, refer to the ``pytest`` documentation on ``skipif``.
"""
from __future__ import annotations
from contextlib import contextmanager
import locale
from typing import Callable
import warnings
import numpy as np
import pytest
from pandas._config import get_option
from pandas.compat import (
IS64,
is_platform_windows,
)
from pandas.compat._optional import import_optional_dependency
from pandas.core.computation.expressions import (
NUMEXPR_INSTALLED,
USE_NUMEXPR,
)
from pandas.util.version import Version
def safe_import(mod_name: str, min_version: str | None = None):
"""
Parameters
----------
mod_name : str
Name of the module to be imported
min_version : str, default None
Minimum required version of the specified mod_name
Returns
-------
object
The imported module if successful, or False
"""
with warnings.catch_warnings():
# Suppress warnings that we can't do anything about,
# e.g. from aiohttp
warnings.filterwarnings(
"ignore",
category=DeprecationWarning,
module="aiohttp",
message=".*decorator is deprecated since Python 3.8.*",
)
# fastparquet import accesses pd.Int64Index
warnings.filterwarnings(
"ignore",
category=FutureWarning,
module="fastparquet",
message=".*Int64Index.*",
)
try:
mod = __import__(mod_name)
except ImportError:
return False
if not min_version:
return mod
else:
import sys
try:
version = getattr(sys.modules[mod_name], "__version__")
except AttributeError:
# xlrd uses a capitalized attribute name
version = getattr(sys.modules[mod_name], "__VERSION__")
if version and Version(version) >= Version(min_version):
return mod
return False
def _skip_if_no_mpl():
mod = safe_import("matplotlib")
if mod:
mod.use("Agg")
else:
return True
def _skip_if_has_locale():
lang, _ = locale.getlocale()
if lang is not None:
return True
def _skip_if_not_us_locale():
lang, _ = locale.getlocale()
if lang != "en_US":
return True
def _skip_if_no_scipy() -> bool:
return not (
safe_import("scipy.stats")
and safe_import("scipy.sparse")
and safe_import("scipy.interpolate")
and safe_import("scipy.signal")
)
# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public
# https://github.com/pytest-dev/pytest/issues/7469
def skip_if_installed(package: str):
"""
Skip a test if a package is installed.
Parameters
----------
package : str
The name of the package.
"""
return pytest.mark.skipif(
safe_import(package), reason=f"Skipping because {package} is installed."
)
# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public
# https://github.com/pytest-dev/pytest/issues/7469
def skip_if_no(package: str, min_version: str | None = None):
"""
Generic function to help skip tests when required packages are not
present on the testing system.
This function returns a pytest mark with a skip condition that will be
evaluated during test collection. An attempt will be made to import the
specified ``package`` and optionally ensure it meets the ``min_version``
The mark can be used as either a decorator for a test function or to be
applied to parameters in pytest.mark.parametrize calls or parametrized
fixtures.
If the import and version check are unsuccessful, then the test function
(or test case when used in conjunction with parametrization) will be
skipped.
Parameters
----------
package: str
The name of the required package.
min_version: str or None, default None
Optional minimum version of the package.
Returns
-------
_pytest.mark.structures.MarkDecorator
a pytest.mark.skipif to use as either a test decorator or a
parametrization mark.
"""
msg = f"Could not import '{package}'"
if min_version:
msg += f" satisfying a min_version of {min_version}"
return pytest.mark.skipif(
not safe_import(package, min_version=min_version), reason=msg
)
skip_if_no_mpl = pytest.mark.skipif(
_skip_if_no_mpl(), reason="Missing matplotlib dependency"
)
skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present")
skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
skip_if_has_locale = pytest.mark.skipif(
_skip_if_has_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}"
)
skip_if_not_us_locale = pytest.mark.skipif(
_skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}"
)
skip_if_no_scipy = pytest.mark.skipif(
_skip_if_no_scipy(), reason="Missing SciPy requirement"
)
skip_if_no_ne = pytest.mark.skipif(
not USE_NUMEXPR,
reason=f"numexpr enabled->{USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}",
)
# TODO(pytest#7469): return type, _pytest.mark.structures.MarkDecorator is not public
# https://github.com/pytest-dev/pytest/issues/7469
def skip_if_np_lt(ver_str: str, *args, reason: str | None = None):
if reason is None:
reason = f"NumPy {ver_str} or greater required"
return pytest.mark.skipif(
Version(np.__version__) < Version(ver_str),
*args,
reason=reason,
)
def parametrize_fixture_doc(*args):
"""
Intended for use as a decorator for parametrized fixture,
this function will wrap the decorated function with a pytest
``parametrize_fixture_doc`` mark. That mark will format
initial fixture docstring by replacing placeholders {0}, {1} etc
with parameters passed as arguments.
Parameters
----------
args: iterable
Positional arguments for docstring.
Returns
-------
function
The decorated function wrapped within a pytest
``parametrize_fixture_doc`` mark
"""
def documented_fixture(fixture):
fixture.__doc__ = fixture.__doc__.format(*args)
return fixture
return documented_fixture
def check_file_leaks(func) -> Callable:
"""
Decorate a test function to check that we are not leaking file descriptors.
"""
with file_leak_context():
return func
@contextmanager
def file_leak_context():
"""
ContextManager analogue to check_file_leaks.
"""
psutil = safe_import("psutil")
if not psutil or is_platform_windows():
# Checking for file leaks can hang on Windows CI
yield
else:
proc = psutil.Process()
flist = proc.open_files()
conns = proc.connections()
try:
yield
finally:
flist2 = proc.open_files()
# on some builds open_files includes file position, which we _dont_
# expect to remain unchanged, so we need to compare excluding that
flist_ex = [(x.path, x.fd) for x in flist]
flist2_ex = [(x.path, x.fd) for x in flist2]
assert flist2_ex == flist_ex, (flist2, flist)
conns2 = proc.connections()
assert conns2 == conns, (conns2, conns)
def async_mark():
try:
import_optional_dependency("pytest_asyncio")
async_mark = pytest.mark.asyncio
except ImportError:
async_mark = pytest.mark.skip(reason="Missing dependency pytest-asyncio")
return async_mark
def mark_array_manager_not_yet_implemented(request):
mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
request.node.add_marker(mark)
skip_array_manager_not_yet_implemented = pytest.mark.xfail(
get_option("mode.data_manager") == "array",
reason="Not yet implemented for ArrayManager",
)
skip_array_manager_invalid_test = pytest.mark.skipif(
get_option("mode.data_manager") == "array",
reason="Test that relies on BlockManager internals or specific behaviour",
)

View File

@ -0,0 +1,33 @@
"""
Entrypoint for testing from the top-level namespace.
"""
import os
import sys
PKG = os.path.dirname(os.path.dirname(__file__))
def test(extra_args=None):
"""
Run the pandas test suite using pytest.
"""
try:
import pytest
except ImportError as err:
raise ImportError("Need pytest>=5.0.1 to run tests") from err
try:
import hypothesis # noqa:F401
except ImportError as err:
raise ImportError("Need hypothesis>=3.58 to run tests") from err
cmd = ["--skip-slow", "--skip-network", "--skip-db"]
if extra_args:
if not isinstance(extra_args, list):
extra_args = [extra_args]
cmd = extra_args
cmd += [PKG]
joined = " ".join(cmd)
print(f"running: pytest {joined}")
sys.exit(pytest.main(cmd))
__all__ = ["test"]

View File

@ -0,0 +1,520 @@
"""
Module that contains many useful utilities
for validating data or function arguments
"""
from __future__ import annotations
from typing import (
Iterable,
Sequence,
)
import warnings
import numpy as np
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_bool,
is_integer,
)
def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args):
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args):
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = list(diff)[0]
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args):
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args):
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False):
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or value is None
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value
def validate_axis_style_args(data, args, kwargs, arg_name, method_name):
"""
Argument handler for mixed index, columns / axis functions
In an attempt to handle both `.method(index, columns)`, and
`.method(arg, axis=.)`, we have to do some bad things to argument
parsing. This translates all arguments to `{index=., columns=.}` style.
Parameters
----------
data : DataFrame
args : tuple
All positional arguments from the user
kwargs : dict
All keyword arguments from the user
arg_name, method_name : str
Used for better error messages
Returns
-------
kwargs : dict
A dictionary of keyword arguments. Doesn't modify ``kwargs``
inplace, so update them with the return value here.
Examples
--------
>>> df = pd.DataFrame(range(2))
>>> validate_axis_style_args(df, (str.upper,), {'columns': id},
... 'mapper', 'rename')
{'columns': <built-in function id>, 'index': <method 'upper' of 'str' objects>}
This emits a warning
>>> validate_axis_style_args(df, (str.upper, id), {},
... 'mapper', 'rename')
{'index': <method 'upper' of 'str' objects>, 'columns': <built-in function id>}
"""
# TODO: Change to keyword-only args and remove all this
out = {}
# Goal: fill 'out' with index/columns-style arguments
# like out = {'index': foo, 'columns': bar}
# Start by validating for consistency
if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER):
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
raise TypeError(msg)
# First fill with explicit values provided by the user...
if arg_name in kwargs:
if args:
msg = f"{method_name} got multiple values for argument '{arg_name}'"
raise TypeError(msg)
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = kwargs[arg_name]
# More user-provided arguments, now from kwargs
for k, v in kwargs.items():
try:
ax = data._get_axis_name(k)
except ValueError:
pass
else:
out[ax] = v
# All user-provided kwargs have been handled now.
# Now we supplement with positional arguments, emitting warnings
# when there's ambiguity and raising when there's conflicts
if len(args) == 0:
pass # It's up to the function to decide if this is valid
elif len(args) == 1:
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = args[0]
elif len(args) == 2:
if "axis" in kwargs:
# Unambiguously wrong
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
raise TypeError(msg)
msg = (
f"Interpreting call\n\t'.{method_name}(a, b)' as "
f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
"arguments to remove any ambiguity. In the future, using "
"positional arguments for 'index' or 'columns' will raise "
"a 'TypeError'."
)
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
out[data._get_axis_name(0)] = args[0]
out[data._get_axis_name(1)] = args[1]
else:
msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
raise TypeError(msg)
return out
def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
elif value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg.format(q_arr / 100.0))
return q_arr
def validate_ascending(
ascending: bool | int | Sequence[bool | int] = True,
):
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, (list, tuple)):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
"""
Check that the `closed` argument is among [None, "left", "right"]
Parameters
----------
closed : {None, "left", "right"}
Returns
-------
left_closed : bool
right_closed : bool
Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
return left_closed, right_closed
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
"""
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
Parameters
----------
inclusive : {"both", "neither", "left", "right"}
Returns
-------
left_right_inclusive : tuple[bool, bool]
Raises
------
ValueError : if argument is not among valid values
"""
left_right_inclusive: tuple[bool, bool] | None = None
if isinstance(inclusive, str):
left_right_inclusive = {
"both": (True, True),
"left": (True, False),
"right": (False, True),
"neither": (False, False),
}.get(inclusive)
if left_right_inclusive is None:
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)
return left_right_inclusive
def validate_insert_loc(loc: int, length: int) -> int:
"""
Check that we have an integer between -length and length, inclusive.
Standardize negative loc to within [0, length].
The exceptions we raise on failure match np.insert.
"""
if not is_integer(loc):
raise TypeError(f"loc must be an integer between -{length} and {length}")
if loc < 0:
loc += length
if not 0 <= loc <= length:
raise IndexError(f"loc must be an integer between -{length} and {length}")
return loc

View File

@ -0,0 +1,14 @@
import warnings
from pandas.util._exceptions import find_stack_level
from pandas._testing import * # noqa:F401,F403,PDF014
warnings.warn(
(
"pandas.util.testing is deprecated. Use the functions in the "
"public API at pandas.testing instead."
),
FutureWarning,
stacklevel=find_stack_level(),
)

View File

@ -0,0 +1,579 @@
# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py
# and https://github.com/pypa/packaging/blob/main/packaging/_structures.py
# changeset ae891fd74d6dd4c6063bb04f2faeadaac6fc6313
# 04/30/2021
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
from __future__ import annotations
import collections
import itertools
import re
from typing import (
Callable,
Iterator,
SupportsInt,
Tuple,
Union,
)
import warnings
__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"]
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, type(self))
def __ne__(self, other: object) -> bool:
return not isinstance(other, type(self))
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> NegativeInfinityType:
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, type(self))
def __ne__(self, other: object) -> bool:
return not isinstance(other, type(self))
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, Tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
NegativeInfinityType,
Tuple[
Union[
SubLocalType,
Tuple[SubLocalType, str],
Tuple[NegativeInfinityType, SubLocalType],
],
...,
],
]
CmpKey = Tuple[
int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
]
LegacyCmpKey = Tuple[int, Tuple[str, ...]]
VersionComparisonMethod = Callable[
[Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
def parse(version: str) -> LegacyVersion | Version:
"""
Parse the given version string and return either a :class:`Version` object
or a :class:`LegacyVersion` object depending on if the given version is
a valid PEP 440 version or a legacy version.
"""
try:
return Version(version)
except InvalidVersion:
return LegacyVersion(version)
class InvalidVersion(ValueError):
"""
An invalid version was found, users should refer to PEP 440.
"""
class _BaseVersion:
_key: CmpKey | LegacyCmpKey
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
class LegacyVersion(_BaseVersion):
def __init__(self, version: str) -> None:
self._version = str(version)
self._key = _legacy_cmpkey(self._version)
warnings.warn(
"Creating a LegacyVersion has been deprecated and will be "
"removed in the next major release.",
DeprecationWarning,
)
def __str__(self) -> str:
return self._version
def __repr__(self) -> str:
return f"<LegacyVersion('{self}')>"
@property
def public(self) -> str:
return self._version
@property
def base_version(self) -> str:
return self._version
@property
def epoch(self) -> int:
return -1
@property
def release(self) -> None:
return None
@property
def pre(self) -> None:
return None
@property
def post(self) -> None:
return None
@property
def dev(self) -> None:
return None
@property
def local(self) -> None:
return None
@property
def is_prerelease(self) -> bool:
return False
@property
def is_postrelease(self) -> bool:
return False
@property
def is_devrelease(self) -> bool:
return False
_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
_legacy_version_replacement_map = {
"pre": "c",
"preview": "c",
"-": "final-",
"rc": "c",
"dev": "@",
}
def _parse_version_parts(s: str) -> Iterator[str]:
for part in _legacy_version_component_re.split(s):
part = _legacy_version_replacement_map.get(part, part)
if not part or part == ".":
continue
if part[:1] in "0123456789":
# pad for numeric comparison
yield part.zfill(8)
else:
yield "*" + part
# ensure that alpha/beta/candidate are before final
yield "*final"
def _legacy_cmpkey(version: str) -> LegacyCmpKey:
# We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
# greater than or equal to 0. This will effectively put the LegacyVersion,
# which uses the defacto standard originally implemented by setuptools,
# as before all PEP 440 versions.
epoch = -1
# This scheme is taken from pkg_resources.parse_version setuptools prior to
# it's adoption of the packaging library.
parts: list[str] = []
for part in _parse_version_parts(version.lower()):
if part.startswith("*"):
# remove "-" before a prerelease tag
if part < "*final":
while parts and parts[-1] == "*final-":
parts.pop()
# remove trailing zeros from each series of numeric parts
while parts and parts[-1] == "00000000":
parts.pop()
parts.append(part)
return epoch, tuple(parts)
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
class Version(_BaseVersion):
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
def __init__(self, version: str) -> None:
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
return f"<Version('{self}')>"
def __str__(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join([str(x) for x in self.release]))
# Pre-release
if self.pre is not None:
parts.append("".join([str(x) for x in self.pre]))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> tuple[int, ...]:
_release: tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> tuple[str, int] | None:
_pre: tuple[str, int] | None = self._version.pre
return _pre
@property
def post(self) -> int | None:
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> int | None:
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> str | None:
if self._version.local:
return ".".join([str(x) for x in self._version.local])
else:
return None
@property
def public(self) -> str:
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join([str(x) for x in self.release]))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
return self.post is not None
@property
def is_devrelease(self) -> bool:
return self.dev is not None
@property
def major(self) -> int:
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: str | bytes | SupportsInt
) -> tuple[str, int] | None:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> LocalType | None:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: tuple[int, ...],
pre: tuple[str, int] | None,
post: tuple[str, int] | None,
dev: tuple[str, int] | None,
local: tuple[SubLocalType] | None,
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local