mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 22:57:06 +00:00
first commit
This commit is contained in:
211
.venv/Lib/site-packages/pandas/core/computation/align.py
Normal file
211
.venv/Lib/site-packages/pandas/core/computation/align.py
Normal file
@ -0,0 +1,211 @@
|
||||
"""
|
||||
Core eval alignment algorithms.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import (
|
||||
partial,
|
||||
wraps,
|
||||
)
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Sequence,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import result_type_many
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas.core.generic import NDFrame
|
||||
from pandas.core.indexes.api import Index
|
||||
|
||||
|
||||
def _align_core_single_unary_op(
|
||||
term,
|
||||
) -> tuple[partial | type[NDFrame], dict[str, Index] | None]:
|
||||
|
||||
typ: partial | type[NDFrame]
|
||||
axes: dict[str, Index] | None = None
|
||||
|
||||
if isinstance(term.value, np.ndarray):
|
||||
typ = partial(np.asanyarray, dtype=term.value.dtype)
|
||||
else:
|
||||
typ = type(term.value)
|
||||
if hasattr(term.value, "axes"):
|
||||
axes = _zip_axes_from_type(typ, term.value.axes)
|
||||
|
||||
return typ, axes
|
||||
|
||||
|
||||
def _zip_axes_from_type(
|
||||
typ: type[NDFrame], new_axes: Sequence[Index]
|
||||
) -> dict[str, Index]:
|
||||
return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)}
|
||||
|
||||
|
||||
def _any_pandas_objects(terms) -> bool:
|
||||
"""
|
||||
Check a sequence of terms for instances of PandasObject.
|
||||
"""
|
||||
return any(isinstance(term.value, PandasObject) for term in terms)
|
||||
|
||||
|
||||
def _filter_special_cases(f):
|
||||
@wraps(f)
|
||||
def wrapper(terms):
|
||||
# single unary operand
|
||||
if len(terms) == 1:
|
||||
return _align_core_single_unary_op(terms[0])
|
||||
|
||||
term_values = (term.value for term in terms)
|
||||
|
||||
# we don't have any pandas objects
|
||||
if not _any_pandas_objects(terms):
|
||||
return result_type_many(*term_values), None
|
||||
|
||||
return f(terms)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@_filter_special_cases
|
||||
def _align_core(terms):
|
||||
term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")]
|
||||
term_dims = [terms[i].value.ndim for i in term_index]
|
||||
|
||||
from pandas import Series
|
||||
|
||||
ndims = Series(dict(zip(term_index, term_dims)))
|
||||
|
||||
# initial axes are the axes of the largest-axis'd term
|
||||
biggest = terms[ndims.idxmax()].value
|
||||
typ = biggest._constructor
|
||||
axes = biggest.axes
|
||||
naxes = len(axes)
|
||||
gt_than_one_axis = naxes > 1
|
||||
|
||||
for value in (terms[i].value for i in term_index):
|
||||
is_series = isinstance(value, ABCSeries)
|
||||
is_series_and_gt_one_axis = is_series and gt_than_one_axis
|
||||
|
||||
for axis, items in enumerate(value.axes):
|
||||
if is_series_and_gt_one_axis:
|
||||
ax, itm = naxes - 1, value.index
|
||||
else:
|
||||
ax, itm = axis, items
|
||||
|
||||
if not axes[ax].is_(itm):
|
||||
axes[ax] = axes[ax].join(itm, how="outer")
|
||||
|
||||
for i, ndim in ndims.items():
|
||||
for axis, items in zip(range(ndim), axes):
|
||||
ti = terms[i].value
|
||||
|
||||
if hasattr(ti, "reindex"):
|
||||
transpose = isinstance(ti, ABCSeries) and naxes > 1
|
||||
reindexer = axes[naxes - 1] if transpose else items
|
||||
|
||||
term_axis_size = len(ti.axes[axis])
|
||||
reindexer_size = len(reindexer)
|
||||
|
||||
ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
|
||||
if ordm >= 1 and reindexer_size >= 10000:
|
||||
w = (
|
||||
f"Alignment difference on axis {axis} is larger "
|
||||
f"than an order of magnitude on term {repr(terms[i].name)}, "
|
||||
f"by more than {ordm:.4g}; performance may suffer."
|
||||
)
|
||||
warnings.warn(
|
||||
w, category=PerformanceWarning, stacklevel=find_stack_level()
|
||||
)
|
||||
|
||||
f = partial(ti.reindex, reindexer, axis=axis, copy=False)
|
||||
|
||||
terms[i].update(f())
|
||||
|
||||
terms[i].update(terms[i].value.values)
|
||||
|
||||
return typ, _zip_axes_from_type(typ, axes)
|
||||
|
||||
|
||||
def align_terms(terms):
|
||||
"""
|
||||
Align a set of terms.
|
||||
"""
|
||||
try:
|
||||
# flatten the parse tree (a nested list, really)
|
||||
terms = list(com.flatten(terms))
|
||||
except TypeError:
|
||||
# can't iterate so it must just be a constant or single variable
|
||||
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
|
||||
typ = type(terms.value)
|
||||
return typ, _zip_axes_from_type(typ, terms.value.axes)
|
||||
return np.result_type(terms.type), None
|
||||
|
||||
# if all resolved variables are numeric scalars
|
||||
if all(term.is_scalar for term in terms):
|
||||
return result_type_many(*(term.value for term in terms)).type, None
|
||||
|
||||
# perform the main alignment
|
||||
typ, axes = _align_core(terms)
|
||||
return typ, axes
|
||||
|
||||
|
||||
def reconstruct_object(typ, obj, axes, dtype):
|
||||
"""
|
||||
Reconstruct an object given its type, raw value, and possibly empty
|
||||
(None) axes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
typ : object
|
||||
A type
|
||||
obj : object
|
||||
The value to use in the type constructor
|
||||
axes : dict
|
||||
The axes to use to construct the resulting pandas object
|
||||
|
||||
Returns
|
||||
-------
|
||||
ret : typ
|
||||
An object of type ``typ`` with the value `obj` and possible axes
|
||||
`axes`.
|
||||
"""
|
||||
try:
|
||||
typ = typ.type
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
res_t = np.result_type(obj.dtype, dtype)
|
||||
|
||||
if not isinstance(typ, partial) and issubclass(typ, PandasObject):
|
||||
return typ(obj, dtype=res_t, **axes)
|
||||
|
||||
# special case for pathological things like ~True/~False
|
||||
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
|
||||
ret_value = res_t.type(obj)
|
||||
else:
|
||||
ret_value = typ(obj).astype(res_t)
|
||||
# The condition is to distinguish 0-dim array (returned in case of
|
||||
# scalar) and 1 element array
|
||||
# e.g. np.array(0) and np.array([0])
|
||||
if (
|
||||
len(obj.shape) == 1
|
||||
and len(obj) == 1
|
||||
and not isinstance(ret_value, np.ndarray)
|
||||
):
|
||||
ret_value = np.array([ret_value]).astype(res_t)
|
||||
|
||||
return ret_value
|
2
.venv/Lib/site-packages/pandas/core/computation/api.py
Normal file
2
.venv/Lib/site-packages/pandas/core/computation/api.py
Normal file
@ -0,0 +1,2 @@
|
||||
__all__ = ["eval"]
|
||||
from pandas.core.computation.eval import eval
|
10
.venv/Lib/site-packages/pandas/core/computation/check.py
Normal file
10
.venv/Lib/site-packages/pandas/core/computation/check.py
Normal file
@ -0,0 +1,10 @@
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
ne = import_optional_dependency("numexpr", errors="warn")
|
||||
NUMEXPR_INSTALLED = ne is not None
|
||||
if NUMEXPR_INSTALLED:
|
||||
NUMEXPR_VERSION = ne.__version__
|
||||
else:
|
||||
NUMEXPR_VERSION = None
|
||||
|
||||
__all__ = ["NUMEXPR_INSTALLED", "NUMEXPR_VERSION"]
|
26
.venv/Lib/site-packages/pandas/core/computation/common.py
Normal file
26
.venv/Lib/site-packages/pandas/core/computation/common.py
Normal file
@ -0,0 +1,26 @@
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
|
||||
def ensure_decoded(s):
|
||||
"""
|
||||
If we have bytes, decode them to unicode.
|
||||
"""
|
||||
if isinstance(s, (np.bytes_, bytes)):
|
||||
s = s.decode(get_option("display.encoding"))
|
||||
return s
|
||||
|
||||
|
||||
def result_type_many(*arrays_and_dtypes):
|
||||
"""
|
||||
Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
|
||||
argument limit.
|
||||
"""
|
||||
try:
|
||||
return np.result_type(*arrays_and_dtypes)
|
||||
except ValueError:
|
||||
# we have > NPY_MAXARGS terms in our expression
|
||||
return reduce(np.result_type, arrays_and_dtypes)
|
143
.venv/Lib/site-packages/pandas/core/computation/engines.py
Normal file
143
.venv/Lib/site-packages/pandas/core/computation/engines.py
Normal file
@ -0,0 +1,143 @@
|
||||
"""
|
||||
Engine classes for :func:`~pandas.eval`
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
|
||||
from pandas.core.computation.align import (
|
||||
align_terms,
|
||||
reconstruct_object,
|
||||
)
|
||||
from pandas.core.computation.expr import Expr
|
||||
from pandas.core.computation.ops import (
|
||||
MATHOPS,
|
||||
REDUCTIONS,
|
||||
)
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
_ne_builtins = frozenset(MATHOPS + REDUCTIONS)
|
||||
|
||||
|
||||
class NumExprClobberingError(NameError):
|
||||
pass
|
||||
|
||||
|
||||
def _check_ne_builtin_clash(expr: Expr) -> None:
|
||||
"""
|
||||
Attempt to prevent foot-shooting in a helpful way.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : Expr
|
||||
Terms can contain
|
||||
"""
|
||||
names = expr.names
|
||||
overlap = names & _ne_builtins
|
||||
|
||||
if overlap:
|
||||
s = ", ".join([repr(x) for x in overlap])
|
||||
raise NumExprClobberingError(
|
||||
f'Variables in expression "{expr}" overlap with builtins: ({s})'
|
||||
)
|
||||
|
||||
|
||||
class AbstractEngine(metaclass=abc.ABCMeta):
|
||||
"""Object serving as a base class for all engines."""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def __init__(self, expr):
|
||||
self.expr = expr
|
||||
self.aligned_axes = None
|
||||
self.result_type = None
|
||||
|
||||
def convert(self) -> str:
|
||||
"""
|
||||
Convert an expression for evaluation.
|
||||
|
||||
Defaults to return the expression as a string.
|
||||
"""
|
||||
return printing.pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self) -> object:
|
||||
"""
|
||||
Run the engine on the expression.
|
||||
|
||||
This method performs alignment which is necessary no matter what engine
|
||||
is being used, thus its implementation is in the base class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
The result of the passed expression.
|
||||
"""
|
||||
if not self._is_aligned:
|
||||
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
|
||||
|
||||
# make sure no names in resolvers and locals/globals clash
|
||||
res = self._evaluate()
|
||||
return reconstruct_object(
|
||||
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
|
||||
)
|
||||
|
||||
@property
|
||||
def _is_aligned(self) -> bool:
|
||||
return self.aligned_axes is not None and self.result_type is not None
|
||||
|
||||
@abc.abstractmethod
|
||||
def _evaluate(self):
|
||||
"""
|
||||
Return an evaluated expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
The local and global environment in which to evaluate an
|
||||
expression.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class NumExprEngine(AbstractEngine):
|
||||
"""NumExpr engine class"""
|
||||
|
||||
has_neg_frac = True
|
||||
|
||||
def _evaluate(self):
|
||||
import numexpr as ne
|
||||
|
||||
# convert the expression to a valid numexpr expression
|
||||
s = self.convert()
|
||||
|
||||
env = self.expr.env
|
||||
scope = env.full_scope
|
||||
_check_ne_builtin_clash(self.expr)
|
||||
return ne.evaluate(s, local_dict=scope)
|
||||
|
||||
|
||||
class PythonEngine(AbstractEngine):
|
||||
"""
|
||||
Evaluate an expression in Python space.
|
||||
|
||||
Mostly for testing purposes.
|
||||
"""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def evaluate(self):
|
||||
return self.expr()
|
||||
|
||||
def _evaluate(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
ENGINES: dict[str, type[AbstractEngine]] = {
|
||||
"numexpr": NumExprEngine,
|
||||
"python": PythonEngine,
|
||||
}
|
408
.venv/Lib/site-packages/pandas/core/computation/eval.py
Normal file
408
.venv/Lib/site-packages/pandas/core/computation/eval.py
Normal file
@ -0,0 +1,408 @@
|
||||
"""
|
||||
Top level ``eval`` module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import tokenize
|
||||
import warnings
|
||||
|
||||
from pandas._libs.lib import no_default
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from pandas.core.computation.engines import ENGINES
|
||||
from pandas.core.computation.expr import (
|
||||
PARSERS,
|
||||
Expr,
|
||||
)
|
||||
from pandas.core.computation.ops import BinOp
|
||||
from pandas.core.computation.parsing import tokenize_string
|
||||
from pandas.core.computation.scope import ensure_scope
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
|
||||
def _check_engine(engine: str | None) -> str:
|
||||
"""
|
||||
Make sure a valid engine is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
engine : str
|
||||
String to validate.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid engine is passed.
|
||||
ImportError
|
||||
* If numexpr was requested but doesn't exist.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Engine name.
|
||||
"""
|
||||
from pandas.core.computation.check import NUMEXPR_INSTALLED
|
||||
from pandas.core.computation.expressions import USE_NUMEXPR
|
||||
|
||||
if engine is None:
|
||||
engine = "numexpr" if USE_NUMEXPR else "python"
|
||||
|
||||
if engine not in ENGINES:
|
||||
valid_engines = list(ENGINES.keys())
|
||||
raise KeyError(
|
||||
f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"
|
||||
)
|
||||
|
||||
# TODO: validate this in a more general way (thinking of future engines
|
||||
# that won't necessarily be import-able)
|
||||
# Could potentially be done on engine instantiation
|
||||
if engine == "numexpr" and not NUMEXPR_INSTALLED:
|
||||
raise ImportError(
|
||||
"'numexpr' is not installed or an unsupported version. Cannot use "
|
||||
"engine='numexpr' for query/eval if 'numexpr' is not installed"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
def _check_parser(parser: str):
|
||||
"""
|
||||
Make sure a valid parser is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parser : str
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid parser is passed
|
||||
"""
|
||||
if parser not in PARSERS:
|
||||
raise KeyError(
|
||||
f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"
|
||||
)
|
||||
|
||||
|
||||
def _check_resolvers(resolvers):
|
||||
if resolvers is not None:
|
||||
for resolver in resolvers:
|
||||
if not hasattr(resolver, "__getitem__"):
|
||||
name = type(resolver).__name__
|
||||
raise TypeError(
|
||||
f"Resolver of type '{name}' does not "
|
||||
"implement the __getitem__ method"
|
||||
)
|
||||
|
||||
|
||||
def _check_expression(expr):
|
||||
"""
|
||||
Make sure an expression is not an empty string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
An object that can be converted to a string
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If expr is an empty string
|
||||
"""
|
||||
if not expr:
|
||||
raise ValueError("expr cannot be an empty string")
|
||||
|
||||
|
||||
def _convert_expression(expr) -> str:
|
||||
"""
|
||||
Convert an object to an expression.
|
||||
|
||||
This function converts an object to an expression (a unicode string) and
|
||||
checks to make sure it isn't empty after conversion. This is used to
|
||||
convert operators to their string representation for recursive calls to
|
||||
:func:`~pandas.eval`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
The object to be converted to a string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The string representation of an object.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If the expression is empty.
|
||||
"""
|
||||
s = pprint_thing(expr)
|
||||
_check_expression(s)
|
||||
return s
|
||||
|
||||
|
||||
def _check_for_locals(expr: str, stack_level: int, parser: str):
|
||||
|
||||
at_top_of_stack = stack_level == 0
|
||||
not_pandas_parser = parser != "pandas"
|
||||
|
||||
if not_pandas_parser:
|
||||
msg = "The '@' prefix is only supported by the pandas parser"
|
||||
elif at_top_of_stack:
|
||||
msg = (
|
||||
"The '@' prefix is not allowed in top-level eval calls.\n"
|
||||
"please refer to your variables by name without the '@' prefix."
|
||||
)
|
||||
|
||||
if at_top_of_stack or not_pandas_parser:
|
||||
for toknum, tokval in tokenize_string(expr):
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
def eval(
|
||||
expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users
|
||||
parser: str = "pandas",
|
||||
engine: str | None = None,
|
||||
truediv=no_default,
|
||||
local_dict=None,
|
||||
global_dict=None,
|
||||
resolvers=(),
|
||||
level=0,
|
||||
target=None,
|
||||
inplace=False,
|
||||
):
|
||||
"""
|
||||
Evaluate a Python expression as a string using various backends.
|
||||
|
||||
The following arithmetic operations are supported: ``+``, ``-``, ``*``,
|
||||
``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
|
||||
boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
|
||||
Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
|
||||
:keyword:`or`, and :keyword:`not` with the same semantics as the
|
||||
corresponding bitwise operators. :class:`~pandas.Series` and
|
||||
:class:`~pandas.DataFrame` objects are supported and behave as they would
|
||||
with plain ol' Python evaluation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str
|
||||
The expression to evaluate. This string cannot contain any Python
|
||||
`statements
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
|
||||
only Python `expressions
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
|
||||
parser : {'pandas', 'python'}, default 'pandas'
|
||||
The parser to use to construct the syntax tree from the expression. The
|
||||
default of ``'pandas'`` parses code slightly different than standard
|
||||
Python. Alternatively, you can parse an expression using the
|
||||
``'python'`` parser to retain strict Python semantics. See the
|
||||
:ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
engine : {'python', 'numexpr'}, default 'numexpr'
|
||||
|
||||
The engine used to evaluate the expression. Supported engines are
|
||||
|
||||
- None : tries to use ``numexpr``, falls back to ``python``
|
||||
- ``'numexpr'``: This default engine evaluates pandas objects using
|
||||
numexpr for large speed ups in complex expressions
|
||||
with large frames.
|
||||
- ``'python'``: Performs operations as if you had ``eval``'d in top
|
||||
level python. This engine is generally not that useful.
|
||||
|
||||
More backends may be available in the future.
|
||||
|
||||
truediv : bool, optional
|
||||
Whether to use true division, like in Python >= 3.
|
||||
|
||||
.. deprecated:: 1.0.0
|
||||
|
||||
local_dict : dict or None, optional
|
||||
A dictionary of local variables, taken from locals() by default.
|
||||
global_dict : dict or None, optional
|
||||
A dictionary of global variables, taken from globals() by default.
|
||||
resolvers : list of dict-like or None, optional
|
||||
A list of objects implementing the ``__getitem__`` special method that
|
||||
you can use to inject an additional collection of namespaces to use for
|
||||
variable lookup. For example, this is used in the
|
||||
:meth:`~DataFrame.query` method to inject the
|
||||
``DataFrame.index`` and ``DataFrame.columns``
|
||||
variables that refer to their respective :class:`~pandas.DataFrame`
|
||||
instance attributes.
|
||||
level : int, optional
|
||||
The number of prior stack frames to traverse and add to the current
|
||||
scope. Most users will **not** need to change this parameter.
|
||||
target : object, optional, default None
|
||||
This is the target object for assignment. It is used when there is
|
||||
variable assignment in the expression. If so, then `target` must
|
||||
support item assignment with string keys, and if a copy is being
|
||||
returned, it must also support `.copy()`.
|
||||
inplace : bool, default False
|
||||
If `target` is provided, and the expression mutates `target`, whether
|
||||
to modify `target` inplace. Otherwise, return a copy of `target` with
|
||||
the mutation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray, numeric scalar, DataFrame, Series, or None
|
||||
The completion value of evaluating the given code or None if ``inplace=True``.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
There are many instances where such an error can be raised:
|
||||
|
||||
- `target=None`, but the expression is multiline.
|
||||
- The expression is multiline, but not all them have item assignment.
|
||||
An example of such an arrangement is this:
|
||||
|
||||
a = b + 1
|
||||
a + 2
|
||||
|
||||
Here, there are expressions on different lines, making it multiline,
|
||||
but the last line has no variable assigned to the output of `a + 2`.
|
||||
- `inplace=True`, but the expression is missing item assignment.
|
||||
- Item assignment is provided, but the `target` does not support
|
||||
string item assignment.
|
||||
- Item assignment is provided and `inplace=False`, but the `target`
|
||||
does not support the `.copy()` method
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.query : Evaluates a boolean expression to query the columns
|
||||
of a frame.
|
||||
DataFrame.eval : Evaluate a string describing operations on
|
||||
DataFrame columns.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
|
||||
recursively cast to ``float64``.
|
||||
|
||||
See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
|
||||
>>> df
|
||||
animal age
|
||||
0 dog 10
|
||||
1 pig 20
|
||||
|
||||
We can add a new column using ``pd.eval``:
|
||||
|
||||
>>> pd.eval("double_age = df.age * 2", target=df)
|
||||
animal age double_age
|
||||
0 dog 10 20
|
||||
1 pig 20 40
|
||||
"""
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
|
||||
if truediv is not no_default:
|
||||
warnings.warn(
|
||||
(
|
||||
"The `truediv` parameter in pd.eval is deprecated and "
|
||||
"will be removed in a future version."
|
||||
),
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
exprs: list[str | BinOp]
|
||||
if isinstance(expr, str):
|
||||
_check_expression(expr)
|
||||
exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
|
||||
else:
|
||||
# ops.BinOp; for internal compat, not intended to be passed by users
|
||||
exprs = [expr]
|
||||
multi_line = len(exprs) > 1
|
||||
|
||||
if multi_line and target is None:
|
||||
raise ValueError(
|
||||
"multi-line expressions are only valid in the "
|
||||
"context of data, use DataFrame.eval"
|
||||
)
|
||||
engine = _check_engine(engine)
|
||||
_check_parser(parser)
|
||||
_check_resolvers(resolvers)
|
||||
|
||||
ret = None
|
||||
first_expr = True
|
||||
target_modified = False
|
||||
|
||||
for expr in exprs:
|
||||
expr = _convert_expression(expr)
|
||||
_check_for_locals(expr, level, parser)
|
||||
|
||||
# get our (possibly passed-in) scope
|
||||
env = ensure_scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
|
||||
|
||||
# construct the engine and evaluate the parsed expression
|
||||
eng = ENGINES[engine]
|
||||
eng_inst = eng(parsed_expr)
|
||||
ret = eng_inst.evaluate()
|
||||
|
||||
if parsed_expr.assigner is None:
|
||||
if multi_line:
|
||||
raise ValueError(
|
||||
"Multi-line expressions are only valid "
|
||||
"if all expressions contain an assignment"
|
||||
)
|
||||
elif inplace:
|
||||
raise ValueError("Cannot operate inplace if there is no assignment")
|
||||
|
||||
# assign if needed
|
||||
assigner = parsed_expr.assigner
|
||||
if env.target is not None and assigner is not None:
|
||||
target_modified = True
|
||||
|
||||
# if returning a copy, copy only on the first assignment
|
||||
if not inplace and first_expr:
|
||||
try:
|
||||
target = env.target.copy()
|
||||
except AttributeError as err:
|
||||
raise ValueError("Cannot return a copy of the target") from err
|
||||
else:
|
||||
target = env.target
|
||||
|
||||
# TypeError is most commonly raised (e.g. int, list), but you
|
||||
# get IndexError if you try to do this assignment on np.ndarray.
|
||||
# we will ignore numpy warnings here; e.g. if trying
|
||||
# to use a non-numeric indexer
|
||||
try:
|
||||
with warnings.catch_warnings(record=True):
|
||||
# TODO: Filter the warnings we actually care about here.
|
||||
target[assigner] = ret
|
||||
except (TypeError, IndexError) as err:
|
||||
raise ValueError("Cannot assign expression output to target") from err
|
||||
|
||||
if not resolvers:
|
||||
resolvers = ({assigner: ret},)
|
||||
else:
|
||||
# existing resolver needs updated to handle
|
||||
# case of mutating existing column in copy
|
||||
for resolver in resolvers:
|
||||
if assigner in resolver:
|
||||
resolver[assigner] = ret
|
||||
break
|
||||
else:
|
||||
resolvers += ({assigner: ret},)
|
||||
|
||||
ret = None
|
||||
first_expr = False
|
||||
|
||||
# We want to exclude `inplace=None` as being False.
|
||||
if inplace is False:
|
||||
return target if target_modified else ret
|
842
.venv/Lib/site-packages/pandas/core/computation/expr.py
Normal file
842
.venv/Lib/site-packages/pandas/core/computation/expr.py
Normal file
@ -0,0 +1,842 @@
|
||||
"""
|
||||
:func:`~pandas.eval` parsers.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from functools import (
|
||||
partial,
|
||||
reduce,
|
||||
)
|
||||
from keyword import iskeyword
|
||||
import tokenize
|
||||
from typing import (
|
||||
Callable,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat import PY39
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.ops import (
|
||||
ARITH_OPS_SYMS,
|
||||
BOOL_OPS_SYMS,
|
||||
CMP_OPS_SYMS,
|
||||
LOCAL_TAG,
|
||||
MATHOPS,
|
||||
REDUCTIONS,
|
||||
UNARY_OPS_SYMS,
|
||||
BinOp,
|
||||
Constant,
|
||||
Div,
|
||||
FuncNode,
|
||||
Op,
|
||||
Term,
|
||||
UnaryOp,
|
||||
UndefinedVariableError,
|
||||
is_term,
|
||||
)
|
||||
from pandas.core.computation.parsing import (
|
||||
clean_backtick_quoted_toks,
|
||||
tokenize_string,
|
||||
)
|
||||
from pandas.core.computation.scope import Scope
|
||||
|
||||
import pandas.io.formats.printing as printing
|
||||
|
||||
|
||||
def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Rewrite the assignment operator for PyTables expressions that use ``=``
|
||||
as a substitute for ``==``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
return toknum, "==" if tokval == "=" else tokval
|
||||
|
||||
|
||||
def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
|
||||
precedence is changed to boolean precedence.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP:
|
||||
if tokval == "&":
|
||||
return tokenize.NAME, "and"
|
||||
elif tokval == "|":
|
||||
return tokenize.NAME, "or"
|
||||
return toknum, tokval
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Replace local variables with a syntactically valid name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
|
||||
``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
|
||||
is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
return tokenize.OP, LOCAL_TAG
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _compose2(f, g):
|
||||
"""
|
||||
Compose 2 callables.
|
||||
"""
|
||||
return lambda *args, **kwargs: f(g(*args, **kwargs))
|
||||
|
||||
|
||||
def _compose(*funcs):
|
||||
"""
|
||||
Compose 2 or more callables.
|
||||
"""
|
||||
assert len(funcs) > 1, "At least 2 callables must be passed to compose"
|
||||
return reduce(_compose2, funcs)
|
||||
|
||||
|
||||
def _preparse(
|
||||
source: str,
|
||||
f=_compose(
|
||||
_replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks
|
||||
),
|
||||
) -> str:
|
||||
"""
|
||||
Compose a collection of tokenization functions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
A Python source code string
|
||||
f : callable
|
||||
This takes a tuple of (toknum, tokval) as its argument and returns a
|
||||
tuple with the same structure but possibly different elements. Defaults
|
||||
to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
|
||||
``_replace_locals``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Valid Python source code
|
||||
|
||||
Notes
|
||||
-----
|
||||
The `f` parameter can be any callable that takes *and* returns input of the
|
||||
form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
|
||||
the ``tokenize`` module and ``tokval`` is a string.
|
||||
"""
|
||||
assert callable(f), "f must be callable"
|
||||
return tokenize.untokenize(f(x) for x in tokenize_string(source))
|
||||
|
||||
|
||||
def _is_type(t):
|
||||
"""
|
||||
Factory for a type checking function of type ``t`` or tuple of types.
|
||||
"""
|
||||
return lambda x: isinstance(x.value, t)
|
||||
|
||||
|
||||
_is_list = _is_type(list)
|
||||
_is_str = _is_type(str)
|
||||
|
||||
|
||||
# partition all AST nodes
|
||||
_all_nodes = frozenset(
|
||||
node
|
||||
for node in (getattr(ast, name) for name in dir(ast))
|
||||
if isinstance(node, type) and issubclass(node, ast.AST)
|
||||
)
|
||||
|
||||
|
||||
def _filter_nodes(superclass, all_nodes=_all_nodes):
|
||||
"""
|
||||
Filter out AST nodes that are subclasses of ``superclass``.
|
||||
"""
|
||||
node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))
|
||||
return frozenset(node_names)
|
||||
|
||||
|
||||
_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
|
||||
_mod_nodes = _filter_nodes(ast.mod)
|
||||
_stmt_nodes = _filter_nodes(ast.stmt)
|
||||
_expr_nodes = _filter_nodes(ast.expr)
|
||||
_expr_context_nodes = _filter_nodes(ast.expr_context)
|
||||
_boolop_nodes = _filter_nodes(ast.boolop)
|
||||
_operator_nodes = _filter_nodes(ast.operator)
|
||||
_unary_op_nodes = _filter_nodes(ast.unaryop)
|
||||
_cmp_op_nodes = _filter_nodes(ast.cmpop)
|
||||
_comprehension_nodes = _filter_nodes(ast.comprehension)
|
||||
_handler_nodes = _filter_nodes(ast.excepthandler)
|
||||
_arguments_nodes = _filter_nodes(ast.arguments)
|
||||
_keyword_nodes = _filter_nodes(ast.keyword)
|
||||
_alias_nodes = _filter_nodes(ast.alias)
|
||||
|
||||
if not PY39:
|
||||
_slice_nodes = _filter_nodes(ast.slice)
|
||||
|
||||
|
||||
# nodes that we don't support directly but are needed for parsing
|
||||
_hacked_nodes = frozenset(["Assign", "Module", "Expr"])
|
||||
|
||||
|
||||
_unsupported_expr_nodes = frozenset(
|
||||
[
|
||||
"Yield",
|
||||
"GeneratorExp",
|
||||
"IfExp",
|
||||
"DictComp",
|
||||
"SetComp",
|
||||
"Repr",
|
||||
"Lambda",
|
||||
"Set",
|
||||
"AST",
|
||||
"Is",
|
||||
"IsNot",
|
||||
]
|
||||
)
|
||||
|
||||
# these nodes are low priority or won't ever be supported (e.g., AST)
|
||||
_unsupported_nodes = (
|
||||
_stmt_nodes
|
||||
| _mod_nodes
|
||||
| _handler_nodes
|
||||
| _arguments_nodes
|
||||
| _keyword_nodes
|
||||
| _alias_nodes
|
||||
| _expr_context_nodes
|
||||
| _unsupported_expr_nodes
|
||||
) - _hacked_nodes
|
||||
|
||||
# we're adding a different assignment in some cases to be equality comparison
|
||||
# and we don't want `stmt` and friends in their so get only the class whose
|
||||
# names are capitalized
|
||||
_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
|
||||
intersection = _unsupported_nodes & _base_supported_nodes
|
||||
_msg = f"cannot both support and not support {intersection}"
|
||||
assert not intersection, _msg
|
||||
|
||||
|
||||
def _node_not_implemented(node_name: str) -> Callable[..., None]:
|
||||
"""
|
||||
Return a function that raises a NotImplementedError with a passed node name.
|
||||
"""
|
||||
|
||||
def f(self, *args, **kwargs):
|
||||
raise NotImplementedError(f"'{node_name}' nodes are not implemented")
|
||||
|
||||
return f
|
||||
|
||||
|
||||
# should be bound by BaseExprVisitor but that creates a circular dependency:
|
||||
# _T is used in disallow, but disallow is used to define BaseExprVisitor
|
||||
# https://github.com/microsoft/pyright/issues/2315
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
|
||||
"""
|
||||
Decorator to disallow certain nodes from parsing. Raises a
|
||||
NotImplementedError instead.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
|
||||
def disallowed(cls: type[_T]) -> type[_T]:
|
||||
# error: "Type[_T]" has no attribute "unsupported_nodes"
|
||||
cls.unsupported_nodes = () # type: ignore[attr-defined]
|
||||
for node in nodes:
|
||||
new_method = _node_not_implemented(node)
|
||||
name = f"visit_{node}"
|
||||
# error: "Type[_T]" has no attribute "unsupported_nodes"
|
||||
cls.unsupported_nodes += (name,) # type: ignore[attr-defined]
|
||||
setattr(cls, name, new_method)
|
||||
return cls
|
||||
|
||||
return disallowed
|
||||
|
||||
|
||||
def _op_maker(op_class, op_symbol):
|
||||
"""
|
||||
Return a function to create an op class with its symbol already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
|
||||
def f(self, node, *args, **kwargs):
|
||||
"""
|
||||
Return a partial function with an Op subclass with an operator already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
return partial(op_class, op_symbol, *args, **kwargs)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
_op_classes = {"binary": BinOp, "unary": UnaryOp}
|
||||
|
||||
|
||||
def add_ops(op_classes):
|
||||
"""
|
||||
Decorator to add default implementation of ops.
|
||||
"""
|
||||
|
||||
def f(cls):
|
||||
for op_attr_name, op_class in op_classes.items():
|
||||
ops = getattr(cls, f"{op_attr_name}_ops")
|
||||
ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map")
|
||||
for op in ops:
|
||||
op_node = ops_map[op]
|
||||
if op_node is not None:
|
||||
made_op = _op_maker(op_class, op)
|
||||
setattr(cls, f"visit_{op_node}", made_op)
|
||||
return cls
|
||||
|
||||
return f
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes)
|
||||
@add_ops(_op_classes)
|
||||
class BaseExprVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
Custom ast walker. Parsers of other engines should subclass this class
|
||||
if necessary.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
preparser : callable
|
||||
"""
|
||||
|
||||
const_type: type[Term] = Constant
|
||||
term_type = Term
|
||||
|
||||
binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS
|
||||
binary_op_nodes = (
|
||||
"Gt",
|
||||
"Lt",
|
||||
"GtE",
|
||||
"LtE",
|
||||
"Eq",
|
||||
"NotEq",
|
||||
"In",
|
||||
"NotIn",
|
||||
"BitAnd",
|
||||
"BitOr",
|
||||
"And",
|
||||
"Or",
|
||||
"Add",
|
||||
"Sub",
|
||||
"Mult",
|
||||
None,
|
||||
"Pow",
|
||||
"FloorDiv",
|
||||
"Mod",
|
||||
)
|
||||
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
|
||||
|
||||
unary_ops = UNARY_OPS_SYMS
|
||||
unary_op_nodes = "UAdd", "USub", "Invert", "Not"
|
||||
unary_op_nodes_map = {k: v for k, v in zip(unary_ops, unary_op_nodes)}
|
||||
|
||||
rewrite_map = {
|
||||
ast.Eq: ast.In,
|
||||
ast.NotEq: ast.NotIn,
|
||||
ast.In: ast.In,
|
||||
ast.NotIn: ast.NotIn,
|
||||
}
|
||||
|
||||
unsupported_nodes: tuple[str, ...]
|
||||
|
||||
def __init__(self, env, engine, parser, preparser=_preparse):
|
||||
self.env = env
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self.preparser = preparser
|
||||
self.assigner = None
|
||||
|
||||
def visit(self, node, **kwargs):
|
||||
if isinstance(node, str):
|
||||
clean = self.preparser(node)
|
||||
try:
|
||||
node = ast.fix_missing_locations(ast.parse(clean))
|
||||
except SyntaxError as e:
|
||||
if any(iskeyword(x) for x in clean.split()):
|
||||
e.msg = "Python keyword not valid identifier in numexpr query"
|
||||
raise e
|
||||
|
||||
method = "visit_" + type(node).__name__
|
||||
visitor = getattr(self, method)
|
||||
return visitor(node, **kwargs)
|
||||
|
||||
def visit_Module(self, node, **kwargs):
|
||||
if len(node.body) != 1:
|
||||
raise SyntaxError("only a single expression is allowed")
|
||||
expr = node.body[0]
|
||||
return self.visit(expr, **kwargs)
|
||||
|
||||
def visit_Expr(self, node, **kwargs):
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
# the kind of the operator (is actually an instance)
|
||||
op_instance = node.op
|
||||
op_type = type(op_instance)
|
||||
|
||||
# must be two terms and the comparison operator must be ==/!=/in/not in
|
||||
if is_term(left) and is_term(right) and op_type in self.rewrite_map:
|
||||
|
||||
left_list, right_list = map(_is_list, (left, right))
|
||||
left_str, right_str = map(_is_str, (left, right))
|
||||
|
||||
# if there are any strings or lists in the expression
|
||||
if left_list or right_list or left_str or right_str:
|
||||
op_instance = self.rewrite_map[op_type]()
|
||||
|
||||
# pop the string variable out of locals and replace it with a list
|
||||
# of one string, kind of a hack
|
||||
if right_str:
|
||||
name = self.env.add_tmp([right.value])
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
if left_str:
|
||||
name = self.env.add_tmp([left.value])
|
||||
left = self.term_type(name, self.env)
|
||||
|
||||
op = self.visit(op_instance)
|
||||
return op, op_instance, left, right
|
||||
|
||||
def _maybe_transform_eq_ne(self, node, left=None, right=None):
|
||||
if left is None:
|
||||
left = self.visit(node.left, side="left")
|
||||
if right is None:
|
||||
right = self.visit(node.right, side="right")
|
||||
op, op_class, left, right = self._rewrite_membership_op(node, left, right)
|
||||
return op, op_class, left, right
|
||||
|
||||
def _maybe_downcast_constants(self, left, right):
|
||||
f32 = np.dtype(np.float32)
|
||||
if (
|
||||
left.is_scalar
|
||||
and hasattr(left, "value")
|
||||
and not right.is_scalar
|
||||
and right.return_type == f32
|
||||
):
|
||||
# right is a float32 array, left is a scalar
|
||||
name = self.env.add_tmp(np.float32(left.value))
|
||||
left = self.term_type(name, self.env)
|
||||
if (
|
||||
right.is_scalar
|
||||
and hasattr(right, "value")
|
||||
and not left.is_scalar
|
||||
and left.return_type == f32
|
||||
):
|
||||
# left is a float32 array, right is a scalar
|
||||
name = self.env.add_tmp(np.float32(right.value))
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
return left, right
|
||||
|
||||
def _maybe_eval(self, binop, eval_in_python):
|
||||
# eval `in` and `not in` (for now) in "partial" python space
|
||||
# things that can be evaluated in "eval" space will be turned into
|
||||
# temporary variables. for example,
|
||||
# [1,2] in a + 2 * b
|
||||
# in that case a + 2 * b will be evaluated using numexpr, and the "in"
|
||||
# call will be evaluated using isin (in python space)
|
||||
return binop.evaluate(
|
||||
self.env, self.engine, self.parser, self.term_type, eval_in_python
|
||||
)
|
||||
|
||||
def _maybe_evaluate_binop(
|
||||
self,
|
||||
op,
|
||||
op_class,
|
||||
lhs,
|
||||
rhs,
|
||||
eval_in_python=("in", "not in"),
|
||||
maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="),
|
||||
):
|
||||
res = op(lhs, rhs)
|
||||
|
||||
if res.has_invalid_return_type:
|
||||
raise TypeError(
|
||||
f"unsupported operand type(s) for {res.op}: "
|
||||
f"'{lhs.type}' and '{rhs.type}'"
|
||||
)
|
||||
|
||||
if self.engine != "pytables" and (
|
||||
res.op in CMP_OPS_SYMS
|
||||
and getattr(lhs, "is_datetime", False)
|
||||
or getattr(rhs, "is_datetime", False)
|
||||
):
|
||||
# all date ops must be done in python bc numexpr doesn't work
|
||||
# well with NaT
|
||||
return self._maybe_eval(res, self.binary_ops)
|
||||
|
||||
if res.op in eval_in_python:
|
||||
# "in"/"not in" ops are always evaluated in python
|
||||
return self._maybe_eval(res, eval_in_python)
|
||||
elif self.engine != "pytables":
|
||||
if (
|
||||
getattr(lhs, "return_type", None) == object
|
||||
or getattr(rhs, "return_type", None) == object
|
||||
):
|
||||
# evaluate "==" and "!=" in python if either of our operands
|
||||
# has an object return type
|
||||
return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
|
||||
return res
|
||||
|
||||
def visit_BinOp(self, node, **kwargs):
|
||||
op, op_class, left, right = self._maybe_transform_eq_ne(node)
|
||||
left, right = self._maybe_downcast_constants(left, right)
|
||||
return self._maybe_evaluate_binop(op, op_class, left, right)
|
||||
|
||||
def visit_Div(self, node, **kwargs):
|
||||
return lambda lhs, rhs: Div(lhs, rhs)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs):
|
||||
op = self.visit(node.op)
|
||||
operand = self.visit(node.operand)
|
||||
return op(operand)
|
||||
|
||||
def visit_Name(self, node, **kwargs):
|
||||
return self.term_type(node.id, self.env, **kwargs)
|
||||
|
||||
def visit_NameConstant(self, node, **kwargs):
|
||||
return self.const_type(node.value, self.env)
|
||||
|
||||
def visit_Num(self, node, **kwargs):
|
||||
return self.const_type(node.n, self.env)
|
||||
|
||||
def visit_Constant(self, node, **kwargs):
|
||||
return self.const_type(node.n, self.env)
|
||||
|
||||
def visit_Str(self, node, **kwargs):
|
||||
name = self.env.add_tmp(node.s)
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
def visit_List(self, node, **kwargs):
|
||||
name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
visit_Tuple = visit_List
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
"""df.index[4]"""
|
||||
return self.visit(node.value)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs):
|
||||
from pandas import eval as pd_eval
|
||||
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
result = pd_eval(
|
||||
slobj, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
try:
|
||||
# a Term instance
|
||||
v = value.value[result]
|
||||
except AttributeError:
|
||||
# an Op instance
|
||||
lhs = pd_eval(
|
||||
value, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
v = lhs[result]
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, env=self.env)
|
||||
|
||||
def visit_Slice(self, node, **kwargs):
|
||||
"""df.index[slice(4,6)]"""
|
||||
lower = node.lower
|
||||
if lower is not None:
|
||||
lower = self.visit(lower).value
|
||||
upper = node.upper
|
||||
if upper is not None:
|
||||
upper = self.visit(upper).value
|
||||
step = node.step
|
||||
if step is not None:
|
||||
step = self.visit(step).value
|
||||
|
||||
return slice(lower, upper, step)
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
"""
|
||||
support a single assignment node, like
|
||||
|
||||
c = a + b
|
||||
|
||||
set the assigner at the top level, must be a Name node which
|
||||
might or might not exist in the resolvers
|
||||
|
||||
"""
|
||||
if len(node.targets) != 1:
|
||||
raise SyntaxError("can only assign a single expression")
|
||||
if not isinstance(node.targets[0], ast.Name):
|
||||
raise SyntaxError("left hand side of an assignment must be a single name")
|
||||
if self.env.target is None:
|
||||
raise ValueError("cannot assign without a target object")
|
||||
|
||||
try:
|
||||
assigner = self.visit(node.targets[0], **kwargs)
|
||||
except UndefinedVariableError:
|
||||
assigner = node.targets[0].id
|
||||
|
||||
self.assigner = getattr(assigner, "name", assigner)
|
||||
if self.assigner is None:
|
||||
raise SyntaxError(
|
||||
"left hand side of an assignment must be a single resolvable name"
|
||||
)
|
||||
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = node.ctx
|
||||
if isinstance(ctx, ast.Load):
|
||||
# resolve the value
|
||||
resolved = self.visit(value).value
|
||||
try:
|
||||
v = getattr(resolved, attr)
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, self.env)
|
||||
except AttributeError:
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
raise
|
||||
|
||||
raise ValueError(f"Invalid Attribute context {type(ctx).__name__}")
|
||||
|
||||
def visit_Call(self, node, side=None, **kwargs):
|
||||
|
||||
if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__":
|
||||
res = self.visit_Attribute(node.func)
|
||||
elif not isinstance(node.func, ast.Name):
|
||||
raise TypeError("Only named functions are supported")
|
||||
else:
|
||||
try:
|
||||
res = self.visit(node.func)
|
||||
except UndefinedVariableError:
|
||||
# Check if this is a supported function name
|
||||
try:
|
||||
res = FuncNode(node.func.id)
|
||||
except ValueError:
|
||||
# Raise original error
|
||||
raise
|
||||
|
||||
if res is None:
|
||||
# error: "expr" has no attribute "id"
|
||||
raise ValueError(
|
||||
f"Invalid function call {node.func.id}" # type: ignore[attr-defined]
|
||||
)
|
||||
if hasattr(res, "value"):
|
||||
res = res.value
|
||||
|
||||
if isinstance(res, FuncNode):
|
||||
|
||||
new_args = [self.visit(arg) for arg in node.args]
|
||||
|
||||
if node.keywords:
|
||||
raise TypeError(
|
||||
f'Function "{res.name}" does not support keyword arguments'
|
||||
)
|
||||
|
||||
return res(*new_args)
|
||||
|
||||
else:
|
||||
|
||||
new_args = [self.visit(arg).value for arg in node.args]
|
||||
|
||||
for key in node.keywords:
|
||||
if not isinstance(key, ast.keyword):
|
||||
# error: "expr" has no attribute "id"
|
||||
raise ValueError(
|
||||
"keyword error in function call " # type: ignore[attr-defined]
|
||||
f"'{node.func.id}'"
|
||||
)
|
||||
|
||||
if key.arg:
|
||||
kwargs[key.arg] = self.visit(key.value).value
|
||||
|
||||
name = self.env.add_tmp(res(*new_args, **kwargs))
|
||||
return self.term_type(name=name, env=self.env)
|
||||
|
||||
def translate_In(self, op):
|
||||
return op
|
||||
|
||||
def visit_Compare(self, node, **kwargs):
|
||||
ops = node.ops
|
||||
comps = node.comparators
|
||||
|
||||
# base case: we have something like a CMP b
|
||||
if len(comps) == 1:
|
||||
op = self.translate_In(ops[0])
|
||||
binop = ast.BinOp(op=op, left=node.left, right=comps[0])
|
||||
return self.visit(binop)
|
||||
|
||||
# recursive case: we have a chained comparison, a CMP b CMP c, etc.
|
||||
left = node.left
|
||||
values = []
|
||||
for op, comp in zip(ops, comps):
|
||||
new_node = self.visit(
|
||||
ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
|
||||
)
|
||||
left = comp
|
||||
values.append(new_node)
|
||||
return self.visit(ast.BoolOp(op=ast.And(), values=values))
|
||||
|
||||
def _try_visit_binop(self, bop):
|
||||
if isinstance(bop, (Op, Term)):
|
||||
return bop
|
||||
return self.visit(bop)
|
||||
|
||||
def visit_BoolOp(self, node, **kwargs):
|
||||
def visitor(x, y):
|
||||
lhs = self._try_visit_binop(x)
|
||||
rhs = self._try_visit_binop(y)
|
||||
|
||||
op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)
|
||||
return self._maybe_evaluate_binop(op, node.op, lhs, rhs)
|
||||
|
||||
operands = node.values
|
||||
return reduce(visitor, operands)
|
||||
|
||||
|
||||
_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"])
|
||||
_numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS)
|
||||
|
||||
|
||||
@disallow(
|
||||
(_unsupported_nodes | _python_not_supported)
|
||||
- (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"]))
|
||||
)
|
||||
class PandasExprVisitor(BaseExprVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
env,
|
||||
engine,
|
||||
parser,
|
||||
preparser=partial(
|
||||
_preparse,
|
||||
f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),
|
||||
),
|
||||
):
|
||||
super().__init__(env, engine, parser, preparser)
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"]))
|
||||
class PythonExprVisitor(BaseExprVisitor):
|
||||
def __init__(self, env, engine, parser, preparser=lambda x: x):
|
||||
super().__init__(env, engine, parser, preparser=preparser)
|
||||
|
||||
|
||||
class Expr:
|
||||
"""
|
||||
Object encapsulating an expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str
|
||||
engine : str, optional, default 'numexpr'
|
||||
parser : str, optional, default 'pandas'
|
||||
env : Scope, optional, default None
|
||||
level : int, optional, default 2
|
||||
"""
|
||||
|
||||
env: Scope
|
||||
engine: str
|
||||
parser: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
expr,
|
||||
engine: str = "numexpr",
|
||||
parser: str = "pandas",
|
||||
env: Scope | None = None,
|
||||
level: int = 0,
|
||||
):
|
||||
self.expr = expr
|
||||
self.env = env or Scope(level=level + 1)
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
|
||||
self.terms = self.parse()
|
||||
|
||||
@property
|
||||
def assigner(self):
|
||||
return getattr(self._visitor, "assigner", None)
|
||||
|
||||
def __call__(self):
|
||||
return self.terms(self.env)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return printing.pprint_thing(self.terms)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.expr)
|
||||
|
||||
def parse(self):
|
||||
"""
|
||||
Parse an expression.
|
||||
"""
|
||||
return self._visitor.visit(self.expr)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Get the names in an expression.
|
||||
"""
|
||||
if is_term(self.terms):
|
||||
return frozenset([self.terms.name])
|
||||
return frozenset(term.name for term in com.flatten(self.terms))
|
||||
|
||||
|
||||
PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor}
|
284
.venv/Lib/site-packages/pandas/core/computation/expressions.py
Normal file
284
.venv/Lib/site-packages/pandas/core/computation/expressions.py
Normal file
@ -0,0 +1,284 @@
|
||||
"""
|
||||
Expressions
|
||||
-----------
|
||||
|
||||
Offer fast expression evaluation through numexpr
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas._typing import FuncType
|
||||
|
||||
from pandas.core.computation.check import NUMEXPR_INSTALLED
|
||||
from pandas.core.ops import roperator
|
||||
|
||||
if NUMEXPR_INSTALLED:
|
||||
import numexpr as ne
|
||||
|
||||
_TEST_MODE: bool | None = None
|
||||
_TEST_RESULT: list[bool] = []
|
||||
USE_NUMEXPR = NUMEXPR_INSTALLED
|
||||
_evaluate: FuncType | None = None
|
||||
_where: FuncType | None = None
|
||||
|
||||
# the set of dtypes that we will allow pass to numexpr
|
||||
_ALLOWED_DTYPES = {
|
||||
"evaluate": {"int64", "int32", "float64", "float32", "bool"},
|
||||
"where": {"int64", "float64", "bool"},
|
||||
}
|
||||
|
||||
# the minimum prod shape that we will use numexpr
|
||||
_MIN_ELEMENTS = 1_000_000
|
||||
|
||||
|
||||
def set_use_numexpr(v=True):
|
||||
# set/unset to use numexpr
|
||||
global USE_NUMEXPR
|
||||
if NUMEXPR_INSTALLED:
|
||||
USE_NUMEXPR = v
|
||||
|
||||
# choose what we are going to do
|
||||
global _evaluate, _where
|
||||
|
||||
_evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard
|
||||
_where = _where_numexpr if USE_NUMEXPR else _where_standard
|
||||
|
||||
|
||||
def set_numexpr_threads(n=None):
|
||||
# if we are using numexpr, set the threads to n
|
||||
# otherwise reset
|
||||
if NUMEXPR_INSTALLED and USE_NUMEXPR:
|
||||
if n is None:
|
||||
n = ne.detect_number_of_cores()
|
||||
ne.set_num_threads(n)
|
||||
|
||||
|
||||
def _evaluate_standard(op, op_str, a, b):
|
||||
"""
|
||||
Standard evaluation.
|
||||
"""
|
||||
if _TEST_MODE:
|
||||
_store_test_result(False)
|
||||
return op(a, b)
|
||||
|
||||
|
||||
def _can_use_numexpr(op, op_str, a, b, dtype_check):
|
||||
"""return a boolean if we WILL be using numexpr"""
|
||||
if op_str is not None:
|
||||
|
||||
# required min elements (otherwise we are adding overhead)
|
||||
if a.size > _MIN_ELEMENTS:
|
||||
# check for dtype compatibility
|
||||
dtypes: set[str] = set()
|
||||
for o in [a, b]:
|
||||
# ndarray and Series Case
|
||||
if hasattr(o, "dtype"):
|
||||
dtypes |= {o.dtype.name}
|
||||
|
||||
# allowed are a superset
|
||||
if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _evaluate_numexpr(op, op_str, a, b):
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(op, op_str, a, b, "evaluate"):
|
||||
is_reversed = op.__name__.strip("_").startswith("r")
|
||||
if is_reversed:
|
||||
# we were originally called by a reversed op method
|
||||
a, b = b, a
|
||||
|
||||
a_value = a
|
||||
b_value = b
|
||||
|
||||
try:
|
||||
result = ne.evaluate(
|
||||
f"a_value {op_str} b_value",
|
||||
local_dict={"a_value": a_value, "b_value": b_value},
|
||||
casting="safe",
|
||||
)
|
||||
except TypeError:
|
||||
# numexpr raises eg for array ** array with integers
|
||||
# (https://github.com/pydata/numexpr/issues/379)
|
||||
pass
|
||||
except NotImplementedError:
|
||||
if _bool_arith_fallback(op_str, a, b):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
if is_reversed:
|
||||
# reverse order to original for fallback
|
||||
a, b = b, a
|
||||
|
||||
if _TEST_MODE:
|
||||
_store_test_result(result is not None)
|
||||
|
||||
if result is None:
|
||||
result = _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
_op_str_mapping = {
|
||||
operator.add: "+",
|
||||
roperator.radd: "+",
|
||||
operator.mul: "*",
|
||||
roperator.rmul: "*",
|
||||
operator.sub: "-",
|
||||
roperator.rsub: "-",
|
||||
operator.truediv: "/",
|
||||
roperator.rtruediv: "/",
|
||||
# floordiv not supported by numexpr 2.x
|
||||
operator.floordiv: None,
|
||||
roperator.rfloordiv: None,
|
||||
# we require Python semantics for mod of negative for backwards compatibility
|
||||
# see https://github.com/pydata/numexpr/issues/365
|
||||
# so sticking with unaccelerated for now GH#36552
|
||||
operator.mod: None,
|
||||
roperator.rmod: None,
|
||||
operator.pow: "**",
|
||||
roperator.rpow: "**",
|
||||
operator.eq: "==",
|
||||
operator.ne: "!=",
|
||||
operator.le: "<=",
|
||||
operator.lt: "<",
|
||||
operator.ge: ">=",
|
||||
operator.gt: ">",
|
||||
operator.and_: "&",
|
||||
roperator.rand_: "&",
|
||||
operator.or_: "|",
|
||||
roperator.ror_: "|",
|
||||
operator.xor: "^",
|
||||
roperator.rxor: "^",
|
||||
divmod: None,
|
||||
roperator.rdivmod: None,
|
||||
}
|
||||
|
||||
|
||||
def _where_standard(cond, a, b):
|
||||
# Caller is responsible for extracting ndarray if necessary
|
||||
return np.where(cond, a, b)
|
||||
|
||||
|
||||
def _where_numexpr(cond, a, b):
|
||||
# Caller is responsible for extracting ndarray if necessary
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(None, "where", a, b, "where"):
|
||||
|
||||
result = ne.evaluate(
|
||||
"where(cond_value, a_value, b_value)",
|
||||
local_dict={"cond_value": cond, "a_value": a, "b_value": b},
|
||||
casting="safe",
|
||||
)
|
||||
|
||||
if result is None:
|
||||
result = _where_standard(cond, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# turn myself on
|
||||
set_use_numexpr(get_option("compute.use_numexpr"))
|
||||
|
||||
|
||||
def _has_bool_dtype(x):
|
||||
try:
|
||||
return x.dtype == bool
|
||||
except AttributeError:
|
||||
return isinstance(x, (bool, np.bool_))
|
||||
|
||||
|
||||
_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"}
|
||||
|
||||
|
||||
def _bool_arith_fallback(op_str, a, b):
|
||||
"""
|
||||
Check if we should fallback to the python `_evaluate_standard` in case
|
||||
of an unsupported operation by numexpr, which is the case for some
|
||||
boolean ops.
|
||||
"""
|
||||
if _has_bool_dtype(a) and _has_bool_dtype(b):
|
||||
if op_str in _BOOL_OP_UNSUPPORTED:
|
||||
warnings.warn(
|
||||
f"evaluating in Python space because the {repr(op_str)} "
|
||||
"operator is not supported by numexpr for the bool dtype, "
|
||||
f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def evaluate(op, a, b, use_numexpr: bool = True):
|
||||
"""
|
||||
Evaluate and return the expression of the op on a and b.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : the actual operand
|
||||
a : left operand
|
||||
b : right operand
|
||||
use_numexpr : bool, default True
|
||||
Whether to try to use numexpr.
|
||||
"""
|
||||
op_str = _op_str_mapping[op]
|
||||
if op_str is not None:
|
||||
if use_numexpr:
|
||||
# error: "None" not callable
|
||||
return _evaluate(op, op_str, a, b) # type: ignore[misc]
|
||||
return _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
|
||||
def where(cond, a, b, use_numexpr=True):
|
||||
"""
|
||||
Evaluate the where condition cond on a and b.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cond : np.ndarray[bool]
|
||||
a : return if cond is True
|
||||
b : return if cond is False
|
||||
use_numexpr : bool, default True
|
||||
Whether to try to use numexpr.
|
||||
"""
|
||||
assert _where is not None
|
||||
return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b)
|
||||
|
||||
|
||||
def set_test_mode(v: bool = True) -> None:
|
||||
"""
|
||||
Keeps track of whether numexpr was used.
|
||||
|
||||
Stores an additional ``True`` for every successful use of evaluate with
|
||||
numexpr since the last ``get_test_result``.
|
||||
"""
|
||||
global _TEST_MODE, _TEST_RESULT
|
||||
_TEST_MODE = v
|
||||
_TEST_RESULT = []
|
||||
|
||||
|
||||
def _store_test_result(used_numexpr: bool) -> None:
|
||||
global _TEST_RESULT
|
||||
if used_numexpr:
|
||||
_TEST_RESULT.append(used_numexpr)
|
||||
|
||||
|
||||
def get_test_result() -> list[bool]:
|
||||
"""
|
||||
Get test result and reset test_results.
|
||||
"""
|
||||
global _TEST_RESULT
|
||||
res = _TEST_RESULT
|
||||
_TEST_RESULT = []
|
||||
return res
|
624
.venv/Lib/site-packages/pandas/core/computation/ops.py
Normal file
624
.venv/Lib/site-packages/pandas/core/computation/ops.py
Normal file
@ -0,0 +1,624 @@
|
||||
"""
|
||||
Operator classes for eval.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
import operator
|
||||
from typing import (
|
||||
Callable,
|
||||
Iterable,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_list_like,
|
||||
is_scalar,
|
||||
)
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import (
|
||||
ensure_decoded,
|
||||
result_type_many,
|
||||
)
|
||||
from pandas.core.computation.scope import DEFAULT_GLOBALS
|
||||
|
||||
from pandas.io.formats.printing import (
|
||||
pprint_thing,
|
||||
pprint_thing_encoded,
|
||||
)
|
||||
|
||||
REDUCTIONS = ("sum", "prod")
|
||||
|
||||
_unary_math_ops = (
|
||||
"sin",
|
||||
"cos",
|
||||
"exp",
|
||||
"log",
|
||||
"expm1",
|
||||
"log1p",
|
||||
"sqrt",
|
||||
"sinh",
|
||||
"cosh",
|
||||
"tanh",
|
||||
"arcsin",
|
||||
"arccos",
|
||||
"arctan",
|
||||
"arccosh",
|
||||
"arcsinh",
|
||||
"arctanh",
|
||||
"abs",
|
||||
"log10",
|
||||
"floor",
|
||||
"ceil",
|
||||
)
|
||||
_binary_math_ops = ("arctan2",)
|
||||
|
||||
MATHOPS = _unary_math_ops + _binary_math_ops
|
||||
|
||||
|
||||
LOCAL_TAG = "__pd_eval_local_"
|
||||
|
||||
|
||||
class UndefinedVariableError(NameError):
|
||||
"""
|
||||
NameError subclass for local variables.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, is_local: bool | None = None):
|
||||
base_msg = f"{repr(name)} is not defined"
|
||||
if is_local:
|
||||
msg = f"local variable {base_msg}"
|
||||
else:
|
||||
msg = f"name {base_msg}"
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
class Term:
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
klass = Constant if not isinstance(name, str) else cls
|
||||
# error: Argument 2 for "super" not an instance of argument 1
|
||||
supr_new = super(Term, klass).__new__ # type: ignore[misc]
|
||||
return supr_new(klass)
|
||||
|
||||
is_local: bool
|
||||
|
||||
def __init__(self, name, env, side=None, encoding=None):
|
||||
# name is a str for Term, but may be something else for subclasses
|
||||
self._name = name
|
||||
self.env = env
|
||||
self.side = side
|
||||
tname = str(name)
|
||||
self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS
|
||||
self._value = self._resolve_name()
|
||||
self.encoding = encoding
|
||||
|
||||
@property
|
||||
def local_name(self) -> str:
|
||||
return self.name.replace(LOCAL_TAG, "")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(self.name)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.value
|
||||
|
||||
def evaluate(self, *args, **kwargs):
|
||||
return self
|
||||
|
||||
def _resolve_name(self):
|
||||
res = self.env.resolve(self.local_name, is_local=self.is_local)
|
||||
self.update(res)
|
||||
|
||||
if hasattr(res, "ndim") and res.ndim > 2:
|
||||
raise NotImplementedError(
|
||||
"N-dimensional objects, where N > 2, are not supported with eval"
|
||||
)
|
||||
return res
|
||||
|
||||
def update(self, value):
|
||||
"""
|
||||
search order for local (i.e., @variable) variables:
|
||||
|
||||
scope, key_variable
|
||||
[('locals', 'local_name'),
|
||||
('globals', 'local_name'),
|
||||
('locals', 'key'),
|
||||
('globals', 'key')]
|
||||
"""
|
||||
key = self.name
|
||||
|
||||
# if it's a variable name (otherwise a constant)
|
||||
if isinstance(key, str):
|
||||
self.env.swapkey(self.local_name, key, new_value=value)
|
||||
|
||||
self.value = value
|
||||
|
||||
@property
|
||||
def is_scalar(self) -> bool:
|
||||
return is_scalar(self._value)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
try:
|
||||
# potentially very slow for large, mixed dtype frames
|
||||
return self._value.values.dtype
|
||||
except AttributeError:
|
||||
try:
|
||||
# ndarray
|
||||
return self._value.dtype
|
||||
except AttributeError:
|
||||
# scalar
|
||||
return type(self._value)
|
||||
|
||||
return_type = type
|
||||
|
||||
@property
|
||||
def raw(self) -> str:
|
||||
return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})"
|
||||
|
||||
@property
|
||||
def is_datetime(self) -> bool:
|
||||
try:
|
||||
t = self.type.type
|
||||
except AttributeError:
|
||||
t = self.type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, new_value):
|
||||
self._value = new_value
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return self._value.ndim
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def __init__(self, value, env, side=None, encoding=None):
|
||||
super().__init__(value, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.value
|
||||
|
||||
def __repr__(self) -> str:
|
||||
# in python 2 str() of float
|
||||
# can truncate shorter than repr()
|
||||
return repr(self.name)
|
||||
|
||||
|
||||
_bool_op_map = {"not": "~", "and": "&", "or": "|"}
|
||||
|
||||
|
||||
class Op:
|
||||
"""
|
||||
Hold an operator of arbitrary arity.
|
||||
"""
|
||||
|
||||
op: str
|
||||
|
||||
def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None):
|
||||
self.op = _bool_op_map.get(op, op)
|
||||
self.operands = operands
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.operands)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Print a generic n-ary operator and its operands using infix notation.
|
||||
"""
|
||||
# recurse over the operands
|
||||
parened = (f"({pprint_thing(opr)})" for opr in self.operands)
|
||||
return pprint_thing(f" {self.op} ".join(parened))
|
||||
|
||||
@property
|
||||
def return_type(self):
|
||||
# clobber types to bool if the op is a boolean operator
|
||||
if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):
|
||||
return np.bool_
|
||||
return result_type_many(*(term.type for term in com.flatten(self)))
|
||||
|
||||
@property
|
||||
def has_invalid_return_type(self) -> bool:
|
||||
types = self.operand_types
|
||||
obj_dtype_set = frozenset([np.dtype("object")])
|
||||
return self.return_type == object and types - obj_dtype_set
|
||||
|
||||
@property
|
||||
def operand_types(self):
|
||||
return frozenset(term.type for term in com.flatten(self))
|
||||
|
||||
@property
|
||||
def is_scalar(self) -> bool:
|
||||
return all(operand.is_scalar for operand in self.operands)
|
||||
|
||||
@property
|
||||
def is_datetime(self) -> bool:
|
||||
try:
|
||||
t = self.return_type.type
|
||||
except AttributeError:
|
||||
t = self.return_type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
|
||||
def _in(x, y):
|
||||
"""
|
||||
Compute the vectorized membership of ``x in y`` if possible, otherwise
|
||||
use Python.
|
||||
"""
|
||||
try:
|
||||
return x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x in y
|
||||
|
||||
|
||||
def _not_in(x, y):
|
||||
"""
|
||||
Compute the vectorized membership of ``x not in y`` if possible,
|
||||
otherwise use Python.
|
||||
"""
|
||||
try:
|
||||
return ~x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return ~y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x not in y
|
||||
|
||||
|
||||
CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in")
|
||||
_cmp_ops_funcs = (
|
||||
operator.gt,
|
||||
operator.lt,
|
||||
operator.ge,
|
||||
operator.le,
|
||||
operator.eq,
|
||||
operator.ne,
|
||||
_in,
|
||||
_not_in,
|
||||
)
|
||||
_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))
|
||||
|
||||
BOOL_OPS_SYMS = ("&", "|", "and", "or")
|
||||
_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
|
||||
_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))
|
||||
|
||||
ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
|
||||
_arith_ops_funcs = (
|
||||
operator.add,
|
||||
operator.sub,
|
||||
operator.mul,
|
||||
operator.truediv,
|
||||
operator.pow,
|
||||
operator.floordiv,
|
||||
operator.mod,
|
||||
)
|
||||
_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))
|
||||
|
||||
SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%")
|
||||
_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod)
|
||||
_special_case_arith_ops_dict = dict(
|
||||
zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs)
|
||||
)
|
||||
|
||||
_binary_ops_dict = {}
|
||||
|
||||
for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
|
||||
_binary_ops_dict.update(d)
|
||||
|
||||
|
||||
def _cast_inplace(terms, acceptable_dtypes, dtype):
|
||||
"""
|
||||
Cast an expression inplace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
terms : Op
|
||||
The expression that should cast.
|
||||
acceptable_dtypes : list of acceptable numpy.dtype
|
||||
Will not cast if term's dtype in this list.
|
||||
dtype : str or numpy.dtype
|
||||
The dtype to cast to.
|
||||
"""
|
||||
dt = np.dtype(dtype)
|
||||
for term in terms:
|
||||
if term.type in acceptable_dtypes:
|
||||
continue
|
||||
|
||||
try:
|
||||
new_value = term.value.astype(dt)
|
||||
except AttributeError:
|
||||
new_value = dt.type(term.value)
|
||||
term.update(new_value)
|
||||
|
||||
|
||||
def is_term(obj) -> bool:
|
||||
return isinstance(obj, Term)
|
||||
|
||||
|
||||
class BinOp(Op):
|
||||
"""
|
||||
Hold a binary operator and its operands.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
lhs : Term or Op
|
||||
rhs : Term or Op
|
||||
"""
|
||||
|
||||
def __init__(self, op: str, lhs, rhs):
|
||||
super().__init__(op, (lhs, rhs))
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
|
||||
self._disallow_scalar_only_bool_ops()
|
||||
|
||||
self.convert_values()
|
||||
|
||||
try:
|
||||
self.func = _binary_ops_dict[op]
|
||||
except KeyError as err:
|
||||
# has to be made a list for python3
|
||||
keys = list(_binary_ops_dict.keys())
|
||||
raise ValueError(
|
||||
f"Invalid binary operator {repr(op)}, valid operators are {keys}"
|
||||
) from err
|
||||
|
||||
def __call__(self, env):
|
||||
"""
|
||||
Recursively evaluate an expression in Python space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
The result of an evaluated expression.
|
||||
"""
|
||||
# recurse over the left/right nodes
|
||||
left = self.lhs(env)
|
||||
right = self.rhs(env)
|
||||
|
||||
return self.func(left, right)
|
||||
|
||||
def evaluate(self, env, engine: str, parser, term_type, eval_in_python):
|
||||
"""
|
||||
Evaluate a binary operation *before* being passed to the engine.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
term_type : type
|
||||
eval_in_python : list
|
||||
|
||||
Returns
|
||||
-------
|
||||
term_type
|
||||
The "pre-evaluated" expression as an instance of ``term_type``
|
||||
"""
|
||||
if engine == "python":
|
||||
res = self(env)
|
||||
else:
|
||||
# recurse over the left/right nodes
|
||||
|
||||
left = self.lhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
|
||||
right = self.rhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
|
||||
# base cases
|
||||
if self.op in eval_in_python:
|
||||
res = self.func(left.value, right.value)
|
||||
else:
|
||||
from pandas.core.computation.eval import eval
|
||||
|
||||
res = eval(self, local_dict=env, engine=engine, parser=parser)
|
||||
|
||||
name = env.add_tmp(res)
|
||||
return term_type(name, env=env)
|
||||
|
||||
def convert_values(self):
|
||||
"""
|
||||
Convert datetimes to a comparable value in an expression.
|
||||
"""
|
||||
|
||||
def stringify(value):
|
||||
encoder: Callable
|
||||
if self.encoding is not None:
|
||||
encoder = partial(pprint_thing_encoded, encoding=self.encoding)
|
||||
else:
|
||||
encoder = pprint_thing
|
||||
return encoder(value)
|
||||
|
||||
lhs, rhs = self.lhs, self.rhs
|
||||
|
||||
if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
|
||||
v = rhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.rhs.update(v)
|
||||
|
||||
if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
|
||||
v = lhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.lhs.update(v)
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self):
|
||||
rhs = self.rhs
|
||||
lhs = self.lhs
|
||||
|
||||
# GH#24883 unwrap dtype if necessary to ensure we have a type object
|
||||
rhs_rt = rhs.return_type
|
||||
rhs_rt = getattr(rhs_rt, "type", rhs_rt)
|
||||
lhs_rt = lhs.return_type
|
||||
lhs_rt = getattr(lhs_rt, "type", lhs_rt)
|
||||
if (
|
||||
(lhs.is_scalar or rhs.is_scalar)
|
||||
and self.op in _bool_ops_dict
|
||||
and (
|
||||
not (
|
||||
issubclass(rhs_rt, (bool, np.bool_))
|
||||
and issubclass(lhs_rt, (bool, np.bool_))
|
||||
)
|
||||
)
|
||||
):
|
||||
raise NotImplementedError("cannot evaluate scalar only bool ops")
|
||||
|
||||
|
||||
def isnumeric(dtype) -> bool:
|
||||
return issubclass(np.dtype(dtype).type, np.number)
|
||||
|
||||
|
||||
class Div(BinOp):
|
||||
"""
|
||||
Div operator to special case casting.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lhs, rhs : Term or Op
|
||||
The Terms or Ops in the ``/`` expression.
|
||||
"""
|
||||
|
||||
def __init__(self, lhs, rhs):
|
||||
super().__init__("/", lhs, rhs)
|
||||
|
||||
if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
|
||||
raise TypeError(
|
||||
f"unsupported operand type(s) for {self.op}: "
|
||||
f"'{lhs.return_type}' and '{rhs.return_type}'"
|
||||
)
|
||||
|
||||
# do not upcast float32s to float64 un-necessarily
|
||||
acceptable_dtypes = [np.float32, np.float_]
|
||||
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
|
||||
|
||||
|
||||
UNARY_OPS_SYMS = ("+", "-", "~", "not")
|
||||
_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
|
||||
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
|
||||
|
||||
|
||||
class UnaryOp(Op):
|
||||
"""
|
||||
Hold a unary operator and its operands.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
The token used to represent the operator.
|
||||
operand : Term or Op
|
||||
The Term or Op operand to the operator.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If no function associated with the passed operator token is found.
|
||||
"""
|
||||
|
||||
def __init__(self, op: str, operand):
|
||||
super().__init__(op, (operand,))
|
||||
self.operand = operand
|
||||
|
||||
try:
|
||||
self.func = _unary_ops_dict[op]
|
||||
except KeyError as err:
|
||||
raise ValueError(
|
||||
f"Invalid unary operator {repr(op)}, "
|
||||
f"valid operators are {UNARY_OPS_SYMS}"
|
||||
) from err
|
||||
|
||||
def __call__(self, env):
|
||||
operand = self.operand(env)
|
||||
return self.func(operand)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(f"{self.op}({self.operand})")
|
||||
|
||||
@property
|
||||
def return_type(self) -> np.dtype:
|
||||
operand = self.operand
|
||||
if operand.return_type == np.dtype("bool"):
|
||||
return np.dtype("bool")
|
||||
if isinstance(operand, Op) and (
|
||||
operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict
|
||||
):
|
||||
return np.dtype("bool")
|
||||
return np.dtype("int")
|
||||
|
||||
|
||||
class MathCall(Op):
|
||||
def __init__(self, func, args):
|
||||
super().__init__(func.name, args)
|
||||
self.func = func
|
||||
|
||||
def __call__(self, env):
|
||||
# error: "Op" not callable
|
||||
operands = [op(env) for op in self.operands] # type: ignore[operator]
|
||||
with np.errstate(all="ignore"):
|
||||
return self.func.func(*operands)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
operands = map(str, self.operands)
|
||||
return pprint_thing(f"{self.op}({','.join(operands)})")
|
||||
|
||||
|
||||
class FuncNode:
|
||||
def __init__(self, name: str):
|
||||
if name not in MATHOPS:
|
||||
raise ValueError(f'"{name}" is not a supported function')
|
||||
self.name = name
|
||||
self.func = getattr(np, name)
|
||||
|
||||
def __call__(self, *args):
|
||||
return MathCall(self, args)
|
195
.venv/Lib/site-packages/pandas/core/computation/parsing.py
Normal file
195
.venv/Lib/site-packages/pandas/core/computation/parsing.py
Normal file
@ -0,0 +1,195 @@
|
||||
"""
|
||||
:func:`~pandas.eval` source string parsing functions
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from keyword import iskeyword
|
||||
import token
|
||||
import tokenize
|
||||
from typing import (
|
||||
Hashable,
|
||||
Iterator,
|
||||
)
|
||||
|
||||
# A token value Python's tokenizer probably will never use.
|
||||
BACKTICK_QUOTED_STRING = 100
|
||||
|
||||
|
||||
def create_valid_python_identifier(name: str) -> str:
|
||||
"""
|
||||
Create valid Python identifiers from any string.
|
||||
|
||||
Check if name contains any special characters. If it contains any
|
||||
special characters, the special characters will be replaced by
|
||||
a special string and a prefix is added.
|
||||
|
||||
Raises
|
||||
------
|
||||
SyntaxError
|
||||
If the returned name is not a Python valid identifier, raise an exception.
|
||||
This can happen if there is a hashtag in the name, as the tokenizer will
|
||||
than terminate and not find the backtick.
|
||||
But also for characters that fall out of the range of (U+0001..U+007F).
|
||||
"""
|
||||
if name.isidentifier() and not iskeyword(name):
|
||||
return name
|
||||
|
||||
# Create a dict with the special characters and their replacement string.
|
||||
# EXACT_TOKEN_TYPES contains these special characters
|
||||
# token.tok_name contains a readable description of the replacement string.
|
||||
special_characters_replacements = {
|
||||
char: f"_{token.tok_name[tokval]}_"
|
||||
for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items())
|
||||
}
|
||||
special_characters_replacements.update(
|
||||
{
|
||||
" ": "_",
|
||||
"?": "_QUESTIONMARK_",
|
||||
"!": "_EXCLAMATIONMARK_",
|
||||
"$": "_DOLLARSIGN_",
|
||||
"€": "_EUROSIGN_",
|
||||
"°": "_DEGREESIGN_",
|
||||
# Including quotes works, but there are exceptions.
|
||||
"'": "_SINGLEQUOTE_",
|
||||
'"': "_DOUBLEQUOTE_",
|
||||
# Currently not possible. Terminates parser and won't find backtick.
|
||||
# "#": "_HASH_",
|
||||
}
|
||||
)
|
||||
|
||||
name = "".join([special_characters_replacements.get(char, char) for char in name])
|
||||
name = "BACKTICK_QUOTED_STRING_" + name
|
||||
|
||||
if not name.isidentifier():
|
||||
raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Clean up a column name if surrounded by backticks.
|
||||
|
||||
Backtick quoted string are indicated by a certain tokval value. If a string
|
||||
is a backtick quoted token it will processed by
|
||||
:func:`_create_valid_python_identifier` so that the parser can find this
|
||||
string when the query is executed.
|
||||
In this case the tok will get the NAME tokval.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok : Tuple[int, str]
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == BACKTICK_QUOTED_STRING:
|
||||
return tokenize.NAME, create_valid_python_identifier(tokval)
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def clean_column_name(name: Hashable) -> Hashable:
|
||||
"""
|
||||
Function to emulate the cleaning of a backtick quoted name.
|
||||
|
||||
The purpose for this function is to see what happens to the name of
|
||||
identifier if it goes to the process of being parsed a Python code
|
||||
inside a backtick quoted string and than being cleaned
|
||||
(removed of any special characters).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : hashable
|
||||
Name to be cleaned.
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : hashable
|
||||
Returns the name after tokenizing and cleaning.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For some cases, a name cannot be converted to a valid Python identifier.
|
||||
In that case :func:`tokenize_string` raises a SyntaxError.
|
||||
In that case, we just return the name unmodified.
|
||||
|
||||
If this name was used in the query string (this makes the query call impossible)
|
||||
an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
|
||||
which is not caught and propagates to the user level.
|
||||
"""
|
||||
try:
|
||||
tokenized = tokenize_string(f"`{name}`")
|
||||
tokval = next(tokenized)[1]
|
||||
return create_valid_python_identifier(tokval)
|
||||
except SyntaxError:
|
||||
return name
|
||||
|
||||
|
||||
def tokenize_backtick_quoted_string(
|
||||
token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int
|
||||
) -> tuple[int, str]:
|
||||
"""
|
||||
Creates a token from a backtick quoted string.
|
||||
|
||||
Moves the token_generator forwards till right after the next backtick.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
token_generator : Iterator[tokenize.TokenInfo]
|
||||
The generator that yields the tokens of the source string (Tuple[int, str]).
|
||||
The generator is at the first token after the backtick (`)
|
||||
|
||||
source : str
|
||||
The Python source code string.
|
||||
|
||||
string_start : int
|
||||
This is the start of backtick quoted string inside the source string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok: Tuple[int, str]
|
||||
The token that represents the backtick quoted string.
|
||||
The integer is equal to BACKTICK_QUOTED_STRING (100).
|
||||
"""
|
||||
for _, tokval, start, _, _ in token_generator:
|
||||
if tokval == "`":
|
||||
string_end = start[1]
|
||||
break
|
||||
|
||||
return BACKTICK_QUOTED_STRING, source[string_start:string_end]
|
||||
|
||||
|
||||
def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
|
||||
"""
|
||||
Tokenize a Python source code string.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
The Python source code string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok_generator : Iterator[Tuple[int, str]]
|
||||
An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
|
||||
"""
|
||||
line_reader = StringIO(source).readline
|
||||
token_generator = tokenize.generate_tokens(line_reader)
|
||||
|
||||
# Loop over all tokens till a backtick (`) is found.
|
||||
# Then, take all tokens till the next backtick to form a backtick quoted string
|
||||
for toknum, tokval, start, _, _ in token_generator:
|
||||
if tokval == "`":
|
||||
try:
|
||||
yield tokenize_backtick_quoted_string(
|
||||
token_generator, source, string_start=start[1] + 1
|
||||
)
|
||||
except Exception as err:
|
||||
raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err
|
||||
else:
|
||||
yield toknum, tokval
|
653
.venv/Lib/site-packages/pandas/core/computation/pytables.py
Normal file
653
.venv/Lib/site-packages/pandas/core/computation/pytables.py
Normal file
@ -0,0 +1,653 @@
|
||||
""" manage PyTables query interface via Expressions """
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
from pandas._typing import npt
|
||||
from pandas.compat.chainmap import DeepChainMap
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation import (
|
||||
expr,
|
||||
ops,
|
||||
scope as _scope,
|
||||
)
|
||||
from pandas.core.computation.common import ensure_decoded
|
||||
from pandas.core.computation.expr import BaseExprVisitor
|
||||
from pandas.core.computation.ops import (
|
||||
UndefinedVariableError,
|
||||
is_term,
|
||||
)
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.indexes.base import Index
|
||||
|
||||
from pandas.io.formats.printing import (
|
||||
pprint_thing,
|
||||
pprint_thing_encoded,
|
||||
)
|
||||
|
||||
|
||||
class PyTablesScope(_scope.Scope):
|
||||
__slots__ = ("queryables",)
|
||||
|
||||
queryables: dict[str, Any]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
level: int,
|
||||
global_dict=None,
|
||||
local_dict=None,
|
||||
queryables: dict[str, Any] | None = None,
|
||||
):
|
||||
super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict)
|
||||
self.queryables = queryables or {}
|
||||
|
||||
|
||||
class Term(ops.Term):
|
||||
env: PyTablesScope
|
||||
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
if isinstance(name, str):
|
||||
klass = cls
|
||||
else:
|
||||
klass = Constant
|
||||
return object.__new__(klass)
|
||||
|
||||
def __init__(self, name, env: PyTablesScope, side=None, encoding=None):
|
||||
super().__init__(name, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
# must be a queryables
|
||||
if self.side == "left":
|
||||
# Note: The behavior of __new__ ensures that self.name is a str here
|
||||
if self.name not in self.env.queryables:
|
||||
raise NameError(f"name {repr(self.name)} is not defined")
|
||||
return self.name
|
||||
|
||||
# resolve the rhs (and allow it to be None)
|
||||
try:
|
||||
return self.env.resolve(self.name, is_local=False)
|
||||
except UndefinedVariableError:
|
||||
return self.name
|
||||
|
||||
# read-only property overwriting read/write property
|
||||
@property # type: ignore[misc]
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def __init__(self, value, env: PyTablesScope, side=None, encoding=None):
|
||||
assert isinstance(env, PyTablesScope), type(env)
|
||||
super().__init__(value, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
|
||||
class BinOp(ops.BinOp):
|
||||
|
||||
_max_selectors = 31
|
||||
|
||||
op: str
|
||||
queryables: dict[str, Any]
|
||||
condition: str | None
|
||||
|
||||
def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding):
|
||||
super().__init__(op, lhs, rhs)
|
||||
self.queryables = queryables
|
||||
self.encoding = encoding
|
||||
self.condition = None
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self):
|
||||
pass
|
||||
|
||||
def prune(self, klass):
|
||||
def pr(left, right):
|
||||
"""create and return a new specialized BinOp from myself"""
|
||||
if left is None:
|
||||
return right
|
||||
elif right is None:
|
||||
return left
|
||||
|
||||
k = klass
|
||||
if isinstance(left, ConditionBinOp):
|
||||
if isinstance(right, ConditionBinOp):
|
||||
k = JointConditionBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
elif isinstance(left, FilterBinOp):
|
||||
if isinstance(right, FilterBinOp):
|
||||
k = JointFilterBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
return k(
|
||||
self.op, left, right, queryables=self.queryables, encoding=self.encoding
|
||||
).evaluate()
|
||||
|
||||
left, right = self.lhs, self.rhs
|
||||
|
||||
if is_term(left) and is_term(right):
|
||||
res = pr(left.value, right.value)
|
||||
elif not is_term(left) and is_term(right):
|
||||
res = pr(left.prune(klass), right.value)
|
||||
elif is_term(left) and not is_term(right):
|
||||
res = pr(left.value, right.prune(klass))
|
||||
elif not (is_term(left) or is_term(right)):
|
||||
res = pr(left.prune(klass), right.prune(klass))
|
||||
|
||||
return res
|
||||
|
||||
def conform(self, rhs):
|
||||
"""inplace conform rhs"""
|
||||
if not is_list_like(rhs):
|
||||
rhs = [rhs]
|
||||
if isinstance(rhs, np.ndarray):
|
||||
rhs = rhs.ravel()
|
||||
return rhs
|
||||
|
||||
@property
|
||||
def is_valid(self) -> bool:
|
||||
"""return True if this is a valid field"""
|
||||
return self.lhs in self.queryables
|
||||
|
||||
@property
|
||||
def is_in_table(self) -> bool:
|
||||
"""
|
||||
return True if this is a valid column name for generation (e.g. an
|
||||
actual column in the table)
|
||||
"""
|
||||
return self.queryables.get(self.lhs) is not None
|
||||
|
||||
@property
|
||||
def kind(self):
|
||||
"""the kind of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "kind", None)
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
"""the meta of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "meta", None)
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
"""the metadata of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "metadata", None)
|
||||
|
||||
def generate(self, v) -> str:
|
||||
"""create and return the op string for this TermValue"""
|
||||
val = v.tostring(self.encoding)
|
||||
return f"({self.lhs} {self.op} {val})"
|
||||
|
||||
def convert_value(self, v) -> TermValue:
|
||||
"""
|
||||
convert the expression that is in the term to something that is
|
||||
accepted by pytables
|
||||
"""
|
||||
|
||||
def stringify(value):
|
||||
if self.encoding is not None:
|
||||
return pprint_thing_encoded(value, encoding=self.encoding)
|
||||
return pprint_thing(value)
|
||||
|
||||
kind = ensure_decoded(self.kind)
|
||||
meta = ensure_decoded(self.meta)
|
||||
if kind == "datetime64" or kind == "datetime":
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = ensure_decoded(v)
|
||||
v = Timestamp(v)
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
return TermValue(v, v.value, kind)
|
||||
elif kind == "timedelta64" or kind == "timedelta":
|
||||
if isinstance(v, str):
|
||||
v = Timedelta(v).value
|
||||
else:
|
||||
v = Timedelta(v, unit="s").value
|
||||
return TermValue(int(v), v, kind)
|
||||
elif meta == "category":
|
||||
metadata = extract_array(self.metadata, extract_numpy=True)
|
||||
result: npt.NDArray[np.intp] | np.intp | int
|
||||
if v not in metadata:
|
||||
result = -1
|
||||
else:
|
||||
result = metadata.searchsorted(v, side="left")
|
||||
return TermValue(result, result, "integer")
|
||||
elif kind == "integer":
|
||||
v = int(float(v))
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "float":
|
||||
v = float(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "bool":
|
||||
if isinstance(v, str):
|
||||
v = not v.strip().lower() in [
|
||||
"false",
|
||||
"f",
|
||||
"no",
|
||||
"n",
|
||||
"none",
|
||||
"0",
|
||||
"[]",
|
||||
"{}",
|
||||
"",
|
||||
]
|
||||
else:
|
||||
v = bool(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif isinstance(v, str):
|
||||
# string quoting
|
||||
return TermValue(v, stringify(v), "string")
|
||||
else:
|
||||
raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
|
||||
|
||||
def convert_values(self):
|
||||
pass
|
||||
|
||||
|
||||
class FilterBinOp(BinOp):
|
||||
filter: tuple[Any, Any, Index] | None = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.filter is None:
|
||||
return "Filter: Not Initialized"
|
||||
return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]")
|
||||
|
||||
def invert(self):
|
||||
"""invert the filter"""
|
||||
if self.filter is not None:
|
||||
self.filter = (
|
||||
self.filter[0],
|
||||
self.generate_filter_op(invert=True),
|
||||
self.filter[2],
|
||||
)
|
||||
return self
|
||||
|
||||
def format(self):
|
||||
"""return the actual filter format"""
|
||||
return [self.filter]
|
||||
|
||||
def evaluate(self):
|
||||
|
||||
if not self.is_valid:
|
||||
raise ValueError(f"query term is not valid [{self}]")
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = list(rhs)
|
||||
|
||||
if self.is_in_table:
|
||||
|
||||
# if too many values to create the expression, use a filter instead
|
||||
if self.op in ["==", "!="] and len(values) > self._max_selectors:
|
||||
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, Index(values))
|
||||
|
||||
return self
|
||||
return None
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, Index(values))
|
||||
|
||||
else:
|
||||
raise TypeError(
|
||||
f"passing a filterable condition to a non-table indexer [{self}]"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def generate_filter_op(self, invert: bool = False):
|
||||
if (self.op == "!=" and not invert) or (self.op == "==" and invert):
|
||||
return lambda axis, vals: ~axis.isin(vals)
|
||||
else:
|
||||
return lambda axis, vals: axis.isin(vals)
|
||||
|
||||
|
||||
class JointFilterBinOp(FilterBinOp):
|
||||
def format(self):
|
||||
raise NotImplementedError("unable to collapse Joint Filters")
|
||||
|
||||
def evaluate(self):
|
||||
return self
|
||||
|
||||
|
||||
class ConditionBinOp(BinOp):
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(f"[Condition : [{self.condition}]]")
|
||||
|
||||
def invert(self):
|
||||
"""invert the condition"""
|
||||
# if self.condition is not None:
|
||||
# self.condition = "~(%s)" % self.condition
|
||||
# return self
|
||||
raise NotImplementedError(
|
||||
"cannot use an invert condition when passing to numexpr"
|
||||
)
|
||||
|
||||
def format(self):
|
||||
"""return the actual ne format"""
|
||||
return self.condition
|
||||
|
||||
def evaluate(self):
|
||||
|
||||
if not self.is_valid:
|
||||
raise ValueError(f"query term is not valid [{self}]")
|
||||
|
||||
# convert values if we are in the table
|
||||
if not self.is_in_table:
|
||||
return None
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = [self.convert_value(v) for v in rhs]
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
|
||||
# too many values to create the expression?
|
||||
if len(values) <= self._max_selectors:
|
||||
vs = [self.generate(v) for v in values]
|
||||
self.condition = f"({' | '.join(vs)})"
|
||||
|
||||
# use a filter after reading
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
self.condition = self.generate(values[0])
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class JointConditionBinOp(ConditionBinOp):
|
||||
def evaluate(self):
|
||||
self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})"
|
||||
return self
|
||||
|
||||
|
||||
class UnaryOp(ops.UnaryOp):
|
||||
def prune(self, klass):
|
||||
|
||||
if self.op != "~":
|
||||
raise NotImplementedError("UnaryOp only support invert type ops")
|
||||
|
||||
operand = self.operand
|
||||
operand = operand.prune(klass)
|
||||
|
||||
if operand is not None and (
|
||||
issubclass(klass, ConditionBinOp)
|
||||
and operand.condition is not None
|
||||
or not issubclass(klass, ConditionBinOp)
|
||||
and issubclass(klass, FilterBinOp)
|
||||
and operand.filter is not None
|
||||
):
|
||||
return operand.invert()
|
||||
return None
|
||||
|
||||
|
||||
class PyTablesExprVisitor(BaseExprVisitor):
|
||||
const_type = Constant
|
||||
term_type = Term
|
||||
|
||||
def __init__(self, env, engine, parser, **kwargs):
|
||||
super().__init__(env, engine, parser)
|
||||
for bin_op in self.binary_ops:
|
||||
bin_node = self.binary_op_nodes_map[bin_op]
|
||||
setattr(
|
||||
self,
|
||||
f"visit_{bin_node}",
|
||||
lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs),
|
||||
)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs):
|
||||
if isinstance(node.op, (ast.Not, ast.Invert)):
|
||||
return UnaryOp("~", self.visit(node.operand))
|
||||
elif isinstance(node.op, ast.USub):
|
||||
return self.const_type(-self.visit(node.operand).value, self.env)
|
||||
elif isinstance(node.op, ast.UAdd):
|
||||
raise NotImplementedError("Unary addition not supported")
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
return self.visit(node.value).value
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
cmpr = ast.Compare(
|
||||
ops=[ast.Eq()], left=node.targets[0], comparators=[node.value]
|
||||
)
|
||||
return self.visit(cmpr)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs):
|
||||
# only allow simple subscripts
|
||||
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
try:
|
||||
value = value.value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if isinstance(slobj, Term):
|
||||
# In py39 np.ndarray lookups with Term containing int raise
|
||||
slobj = slobj.value
|
||||
|
||||
try:
|
||||
return self.const_type(value[slobj], self.env)
|
||||
except TypeError as err:
|
||||
raise ValueError(
|
||||
f"cannot subscript {repr(value)} with {repr(slobj)}"
|
||||
) from err
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = type(node.ctx)
|
||||
if ctx == ast.Load:
|
||||
# resolve the value
|
||||
resolved = self.visit(value)
|
||||
|
||||
# try to get the value to see if we are another expression
|
||||
try:
|
||||
resolved = resolved.value
|
||||
except (AttributeError):
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.term_type(getattr(resolved, attr), self.env)
|
||||
except AttributeError:
|
||||
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
|
||||
raise ValueError(f"Invalid Attribute context {ctx.__name__}")
|
||||
|
||||
def translate_In(self, op):
|
||||
return ast.Eq() if isinstance(op, ast.In) else op
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
return self.visit(node.op), node.op, left, right
|
||||
|
||||
|
||||
def _validate_where(w):
|
||||
"""
|
||||
Validate that the where statement is of the right type.
|
||||
|
||||
The type may either be String, Expr, or list-like of Exprs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : String term expression, Expr, or list-like of Exprs.
|
||||
|
||||
Returns
|
||||
-------
|
||||
where : The original where clause if the check was successful.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError : An invalid data type was passed in for w (e.g. dict).
|
||||
"""
|
||||
if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)):
|
||||
raise TypeError(
|
||||
"where must be passed as a string, PyTablesExpr, "
|
||||
"or list-like of PyTablesExpr"
|
||||
)
|
||||
|
||||
return w
|
||||
|
||||
|
||||
class PyTablesExpr(expr.Expr):
|
||||
"""
|
||||
Hold a pytables-like expression, comprised of possibly multiple 'terms'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where : string term expression, PyTablesExpr, or list-like of PyTablesExprs
|
||||
queryables : a "kinds" map (dict of column name -> kind), or None if column
|
||||
is non-indexable
|
||||
encoding : an encoding that will encode the query terms
|
||||
|
||||
Returns
|
||||
-------
|
||||
a PyTablesExpr object
|
||||
|
||||
Examples
|
||||
--------
|
||||
'index>=date'
|
||||
"columns=['A', 'D']"
|
||||
'columns=A'
|
||||
'columns==A'
|
||||
"~(columns=['A','B'])"
|
||||
'index>df.index[3] & string="bar"'
|
||||
'(index>df.index[3] & index<=df.index[6]) | string="bar"'
|
||||
"ts>=Timestamp('2012-02-01')"
|
||||
"major_axis>=20130101"
|
||||
"""
|
||||
|
||||
_visitor: PyTablesExprVisitor | None
|
||||
env: PyTablesScope
|
||||
expr: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
where,
|
||||
queryables: dict[str, Any] | None = None,
|
||||
encoding=None,
|
||||
scope_level: int = 0,
|
||||
):
|
||||
|
||||
where = _validate_where(where)
|
||||
|
||||
self.encoding = encoding
|
||||
self.condition = None
|
||||
self.filter = None
|
||||
self.terms = None
|
||||
self._visitor = None
|
||||
|
||||
# capture the environment if needed
|
||||
local_dict: DeepChainMap[Any, Any] = DeepChainMap()
|
||||
|
||||
if isinstance(where, PyTablesExpr):
|
||||
local_dict = where.env.scope
|
||||
_where = where.expr
|
||||
|
||||
elif is_list_like(where):
|
||||
where = list(where)
|
||||
for idx, w in enumerate(where):
|
||||
if isinstance(w, PyTablesExpr):
|
||||
local_dict = w.env.scope
|
||||
else:
|
||||
w = _validate_where(w)
|
||||
where[idx] = w
|
||||
_where = " & ".join([f"({w})" for w in com.flatten(where)])
|
||||
else:
|
||||
# _validate_where ensures we otherwise have a string
|
||||
_where = where
|
||||
|
||||
self.expr = _where
|
||||
self.env = PyTablesScope(scope_level + 1, local_dict=local_dict)
|
||||
|
||||
if queryables is not None and isinstance(self.expr, str):
|
||||
self.env.queryables.update(queryables)
|
||||
self._visitor = PyTablesExprVisitor(
|
||||
self.env,
|
||||
queryables=queryables,
|
||||
parser="pytables",
|
||||
engine="pytables",
|
||||
encoding=encoding,
|
||||
)
|
||||
self.terms = self.parse()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.terms is not None:
|
||||
return pprint_thing(self.terms)
|
||||
return pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self):
|
||||
"""create and return the numexpr condition and filter"""
|
||||
try:
|
||||
self.condition = self.terms.prune(ConditionBinOp)
|
||||
except AttributeError as err:
|
||||
raise ValueError(
|
||||
f"cannot process expression [{self.expr}], [{self}] "
|
||||
"is not a valid condition"
|
||||
) from err
|
||||
try:
|
||||
self.filter = self.terms.prune(FilterBinOp)
|
||||
except AttributeError as err:
|
||||
raise ValueError(
|
||||
f"cannot process expression [{self.expr}], [{self}] "
|
||||
"is not a valid filter"
|
||||
) from err
|
||||
|
||||
return self.condition, self.filter
|
||||
|
||||
|
||||
class TermValue:
|
||||
"""hold a term value the we use to construct a condition/filter"""
|
||||
|
||||
def __init__(self, value, converted, kind: str):
|
||||
assert isinstance(kind, str), kind
|
||||
self.value = value
|
||||
self.converted = converted
|
||||
self.kind = kind
|
||||
|
||||
def tostring(self, encoding) -> str:
|
||||
"""quote the string if not encoded else encode and return"""
|
||||
if self.kind == "string":
|
||||
if encoding is not None:
|
||||
return str(self.converted)
|
||||
return f'"{self.converted}"'
|
||||
elif self.kind == "float":
|
||||
# python 2 str(float) is not always
|
||||
# round-trippable so use repr()
|
||||
return repr(self.converted)
|
||||
return str(self.converted)
|
||||
|
||||
|
||||
def maybe_expression(s) -> bool:
|
||||
"""loose checking if s is a pytables-acceptable expression"""
|
||||
if not isinstance(s, str):
|
||||
return False
|
||||
ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",)
|
||||
|
||||
# make sure we have an op at least
|
||||
return any(op in s for op in ops)
|
330
.venv/Lib/site-packages/pandas/core/computation/scope.py
Normal file
330
.venv/Lib/site-packages/pandas/core/computation/scope.py
Normal file
@ -0,0 +1,330 @@
|
||||
"""
|
||||
Module for scope operations
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import inspect
|
||||
from io import StringIO
|
||||
import itertools
|
||||
import pprint
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat.chainmap import DeepChainMap
|
||||
|
||||
|
||||
def ensure_scope(
|
||||
level: int, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs
|
||||
) -> Scope:
|
||||
"""Ensure that we are grabbing the correct scope."""
|
||||
return Scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
|
||||
def _replacer(x) -> str:
|
||||
"""
|
||||
Replace a number with its hexadecimal representation. Used to tag
|
||||
temporary variables with their calling scope's id.
|
||||
"""
|
||||
# get the hex repr of the binary char and remove 0x and pad by pad_size
|
||||
# zeros
|
||||
try:
|
||||
hexin = ord(x)
|
||||
except TypeError:
|
||||
# bytes literals masquerade as ints when iterating in py3
|
||||
hexin = x
|
||||
|
||||
return hex(hexin)
|
||||
|
||||
|
||||
def _raw_hex_id(obj) -> str:
|
||||
"""Return the padded hexadecimal id of ``obj``."""
|
||||
# interpret as a pointer since that's what really what id returns
|
||||
packed = struct.pack("@P", id(obj))
|
||||
return "".join([_replacer(x) for x in packed])
|
||||
|
||||
|
||||
DEFAULT_GLOBALS = {
|
||||
"Timestamp": Timestamp,
|
||||
"datetime": datetime.datetime,
|
||||
"True": True,
|
||||
"False": False,
|
||||
"list": list,
|
||||
"tuple": tuple,
|
||||
"inf": np.inf,
|
||||
"Inf": np.inf,
|
||||
}
|
||||
|
||||
|
||||
def _get_pretty_string(obj) -> str:
|
||||
"""
|
||||
Return a prettier version of obj.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
Object to pretty print
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Pretty print object repr
|
||||
"""
|
||||
sio = StringIO()
|
||||
pprint.pprint(obj, stream=sio)
|
||||
return sio.getvalue()
|
||||
|
||||
|
||||
class Scope:
|
||||
"""
|
||||
Object to hold scope, with a few bells to deal with some custom syntax
|
||||
and contexts added by pandas.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int
|
||||
global_dict : dict or None, optional, default None
|
||||
local_dict : dict or Scope or None, optional, default None
|
||||
resolvers : list-like or None, optional, default None
|
||||
target : object
|
||||
|
||||
Attributes
|
||||
----------
|
||||
level : int
|
||||
scope : DeepChainMap
|
||||
target : object
|
||||
temps : dict
|
||||
"""
|
||||
|
||||
__slots__ = ["level", "scope", "target", "resolvers", "temps"]
|
||||
level: int
|
||||
scope: DeepChainMap
|
||||
resolvers: DeepChainMap
|
||||
temps: dict
|
||||
|
||||
def __init__(
|
||||
self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None
|
||||
):
|
||||
self.level = level + 1
|
||||
|
||||
# shallow copy because we don't want to keep filling this up with what
|
||||
# was there before if there are multiple calls to Scope/_ensure_scope
|
||||
self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())
|
||||
self.target = target
|
||||
|
||||
if isinstance(local_dict, Scope):
|
||||
self.scope.update(local_dict.scope)
|
||||
if local_dict.target is not None:
|
||||
self.target = local_dict.target
|
||||
self._update(local_dict.level)
|
||||
|
||||
frame = sys._getframe(self.level)
|
||||
|
||||
try:
|
||||
# shallow copy here because we don't want to replace what's in
|
||||
# scope when we align terms (alignment accesses the underlying
|
||||
# numpy array of pandas objects)
|
||||
scope_global = self.scope.new_child((global_dict or frame.f_globals).copy())
|
||||
self.scope = DeepChainMap(scope_global)
|
||||
if not isinstance(local_dict, Scope):
|
||||
scope_local = self.scope.new_child(
|
||||
(local_dict or frame.f_locals).copy()
|
||||
)
|
||||
self.scope = DeepChainMap(scope_local)
|
||||
finally:
|
||||
del frame
|
||||
|
||||
# assumes that resolvers are going from outermost scope to inner
|
||||
if isinstance(local_dict, Scope):
|
||||
resolvers += tuple(local_dict.resolvers.maps)
|
||||
self.resolvers = DeepChainMap(*resolvers)
|
||||
self.temps = {}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
scope_keys = _get_pretty_string(list(self.scope.keys()))
|
||||
res_keys = _get_pretty_string(list(self.resolvers.keys()))
|
||||
return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})"
|
||||
|
||||
@property
|
||||
def has_resolvers(self) -> bool:
|
||||
"""
|
||||
Return whether we have any extra scope.
|
||||
|
||||
For example, DataFrames pass Their columns as resolvers during calls to
|
||||
``DataFrame.eval()`` and ``DataFrame.query()``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
hr : bool
|
||||
"""
|
||||
return bool(len(self.resolvers))
|
||||
|
||||
def resolve(self, key: str, is_local: bool):
|
||||
"""
|
||||
Resolve a variable name in a possibly local context.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
A variable name
|
||||
is_local : bool
|
||||
Flag indicating whether the variable is local or not (prefixed with
|
||||
the '@' symbol)
|
||||
|
||||
Returns
|
||||
-------
|
||||
value : object
|
||||
The value of a particular variable
|
||||
"""
|
||||
try:
|
||||
# only look for locals in outer scope
|
||||
if is_local:
|
||||
return self.scope[key]
|
||||
|
||||
# not a local variable so check in resolvers if we have them
|
||||
if self.has_resolvers:
|
||||
return self.resolvers[key]
|
||||
|
||||
# if we're here that means that we have no locals and we also have
|
||||
# no resolvers
|
||||
assert not is_local and not self.has_resolvers
|
||||
return self.scope[key]
|
||||
except KeyError:
|
||||
try:
|
||||
# last ditch effort we look in temporaries
|
||||
# these are created when parsing indexing expressions
|
||||
# e.g., df[df > 0]
|
||||
return self.temps[key]
|
||||
except KeyError as err:
|
||||
# runtime import because ops imports from scope
|
||||
from pandas.core.computation.ops import UndefinedVariableError
|
||||
|
||||
raise UndefinedVariableError(key, is_local) from err
|
||||
|
||||
def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:
|
||||
"""
|
||||
Replace a variable name, with a potentially new value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
old_key : str
|
||||
Current variable name to replace
|
||||
new_key : str
|
||||
New variable name to replace `old_key` with
|
||||
new_value : object
|
||||
Value to be replaced along with the possible renaming
|
||||
"""
|
||||
if self.has_resolvers:
|
||||
maps = self.resolvers.maps + self.scope.maps
|
||||
else:
|
||||
maps = self.scope.maps
|
||||
|
||||
maps.append(self.temps)
|
||||
|
||||
for mapping in maps:
|
||||
if old_key in mapping:
|
||||
mapping[new_key] = new_value
|
||||
return
|
||||
|
||||
def _get_vars(self, stack, scopes: list[str]) -> None:
|
||||
"""
|
||||
Get specifically scoped variables from a list of stack frames.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stack : list
|
||||
A list of stack frames as returned by ``inspect.stack()``
|
||||
scopes : sequence of strings
|
||||
A sequence containing valid stack frame attribute names that
|
||||
evaluate to a dictionary. For example, ('locals', 'globals')
|
||||
"""
|
||||
variables = itertools.product(scopes, stack)
|
||||
for scope, (frame, _, _, _, _, _) in variables:
|
||||
try:
|
||||
d = getattr(frame, "f_" + scope)
|
||||
self.scope = DeepChainMap(self.scope.new_child(d))
|
||||
finally:
|
||||
# won't remove it, but DECREF it
|
||||
# in Py3 this probably isn't necessary since frame won't be
|
||||
# scope after the loop
|
||||
del frame
|
||||
|
||||
def _update(self, level: int) -> None:
|
||||
"""
|
||||
Update the current scope by going back `level` levels.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int
|
||||
"""
|
||||
sl = level + 1
|
||||
|
||||
# add sl frames to the scope starting with the
|
||||
# most distant and overwriting with more current
|
||||
# makes sure that we can capture variable scope
|
||||
stack = inspect.stack()
|
||||
|
||||
try:
|
||||
self._get_vars(stack[:sl], scopes=["locals"])
|
||||
finally:
|
||||
del stack[:], stack
|
||||
|
||||
def add_tmp(self, value) -> str:
|
||||
"""
|
||||
Add a temporary variable to the scope.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : object
|
||||
An arbitrary object to be assigned to a temporary variable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The name of the temporary variable created.
|
||||
"""
|
||||
name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}"
|
||||
|
||||
# add to inner most scope
|
||||
assert name not in self.temps
|
||||
self.temps[name] = value
|
||||
assert name in self.temps
|
||||
|
||||
# only increment if the variable gets put in the scope
|
||||
return name
|
||||
|
||||
@property
|
||||
def ntemps(self) -> int:
|
||||
"""The number of temporary variables in this scope"""
|
||||
return len(self.temps)
|
||||
|
||||
@property
|
||||
def full_scope(self) -> DeepChainMap:
|
||||
"""
|
||||
Return the full scope for use with passing to engines transparently
|
||||
as a mapping.
|
||||
|
||||
Returns
|
||||
-------
|
||||
vars : DeepChainMap
|
||||
All variables in this scope.
|
||||
"""
|
||||
# error: Unsupported operand types for + ("List[Dict[Any, Any]]" and
|
||||
# "List[Mapping[Any, Any]]")
|
||||
# error: Unsupported operand types for + ("List[Dict[Any, Any]]" and
|
||||
# "List[Mapping[str, Any]]")
|
||||
maps = (
|
||||
[self.temps]
|
||||
+ self.resolvers.maps # type: ignore[operator]
|
||||
+ self.scope.maps # type: ignore[operator]
|
||||
)
|
||||
return DeepChainMap(*maps)
|
Reference in New Issue
Block a user