first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,475 @@
"""
Arithmetic operations for PandasObjects
This is not a public API.
"""
from __future__ import annotations
import operator
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
from pandas._typing import Level
from pandas.util._decorators import Appender
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_array_like,
is_list_like,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
)
from pandas.core.dtypes.missing import isna
from pandas.core import (
algorithms,
roperator,
)
from pandas.core.ops.array_ops import ( # noqa:F401
arithmetic_op,
comp_method_OBJECT_ARRAY,
comparison_op,
get_array_op,
logical_op,
maybe_prepare_scalar_for_op,
)
from pandas.core.ops.common import ( # noqa:F401
get_op_result_name,
unpack_zerodim_and_defer,
)
from pandas.core.ops.docstrings import (
_flex_comp_doc_FRAME,
_op_descriptions,
make_flex_doc,
)
from pandas.core.ops.invalid import invalid_comparison # noqa:F401
from pandas.core.ops.mask_ops import ( # noqa: F401
kleene_and,
kleene_or,
kleene_xor,
)
from pandas.core.ops.methods import add_flex_arithmetic_methods # noqa:F401
from pandas.core.roperator import ( # noqa:F401
radd,
rand_,
rdiv,
rdivmod,
rfloordiv,
rmod,
rmul,
ror_,
rpow,
rsub,
rtruediv,
rxor,
)
if TYPE_CHECKING:
from pandas import (
DataFrame,
Series,
)
# -----------------------------------------------------------------------------
# constants
ARITHMETIC_BINOPS: set[str] = {
"add",
"sub",
"mul",
"pow",
"mod",
"floordiv",
"truediv",
"divmod",
"radd",
"rsub",
"rmul",
"rpow",
"rmod",
"rfloordiv",
"rtruediv",
"rdivmod",
}
COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"}
# -----------------------------------------------------------------------------
# Masking NA values and fallbacks for operations numpy does not support
def fill_binop(left, right, fill_value):
"""
If a non-None fill_value is given, replace null entries in left and right
with this value, but only in positions where _one_ of left/right is null,
not both.
Parameters
----------
left : array-like
right : array-like
fill_value : object
Returns
-------
left : array-like
right : array-like
Notes
-----
Makes copies if fill_value is not None and NAs are present.
"""
if fill_value is not None:
left_mask = isna(left)
right_mask = isna(right)
# one but not both
mask = left_mask ^ right_mask
if left_mask.any():
# Avoid making a copy if we can
left = left.copy()
left[left_mask & mask] = fill_value
if right_mask.any():
# Avoid making a copy if we can
right = right.copy()
right[right_mask & mask] = fill_value
return left, right
# -----------------------------------------------------------------------------
# Series
def align_method_SERIES(left: Series, right, align_asobject: bool = False):
"""align lhs and rhs Series"""
# ToDo: Different from align_method_FRAME, list, tuple and ndarray
# are not coerced here
# because Series has inconsistencies described in #13637
if isinstance(right, ABCSeries):
# avoid repeated alignment
if not left.index.equals(right.index):
if align_asobject:
# to keep original value's dtype for bool ops
left = left.astype(object)
right = right.astype(object)
left, right = left.align(right, copy=False)
return left, right
def flex_method_SERIES(op):
name = op.__name__.strip("_")
doc = make_flex_doc(name, "series")
@Appender(doc)
def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# validate axis
if axis is not None:
self._get_axis_number(axis)
res_name = get_op_result_name(self, other)
if isinstance(other, ABCSeries):
return self._binop(other, op, level=level, fill_value=fill_value)
elif isinstance(other, (np.ndarray, list, tuple)):
if len(other) != len(self):
raise ValueError("Lengths must be equal")
other = self._constructor(other, self.index)
result = self._binop(other, op, level=level, fill_value=fill_value)
result.name = res_name
return result
else:
if fill_value is not None:
self = self.fillna(fill_value)
return op(self, other)
flex_wrapper.__name__ = name
return flex_wrapper
# -----------------------------------------------------------------------------
# DataFrame
def align_method_FRAME(
left, right, axis, flex: bool | None = False, level: Level = None
):
"""
Convert rhs to meet lhs dims if input is list, tuple or np.ndarray.
Parameters
----------
left : DataFrame
right : Any
axis : int, str, or None
flex : bool or None, default False
Whether this is a flex op, in which case we reindex.
None indicates not to check for alignment.
level : int or level name, default None
Returns
-------
left : DataFrame
right : Any
"""
def to_series(right):
msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
if axis is not None and left._get_axis_name(axis) == "index":
if len(left.index) != len(right):
raise ValueError(
msg.format(req_len=len(left.index), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.index)
else:
if len(left.columns) != len(right):
raise ValueError(
msg.format(req_len=len(left.columns), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.columns)
return right
if isinstance(right, np.ndarray):
if right.ndim == 1:
right = to_series(right)
elif right.ndim == 2:
if right.shape == left.shape:
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
# Broadcast across columns
right = np.broadcast_to(right, left.shape)
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
# Broadcast along rows
right = to_series(right[0, :])
else:
raise ValueError(
"Unable to coerce to DataFrame, shape "
f"must be {left.shape}: given {right.shape}"
)
elif right.ndim > 2:
raise ValueError(
"Unable to coerce to Series/DataFrame, "
f"dimension must be <= 2: {right.shape}"
)
elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
# GH 36702. Raise when attempting arithmetic with list of array-like.
if any(is_array_like(el) for el in right):
raise ValueError(
f"Unable to coerce list of {type(right[0])} to Series/DataFrame"
)
# GH17901
right = to_series(right)
if flex is not None and isinstance(right, ABCDataFrame):
if not left._indexed_same(right):
if flex:
left, right = left.align(right, join="outer", level=level, copy=False)
else:
raise ValueError(
"Can only compare identically-labeled DataFrame objects"
)
elif isinstance(right, ABCSeries):
# axis=1 is default for DataFrame-with-Series op
axis = left._get_axis_number(axis) if axis is not None else 1
if not flex:
if not left.axes[axis].equals(right.index):
warnings.warn(
"Automatic reindexing on DataFrame vs Series comparisons "
"is deprecated and will raise ValueError in a future version. "
"Do `left, right = left.align(right, axis=1, copy=False)` "
"before e.g. `left == right`",
FutureWarning,
stacklevel=find_stack_level(),
)
left, right = left.align(
right, join="outer", axis=axis, level=level, copy=False
)
right = _maybe_align_series_as_frame(left, right, axis)
return left, right
def should_reindex_frame_op(
left: DataFrame, right, op, axis, default_axis, fill_value, level
) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
"""
assert isinstance(left, ABCDataFrame)
if op is operator.pow or op is roperator.rpow:
# GH#32685 pow has special semantics for operating with null values
return False
if not isinstance(right, ABCDataFrame):
return False
if fill_value is None and level is None and axis is default_axis:
# TODO: any other cases we should handle here?
# Intersection is always unique so we have to check the unique columns
left_uniques = left.columns.unique()
right_uniques = right.columns.unique()
cols = left_uniques.intersection(right_uniques)
if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)):
# TODO: is there a shortcut available when len(cols) == 0?
return True
return False
def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame:
"""
For DataFrame-with-DataFrame operations that require reindexing,
operate only on shared columns, then reindex.
Parameters
----------
left : DataFrame
right : DataFrame
op : binary operator
Returns
-------
DataFrame
"""
# GH#31623, only operate on shared columns
cols, lcols, rcols = left.columns.join(
right.columns, how="inner", level=None, return_indexers=True
)
new_left = left.iloc[:, lcols]
new_right = right.iloc[:, rcols]
result = op(new_left, new_right)
# Do the join on the columns instead of using align_method_FRAME
# to avoid constructing two potentially large/sparse DataFrames
join_columns, _, _ = left.columns.join(
right.columns, how="outer", level=None, return_indexers=True
)
if result.columns.has_duplicates:
# Avoid reindexing with a duplicate axis.
# https://github.com/pandas-dev/pandas/issues/35194
indexer, _ = result.columns.get_indexer_non_unique(join_columns)
indexer = algorithms.unique1d(indexer)
result = result._reindex_with_indexers(
{1: [join_columns, indexer]}, allow_dups=True
)
else:
result = result.reindex(join_columns, axis=1)
return result
def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: int):
"""
If the Series operand is not EA-dtype, we can broadcast to 2D and operate
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]":
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
else:
return series
if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, frame.shape)
return type(frame)(rvalues, index=frame.index, columns=frame.columns)
def flex_arith_method_FRAME(op):
op_name = op.__name__.strip("_")
default_axis = "columns"
na_op = get_array_op(op)
doc = make_flex_doc(op_name, "dataframe")
@Appender(doc)
def f(self, other, axis=default_axis, level=None, fill_value=None):
if should_reindex_frame_op(
self, other, op, axis, default_axis, fill_value, level
):
return frame_arith_method_with_reindex(self, other, op)
if isinstance(other, ABCSeries) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")
axis = self._get_axis_number(axis) if axis is not None else 1
other = maybe_prepare_scalar_for_op(other, self.shape)
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
if isinstance(other, ABCDataFrame):
# Another DataFrame
new_data = self._combine_frame(other, na_op, fill_value)
elif isinstance(other, ABCSeries):
new_data = self._dispatch_frame_op(other, op, axis=axis)
else:
# in this case we always have `np.ndim(other) == 0`
if fill_value is not None:
self = self.fillna(fill_value)
new_data = self._dispatch_frame_op(other, op)
return self._construct_result(new_data)
f.__name__ = op_name
return f
def flex_comp_method_FRAME(op):
op_name = op.__name__.strip("_")
default_axis = "columns" # because we are "flex"
doc = _flex_comp_doc_FRAME.format(
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
)
@Appender(doc)
def f(self, other, axis=default_axis, level=None):
axis = self._get_axis_number(axis) if axis is not None else 1
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)
f.__name__ = op_name
return f

View File

@ -0,0 +1,522 @@
"""
Functions for arithmetic and comparison operations on NumPy arrays and
ExtensionArrays.
"""
import datetime
from functools import partial
import operator
from typing import Any
import numpy as np
from pandas._libs import (
NaT,
Timedelta,
Timestamp,
lib,
ops as libops,
)
from pandas._libs.tslibs import BaseOffset
from pandas._typing import (
ArrayLike,
Shape,
)
from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike,
find_common_type,
)
from pandas.core.dtypes.common import (
ensure_object,
is_bool_dtype,
is_integer_dtype,
is_list_like,
is_numeric_v_string_like,
is_object_dtype,
is_scalar,
)
from pandas.core.dtypes.generic import (
ABCExtensionArray,
ABCIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import (
isna,
notna,
)
import pandas.core.computation.expressions as expressions
from pandas.core.construction import ensure_wrapped_if_datetimelike
from pandas.core.ops import (
missing,
roperator,
)
from pandas.core.ops.dispatch import should_extension_dispatch
from pandas.core.ops.invalid import invalid_comparison
def comp_method_OBJECT_ARRAY(op, x, y):
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)
if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
if not is_object_dtype(y.dtype):
y = y.astype(np.object_)
if isinstance(y, (ABCSeries, ABCIndex)):
y = y._values
if x.shape != y.shape:
raise ValueError("Shapes must match", x.shape, y.shape)
result = libops.vec_compare(x.ravel(), y.ravel(), op)
else:
result = libops.scalar_compare(x.ravel(), y, op)
return result.reshape(x.shape)
def _masked_arith_op(x: np.ndarray, y, op):
"""
If the given arithmetic operation fails, attempt it again on
only the non-null elements of the input array(s).
Parameters
----------
x : np.ndarray
y : np.ndarray, Series, Index
op : binary operator
"""
# For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
# the logic valid for both Series and DataFrame ops.
xrav = x.ravel()
assert isinstance(x, np.ndarray), type(x)
if isinstance(y, np.ndarray):
dtype = find_common_type([x.dtype, y.dtype])
result = np.empty(x.size, dtype=dtype)
if len(x) != len(y):
raise ValueError(x.shape, y.shape)
else:
ymask = notna(y)
# NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
# we would get int64 dtype, see GH#19956
yrav = y.ravel()
mask = notna(xrav) & ymask.ravel()
# See GH#5284, GH#5035, GH#19448 for historical reference
if mask.any():
result[mask] = op(xrav[mask], yrav[mask])
else:
if not is_scalar(y):
raise TypeError(
f"Cannot broadcast np.ndarray with operand of type { type(y) }"
)
# mask is only meaningful for x
result = np.empty(x.size, dtype=x.dtype)
mask = notna(xrav)
# 1 ** np.nan is 1. So we have to unmask those.
if op is pow:
mask = np.where(x == 1, False, mask)
elif op is roperator.rpow:
mask = np.where(y == 1, False, mask)
if mask.any():
result[mask] = op(xrav[mask], y)
np.putmask(result, ~mask, np.nan)
result = result.reshape(x.shape) # 2D compat
return result
def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False):
"""
Return the result of evaluating op on the passed in values.
If native types are not compatible, try coercion to object dtype.
Parameters
----------
left : np.ndarray
right : np.ndarray or scalar
Excludes DataFrame, Series, Index, ExtensionArray.
is_cmp : bool, default False
If this a comparison operation.
Returns
-------
array-like
Raises
------
TypeError : invalid operation
"""
if isinstance(right, str):
# can never use numexpr
func = op
else:
func = partial(expressions.evaluate, op)
try:
result = func(left, right)
except TypeError:
if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)):
# For object dtype, fallback to a masked operation (only operating
# on the non-missing values)
# Don't do this for comparisons, as that will handle complex numbers
# incorrectly, see GH#32047
result = _masked_arith_op(left, right, op)
else:
raise
if is_cmp and (is_scalar(result) or result is NotImplemented):
# numpy returned a scalar instead of operating element-wise
# e.g. numeric array vs str
# TODO: can remove this after dropping some future numpy version?
return invalid_comparison(left, right, op)
return missing.dispatch_fill_zeros(op, left, right, result)
def arithmetic_op(left: ArrayLike, right: Any, op):
"""
Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...
Note: the caller is responsible for ensuring that numpy warnings are
suppressed (with np.errstate(all="ignore")) if needed.
Parameters
----------
left : np.ndarray or ExtensionArray
right : object
Cannot be a DataFrame or Index. Series is *not* excluded.
op : {operator.add, operator.sub, ...}
Or one of the reversed variants from roperator.
Returns
-------
ndarray or ExtensionArray
Or a 2-tuple of these in the case of divmod or rdivmod.
"""
# NB: We assume that extract_array and ensure_wrapped_if_datetimelike
# have already been called on `left` and `right`,
# and `maybe_prepare_scalar_for_op` has already been called on `right`
# We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
# casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
if (
should_extension_dispatch(left, right)
or isinstance(right, (Timedelta, BaseOffset, Timestamp))
or right is NaT
):
# Timedelta/Timestamp and other custom scalars are included in the check
# because numexpr will fail on it, see GH#31457
res_values = op(left, right)
else:
# TODO we should handle EAs consistently and move this check before the if/else
# (https://github.com/pandas-dev/pandas/issues/41165)
_bool_arith_check(op, left, right)
res_values = _na_arithmetic_op(left, right, op)
return res_values
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
"""
Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.
Note: the caller is responsible for ensuring that numpy warnings are
suppressed (with np.errstate(all="ignore")) if needed.
Parameters
----------
left : np.ndarray or ExtensionArray
right : object
Cannot be a DataFrame, Series, or Index.
op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}
Returns
-------
ndarray or ExtensionArray
"""
# NB: We assume extract_array has already been called on left and right
lvalues = ensure_wrapped_if_datetimelike(left)
rvalues = ensure_wrapped_if_datetimelike(right)
rvalues = lib.item_from_zerodim(rvalues)
if isinstance(rvalues, list):
# We don't catch tuple here bc we may be comparing e.g. MultiIndex
# to a tuple that represents a single entry, see test_compare_tuple_strs
rvalues = np.asarray(rvalues)
if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
# TODO: make this treatment consistent across ops and classes.
# We are not catching all listlikes here (e.g. frozenset, tuple)
# The ambiguous case is object-dtype. See GH#27803
if len(lvalues) != len(rvalues):
raise ValueError(
"Lengths must match to compare", lvalues.shape, rvalues.shape
)
if should_extension_dispatch(lvalues, rvalues) or (
(isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)
and not is_object_dtype(lvalues.dtype)
):
# Call the method on lvalues
res_values = op(lvalues, rvalues)
elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA?
# numpy does not like comparisons vs None
if op is operator.ne:
res_values = np.ones(lvalues.shape, dtype=bool)
else:
res_values = np.zeros(lvalues.shape, dtype=bool)
elif is_numeric_v_string_like(lvalues, rvalues):
# GH#36377 going through the numexpr path would incorrectly raise
return invalid_comparison(lvalues, rvalues, op)
elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
else:
res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
return res_values
def na_logical_op(x: np.ndarray, y, op):
try:
# For exposition, write:
# yarr = isinstance(y, np.ndarray)
# yint = is_integer(y) or (yarr and y.dtype.kind == "i")
# ybool = is_bool(y) or (yarr and y.dtype.kind == "b")
# xint = x.dtype.kind == "i"
# xbool = x.dtype.kind == "b"
# Then Cases where this goes through without raising include:
# (xint or xbool) and (yint or bool)
result = op(x, y)
except TypeError:
if isinstance(y, np.ndarray):
# bool-bool dtype operations should be OK, should not get here
assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype))
x = ensure_object(x)
y = ensure_object(y)
result = libops.vec_binop(x.ravel(), y.ravel(), op)
else:
# let null fall thru
assert lib.is_scalar(y)
if not isna(y):
y = bool(y)
try:
result = libops.scalar_binop(x, y, op)
except (
TypeError,
ValueError,
AttributeError,
OverflowError,
NotImplementedError,
) as err:
typ = type(y).__name__
raise TypeError(
f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array "
f"and scalar of type [{typ}]"
) from err
return result.reshape(x.shape)
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
"""
Evaluate a logical operation `|`, `&`, or `^`.
Parameters
----------
left : np.ndarray or ExtensionArray
right : object
Cannot be a DataFrame, Series, or Index.
op : {operator.and_, operator.or_, operator.xor}
Or one of the reversed variants from roperator.
Returns
-------
ndarray or ExtensionArray
"""
fill_int = lambda x: x
def fill_bool(x, left=None):
# if `left` is specifically not-boolean, we do not cast to bool
if x.dtype.kind in ["c", "f", "O"]:
# dtypes that can hold NA
mask = isna(x)
if mask.any():
x = x.astype(object)
x[mask] = False
if left is None or is_bool_dtype(left.dtype):
x = x.astype(bool)
return x
is_self_int_dtype = is_integer_dtype(left.dtype)
right = lib.item_from_zerodim(right)
if is_list_like(right) and not hasattr(right, "dtype"):
# e.g. list, tuple
right = construct_1d_object_array_from_listlike(right)
# NB: We assume extract_array has already been called on left and right
lvalues = ensure_wrapped_if_datetimelike(left)
rvalues = right
if should_extension_dispatch(lvalues, rvalues):
# Call the method on lvalues
res_values = op(lvalues, rvalues)
else:
if isinstance(rvalues, np.ndarray):
is_other_int_dtype = is_integer_dtype(rvalues.dtype)
rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues)
else:
# i.e. scalar
is_other_int_dtype = lib.is_integer(rvalues)
# For int vs int `^`, `|`, `&` are bitwise operators and return
# integer dtypes. Otherwise these are boolean ops
filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
res_values = na_logical_op(lvalues, rvalues, op)
# error: Cannot call function of unknown type
res_values = filler(res_values) # type: ignore[operator]
return res_values
def get_array_op(op):
"""
Return a binary array operation corresponding to the given operator op.
Parameters
----------
op : function
Binary operator from operator or roperator module.
Returns
-------
functools.partial
"""
if isinstance(op, partial):
# We get here via dispatch_to_series in DataFrame case
# e.g. test_rolling_consistency_var_debiasing_factors
return op
op_name = op.__name__.strip("_").lstrip("r")
if op_name == "arith_op":
# Reached via DataFrame._combine_frame i.e. flex methods
# e.g. test_df_add_flex_filled_mixed_dtypes
return op
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
return partial(comparison_op, op=op)
elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}:
return partial(logical_op, op=op)
elif op_name in {
"add",
"sub",
"mul",
"truediv",
"floordiv",
"mod",
"divmod",
"pow",
}:
return partial(arithmetic_op, op=op)
else:
raise NotImplementedError(op_name)
def maybe_prepare_scalar_for_op(obj, shape: Shape):
"""
Cast non-pandas objects to pandas types to unify behavior of arithmetic
and comparison operations.
Parameters
----------
obj: object
shape : tuple[int]
Returns
-------
out : object
Notes
-----
Be careful to call this *after* determining the `name` attribute to be
attached to the result of the arithmetic operation.
"""
if type(obj) is datetime.timedelta:
# GH#22390 cast up to Timedelta to rely on Timedelta
# implementation; otherwise operation against numeric-dtype
# raises TypeError
return Timedelta(obj)
elif type(obj) is datetime.datetime:
# cast up to Timestamp to rely on Timestamp implementation, see Timedelta above
return Timestamp(obj)
elif isinstance(obj, np.datetime64):
# GH#28080 numpy casts integer-dtype to datetime64 when doing
# array[int] + datetime64, which we do not allow
if isna(obj):
from pandas.core.arrays import DatetimeArray
# Avoid possible ambiguities with pd.NaT
obj = obj.astype("datetime64[ns]")
right = np.broadcast_to(obj, shape)
return DatetimeArray(right)
return Timestamp(obj)
elif isinstance(obj, np.timedelta64):
if isna(obj):
from pandas.core.arrays import TimedeltaArray
# wrapping timedelta64("NaT") in Timedelta returns NaT,
# which would incorrectly be treated as a datetime-NaT, so
# we broadcast and wrap in a TimedeltaArray
obj = obj.astype("timedelta64[ns]")
right = np.broadcast_to(obj, shape)
return TimedeltaArray(right)
# In particular non-nanosecond timedelta64 needs to be cast to
# nanoseconds, or else we get undesired behavior like
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
return Timedelta(obj)
return obj
_BOOL_OP_NOT_ALLOWED = {
operator.truediv,
roperator.rtruediv,
operator.floordiv,
roperator.rfloordiv,
operator.pow,
roperator.rpow,
}
def _bool_arith_check(op, a, b):
"""
In contrast to numpy, pandas raises an error for certain operations
with booleans.
"""
if op in _BOOL_OP_NOT_ALLOWED:
if is_bool_dtype(a.dtype) and (
is_bool_dtype(b) or isinstance(b, (bool, np.bool_))
):
op_name = op.__name__.strip("_").lstrip("r")
raise NotImplementedError(
f"operator '{op_name}' not implemented for bool dtypes"
)

View File

@ -0,0 +1,140 @@
"""
Boilerplate functions used in defining binary operations.
"""
from functools import wraps
from typing import Callable
from pandas._libs.lib import item_from_zerodim
from pandas._libs.missing import is_matching_na
from pandas._typing import F
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCSeries,
)
def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]:
"""
Boilerplate for pandas conventions in arithmetic and comparison methods.
Parameters
----------
name : str
Returns
-------
decorator
"""
def wrapper(method: F) -> F:
return _unpack_zerodim_and_defer(method, name)
return wrapper
def _unpack_zerodim_and_defer(method, name: str):
"""
Boilerplate for pandas conventions in arithmetic and comparison methods.
Ensure method returns NotImplemented when operating against "senior"
classes. Ensure zero-dimensional ndarrays are always unpacked.
Parameters
----------
method : binary method
name : str
Returns
-------
method
"""
is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"}
@wraps(method)
def new_method(self, other):
if is_cmp and isinstance(self, ABCIndex) and isinstance(other, ABCSeries):
# For comparison ops, Index does *not* defer to Series
pass
else:
for cls in [ABCDataFrame, ABCSeries, ABCIndex]:
if isinstance(self, cls):
break
if isinstance(other, cls):
return NotImplemented
other = item_from_zerodim(other)
return method(self, other)
return new_method
def get_op_result_name(left, right):
"""
Find the appropriate name to pin to an operation result. This result
should always be either an Index or a Series.
Parameters
----------
left : {Series, Index}
right : object
Returns
-------
name : object
Usually a string
"""
if isinstance(right, (ABCSeries, ABCIndex)):
name = _maybe_match_name(left, right)
else:
name = left.name
return name
def _maybe_match_name(a, b):
"""
Try to find a name to attach to the result of an operation between
a and b. If only one of these has a `name` attribute, return that
name. Otherwise return a consensus name if they match or None if
they have different names.
Parameters
----------
a : object
b : object
Returns
-------
name : str or None
See Also
--------
pandas.core.common.consensus_name_attr
"""
a_has = hasattr(a, "name")
b_has = hasattr(b, "name")
if a_has and b_has:
try:
if a.name == b.name:
return a.name
elif is_matching_na(a.name, b.name):
# e.g. both are np.nan
return a.name
else:
return None
except TypeError:
# pd.NA
if is_matching_na(a.name, b.name):
return a.name
return None
except ValueError:
# e.g. np.int64(1) vs (np.int64(1), np.int64(2))
return None
elif a_has:
return a.name
elif b_has:
return b.name
return None

View File

@ -0,0 +1,24 @@
"""
Functions for defining unary operations.
"""
from typing import Any
from pandas._typing import ArrayLike
from pandas.core.dtypes.generic import ABCExtensionArray
def should_extension_dispatch(left: ArrayLike, right: Any) -> bool:
"""
Identify cases where Series operation should dispatch to ExtensionArray method.
Parameters
----------
left : np.ndarray or ExtensionArray
right : object
Returns
-------
bool
"""
return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray)

View File

@ -0,0 +1,749 @@
"""
Templating for ops docstrings
"""
from __future__ import annotations
def make_flex_doc(op_name: str, typ: str) -> str:
"""
Make the appropriate substitutions for the given operation and class-typ
into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring
to attach to a generated method.
Parameters
----------
op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...}
typ : str {series, 'dataframe']}
Returns
-------
doc : str
"""
op_name = op_name.replace("__", "")
op_desc = _op_descriptions[op_name]
op_desc_op = op_desc["op"]
assert op_desc_op is not None # for mypy
if op_name.startswith("r"):
equiv = "other " + op_desc_op + " " + typ
elif op_name == "divmod":
equiv = f"{op_name}({typ}, other)"
else:
equiv = typ + " " + op_desc_op + " other"
if typ == "series":
base_doc = _flex_doc_SERIES
if op_desc["reverse"]:
base_doc += _see_also_reverse_SERIES.format(
reverse=op_desc["reverse"], see_also_desc=op_desc["see_also_desc"]
)
doc_no_examples = base_doc.format(
desc=op_desc["desc"],
op_name=op_name,
equiv=equiv,
series_returns=op_desc["series_returns"],
)
ser_example = op_desc["series_examples"]
if ser_example:
doc = doc_no_examples + ser_example
else:
doc = doc_no_examples
elif typ == "dataframe":
base_doc = _flex_doc_FRAME
doc = base_doc.format(
desc=op_desc["desc"],
op_name=op_name,
equiv=equiv,
reverse=op_desc["reverse"],
)
else:
raise AssertionError("Invalid typ argument.")
return doc
_common_examples_algebra_SERIES = """
Examples
--------
>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'])
>>> a
a 1.0
b 1.0
c 1.0
d NaN
dtype: float64
>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e'])
>>> b
a 1.0
b NaN
d 1.0
e NaN
dtype: float64"""
_common_examples_comparison_SERIES = """
Examples
--------
>>> a = pd.Series([1, 1, 1, np.nan, 1], index=['a', 'b', 'c', 'd', 'e'])
>>> a
a 1.0
b 1.0
c 1.0
d NaN
e 1.0
dtype: float64
>>> b = pd.Series([0, 1, 2, np.nan, 1], index=['a', 'b', 'c', 'd', 'f'])
>>> b
a 0.0
b 1.0
c 2.0
d NaN
f 1.0
dtype: float64"""
_add_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.add(b, fill_value=0)
a 2.0
b 1.0
c 1.0
d 1.0
e NaN
dtype: float64
"""
)
_sub_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.subtract(b, fill_value=0)
a 0.0
b 1.0
c 1.0
d -1.0
e NaN
dtype: float64
"""
)
_mul_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.multiply(b, fill_value=0)
a 1.0
b 0.0
c 0.0
d 0.0
e NaN
dtype: float64
"""
)
_div_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.divide(b, fill_value=0)
a 1.0
b inf
c inf
d 0.0
e NaN
dtype: float64
"""
)
_floordiv_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.floordiv(b, fill_value=0)
a 1.0
b NaN
c NaN
d 0.0
e NaN
dtype: float64
"""
)
_divmod_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.divmod(b, fill_value=0)
(a 1.0
b NaN
c NaN
d 0.0
e NaN
dtype: float64,
a 0.0
b NaN
c NaN
d 0.0
e NaN
dtype: float64)
"""
)
_mod_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.mod(b, fill_value=0)
a 0.0
b NaN
c NaN
d 0.0
e NaN
dtype: float64
"""
)
_pow_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.pow(b, fill_value=0)
a 1.0
b 1.0
c 1.0
d 0.0
e NaN
dtype: float64
"""
)
_ne_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.ne(b, fill_value=0)
a False
b True
c True
d True
e True
dtype: bool
"""
)
_eq_example_SERIES = (
_common_examples_algebra_SERIES
+ """
>>> a.eq(b, fill_value=0)
a True
b False
c False
d False
e False
dtype: bool
"""
)
_lt_example_SERIES = (
_common_examples_comparison_SERIES
+ """
>>> a.lt(b, fill_value=0)
a False
b False
c True
d False
e False
f True
dtype: bool
"""
)
_le_example_SERIES = (
_common_examples_comparison_SERIES
+ """
>>> a.le(b, fill_value=0)
a False
b True
c True
d False
e False
f True
dtype: bool
"""
)
_gt_example_SERIES = (
_common_examples_comparison_SERIES
+ """
>>> a.gt(b, fill_value=0)
a True
b False
c False
d False
e True
f False
dtype: bool
"""
)
_ge_example_SERIES = (
_common_examples_comparison_SERIES
+ """
>>> a.ge(b, fill_value=0)
a True
b True
c False
d False
e True
f False
dtype: bool
"""
)
_returns_series = """Series\n The result of the operation."""
_returns_tuple = """2-Tuple of Series\n The result of the operation."""
_op_descriptions: dict[str, dict[str, str | None]] = {
# Arithmetic Operators
"add": {
"op": "+",
"desc": "Addition",
"reverse": "radd",
"series_examples": _add_example_SERIES,
"series_returns": _returns_series,
},
"sub": {
"op": "-",
"desc": "Subtraction",
"reverse": "rsub",
"series_examples": _sub_example_SERIES,
"series_returns": _returns_series,
},
"mul": {
"op": "*",
"desc": "Multiplication",
"reverse": "rmul",
"series_examples": _mul_example_SERIES,
"series_returns": _returns_series,
"df_examples": None,
},
"mod": {
"op": "%",
"desc": "Modulo",
"reverse": "rmod",
"series_examples": _mod_example_SERIES,
"series_returns": _returns_series,
},
"pow": {
"op": "**",
"desc": "Exponential power",
"reverse": "rpow",
"series_examples": _pow_example_SERIES,
"series_returns": _returns_series,
"df_examples": None,
},
"truediv": {
"op": "/",
"desc": "Floating division",
"reverse": "rtruediv",
"series_examples": _div_example_SERIES,
"series_returns": _returns_series,
"df_examples": None,
},
"floordiv": {
"op": "//",
"desc": "Integer division",
"reverse": "rfloordiv",
"series_examples": _floordiv_example_SERIES,
"series_returns": _returns_series,
"df_examples": None,
},
"divmod": {
"op": "divmod",
"desc": "Integer division and modulo",
"reverse": "rdivmod",
"series_examples": _divmod_example_SERIES,
"series_returns": _returns_tuple,
"df_examples": None,
},
# Comparison Operators
"eq": {
"op": "==",
"desc": "Equal to",
"reverse": None,
"series_examples": _eq_example_SERIES,
"series_returns": _returns_series,
},
"ne": {
"op": "!=",
"desc": "Not equal to",
"reverse": None,
"series_examples": _ne_example_SERIES,
"series_returns": _returns_series,
},
"lt": {
"op": "<",
"desc": "Less than",
"reverse": None,
"series_examples": _lt_example_SERIES,
"series_returns": _returns_series,
},
"le": {
"op": "<=",
"desc": "Less than or equal to",
"reverse": None,
"series_examples": _le_example_SERIES,
"series_returns": _returns_series,
},
"gt": {
"op": ">",
"desc": "Greater than",
"reverse": None,
"series_examples": _gt_example_SERIES,
"series_returns": _returns_series,
},
"ge": {
"op": ">=",
"desc": "Greater than or equal to",
"reverse": None,
"series_examples": _ge_example_SERIES,
"series_returns": _returns_series,
},
}
_py_num_ref = """see
`Python documentation
<https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types>`_
for more details"""
_op_names = list(_op_descriptions.keys())
for key in _op_names:
reverse_op = _op_descriptions[key]["reverse"]
if reverse_op is not None:
_op_descriptions[reverse_op] = _op_descriptions[key].copy()
_op_descriptions[reverse_op]["reverse"] = key
_op_descriptions[key][
"see_also_desc"
] = f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}"
_op_descriptions[reverse_op][
"see_also_desc"
] = f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}"
_flex_doc_SERIES = """
Return {desc} of series and other, element-wise (binary operator `{op_name}`).
Equivalent to ``{equiv}``, but with support to substitute a fill_value for
missing data in either one of the inputs.
Parameters
----------
other : Series or scalar value
fill_value : None or float value, default None (NaN)
Fill existing missing (NaN) values, and any new element needed for
successful Series alignment, with this value before computation.
If data in both corresponding Series locations is missing
the result of filling (at that location) will be missing.
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
Returns
-------
{series_returns}
"""
_see_also_reverse_SERIES = """
See Also
--------
Series.{reverse} : {see_also_desc}.
"""
_flex_doc_FRAME = """
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
Equivalent to ``{equiv}``, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, `{reverse}`.
Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to
arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`.
Parameters
----------
other : scalar, sequence, Series, or DataFrame
Any single or multiple element data structure, or list-like object.
axis : {{0 or 'index', 1 or 'columns'}}
Whether to compare by the index (0 or 'index') or columns
(1 or 'columns'). For Series input, axis to match Series index on.
level : int or label
Broadcast across a level, matching Index values on the
passed MultiIndex level.
fill_value : float or None, default None
Fill existing missing (NaN) values, and any new element needed for
successful DataFrame alignment, with this value before computation.
If data in both corresponding DataFrame locations is missing
the result will be missing.
Returns
-------
DataFrame
Result of the arithmetic operation.
See Also
--------
DataFrame.add : Add DataFrames.
DataFrame.sub : Subtract DataFrames.
DataFrame.mul : Multiply DataFrames.
DataFrame.div : Divide DataFrames (float division).
DataFrame.truediv : Divide DataFrames (float division).
DataFrame.floordiv : Divide DataFrames (integer division).
DataFrame.mod : Calculate modulo (remainder after division).
DataFrame.pow : Calculate exponential power.
Notes
-----
Mismatched indices will be unioned together.
Examples
--------
>>> df = pd.DataFrame({{'angles': [0, 3, 4],
... 'degrees': [360, 180, 360]}},
... index=['circle', 'triangle', 'rectangle'])
>>> df
angles degrees
circle 0 360
triangle 3 180
rectangle 4 360
Add a scalar with operator version which return the same
results.
>>> df + 1
angles degrees
circle 1 361
triangle 4 181
rectangle 5 361
>>> df.add(1)
angles degrees
circle 1 361
triangle 4 181
rectangle 5 361
Divide by constant with reverse version.
>>> df.div(10)
angles degrees
circle 0.0 36.0
triangle 0.3 18.0
rectangle 0.4 36.0
>>> df.rdiv(10)
angles degrees
circle inf 0.027778
triangle 3.333333 0.055556
rectangle 2.500000 0.027778
Subtract a list and Series by axis with operator version.
>>> df - [1, 2]
angles degrees
circle -1 358
triangle 2 178
rectangle 3 358
>>> df.sub([1, 2], axis='columns')
angles degrees
circle -1 358
triangle 2 178
rectangle 3 358
>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']),
... axis='index')
angles degrees
circle -1 359
triangle 2 179
rectangle 3 359
Multiply a DataFrame of different shape with operator version.
>>> other = pd.DataFrame({{'angles': [0, 3, 4]}},
... index=['circle', 'triangle', 'rectangle'])
>>> other
angles
circle 0
triangle 3
rectangle 4
>>> df * other
angles degrees
circle 0 NaN
triangle 9 NaN
rectangle 16 NaN
>>> df.mul(other, fill_value=0)
angles degrees
circle 0 0.0
triangle 9 0.0
rectangle 16 0.0
Divide by a MultiIndex by level.
>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
... 'degrees': [360, 180, 360, 360, 540, 720]}},
... index=[['A', 'A', 'A', 'B', 'B', 'B'],
... ['circle', 'triangle', 'rectangle',
... 'square', 'pentagon', 'hexagon']])
>>> df_multindex
angles degrees
A circle 0 360
triangle 3 180
rectangle 4 360
B square 4 360
pentagon 5 540
hexagon 6 720
>>> df.div(df_multindex, level=1, fill_value=0)
angles degrees
A circle NaN 1.0
triangle 1.0 1.0
rectangle 1.0 1.0
B square 0.0 0.0
pentagon 0.0 0.0
hexagon 0.0 0.0
"""
_flex_comp_doc_FRAME = """
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
operators.
Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis
(rows or columns) and level for comparison.
Parameters
----------
other : scalar, sequence, Series, or DataFrame
Any single or multiple element data structure, or list-like object.
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
Whether to compare by the index (0 or 'index') or columns
(1 or 'columns').
level : int or label
Broadcast across a level, matching Index values on the passed
MultiIndex level.
Returns
-------
DataFrame of bool
Result of the comparison.
See Also
--------
DataFrame.eq : Compare DataFrames for equality elementwise.
DataFrame.ne : Compare DataFrames for inequality elementwise.
DataFrame.le : Compare DataFrames for less than inequality
or equality elementwise.
DataFrame.lt : Compare DataFrames for strictly less than
inequality elementwise.
DataFrame.ge : Compare DataFrames for greater than inequality
or equality elementwise.
DataFrame.gt : Compare DataFrames for strictly greater than
inequality elementwise.
Notes
-----
Mismatched indices will be unioned together.
`NaN` values are considered different (i.e. `NaN` != `NaN`).
Examples
--------
>>> df = pd.DataFrame({{'cost': [250, 150, 100],
... 'revenue': [100, 250, 300]}},
... index=['A', 'B', 'C'])
>>> df
cost revenue
A 250 100
B 150 250
C 100 300
Comparison with a scalar, using either the operator or method:
>>> df == 100
cost revenue
A False True
B False False
C True False
>>> df.eq(100)
cost revenue
A False True
B False False
C True False
When `other` is a :class:`Series`, the columns of a DataFrame are aligned
with the index of `other` and broadcast:
>>> df != pd.Series([100, 250], index=["cost", "revenue"])
cost revenue
A True True
B True False
C False True
Use the method to control the broadcast axis:
>>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')
cost revenue
A True False
B True True
C True True
D True True
When comparing to an arbitrary sequence, the number of columns must
match the number elements in `other`:
>>> df == [250, 100]
cost revenue
A True True
B False False
C False False
Use the method to control the axis:
>>> df.eq([250, 250, 100], axis='index')
cost revenue
A True False
B False True
C True False
Compare to a DataFrame of different shape.
>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
... index=['A', 'B', 'C', 'D'])
>>> other
revenue
A 300
B 250
C 100
D 150
>>> df.gt(other)
cost revenue
A False False
B False False
C False True
D False False
Compare to a MultiIndex by level.
>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
... 'revenue': [100, 250, 300, 200, 175, 225]}},
... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
... ['A', 'B', 'C', 'A', 'B', 'C']])
>>> df_multindex
cost revenue
Q1 A 250 100
B 150 250
C 100 300
Q2 A 150 200
B 300 175
C 220 225
>>> df.le(df_multindex, level=1)
cost revenue
Q1 A True True
B True True
C True True
Q2 A False True
B True False
C True False
"""

View File

@ -0,0 +1,56 @@
"""
Templates for invalid operations.
"""
import operator
import numpy as np
def invalid_comparison(left, right, op):
"""
If a comparison has mismatched types and is not necessarily meaningful,
follow python3 conventions by:
- returning all-False for equality
- returning all-True for inequality
- raising TypeError otherwise
Parameters
----------
left : array-like
right : scalar, array-like
op : operator.{eq, ne, lt, le, gt}
Raises
------
TypeError : on inequality comparisons
"""
if op is operator.eq:
res_values = np.zeros(left.shape, dtype=bool)
elif op is operator.ne:
res_values = np.ones(left.shape, dtype=bool)
else:
typ = type(right).__name__
raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
return res_values
def make_invalid_op(name: str):
"""
Return a binary method that always raises a TypeError.
Parameters
----------
name : str
Returns
-------
invalid_op : function
"""
def invalid_op(self, other=None):
typ = type(self).__name__
raise TypeError(f"cannot perform {name} with this index type: {typ}")
invalid_op.__name__ = name
return invalid_op

View File

@ -0,0 +1,189 @@
"""
Ops for masked arrays.
"""
from __future__ import annotations
import numpy as np
from pandas._libs import (
lib,
missing as libmissing,
)
def kleene_or(
left: bool | np.ndarray | libmissing.NAType,
right: bool | np.ndarray | libmissing.NAType,
left_mask: np.ndarray | None,
right_mask: np.ndarray | None,
):
"""
Boolean ``or`` using Kleene logic.
Values are NA where we have ``NA | NA`` or ``NA | False``.
``NA | True`` is considered True.
Parameters
----------
left, right : ndarray, NA, or bool
The values of the array.
left_mask, right_mask : ndarray, optional
The masks. Only one of these may be None, which implies that
the associated `left` or `right` value is a scalar.
Returns
-------
result, mask: ndarray[bool]
The result of the logical or, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since
# A | B == B | A
if left_mask is None:
return kleene_or(right, left, right_mask, left_mask)
if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
raise_for_nan(right, method="or")
if right is libmissing.NA:
result = left.copy()
else:
result = left | right
if right_mask is not None:
# output is unknown where (False & NA), (NA & False), (NA & NA)
left_false = ~(left | left_mask)
right_false = ~(right | right_mask)
mask = (
(left_false & right_mask)
| (right_false & left_mask)
| (left_mask & right_mask)
)
else:
if right is True:
mask = np.zeros_like(left_mask)
elif right is libmissing.NA:
mask = (~left & ~left_mask) | left_mask
else:
# False
mask = left_mask.copy()
return result, mask
def kleene_xor(
left: bool | np.ndarray | libmissing.NAType,
right: bool | np.ndarray | libmissing.NAType,
left_mask: np.ndarray | None,
right_mask: np.ndarray | None,
):
"""
Boolean ``xor`` using Kleene logic.
This is the same as ``or``, with the following adjustments
* True, True -> False
* True, NA -> NA
Parameters
----------
left, right : ndarray, NA, or bool
The values of the array.
left_mask, right_mask : ndarray, optional
The masks. Only one of these may be None, which implies that
the associated `left` or `right` value is a scalar.
Returns
-------
result, mask: ndarray[bool]
The result of the logical xor, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since
# A ^ B == B ^ A
if left_mask is None:
return kleene_xor(right, left, right_mask, left_mask)
if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
raise_for_nan(right, method="xor")
if right is libmissing.NA:
result = np.zeros_like(left)
else:
result = left ^ right
if right_mask is None:
if right is libmissing.NA:
mask = np.ones_like(left_mask)
else:
mask = left_mask.copy()
else:
mask = left_mask | right_mask
return result, mask
def kleene_and(
left: bool | libmissing.NAType | np.ndarray,
right: bool | libmissing.NAType | np.ndarray,
left_mask: np.ndarray | None,
right_mask: np.ndarray | None,
):
"""
Boolean ``and`` using Kleene logic.
Values are ``NA`` for ``NA & NA`` or ``True & NA``.
Parameters
----------
left, right : ndarray, NA, or bool
The values of the array.
left_mask, right_mask : ndarray, optional
The masks. Only one of these may be None, which implies that
the associated `left` or `right` value is a scalar.
Returns
-------
result, mask: ndarray[bool]
The result of the logical xor, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since
# A & B == B & A
if left_mask is None:
return kleene_and(right, left, right_mask, left_mask)
if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
raise_for_nan(right, method="and")
if right is libmissing.NA:
result = np.zeros_like(left)
else:
result = left & right
if right_mask is None:
# Scalar `right`
if right is libmissing.NA:
mask = (left & ~left_mask) | left_mask
else:
mask = left_mask.copy()
if right is False:
# unmask everything
mask[:] = False
else:
# unmask where either left or right is False
left_false = ~(left | left_mask)
right_false = ~(right | right_mask)
mask = (left_mask & ~right_false) | (right_mask & ~left_false)
return result, mask
def raise_for_nan(value, method: str):
if lib.is_float(value) and np.isnan(value):
raise ValueError(f"Cannot perform logical '{method}' with floating NaN")

View File

@ -0,0 +1,122 @@
"""
Functions to generate methods and pin them to the appropriate classes.
"""
import operator
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
)
from pandas.core.ops import roperator
def _get_method_wrappers(cls):
"""
Find the appropriate operation-wrappers to use when defining flex/special
arithmetic, boolean, and comparison operations with the given class.
Parameters
----------
cls : class
Returns
-------
arith_flex : function or None
comp_flex : function or None
"""
# TODO: make these non-runtime imports once the relevant functions
# are no longer in __init__
from pandas.core.ops import (
flex_arith_method_FRAME,
flex_comp_method_FRAME,
flex_method_SERIES,
)
if issubclass(cls, ABCSeries):
# Just Series
arith_flex = flex_method_SERIES
comp_flex = flex_method_SERIES
elif issubclass(cls, ABCDataFrame):
arith_flex = flex_arith_method_FRAME
comp_flex = flex_comp_method_FRAME
return arith_flex, comp_flex
def add_flex_arithmetic_methods(cls):
"""
Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``)
to the class.
Parameters
----------
cls : class
flex methods will be defined and pinned to this class
"""
flex_arith_method, flex_comp_method = _get_method_wrappers(cls)
new_methods = _create_methods(cls, flex_arith_method, flex_comp_method)
new_methods.update(
{
"multiply": new_methods["mul"],
"subtract": new_methods["sub"],
"divide": new_methods["div"],
}
)
# opt out of bool flex methods for now
assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_"))
_add_methods(cls, new_methods=new_methods)
def _create_methods(cls, arith_method, comp_method):
# creates actual flex methods based upon arithmetic, and comp method
# constructors.
have_divmod = issubclass(cls, ABCSeries)
# divmod is available for Series
new_methods = {}
new_methods.update(
{
"add": arith_method(operator.add),
"radd": arith_method(roperator.radd),
"sub": arith_method(operator.sub),
"mul": arith_method(operator.mul),
"truediv": arith_method(operator.truediv),
"floordiv": arith_method(operator.floordiv),
"mod": arith_method(operator.mod),
"pow": arith_method(operator.pow),
"rmul": arith_method(roperator.rmul),
"rsub": arith_method(roperator.rsub),
"rtruediv": arith_method(roperator.rtruediv),
"rfloordiv": arith_method(roperator.rfloordiv),
"rpow": arith_method(roperator.rpow),
"rmod": arith_method(roperator.rmod),
}
)
new_methods["div"] = new_methods["truediv"]
new_methods["rdiv"] = new_methods["rtruediv"]
if have_divmod:
# divmod doesn't have an op that is supported by numexpr
new_methods["divmod"] = arith_method(divmod)
new_methods["rdivmod"] = arith_method(roperator.rdivmod)
new_methods.update(
{
"eq": comp_method(operator.eq),
"ne": comp_method(operator.ne),
"lt": comp_method(operator.lt),
"gt": comp_method(operator.gt),
"le": comp_method(operator.le),
"ge": comp_method(operator.ge),
}
)
new_methods = {k.strip("_"): v for k, v in new_methods.items()}
return new_methods
def _add_methods(cls, new_methods):
for name, method in new_methods.items():
setattr(cls, name, method)

View File

@ -0,0 +1,181 @@
"""
Missing data handling for arithmetic operations.
In particular, pandas conventions regarding division by zero differ
from numpy in the following ways:
1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
the remaining pairs
(the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
pandas convention is to return [-inf, nan, inf] for all dtype
combinations.
Note: the numpy behavior described here is py3-specific.
2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
gives precisely the same results as the // operation.
pandas convention is to return [nan, nan, nan] for all dtype
combinations.
3) divmod behavior consistent with 1) and 2).
"""
import operator
import numpy as np
from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
is_scalar,
)
from pandas.core.ops import roperator
def _fill_zeros(result, x, y):
"""
If this is a reversed op, then flip x,y
If we have an integer value (or array in y)
and we have 0's, fill them with np.nan,
return the result.
Mask the nan's from x.
"""
if is_float_dtype(result.dtype):
return result
is_variable_type = hasattr(y, "dtype")
is_scalar_type = is_scalar(y)
if not is_variable_type and not is_scalar_type:
return result
if is_scalar_type:
y = np.array(y)
if is_integer_dtype(y.dtype):
ymask = y == 0
if ymask.any():
# GH#7325, mask and nans must be broadcastable
mask = ymask & ~np.isnan(result)
# GH#9308 doing ravel on result and mask can improve putmask perf,
# but can also make unwanted copies.
result = result.astype("float64", copy=False)
np.putmask(result, mask, np.nan)
return result
def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
"""
Set results of 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.
Parameters
----------
x : ndarray
y : ndarray
result : ndarray
Returns
-------
ndarray
The filled result.
Examples
--------
>>> x = np.array([1, 0, -1], dtype=np.int64)
>>> x
array([ 1, 0, -1])
>>> y = 0 # int 0; numpy behavior is different with float
>>> result = x // y
>>> result # raw numpy result does not fill division by zero
array([0, 0, 0])
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if not hasattr(y, "dtype"):
# e.g. scalar, tuple
y = np.array(y)
if not hasattr(x, "dtype"):
# e.g scalar, tuple
x = np.array(x)
zmask = y == 0
if zmask.any():
# Flip sign if necessary for -0.0
zneg_mask = zmask & np.signbit(y)
zpos_mask = zmask & ~zneg_mask
x_lt0 = x < 0
x_gt0 = x > 0
nan_mask = zmask & (x == 0)
with np.errstate(invalid="ignore"):
neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0)
posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0)
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype("float64", copy=False)
result[nan_mask] = np.nan
result[posinf_mask] = np.inf
result[neginf_mask] = -np.inf
return result
def dispatch_fill_zeros(op, left, right, result):
"""
Call _fill_zeros with the appropriate fill value depending on the operation,
with special logic for divmod and rdivmod.
Parameters
----------
op : function (operator.add, operator.div, ...)
left : object (np.ndarray for non-reversed ops)
right : object (np.ndarray for reversed ops)
result : ndarray
Returns
-------
result : np.ndarray
Notes
-----
For divmod and rdivmod, the `result` parameter and returned `result`
is a 2-tuple of ndarray objects.
"""
if op is divmod:
result = (
mask_zero_div_zero(left, right, result[0]),
_fill_zeros(result[1], left, right),
)
elif op is roperator.rdivmod:
result = (
mask_zero_div_zero(right, left, result[0]),
_fill_zeros(result[1], right, left),
)
elif op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is roperator.rfloordiv:
# Note: no need to do this for rtruediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(right, left, result)
elif op is operator.mod:
result = _fill_zeros(result, left, right)
elif op is roperator.rmod:
result = _fill_zeros(result, right, left)
return result