mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 22:57:06 +00:00
first commit
This commit is contained in:
475
.venv/Lib/site-packages/pandas/core/ops/__init__.py
Normal file
475
.venv/Lib/site-packages/pandas/core/ops/__init__.py
Normal file
@ -0,0 +1,475 @@
|
||||
"""
|
||||
Arithmetic operations for PandasObjects
|
||||
|
||||
This is not a public API.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
|
||||
from pandas._typing import Level
|
||||
from pandas.util._decorators import Appender
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_array_like,
|
||||
is_list_like,
|
||||
)
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
from pandas.core.dtypes.missing import isna
|
||||
|
||||
from pandas.core import (
|
||||
algorithms,
|
||||
roperator,
|
||||
)
|
||||
from pandas.core.ops.array_ops import ( # noqa:F401
|
||||
arithmetic_op,
|
||||
comp_method_OBJECT_ARRAY,
|
||||
comparison_op,
|
||||
get_array_op,
|
||||
logical_op,
|
||||
maybe_prepare_scalar_for_op,
|
||||
)
|
||||
from pandas.core.ops.common import ( # noqa:F401
|
||||
get_op_result_name,
|
||||
unpack_zerodim_and_defer,
|
||||
)
|
||||
from pandas.core.ops.docstrings import (
|
||||
_flex_comp_doc_FRAME,
|
||||
_op_descriptions,
|
||||
make_flex_doc,
|
||||
)
|
||||
from pandas.core.ops.invalid import invalid_comparison # noqa:F401
|
||||
from pandas.core.ops.mask_ops import ( # noqa: F401
|
||||
kleene_and,
|
||||
kleene_or,
|
||||
kleene_xor,
|
||||
)
|
||||
from pandas.core.ops.methods import add_flex_arithmetic_methods # noqa:F401
|
||||
from pandas.core.roperator import ( # noqa:F401
|
||||
radd,
|
||||
rand_,
|
||||
rdiv,
|
||||
rdivmod,
|
||||
rfloordiv,
|
||||
rmod,
|
||||
rmul,
|
||||
ror_,
|
||||
rpow,
|
||||
rsub,
|
||||
rtruediv,
|
||||
rxor,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# constants
|
||||
ARITHMETIC_BINOPS: set[str] = {
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"pow",
|
||||
"mod",
|
||||
"floordiv",
|
||||
"truediv",
|
||||
"divmod",
|
||||
"radd",
|
||||
"rsub",
|
||||
"rmul",
|
||||
"rpow",
|
||||
"rmod",
|
||||
"rfloordiv",
|
||||
"rtruediv",
|
||||
"rdivmod",
|
||||
}
|
||||
|
||||
|
||||
COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"}
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Masking NA values and fallbacks for operations numpy does not support
|
||||
|
||||
|
||||
def fill_binop(left, right, fill_value):
|
||||
"""
|
||||
If a non-None fill_value is given, replace null entries in left and right
|
||||
with this value, but only in positions where _one_ of left/right is null,
|
||||
not both.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : array-like
|
||||
right : array-like
|
||||
fill_value : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
left : array-like
|
||||
right : array-like
|
||||
|
||||
Notes
|
||||
-----
|
||||
Makes copies if fill_value is not None and NAs are present.
|
||||
"""
|
||||
if fill_value is not None:
|
||||
left_mask = isna(left)
|
||||
right_mask = isna(right)
|
||||
|
||||
# one but not both
|
||||
mask = left_mask ^ right_mask
|
||||
|
||||
if left_mask.any():
|
||||
# Avoid making a copy if we can
|
||||
left = left.copy()
|
||||
left[left_mask & mask] = fill_value
|
||||
|
||||
if right_mask.any():
|
||||
# Avoid making a copy if we can
|
||||
right = right.copy()
|
||||
right[right_mask & mask] = fill_value
|
||||
|
||||
return left, right
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Series
|
||||
|
||||
|
||||
def align_method_SERIES(left: Series, right, align_asobject: bool = False):
|
||||
"""align lhs and rhs Series"""
|
||||
# ToDo: Different from align_method_FRAME, list, tuple and ndarray
|
||||
# are not coerced here
|
||||
# because Series has inconsistencies described in #13637
|
||||
|
||||
if isinstance(right, ABCSeries):
|
||||
# avoid repeated alignment
|
||||
if not left.index.equals(right.index):
|
||||
|
||||
if align_asobject:
|
||||
# to keep original value's dtype for bool ops
|
||||
left = left.astype(object)
|
||||
right = right.astype(object)
|
||||
|
||||
left, right = left.align(right, copy=False)
|
||||
|
||||
return left, right
|
||||
|
||||
|
||||
def flex_method_SERIES(op):
|
||||
name = op.__name__.strip("_")
|
||||
doc = make_flex_doc(name, "series")
|
||||
|
||||
@Appender(doc)
|
||||
def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
|
||||
# validate axis
|
||||
if axis is not None:
|
||||
self._get_axis_number(axis)
|
||||
|
||||
res_name = get_op_result_name(self, other)
|
||||
|
||||
if isinstance(other, ABCSeries):
|
||||
return self._binop(other, op, level=level, fill_value=fill_value)
|
||||
elif isinstance(other, (np.ndarray, list, tuple)):
|
||||
if len(other) != len(self):
|
||||
raise ValueError("Lengths must be equal")
|
||||
other = self._constructor(other, self.index)
|
||||
result = self._binop(other, op, level=level, fill_value=fill_value)
|
||||
result.name = res_name
|
||||
return result
|
||||
else:
|
||||
if fill_value is not None:
|
||||
self = self.fillna(fill_value)
|
||||
|
||||
return op(self, other)
|
||||
|
||||
flex_wrapper.__name__ = name
|
||||
return flex_wrapper
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DataFrame
|
||||
|
||||
|
||||
def align_method_FRAME(
|
||||
left, right, axis, flex: bool | None = False, level: Level = None
|
||||
):
|
||||
"""
|
||||
Convert rhs to meet lhs dims if input is list, tuple or np.ndarray.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : DataFrame
|
||||
right : Any
|
||||
axis : int, str, or None
|
||||
flex : bool or None, default False
|
||||
Whether this is a flex op, in which case we reindex.
|
||||
None indicates not to check for alignment.
|
||||
level : int or level name, default None
|
||||
|
||||
Returns
|
||||
-------
|
||||
left : DataFrame
|
||||
right : Any
|
||||
"""
|
||||
|
||||
def to_series(right):
|
||||
msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
|
||||
if axis is not None and left._get_axis_name(axis) == "index":
|
||||
if len(left.index) != len(right):
|
||||
raise ValueError(
|
||||
msg.format(req_len=len(left.index), given_len=len(right))
|
||||
)
|
||||
right = left._constructor_sliced(right, index=left.index)
|
||||
else:
|
||||
if len(left.columns) != len(right):
|
||||
raise ValueError(
|
||||
msg.format(req_len=len(left.columns), given_len=len(right))
|
||||
)
|
||||
right = left._constructor_sliced(right, index=left.columns)
|
||||
return right
|
||||
|
||||
if isinstance(right, np.ndarray):
|
||||
|
||||
if right.ndim == 1:
|
||||
right = to_series(right)
|
||||
|
||||
elif right.ndim == 2:
|
||||
if right.shape == left.shape:
|
||||
right = left._constructor(right, index=left.index, columns=left.columns)
|
||||
|
||||
elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
|
||||
# Broadcast across columns
|
||||
right = np.broadcast_to(right, left.shape)
|
||||
right = left._constructor(right, index=left.index, columns=left.columns)
|
||||
|
||||
elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
|
||||
# Broadcast along rows
|
||||
right = to_series(right[0, :])
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to coerce to DataFrame, shape "
|
||||
f"must be {left.shape}: given {right.shape}"
|
||||
)
|
||||
|
||||
elif right.ndim > 2:
|
||||
raise ValueError(
|
||||
"Unable to coerce to Series/DataFrame, "
|
||||
f"dimension must be <= 2: {right.shape}"
|
||||
)
|
||||
|
||||
elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
|
||||
# GH 36702. Raise when attempting arithmetic with list of array-like.
|
||||
if any(is_array_like(el) for el in right):
|
||||
raise ValueError(
|
||||
f"Unable to coerce list of {type(right[0])} to Series/DataFrame"
|
||||
)
|
||||
# GH17901
|
||||
right = to_series(right)
|
||||
|
||||
if flex is not None and isinstance(right, ABCDataFrame):
|
||||
if not left._indexed_same(right):
|
||||
if flex:
|
||||
left, right = left.align(right, join="outer", level=level, copy=False)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Can only compare identically-labeled DataFrame objects"
|
||||
)
|
||||
elif isinstance(right, ABCSeries):
|
||||
# axis=1 is default for DataFrame-with-Series op
|
||||
axis = left._get_axis_number(axis) if axis is not None else 1
|
||||
|
||||
if not flex:
|
||||
if not left.axes[axis].equals(right.index):
|
||||
warnings.warn(
|
||||
"Automatic reindexing on DataFrame vs Series comparisons "
|
||||
"is deprecated and will raise ValueError in a future version. "
|
||||
"Do `left, right = left.align(right, axis=1, copy=False)` "
|
||||
"before e.g. `left == right`",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
left, right = left.align(
|
||||
right, join="outer", axis=axis, level=level, copy=False
|
||||
)
|
||||
right = _maybe_align_series_as_frame(left, right, axis)
|
||||
|
||||
return left, right
|
||||
|
||||
|
||||
def should_reindex_frame_op(
|
||||
left: DataFrame, right, op, axis, default_axis, fill_value, level
|
||||
) -> bool:
|
||||
"""
|
||||
Check if this is an operation between DataFrames that will need to reindex.
|
||||
"""
|
||||
assert isinstance(left, ABCDataFrame)
|
||||
|
||||
if op is operator.pow or op is roperator.rpow:
|
||||
# GH#32685 pow has special semantics for operating with null values
|
||||
return False
|
||||
|
||||
if not isinstance(right, ABCDataFrame):
|
||||
return False
|
||||
|
||||
if fill_value is None and level is None and axis is default_axis:
|
||||
# TODO: any other cases we should handle here?
|
||||
|
||||
# Intersection is always unique so we have to check the unique columns
|
||||
left_uniques = left.columns.unique()
|
||||
right_uniques = right.columns.unique()
|
||||
cols = left_uniques.intersection(right_uniques)
|
||||
if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)):
|
||||
# TODO: is there a shortcut available when len(cols) == 0?
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> DataFrame:
|
||||
"""
|
||||
For DataFrame-with-DataFrame operations that require reindexing,
|
||||
operate only on shared columns, then reindex.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : DataFrame
|
||||
right : DataFrame
|
||||
op : binary operator
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
"""
|
||||
# GH#31623, only operate on shared columns
|
||||
cols, lcols, rcols = left.columns.join(
|
||||
right.columns, how="inner", level=None, return_indexers=True
|
||||
)
|
||||
|
||||
new_left = left.iloc[:, lcols]
|
||||
new_right = right.iloc[:, rcols]
|
||||
result = op(new_left, new_right)
|
||||
|
||||
# Do the join on the columns instead of using align_method_FRAME
|
||||
# to avoid constructing two potentially large/sparse DataFrames
|
||||
join_columns, _, _ = left.columns.join(
|
||||
right.columns, how="outer", level=None, return_indexers=True
|
||||
)
|
||||
|
||||
if result.columns.has_duplicates:
|
||||
# Avoid reindexing with a duplicate axis.
|
||||
# https://github.com/pandas-dev/pandas/issues/35194
|
||||
indexer, _ = result.columns.get_indexer_non_unique(join_columns)
|
||||
indexer = algorithms.unique1d(indexer)
|
||||
result = result._reindex_with_indexers(
|
||||
{1: [join_columns, indexer]}, allow_dups=True
|
||||
)
|
||||
else:
|
||||
result = result.reindex(join_columns, axis=1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: int):
|
||||
"""
|
||||
If the Series operand is not EA-dtype, we can broadcast to 2D and operate
|
||||
blockwise.
|
||||
"""
|
||||
rvalues = series._values
|
||||
if not isinstance(rvalues, np.ndarray):
|
||||
# TODO(EA2D): no need to special-case with 2D EAs
|
||||
if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]":
|
||||
# We can losslessly+cheaply cast to ndarray
|
||||
rvalues = np.asarray(rvalues)
|
||||
else:
|
||||
return series
|
||||
|
||||
if axis == 0:
|
||||
rvalues = rvalues.reshape(-1, 1)
|
||||
else:
|
||||
rvalues = rvalues.reshape(1, -1)
|
||||
|
||||
rvalues = np.broadcast_to(rvalues, frame.shape)
|
||||
return type(frame)(rvalues, index=frame.index, columns=frame.columns)
|
||||
|
||||
|
||||
def flex_arith_method_FRAME(op):
|
||||
op_name = op.__name__.strip("_")
|
||||
default_axis = "columns"
|
||||
|
||||
na_op = get_array_op(op)
|
||||
doc = make_flex_doc(op_name, "dataframe")
|
||||
|
||||
@Appender(doc)
|
||||
def f(self, other, axis=default_axis, level=None, fill_value=None):
|
||||
|
||||
if should_reindex_frame_op(
|
||||
self, other, op, axis, default_axis, fill_value, level
|
||||
):
|
||||
return frame_arith_method_with_reindex(self, other, op)
|
||||
|
||||
if isinstance(other, ABCSeries) and fill_value is not None:
|
||||
# TODO: We could allow this in cases where we end up going
|
||||
# through the DataFrame path
|
||||
raise NotImplementedError(f"fill_value {fill_value} not supported.")
|
||||
|
||||
axis = self._get_axis_number(axis) if axis is not None else 1
|
||||
|
||||
other = maybe_prepare_scalar_for_op(other, self.shape)
|
||||
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
|
||||
|
||||
if isinstance(other, ABCDataFrame):
|
||||
# Another DataFrame
|
||||
new_data = self._combine_frame(other, na_op, fill_value)
|
||||
|
||||
elif isinstance(other, ABCSeries):
|
||||
new_data = self._dispatch_frame_op(other, op, axis=axis)
|
||||
else:
|
||||
# in this case we always have `np.ndim(other) == 0`
|
||||
if fill_value is not None:
|
||||
self = self.fillna(fill_value)
|
||||
|
||||
new_data = self._dispatch_frame_op(other, op)
|
||||
|
||||
return self._construct_result(new_data)
|
||||
|
||||
f.__name__ = op_name
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def flex_comp_method_FRAME(op):
|
||||
op_name = op.__name__.strip("_")
|
||||
default_axis = "columns" # because we are "flex"
|
||||
|
||||
doc = _flex_comp_doc_FRAME.format(
|
||||
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
|
||||
)
|
||||
|
||||
@Appender(doc)
|
||||
def f(self, other, axis=default_axis, level=None):
|
||||
axis = self._get_axis_number(axis) if axis is not None else 1
|
||||
|
||||
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
|
||||
|
||||
new_data = self._dispatch_frame_op(other, op, axis=axis)
|
||||
return self._construct_result(new_data)
|
||||
|
||||
f.__name__ = op_name
|
||||
|
||||
return f
|
522
.venv/Lib/site-packages/pandas/core/ops/array_ops.py
Normal file
522
.venv/Lib/site-packages/pandas/core/ops/array_ops.py
Normal file
@ -0,0 +1,522 @@
|
||||
"""
|
||||
Functions for arithmetic and comparison operations on NumPy arrays and
|
||||
ExtensionArrays.
|
||||
"""
|
||||
import datetime
|
||||
from functools import partial
|
||||
import operator
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
lib,
|
||||
ops as libops,
|
||||
)
|
||||
from pandas._libs.tslibs import BaseOffset
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
Shape,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
construct_1d_object_array_from_listlike,
|
||||
find_common_type,
|
||||
)
|
||||
from pandas.core.dtypes.common import (
|
||||
ensure_object,
|
||||
is_bool_dtype,
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
is_numeric_v_string_like,
|
||||
is_object_dtype,
|
||||
is_scalar,
|
||||
)
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCExtensionArray,
|
||||
ABCIndex,
|
||||
ABCSeries,
|
||||
)
|
||||
from pandas.core.dtypes.missing import (
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
|
||||
import pandas.core.computation.expressions as expressions
|
||||
from pandas.core.construction import ensure_wrapped_if_datetimelike
|
||||
from pandas.core.ops import (
|
||||
missing,
|
||||
roperator,
|
||||
)
|
||||
from pandas.core.ops.dispatch import should_extension_dispatch
|
||||
from pandas.core.ops.invalid import invalid_comparison
|
||||
|
||||
|
||||
def comp_method_OBJECT_ARRAY(op, x, y):
|
||||
if isinstance(y, list):
|
||||
y = construct_1d_object_array_from_listlike(y)
|
||||
|
||||
if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
|
||||
if not is_object_dtype(y.dtype):
|
||||
y = y.astype(np.object_)
|
||||
|
||||
if isinstance(y, (ABCSeries, ABCIndex)):
|
||||
y = y._values
|
||||
|
||||
if x.shape != y.shape:
|
||||
raise ValueError("Shapes must match", x.shape, y.shape)
|
||||
result = libops.vec_compare(x.ravel(), y.ravel(), op)
|
||||
else:
|
||||
result = libops.scalar_compare(x.ravel(), y, op)
|
||||
return result.reshape(x.shape)
|
||||
|
||||
|
||||
def _masked_arith_op(x: np.ndarray, y, op):
|
||||
"""
|
||||
If the given arithmetic operation fails, attempt it again on
|
||||
only the non-null elements of the input array(s).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : np.ndarray
|
||||
y : np.ndarray, Series, Index
|
||||
op : binary operator
|
||||
"""
|
||||
# For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
|
||||
# the logic valid for both Series and DataFrame ops.
|
||||
xrav = x.ravel()
|
||||
assert isinstance(x, np.ndarray), type(x)
|
||||
if isinstance(y, np.ndarray):
|
||||
dtype = find_common_type([x.dtype, y.dtype])
|
||||
result = np.empty(x.size, dtype=dtype)
|
||||
|
||||
if len(x) != len(y):
|
||||
raise ValueError(x.shape, y.shape)
|
||||
else:
|
||||
ymask = notna(y)
|
||||
|
||||
# NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
|
||||
# we would get int64 dtype, see GH#19956
|
||||
yrav = y.ravel()
|
||||
mask = notna(xrav) & ymask.ravel()
|
||||
|
||||
# See GH#5284, GH#5035, GH#19448 for historical reference
|
||||
if mask.any():
|
||||
result[mask] = op(xrav[mask], yrav[mask])
|
||||
|
||||
else:
|
||||
if not is_scalar(y):
|
||||
raise TypeError(
|
||||
f"Cannot broadcast np.ndarray with operand of type { type(y) }"
|
||||
)
|
||||
|
||||
# mask is only meaningful for x
|
||||
result = np.empty(x.size, dtype=x.dtype)
|
||||
mask = notna(xrav)
|
||||
|
||||
# 1 ** np.nan is 1. So we have to unmask those.
|
||||
if op is pow:
|
||||
mask = np.where(x == 1, False, mask)
|
||||
elif op is roperator.rpow:
|
||||
mask = np.where(y == 1, False, mask)
|
||||
|
||||
if mask.any():
|
||||
result[mask] = op(xrav[mask], y)
|
||||
|
||||
np.putmask(result, ~mask, np.nan)
|
||||
result = result.reshape(x.shape) # 2D compat
|
||||
return result
|
||||
|
||||
|
||||
def _na_arithmetic_op(left: np.ndarray, right, op, is_cmp: bool = False):
|
||||
"""
|
||||
Return the result of evaluating op on the passed in values.
|
||||
|
||||
If native types are not compatible, try coercion to object dtype.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray
|
||||
right : np.ndarray or scalar
|
||||
Excludes DataFrame, Series, Index, ExtensionArray.
|
||||
is_cmp : bool, default False
|
||||
If this a comparison operation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array-like
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError : invalid operation
|
||||
"""
|
||||
if isinstance(right, str):
|
||||
# can never use numexpr
|
||||
func = op
|
||||
else:
|
||||
func = partial(expressions.evaluate, op)
|
||||
|
||||
try:
|
||||
result = func(left, right)
|
||||
except TypeError:
|
||||
if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)):
|
||||
# For object dtype, fallback to a masked operation (only operating
|
||||
# on the non-missing values)
|
||||
# Don't do this for comparisons, as that will handle complex numbers
|
||||
# incorrectly, see GH#32047
|
||||
result = _masked_arith_op(left, right, op)
|
||||
else:
|
||||
raise
|
||||
|
||||
if is_cmp and (is_scalar(result) or result is NotImplemented):
|
||||
# numpy returned a scalar instead of operating element-wise
|
||||
# e.g. numeric array vs str
|
||||
# TODO: can remove this after dropping some future numpy version?
|
||||
return invalid_comparison(left, right, op)
|
||||
|
||||
return missing.dispatch_fill_zeros(op, left, right, result)
|
||||
|
||||
|
||||
def arithmetic_op(left: ArrayLike, right: Any, op):
|
||||
"""
|
||||
Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...
|
||||
|
||||
Note: the caller is responsible for ensuring that numpy warnings are
|
||||
suppressed (with np.errstate(all="ignore")) if needed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray or ExtensionArray
|
||||
right : object
|
||||
Cannot be a DataFrame or Index. Series is *not* excluded.
|
||||
op : {operator.add, operator.sub, ...}
|
||||
Or one of the reversed variants from roperator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray or ExtensionArray
|
||||
Or a 2-tuple of these in the case of divmod or rdivmod.
|
||||
"""
|
||||
# NB: We assume that extract_array and ensure_wrapped_if_datetimelike
|
||||
# have already been called on `left` and `right`,
|
||||
# and `maybe_prepare_scalar_for_op` has already been called on `right`
|
||||
# We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
|
||||
# casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
|
||||
|
||||
if (
|
||||
should_extension_dispatch(left, right)
|
||||
or isinstance(right, (Timedelta, BaseOffset, Timestamp))
|
||||
or right is NaT
|
||||
):
|
||||
# Timedelta/Timestamp and other custom scalars are included in the check
|
||||
# because numexpr will fail on it, see GH#31457
|
||||
res_values = op(left, right)
|
||||
else:
|
||||
# TODO we should handle EAs consistently and move this check before the if/else
|
||||
# (https://github.com/pandas-dev/pandas/issues/41165)
|
||||
_bool_arith_check(op, left, right)
|
||||
|
||||
res_values = _na_arithmetic_op(left, right, op)
|
||||
|
||||
return res_values
|
||||
|
||||
|
||||
def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
|
||||
"""
|
||||
Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.
|
||||
|
||||
Note: the caller is responsible for ensuring that numpy warnings are
|
||||
suppressed (with np.errstate(all="ignore")) if needed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray or ExtensionArray
|
||||
right : object
|
||||
Cannot be a DataFrame, Series, or Index.
|
||||
op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray or ExtensionArray
|
||||
"""
|
||||
# NB: We assume extract_array has already been called on left and right
|
||||
lvalues = ensure_wrapped_if_datetimelike(left)
|
||||
rvalues = ensure_wrapped_if_datetimelike(right)
|
||||
|
||||
rvalues = lib.item_from_zerodim(rvalues)
|
||||
if isinstance(rvalues, list):
|
||||
# We don't catch tuple here bc we may be comparing e.g. MultiIndex
|
||||
# to a tuple that represents a single entry, see test_compare_tuple_strs
|
||||
rvalues = np.asarray(rvalues)
|
||||
|
||||
if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
|
||||
# TODO: make this treatment consistent across ops and classes.
|
||||
# We are not catching all listlikes here (e.g. frozenset, tuple)
|
||||
# The ambiguous case is object-dtype. See GH#27803
|
||||
if len(lvalues) != len(rvalues):
|
||||
raise ValueError(
|
||||
"Lengths must match to compare", lvalues.shape, rvalues.shape
|
||||
)
|
||||
|
||||
if should_extension_dispatch(lvalues, rvalues) or (
|
||||
(isinstance(rvalues, (Timedelta, BaseOffset, Timestamp)) or right is NaT)
|
||||
and not is_object_dtype(lvalues.dtype)
|
||||
):
|
||||
# Call the method on lvalues
|
||||
res_values = op(lvalues, rvalues)
|
||||
|
||||
elif is_scalar(rvalues) and isna(rvalues): # TODO: but not pd.NA?
|
||||
# numpy does not like comparisons vs None
|
||||
if op is operator.ne:
|
||||
res_values = np.ones(lvalues.shape, dtype=bool)
|
||||
else:
|
||||
res_values = np.zeros(lvalues.shape, dtype=bool)
|
||||
|
||||
elif is_numeric_v_string_like(lvalues, rvalues):
|
||||
# GH#36377 going through the numexpr path would incorrectly raise
|
||||
return invalid_comparison(lvalues, rvalues, op)
|
||||
|
||||
elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
|
||||
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
|
||||
|
||||
else:
|
||||
res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
|
||||
|
||||
return res_values
|
||||
|
||||
|
||||
def na_logical_op(x: np.ndarray, y, op):
|
||||
try:
|
||||
# For exposition, write:
|
||||
# yarr = isinstance(y, np.ndarray)
|
||||
# yint = is_integer(y) or (yarr and y.dtype.kind == "i")
|
||||
# ybool = is_bool(y) or (yarr and y.dtype.kind == "b")
|
||||
# xint = x.dtype.kind == "i"
|
||||
# xbool = x.dtype.kind == "b"
|
||||
# Then Cases where this goes through without raising include:
|
||||
# (xint or xbool) and (yint or bool)
|
||||
result = op(x, y)
|
||||
except TypeError:
|
||||
if isinstance(y, np.ndarray):
|
||||
# bool-bool dtype operations should be OK, should not get here
|
||||
assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype))
|
||||
x = ensure_object(x)
|
||||
y = ensure_object(y)
|
||||
result = libops.vec_binop(x.ravel(), y.ravel(), op)
|
||||
else:
|
||||
# let null fall thru
|
||||
assert lib.is_scalar(y)
|
||||
if not isna(y):
|
||||
y = bool(y)
|
||||
try:
|
||||
result = libops.scalar_binop(x, y, op)
|
||||
except (
|
||||
TypeError,
|
||||
ValueError,
|
||||
AttributeError,
|
||||
OverflowError,
|
||||
NotImplementedError,
|
||||
) as err:
|
||||
typ = type(y).__name__
|
||||
raise TypeError(
|
||||
f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array "
|
||||
f"and scalar of type [{typ}]"
|
||||
) from err
|
||||
|
||||
return result.reshape(x.shape)
|
||||
|
||||
|
||||
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
|
||||
"""
|
||||
Evaluate a logical operation `|`, `&`, or `^`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray or ExtensionArray
|
||||
right : object
|
||||
Cannot be a DataFrame, Series, or Index.
|
||||
op : {operator.and_, operator.or_, operator.xor}
|
||||
Or one of the reversed variants from roperator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray or ExtensionArray
|
||||
"""
|
||||
fill_int = lambda x: x
|
||||
|
||||
def fill_bool(x, left=None):
|
||||
# if `left` is specifically not-boolean, we do not cast to bool
|
||||
if x.dtype.kind in ["c", "f", "O"]:
|
||||
# dtypes that can hold NA
|
||||
mask = isna(x)
|
||||
if mask.any():
|
||||
x = x.astype(object)
|
||||
x[mask] = False
|
||||
|
||||
if left is None or is_bool_dtype(left.dtype):
|
||||
x = x.astype(bool)
|
||||
return x
|
||||
|
||||
is_self_int_dtype = is_integer_dtype(left.dtype)
|
||||
|
||||
right = lib.item_from_zerodim(right)
|
||||
if is_list_like(right) and not hasattr(right, "dtype"):
|
||||
# e.g. list, tuple
|
||||
right = construct_1d_object_array_from_listlike(right)
|
||||
|
||||
# NB: We assume extract_array has already been called on left and right
|
||||
lvalues = ensure_wrapped_if_datetimelike(left)
|
||||
rvalues = right
|
||||
|
||||
if should_extension_dispatch(lvalues, rvalues):
|
||||
# Call the method on lvalues
|
||||
res_values = op(lvalues, rvalues)
|
||||
|
||||
else:
|
||||
if isinstance(rvalues, np.ndarray):
|
||||
is_other_int_dtype = is_integer_dtype(rvalues.dtype)
|
||||
rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues)
|
||||
|
||||
else:
|
||||
# i.e. scalar
|
||||
is_other_int_dtype = lib.is_integer(rvalues)
|
||||
|
||||
# For int vs int `^`, `|`, `&` are bitwise operators and return
|
||||
# integer dtypes. Otherwise these are boolean ops
|
||||
filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
|
||||
|
||||
res_values = na_logical_op(lvalues, rvalues, op)
|
||||
# error: Cannot call function of unknown type
|
||||
res_values = filler(res_values) # type: ignore[operator]
|
||||
|
||||
return res_values
|
||||
|
||||
|
||||
def get_array_op(op):
|
||||
"""
|
||||
Return a binary array operation corresponding to the given operator op.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : function
|
||||
Binary operator from operator or roperator module.
|
||||
|
||||
Returns
|
||||
-------
|
||||
functools.partial
|
||||
"""
|
||||
if isinstance(op, partial):
|
||||
# We get here via dispatch_to_series in DataFrame case
|
||||
# e.g. test_rolling_consistency_var_debiasing_factors
|
||||
return op
|
||||
|
||||
op_name = op.__name__.strip("_").lstrip("r")
|
||||
if op_name == "arith_op":
|
||||
# Reached via DataFrame._combine_frame i.e. flex methods
|
||||
# e.g. test_df_add_flex_filled_mixed_dtypes
|
||||
return op
|
||||
|
||||
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
|
||||
return partial(comparison_op, op=op)
|
||||
elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}:
|
||||
return partial(logical_op, op=op)
|
||||
elif op_name in {
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"truediv",
|
||||
"floordiv",
|
||||
"mod",
|
||||
"divmod",
|
||||
"pow",
|
||||
}:
|
||||
return partial(arithmetic_op, op=op)
|
||||
else:
|
||||
raise NotImplementedError(op_name)
|
||||
|
||||
|
||||
def maybe_prepare_scalar_for_op(obj, shape: Shape):
|
||||
"""
|
||||
Cast non-pandas objects to pandas types to unify behavior of arithmetic
|
||||
and comparison operations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj: object
|
||||
shape : tuple[int]
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : object
|
||||
|
||||
Notes
|
||||
-----
|
||||
Be careful to call this *after* determining the `name` attribute to be
|
||||
attached to the result of the arithmetic operation.
|
||||
"""
|
||||
if type(obj) is datetime.timedelta:
|
||||
# GH#22390 cast up to Timedelta to rely on Timedelta
|
||||
# implementation; otherwise operation against numeric-dtype
|
||||
# raises TypeError
|
||||
return Timedelta(obj)
|
||||
elif type(obj) is datetime.datetime:
|
||||
# cast up to Timestamp to rely on Timestamp implementation, see Timedelta above
|
||||
return Timestamp(obj)
|
||||
elif isinstance(obj, np.datetime64):
|
||||
# GH#28080 numpy casts integer-dtype to datetime64 when doing
|
||||
# array[int] + datetime64, which we do not allow
|
||||
if isna(obj):
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
# Avoid possible ambiguities with pd.NaT
|
||||
obj = obj.astype("datetime64[ns]")
|
||||
right = np.broadcast_to(obj, shape)
|
||||
return DatetimeArray(right)
|
||||
|
||||
return Timestamp(obj)
|
||||
|
||||
elif isinstance(obj, np.timedelta64):
|
||||
if isna(obj):
|
||||
from pandas.core.arrays import TimedeltaArray
|
||||
|
||||
# wrapping timedelta64("NaT") in Timedelta returns NaT,
|
||||
# which would incorrectly be treated as a datetime-NaT, so
|
||||
# we broadcast and wrap in a TimedeltaArray
|
||||
obj = obj.astype("timedelta64[ns]")
|
||||
right = np.broadcast_to(obj, shape)
|
||||
return TimedeltaArray(right)
|
||||
|
||||
# In particular non-nanosecond timedelta64 needs to be cast to
|
||||
# nanoseconds, or else we get undesired behavior like
|
||||
# np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
|
||||
return Timedelta(obj)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
_BOOL_OP_NOT_ALLOWED = {
|
||||
operator.truediv,
|
||||
roperator.rtruediv,
|
||||
operator.floordiv,
|
||||
roperator.rfloordiv,
|
||||
operator.pow,
|
||||
roperator.rpow,
|
||||
}
|
||||
|
||||
|
||||
def _bool_arith_check(op, a, b):
|
||||
"""
|
||||
In contrast to numpy, pandas raises an error for certain operations
|
||||
with booleans.
|
||||
"""
|
||||
if op in _BOOL_OP_NOT_ALLOWED:
|
||||
if is_bool_dtype(a.dtype) and (
|
||||
is_bool_dtype(b) or isinstance(b, (bool, np.bool_))
|
||||
):
|
||||
op_name = op.__name__.strip("_").lstrip("r")
|
||||
raise NotImplementedError(
|
||||
f"operator '{op_name}' not implemented for bool dtypes"
|
||||
)
|
140
.venv/Lib/site-packages/pandas/core/ops/common.py
Normal file
140
.venv/Lib/site-packages/pandas/core/ops/common.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""
|
||||
Boilerplate functions used in defining binary operations.
|
||||
"""
|
||||
from functools import wraps
|
||||
from typing import Callable
|
||||
|
||||
from pandas._libs.lib import item_from_zerodim
|
||||
from pandas._libs.missing import is_matching_na
|
||||
from pandas._typing import F
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCIndex,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
|
||||
def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]:
|
||||
"""
|
||||
Boilerplate for pandas conventions in arithmetic and comparison methods.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
|
||||
Returns
|
||||
-------
|
||||
decorator
|
||||
"""
|
||||
|
||||
def wrapper(method: F) -> F:
|
||||
return _unpack_zerodim_and_defer(method, name)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def _unpack_zerodim_and_defer(method, name: str):
|
||||
"""
|
||||
Boilerplate for pandas conventions in arithmetic and comparison methods.
|
||||
|
||||
Ensure method returns NotImplemented when operating against "senior"
|
||||
classes. Ensure zero-dimensional ndarrays are always unpacked.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
method : binary method
|
||||
name : str
|
||||
|
||||
Returns
|
||||
-------
|
||||
method
|
||||
"""
|
||||
is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"}
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, other):
|
||||
|
||||
if is_cmp and isinstance(self, ABCIndex) and isinstance(other, ABCSeries):
|
||||
# For comparison ops, Index does *not* defer to Series
|
||||
pass
|
||||
else:
|
||||
for cls in [ABCDataFrame, ABCSeries, ABCIndex]:
|
||||
if isinstance(self, cls):
|
||||
break
|
||||
if isinstance(other, cls):
|
||||
return NotImplemented
|
||||
|
||||
other = item_from_zerodim(other)
|
||||
|
||||
return method(self, other)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def get_op_result_name(left, right):
|
||||
"""
|
||||
Find the appropriate name to pin to an operation result. This result
|
||||
should always be either an Index or a Series.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : {Series, Index}
|
||||
right : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : object
|
||||
Usually a string
|
||||
"""
|
||||
if isinstance(right, (ABCSeries, ABCIndex)):
|
||||
name = _maybe_match_name(left, right)
|
||||
else:
|
||||
name = left.name
|
||||
return name
|
||||
|
||||
|
||||
def _maybe_match_name(a, b):
|
||||
"""
|
||||
Try to find a name to attach to the result of an operation between
|
||||
a and b. If only one of these has a `name` attribute, return that
|
||||
name. Otherwise return a consensus name if they match or None if
|
||||
they have different names.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : object
|
||||
b : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : str or None
|
||||
|
||||
See Also
|
||||
--------
|
||||
pandas.core.common.consensus_name_attr
|
||||
"""
|
||||
a_has = hasattr(a, "name")
|
||||
b_has = hasattr(b, "name")
|
||||
if a_has and b_has:
|
||||
try:
|
||||
if a.name == b.name:
|
||||
return a.name
|
||||
elif is_matching_na(a.name, b.name):
|
||||
# e.g. both are np.nan
|
||||
return a.name
|
||||
else:
|
||||
return None
|
||||
except TypeError:
|
||||
# pd.NA
|
||||
if is_matching_na(a.name, b.name):
|
||||
return a.name
|
||||
return None
|
||||
except ValueError:
|
||||
# e.g. np.int64(1) vs (np.int64(1), np.int64(2))
|
||||
return None
|
||||
elif a_has:
|
||||
return a.name
|
||||
elif b_has:
|
||||
return b.name
|
||||
return None
|
24
.venv/Lib/site-packages/pandas/core/ops/dispatch.py
Normal file
24
.venv/Lib/site-packages/pandas/core/ops/dispatch.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""
|
||||
Functions for defining unary operations.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
from pandas._typing import ArrayLike
|
||||
|
||||
from pandas.core.dtypes.generic import ABCExtensionArray
|
||||
|
||||
|
||||
def should_extension_dispatch(left: ArrayLike, right: Any) -> bool:
|
||||
"""
|
||||
Identify cases where Series operation should dispatch to ExtensionArray method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray or ExtensionArray
|
||||
right : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray)
|
749
.venv/Lib/site-packages/pandas/core/ops/docstrings.py
Normal file
749
.venv/Lib/site-packages/pandas/core/ops/docstrings.py
Normal file
@ -0,0 +1,749 @@
|
||||
"""
|
||||
Templating for ops docstrings
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def make_flex_doc(op_name: str, typ: str) -> str:
|
||||
"""
|
||||
Make the appropriate substitutions for the given operation and class-typ
|
||||
into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring
|
||||
to attach to a generated method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...}
|
||||
typ : str {series, 'dataframe']}
|
||||
|
||||
Returns
|
||||
-------
|
||||
doc : str
|
||||
"""
|
||||
op_name = op_name.replace("__", "")
|
||||
op_desc = _op_descriptions[op_name]
|
||||
|
||||
op_desc_op = op_desc["op"]
|
||||
assert op_desc_op is not None # for mypy
|
||||
if op_name.startswith("r"):
|
||||
equiv = "other " + op_desc_op + " " + typ
|
||||
elif op_name == "divmod":
|
||||
equiv = f"{op_name}({typ}, other)"
|
||||
else:
|
||||
equiv = typ + " " + op_desc_op + " other"
|
||||
|
||||
if typ == "series":
|
||||
base_doc = _flex_doc_SERIES
|
||||
if op_desc["reverse"]:
|
||||
base_doc += _see_also_reverse_SERIES.format(
|
||||
reverse=op_desc["reverse"], see_also_desc=op_desc["see_also_desc"]
|
||||
)
|
||||
doc_no_examples = base_doc.format(
|
||||
desc=op_desc["desc"],
|
||||
op_name=op_name,
|
||||
equiv=equiv,
|
||||
series_returns=op_desc["series_returns"],
|
||||
)
|
||||
ser_example = op_desc["series_examples"]
|
||||
if ser_example:
|
||||
doc = doc_no_examples + ser_example
|
||||
else:
|
||||
doc = doc_no_examples
|
||||
elif typ == "dataframe":
|
||||
base_doc = _flex_doc_FRAME
|
||||
doc = base_doc.format(
|
||||
desc=op_desc["desc"],
|
||||
op_name=op_name,
|
||||
equiv=equiv,
|
||||
reverse=op_desc["reverse"],
|
||||
)
|
||||
else:
|
||||
raise AssertionError("Invalid typ argument.")
|
||||
return doc
|
||||
|
||||
|
||||
_common_examples_algebra_SERIES = """
|
||||
Examples
|
||||
--------
|
||||
>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'])
|
||||
>>> a
|
||||
a 1.0
|
||||
b 1.0
|
||||
c 1.0
|
||||
d NaN
|
||||
dtype: float64
|
||||
>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e'])
|
||||
>>> b
|
||||
a 1.0
|
||||
b NaN
|
||||
d 1.0
|
||||
e NaN
|
||||
dtype: float64"""
|
||||
|
||||
_common_examples_comparison_SERIES = """
|
||||
Examples
|
||||
--------
|
||||
>>> a = pd.Series([1, 1, 1, np.nan, 1], index=['a', 'b', 'c', 'd', 'e'])
|
||||
>>> a
|
||||
a 1.0
|
||||
b 1.0
|
||||
c 1.0
|
||||
d NaN
|
||||
e 1.0
|
||||
dtype: float64
|
||||
>>> b = pd.Series([0, 1, 2, np.nan, 1], index=['a', 'b', 'c', 'd', 'f'])
|
||||
>>> b
|
||||
a 0.0
|
||||
b 1.0
|
||||
c 2.0
|
||||
d NaN
|
||||
f 1.0
|
||||
dtype: float64"""
|
||||
|
||||
_add_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.add(b, fill_value=0)
|
||||
a 2.0
|
||||
b 1.0
|
||||
c 1.0
|
||||
d 1.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_sub_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.subtract(b, fill_value=0)
|
||||
a 0.0
|
||||
b 1.0
|
||||
c 1.0
|
||||
d -1.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_mul_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.multiply(b, fill_value=0)
|
||||
a 1.0
|
||||
b 0.0
|
||||
c 0.0
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_div_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.divide(b, fill_value=0)
|
||||
a 1.0
|
||||
b inf
|
||||
c inf
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_floordiv_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.floordiv(b, fill_value=0)
|
||||
a 1.0
|
||||
b NaN
|
||||
c NaN
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_divmod_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.divmod(b, fill_value=0)
|
||||
(a 1.0
|
||||
b NaN
|
||||
c NaN
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64,
|
||||
a 0.0
|
||||
b NaN
|
||||
c NaN
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64)
|
||||
"""
|
||||
)
|
||||
|
||||
_mod_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.mod(b, fill_value=0)
|
||||
a 0.0
|
||||
b NaN
|
||||
c NaN
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
_pow_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.pow(b, fill_value=0)
|
||||
a 1.0
|
||||
b 1.0
|
||||
c 1.0
|
||||
d 0.0
|
||||
e NaN
|
||||
dtype: float64
|
||||
"""
|
||||
)
|
||||
|
||||
_ne_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.ne(b, fill_value=0)
|
||||
a False
|
||||
b True
|
||||
c True
|
||||
d True
|
||||
e True
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_eq_example_SERIES = (
|
||||
_common_examples_algebra_SERIES
|
||||
+ """
|
||||
>>> a.eq(b, fill_value=0)
|
||||
a True
|
||||
b False
|
||||
c False
|
||||
d False
|
||||
e False
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_lt_example_SERIES = (
|
||||
_common_examples_comparison_SERIES
|
||||
+ """
|
||||
>>> a.lt(b, fill_value=0)
|
||||
a False
|
||||
b False
|
||||
c True
|
||||
d False
|
||||
e False
|
||||
f True
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_le_example_SERIES = (
|
||||
_common_examples_comparison_SERIES
|
||||
+ """
|
||||
>>> a.le(b, fill_value=0)
|
||||
a False
|
||||
b True
|
||||
c True
|
||||
d False
|
||||
e False
|
||||
f True
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_gt_example_SERIES = (
|
||||
_common_examples_comparison_SERIES
|
||||
+ """
|
||||
>>> a.gt(b, fill_value=0)
|
||||
a True
|
||||
b False
|
||||
c False
|
||||
d False
|
||||
e True
|
||||
f False
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_ge_example_SERIES = (
|
||||
_common_examples_comparison_SERIES
|
||||
+ """
|
||||
>>> a.ge(b, fill_value=0)
|
||||
a True
|
||||
b True
|
||||
c False
|
||||
d False
|
||||
e True
|
||||
f False
|
||||
dtype: bool
|
||||
"""
|
||||
)
|
||||
|
||||
_returns_series = """Series\n The result of the operation."""
|
||||
|
||||
_returns_tuple = """2-Tuple of Series\n The result of the operation."""
|
||||
|
||||
_op_descriptions: dict[str, dict[str, str | None]] = {
|
||||
# Arithmetic Operators
|
||||
"add": {
|
||||
"op": "+",
|
||||
"desc": "Addition",
|
||||
"reverse": "radd",
|
||||
"series_examples": _add_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"sub": {
|
||||
"op": "-",
|
||||
"desc": "Subtraction",
|
||||
"reverse": "rsub",
|
||||
"series_examples": _sub_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"mul": {
|
||||
"op": "*",
|
||||
"desc": "Multiplication",
|
||||
"reverse": "rmul",
|
||||
"series_examples": _mul_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
"df_examples": None,
|
||||
},
|
||||
"mod": {
|
||||
"op": "%",
|
||||
"desc": "Modulo",
|
||||
"reverse": "rmod",
|
||||
"series_examples": _mod_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"pow": {
|
||||
"op": "**",
|
||||
"desc": "Exponential power",
|
||||
"reverse": "rpow",
|
||||
"series_examples": _pow_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
"df_examples": None,
|
||||
},
|
||||
"truediv": {
|
||||
"op": "/",
|
||||
"desc": "Floating division",
|
||||
"reverse": "rtruediv",
|
||||
"series_examples": _div_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
"df_examples": None,
|
||||
},
|
||||
"floordiv": {
|
||||
"op": "//",
|
||||
"desc": "Integer division",
|
||||
"reverse": "rfloordiv",
|
||||
"series_examples": _floordiv_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
"df_examples": None,
|
||||
},
|
||||
"divmod": {
|
||||
"op": "divmod",
|
||||
"desc": "Integer division and modulo",
|
||||
"reverse": "rdivmod",
|
||||
"series_examples": _divmod_example_SERIES,
|
||||
"series_returns": _returns_tuple,
|
||||
"df_examples": None,
|
||||
},
|
||||
# Comparison Operators
|
||||
"eq": {
|
||||
"op": "==",
|
||||
"desc": "Equal to",
|
||||
"reverse": None,
|
||||
"series_examples": _eq_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"ne": {
|
||||
"op": "!=",
|
||||
"desc": "Not equal to",
|
||||
"reverse": None,
|
||||
"series_examples": _ne_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"lt": {
|
||||
"op": "<",
|
||||
"desc": "Less than",
|
||||
"reverse": None,
|
||||
"series_examples": _lt_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"le": {
|
||||
"op": "<=",
|
||||
"desc": "Less than or equal to",
|
||||
"reverse": None,
|
||||
"series_examples": _le_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"gt": {
|
||||
"op": ">",
|
||||
"desc": "Greater than",
|
||||
"reverse": None,
|
||||
"series_examples": _gt_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
"ge": {
|
||||
"op": ">=",
|
||||
"desc": "Greater than or equal to",
|
||||
"reverse": None,
|
||||
"series_examples": _ge_example_SERIES,
|
||||
"series_returns": _returns_series,
|
||||
},
|
||||
}
|
||||
|
||||
_py_num_ref = """see
|
||||
`Python documentation
|
||||
<https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types>`_
|
||||
for more details"""
|
||||
_op_names = list(_op_descriptions.keys())
|
||||
for key in _op_names:
|
||||
reverse_op = _op_descriptions[key]["reverse"]
|
||||
if reverse_op is not None:
|
||||
_op_descriptions[reverse_op] = _op_descriptions[key].copy()
|
||||
_op_descriptions[reverse_op]["reverse"] = key
|
||||
_op_descriptions[key][
|
||||
"see_also_desc"
|
||||
] = f"Reverse of the {_op_descriptions[key]['desc']} operator, {_py_num_ref}"
|
||||
_op_descriptions[reverse_op][
|
||||
"see_also_desc"
|
||||
] = f"Element-wise {_op_descriptions[key]['desc']}, {_py_num_ref}"
|
||||
|
||||
_flex_doc_SERIES = """
|
||||
Return {desc} of series and other, element-wise (binary operator `{op_name}`).
|
||||
|
||||
Equivalent to ``{equiv}``, but with support to substitute a fill_value for
|
||||
missing data in either one of the inputs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : Series or scalar value
|
||||
fill_value : None or float value, default None (NaN)
|
||||
Fill existing missing (NaN) values, and any new element needed for
|
||||
successful Series alignment, with this value before computation.
|
||||
If data in both corresponding Series locations is missing
|
||||
the result of filling (at that location) will be missing.
|
||||
level : int or name
|
||||
Broadcast across a level, matching Index values on the
|
||||
passed MultiIndex level.
|
||||
|
||||
Returns
|
||||
-------
|
||||
{series_returns}
|
||||
"""
|
||||
|
||||
_see_also_reverse_SERIES = """
|
||||
See Also
|
||||
--------
|
||||
Series.{reverse} : {see_also_desc}.
|
||||
"""
|
||||
|
||||
_flex_doc_FRAME = """
|
||||
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
|
||||
|
||||
Equivalent to ``{equiv}``, but with support to substitute a fill_value
|
||||
for missing data in one of the inputs. With reverse version, `{reverse}`.
|
||||
|
||||
Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to
|
||||
arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : scalar, sequence, Series, or DataFrame
|
||||
Any single or multiple element data structure, or list-like object.
|
||||
axis : {{0 or 'index', 1 or 'columns'}}
|
||||
Whether to compare by the index (0 or 'index') or columns
|
||||
(1 or 'columns'). For Series input, axis to match Series index on.
|
||||
level : int or label
|
||||
Broadcast across a level, matching Index values on the
|
||||
passed MultiIndex level.
|
||||
fill_value : float or None, default None
|
||||
Fill existing missing (NaN) values, and any new element needed for
|
||||
successful DataFrame alignment, with this value before computation.
|
||||
If data in both corresponding DataFrame locations is missing
|
||||
the result will be missing.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
Result of the arithmetic operation.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.add : Add DataFrames.
|
||||
DataFrame.sub : Subtract DataFrames.
|
||||
DataFrame.mul : Multiply DataFrames.
|
||||
DataFrame.div : Divide DataFrames (float division).
|
||||
DataFrame.truediv : Divide DataFrames (float division).
|
||||
DataFrame.floordiv : Divide DataFrames (integer division).
|
||||
DataFrame.mod : Calculate modulo (remainder after division).
|
||||
DataFrame.pow : Calculate exponential power.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Mismatched indices will be unioned together.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({{'angles': [0, 3, 4],
|
||||
... 'degrees': [360, 180, 360]}},
|
||||
... index=['circle', 'triangle', 'rectangle'])
|
||||
>>> df
|
||||
angles degrees
|
||||
circle 0 360
|
||||
triangle 3 180
|
||||
rectangle 4 360
|
||||
|
||||
Add a scalar with operator version which return the same
|
||||
results.
|
||||
|
||||
>>> df + 1
|
||||
angles degrees
|
||||
circle 1 361
|
||||
triangle 4 181
|
||||
rectangle 5 361
|
||||
|
||||
>>> df.add(1)
|
||||
angles degrees
|
||||
circle 1 361
|
||||
triangle 4 181
|
||||
rectangle 5 361
|
||||
|
||||
Divide by constant with reverse version.
|
||||
|
||||
>>> df.div(10)
|
||||
angles degrees
|
||||
circle 0.0 36.0
|
||||
triangle 0.3 18.0
|
||||
rectangle 0.4 36.0
|
||||
|
||||
>>> df.rdiv(10)
|
||||
angles degrees
|
||||
circle inf 0.027778
|
||||
triangle 3.333333 0.055556
|
||||
rectangle 2.500000 0.027778
|
||||
|
||||
Subtract a list and Series by axis with operator version.
|
||||
|
||||
>>> df - [1, 2]
|
||||
angles degrees
|
||||
circle -1 358
|
||||
triangle 2 178
|
||||
rectangle 3 358
|
||||
|
||||
>>> df.sub([1, 2], axis='columns')
|
||||
angles degrees
|
||||
circle -1 358
|
||||
triangle 2 178
|
||||
rectangle 3 358
|
||||
|
||||
>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']),
|
||||
... axis='index')
|
||||
angles degrees
|
||||
circle -1 359
|
||||
triangle 2 179
|
||||
rectangle 3 359
|
||||
|
||||
Multiply a DataFrame of different shape with operator version.
|
||||
|
||||
>>> other = pd.DataFrame({{'angles': [0, 3, 4]}},
|
||||
... index=['circle', 'triangle', 'rectangle'])
|
||||
>>> other
|
||||
angles
|
||||
circle 0
|
||||
triangle 3
|
||||
rectangle 4
|
||||
|
||||
>>> df * other
|
||||
angles degrees
|
||||
circle 0 NaN
|
||||
triangle 9 NaN
|
||||
rectangle 16 NaN
|
||||
|
||||
>>> df.mul(other, fill_value=0)
|
||||
angles degrees
|
||||
circle 0 0.0
|
||||
triangle 9 0.0
|
||||
rectangle 16 0.0
|
||||
|
||||
Divide by a MultiIndex by level.
|
||||
|
||||
>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
|
||||
... 'degrees': [360, 180, 360, 360, 540, 720]}},
|
||||
... index=[['A', 'A', 'A', 'B', 'B', 'B'],
|
||||
... ['circle', 'triangle', 'rectangle',
|
||||
... 'square', 'pentagon', 'hexagon']])
|
||||
>>> df_multindex
|
||||
angles degrees
|
||||
A circle 0 360
|
||||
triangle 3 180
|
||||
rectangle 4 360
|
||||
B square 4 360
|
||||
pentagon 5 540
|
||||
hexagon 6 720
|
||||
|
||||
>>> df.div(df_multindex, level=1, fill_value=0)
|
||||
angles degrees
|
||||
A circle NaN 1.0
|
||||
triangle 1.0 1.0
|
||||
rectangle 1.0 1.0
|
||||
B square 0.0 0.0
|
||||
pentagon 0.0 0.0
|
||||
hexagon 0.0 0.0
|
||||
"""
|
||||
|
||||
_flex_comp_doc_FRAME = """
|
||||
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
|
||||
|
||||
Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
|
||||
operators.
|
||||
|
||||
Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis
|
||||
(rows or columns) and level for comparison.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : scalar, sequence, Series, or DataFrame
|
||||
Any single or multiple element data structure, or list-like object.
|
||||
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
|
||||
Whether to compare by the index (0 or 'index') or columns
|
||||
(1 or 'columns').
|
||||
level : int or label
|
||||
Broadcast across a level, matching Index values on the passed
|
||||
MultiIndex level.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame of bool
|
||||
Result of the comparison.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.eq : Compare DataFrames for equality elementwise.
|
||||
DataFrame.ne : Compare DataFrames for inequality elementwise.
|
||||
DataFrame.le : Compare DataFrames for less than inequality
|
||||
or equality elementwise.
|
||||
DataFrame.lt : Compare DataFrames for strictly less than
|
||||
inequality elementwise.
|
||||
DataFrame.ge : Compare DataFrames for greater than inequality
|
||||
or equality elementwise.
|
||||
DataFrame.gt : Compare DataFrames for strictly greater than
|
||||
inequality elementwise.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Mismatched indices will be unioned together.
|
||||
`NaN` values are considered different (i.e. `NaN` != `NaN`).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({{'cost': [250, 150, 100],
|
||||
... 'revenue': [100, 250, 300]}},
|
||||
... index=['A', 'B', 'C'])
|
||||
>>> df
|
||||
cost revenue
|
||||
A 250 100
|
||||
B 150 250
|
||||
C 100 300
|
||||
|
||||
Comparison with a scalar, using either the operator or method:
|
||||
|
||||
>>> df == 100
|
||||
cost revenue
|
||||
A False True
|
||||
B False False
|
||||
C True False
|
||||
|
||||
>>> df.eq(100)
|
||||
cost revenue
|
||||
A False True
|
||||
B False False
|
||||
C True False
|
||||
|
||||
When `other` is a :class:`Series`, the columns of a DataFrame are aligned
|
||||
with the index of `other` and broadcast:
|
||||
|
||||
>>> df != pd.Series([100, 250], index=["cost", "revenue"])
|
||||
cost revenue
|
||||
A True True
|
||||
B True False
|
||||
C False True
|
||||
|
||||
Use the method to control the broadcast axis:
|
||||
|
||||
>>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')
|
||||
cost revenue
|
||||
A True False
|
||||
B True True
|
||||
C True True
|
||||
D True True
|
||||
|
||||
When comparing to an arbitrary sequence, the number of columns must
|
||||
match the number elements in `other`:
|
||||
|
||||
>>> df == [250, 100]
|
||||
cost revenue
|
||||
A True True
|
||||
B False False
|
||||
C False False
|
||||
|
||||
Use the method to control the axis:
|
||||
|
||||
>>> df.eq([250, 250, 100], axis='index')
|
||||
cost revenue
|
||||
A True False
|
||||
B False True
|
||||
C True False
|
||||
|
||||
Compare to a DataFrame of different shape.
|
||||
|
||||
>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
|
||||
... index=['A', 'B', 'C', 'D'])
|
||||
>>> other
|
||||
revenue
|
||||
A 300
|
||||
B 250
|
||||
C 100
|
||||
D 150
|
||||
|
||||
>>> df.gt(other)
|
||||
cost revenue
|
||||
A False False
|
||||
B False False
|
||||
C False True
|
||||
D False False
|
||||
|
||||
Compare to a MultiIndex by level.
|
||||
|
||||
>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
|
||||
... 'revenue': [100, 250, 300, 200, 175, 225]}},
|
||||
... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
|
||||
... ['A', 'B', 'C', 'A', 'B', 'C']])
|
||||
>>> df_multindex
|
||||
cost revenue
|
||||
Q1 A 250 100
|
||||
B 150 250
|
||||
C 100 300
|
||||
Q2 A 150 200
|
||||
B 300 175
|
||||
C 220 225
|
||||
|
||||
>>> df.le(df_multindex, level=1)
|
||||
cost revenue
|
||||
Q1 A True True
|
||||
B True True
|
||||
C True True
|
||||
Q2 A False True
|
||||
B True False
|
||||
C True False
|
||||
"""
|
56
.venv/Lib/site-packages/pandas/core/ops/invalid.py
Normal file
56
.venv/Lib/site-packages/pandas/core/ops/invalid.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Templates for invalid operations.
|
||||
"""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def invalid_comparison(left, right, op):
|
||||
"""
|
||||
If a comparison has mismatched types and is not necessarily meaningful,
|
||||
follow python3 conventions by:
|
||||
|
||||
- returning all-False for equality
|
||||
- returning all-True for inequality
|
||||
- raising TypeError otherwise
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : array-like
|
||||
right : scalar, array-like
|
||||
op : operator.{eq, ne, lt, le, gt}
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError : on inequality comparisons
|
||||
"""
|
||||
if op is operator.eq:
|
||||
res_values = np.zeros(left.shape, dtype=bool)
|
||||
elif op is operator.ne:
|
||||
res_values = np.ones(left.shape, dtype=bool)
|
||||
else:
|
||||
typ = type(right).__name__
|
||||
raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
|
||||
return res_values
|
||||
|
||||
|
||||
def make_invalid_op(name: str):
|
||||
"""
|
||||
Return a binary method that always raises a TypeError.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
|
||||
Returns
|
||||
-------
|
||||
invalid_op : function
|
||||
"""
|
||||
|
||||
def invalid_op(self, other=None):
|
||||
typ = type(self).__name__
|
||||
raise TypeError(f"cannot perform {name} with this index type: {typ}")
|
||||
|
||||
invalid_op.__name__ = name
|
||||
return invalid_op
|
189
.venv/Lib/site-packages/pandas/core/ops/mask_ops.py
Normal file
189
.venv/Lib/site-packages/pandas/core/ops/mask_ops.py
Normal file
@ -0,0 +1,189 @@
|
||||
"""
|
||||
Ops for masked arrays.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
lib,
|
||||
missing as libmissing,
|
||||
)
|
||||
|
||||
|
||||
def kleene_or(
|
||||
left: bool | np.ndarray | libmissing.NAType,
|
||||
right: bool | np.ndarray | libmissing.NAType,
|
||||
left_mask: np.ndarray | None,
|
||||
right_mask: np.ndarray | None,
|
||||
):
|
||||
"""
|
||||
Boolean ``or`` using Kleene logic.
|
||||
|
||||
Values are NA where we have ``NA | NA`` or ``NA | False``.
|
||||
``NA | True`` is considered True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : ndarray, NA, or bool
|
||||
The values of the array.
|
||||
left_mask, right_mask : ndarray, optional
|
||||
The masks. Only one of these may be None, which implies that
|
||||
the associated `left` or `right` value is a scalar.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result, mask: ndarray[bool]
|
||||
The result of the logical or, and the new mask.
|
||||
"""
|
||||
# To reduce the number of cases, we ensure that `left` & `left_mask`
|
||||
# always come from an array, not a scalar. This is safe, since
|
||||
# A | B == B | A
|
||||
if left_mask is None:
|
||||
return kleene_or(right, left, right_mask, left_mask)
|
||||
|
||||
if not isinstance(left, np.ndarray):
|
||||
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
|
||||
|
||||
raise_for_nan(right, method="or")
|
||||
|
||||
if right is libmissing.NA:
|
||||
result = left.copy()
|
||||
else:
|
||||
result = left | right
|
||||
|
||||
if right_mask is not None:
|
||||
# output is unknown where (False & NA), (NA & False), (NA & NA)
|
||||
left_false = ~(left | left_mask)
|
||||
right_false = ~(right | right_mask)
|
||||
mask = (
|
||||
(left_false & right_mask)
|
||||
| (right_false & left_mask)
|
||||
| (left_mask & right_mask)
|
||||
)
|
||||
else:
|
||||
if right is True:
|
||||
mask = np.zeros_like(left_mask)
|
||||
elif right is libmissing.NA:
|
||||
mask = (~left & ~left_mask) | left_mask
|
||||
else:
|
||||
# False
|
||||
mask = left_mask.copy()
|
||||
|
||||
return result, mask
|
||||
|
||||
|
||||
def kleene_xor(
|
||||
left: bool | np.ndarray | libmissing.NAType,
|
||||
right: bool | np.ndarray | libmissing.NAType,
|
||||
left_mask: np.ndarray | None,
|
||||
right_mask: np.ndarray | None,
|
||||
):
|
||||
"""
|
||||
Boolean ``xor`` using Kleene logic.
|
||||
|
||||
This is the same as ``or``, with the following adjustments
|
||||
|
||||
* True, True -> False
|
||||
* True, NA -> NA
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : ndarray, NA, or bool
|
||||
The values of the array.
|
||||
left_mask, right_mask : ndarray, optional
|
||||
The masks. Only one of these may be None, which implies that
|
||||
the associated `left` or `right` value is a scalar.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result, mask: ndarray[bool]
|
||||
The result of the logical xor, and the new mask.
|
||||
"""
|
||||
# To reduce the number of cases, we ensure that `left` & `left_mask`
|
||||
# always come from an array, not a scalar. This is safe, since
|
||||
# A ^ B == B ^ A
|
||||
if left_mask is None:
|
||||
return kleene_xor(right, left, right_mask, left_mask)
|
||||
|
||||
if not isinstance(left, np.ndarray):
|
||||
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
|
||||
|
||||
raise_for_nan(right, method="xor")
|
||||
if right is libmissing.NA:
|
||||
result = np.zeros_like(left)
|
||||
else:
|
||||
result = left ^ right
|
||||
|
||||
if right_mask is None:
|
||||
if right is libmissing.NA:
|
||||
mask = np.ones_like(left_mask)
|
||||
else:
|
||||
mask = left_mask.copy()
|
||||
else:
|
||||
mask = left_mask | right_mask
|
||||
|
||||
return result, mask
|
||||
|
||||
|
||||
def kleene_and(
|
||||
left: bool | libmissing.NAType | np.ndarray,
|
||||
right: bool | libmissing.NAType | np.ndarray,
|
||||
left_mask: np.ndarray | None,
|
||||
right_mask: np.ndarray | None,
|
||||
):
|
||||
"""
|
||||
Boolean ``and`` using Kleene logic.
|
||||
|
||||
Values are ``NA`` for ``NA & NA`` or ``True & NA``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : ndarray, NA, or bool
|
||||
The values of the array.
|
||||
left_mask, right_mask : ndarray, optional
|
||||
The masks. Only one of these may be None, which implies that
|
||||
the associated `left` or `right` value is a scalar.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result, mask: ndarray[bool]
|
||||
The result of the logical xor, and the new mask.
|
||||
"""
|
||||
# To reduce the number of cases, we ensure that `left` & `left_mask`
|
||||
# always come from an array, not a scalar. This is safe, since
|
||||
# A & B == B & A
|
||||
if left_mask is None:
|
||||
return kleene_and(right, left, right_mask, left_mask)
|
||||
|
||||
if not isinstance(left, np.ndarray):
|
||||
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
|
||||
raise_for_nan(right, method="and")
|
||||
|
||||
if right is libmissing.NA:
|
||||
result = np.zeros_like(left)
|
||||
else:
|
||||
result = left & right
|
||||
|
||||
if right_mask is None:
|
||||
# Scalar `right`
|
||||
if right is libmissing.NA:
|
||||
mask = (left & ~left_mask) | left_mask
|
||||
|
||||
else:
|
||||
mask = left_mask.copy()
|
||||
if right is False:
|
||||
# unmask everything
|
||||
mask[:] = False
|
||||
else:
|
||||
# unmask where either left or right is False
|
||||
left_false = ~(left | left_mask)
|
||||
right_false = ~(right | right_mask)
|
||||
mask = (left_mask & ~right_false) | (right_mask & ~left_false)
|
||||
|
||||
return result, mask
|
||||
|
||||
|
||||
def raise_for_nan(value, method: str):
|
||||
if lib.is_float(value) and np.isnan(value):
|
||||
raise ValueError(f"Cannot perform logical '{method}' with floating NaN")
|
122
.venv/Lib/site-packages/pandas/core/ops/methods.py
Normal file
122
.venv/Lib/site-packages/pandas/core/ops/methods.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""
|
||||
Functions to generate methods and pin them to the appropriate classes.
|
||||
"""
|
||||
import operator
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.ops import roperator
|
||||
|
||||
|
||||
def _get_method_wrappers(cls):
|
||||
"""
|
||||
Find the appropriate operation-wrappers to use when defining flex/special
|
||||
arithmetic, boolean, and comparison operations with the given class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cls : class
|
||||
|
||||
Returns
|
||||
-------
|
||||
arith_flex : function or None
|
||||
comp_flex : function or None
|
||||
"""
|
||||
# TODO: make these non-runtime imports once the relevant functions
|
||||
# are no longer in __init__
|
||||
from pandas.core.ops import (
|
||||
flex_arith_method_FRAME,
|
||||
flex_comp_method_FRAME,
|
||||
flex_method_SERIES,
|
||||
)
|
||||
|
||||
if issubclass(cls, ABCSeries):
|
||||
# Just Series
|
||||
arith_flex = flex_method_SERIES
|
||||
comp_flex = flex_method_SERIES
|
||||
elif issubclass(cls, ABCDataFrame):
|
||||
arith_flex = flex_arith_method_FRAME
|
||||
comp_flex = flex_comp_method_FRAME
|
||||
return arith_flex, comp_flex
|
||||
|
||||
|
||||
def add_flex_arithmetic_methods(cls):
|
||||
"""
|
||||
Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``)
|
||||
to the class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cls : class
|
||||
flex methods will be defined and pinned to this class
|
||||
"""
|
||||
flex_arith_method, flex_comp_method = _get_method_wrappers(cls)
|
||||
new_methods = _create_methods(cls, flex_arith_method, flex_comp_method)
|
||||
new_methods.update(
|
||||
{
|
||||
"multiply": new_methods["mul"],
|
||||
"subtract": new_methods["sub"],
|
||||
"divide": new_methods["div"],
|
||||
}
|
||||
)
|
||||
# opt out of bool flex methods for now
|
||||
assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_"))
|
||||
|
||||
_add_methods(cls, new_methods=new_methods)
|
||||
|
||||
|
||||
def _create_methods(cls, arith_method, comp_method):
|
||||
# creates actual flex methods based upon arithmetic, and comp method
|
||||
# constructors.
|
||||
|
||||
have_divmod = issubclass(cls, ABCSeries)
|
||||
# divmod is available for Series
|
||||
|
||||
new_methods = {}
|
||||
|
||||
new_methods.update(
|
||||
{
|
||||
"add": arith_method(operator.add),
|
||||
"radd": arith_method(roperator.radd),
|
||||
"sub": arith_method(operator.sub),
|
||||
"mul": arith_method(operator.mul),
|
||||
"truediv": arith_method(operator.truediv),
|
||||
"floordiv": arith_method(operator.floordiv),
|
||||
"mod": arith_method(operator.mod),
|
||||
"pow": arith_method(operator.pow),
|
||||
"rmul": arith_method(roperator.rmul),
|
||||
"rsub": arith_method(roperator.rsub),
|
||||
"rtruediv": arith_method(roperator.rtruediv),
|
||||
"rfloordiv": arith_method(roperator.rfloordiv),
|
||||
"rpow": arith_method(roperator.rpow),
|
||||
"rmod": arith_method(roperator.rmod),
|
||||
}
|
||||
)
|
||||
new_methods["div"] = new_methods["truediv"]
|
||||
new_methods["rdiv"] = new_methods["rtruediv"]
|
||||
if have_divmod:
|
||||
# divmod doesn't have an op that is supported by numexpr
|
||||
new_methods["divmod"] = arith_method(divmod)
|
||||
new_methods["rdivmod"] = arith_method(roperator.rdivmod)
|
||||
|
||||
new_methods.update(
|
||||
{
|
||||
"eq": comp_method(operator.eq),
|
||||
"ne": comp_method(operator.ne),
|
||||
"lt": comp_method(operator.lt),
|
||||
"gt": comp_method(operator.gt),
|
||||
"le": comp_method(operator.le),
|
||||
"ge": comp_method(operator.ge),
|
||||
}
|
||||
)
|
||||
|
||||
new_methods = {k.strip("_"): v for k, v in new_methods.items()}
|
||||
return new_methods
|
||||
|
||||
|
||||
def _add_methods(cls, new_methods):
|
||||
for name, method in new_methods.items():
|
||||
setattr(cls, name, method)
|
181
.venv/Lib/site-packages/pandas/core/ops/missing.py
Normal file
181
.venv/Lib/site-packages/pandas/core/ops/missing.py
Normal file
@ -0,0 +1,181 @@
|
||||
"""
|
||||
Missing data handling for arithmetic operations.
|
||||
|
||||
In particular, pandas conventions regarding division by zero differ
|
||||
from numpy in the following ways:
|
||||
1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
|
||||
gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
|
||||
the remaining pairs
|
||||
(the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
|
||||
|
||||
pandas convention is to return [-inf, nan, inf] for all dtype
|
||||
combinations.
|
||||
|
||||
Note: the numpy behavior described here is py3-specific.
|
||||
|
||||
2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
|
||||
gives precisely the same results as the // operation.
|
||||
|
||||
pandas convention is to return [nan, nan, nan] for all dtype
|
||||
combinations.
|
||||
|
||||
3) divmod behavior consistent with 1) and 2).
|
||||
"""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float_dtype,
|
||||
is_integer_dtype,
|
||||
is_scalar,
|
||||
)
|
||||
|
||||
from pandas.core.ops import roperator
|
||||
|
||||
|
||||
def _fill_zeros(result, x, y):
|
||||
"""
|
||||
If this is a reversed op, then flip x,y
|
||||
|
||||
If we have an integer value (or array in y)
|
||||
and we have 0's, fill them with np.nan,
|
||||
return the result.
|
||||
|
||||
Mask the nan's from x.
|
||||
"""
|
||||
if is_float_dtype(result.dtype):
|
||||
return result
|
||||
|
||||
is_variable_type = hasattr(y, "dtype")
|
||||
is_scalar_type = is_scalar(y)
|
||||
|
||||
if not is_variable_type and not is_scalar_type:
|
||||
return result
|
||||
|
||||
if is_scalar_type:
|
||||
y = np.array(y)
|
||||
|
||||
if is_integer_dtype(y.dtype):
|
||||
|
||||
ymask = y == 0
|
||||
if ymask.any():
|
||||
|
||||
# GH#7325, mask and nans must be broadcastable
|
||||
mask = ymask & ~np.isnan(result)
|
||||
|
||||
# GH#9308 doing ravel on result and mask can improve putmask perf,
|
||||
# but can also make unwanted copies.
|
||||
result = result.astype("float64", copy=False)
|
||||
|
||||
np.putmask(result, mask, np.nan)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Set results of 0 // 0 to np.nan, regardless of the dtypes
|
||||
of the numerator or the denominator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
y : ndarray
|
||||
result : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The filled result.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> x = np.array([1, 0, -1], dtype=np.int64)
|
||||
>>> x
|
||||
array([ 1, 0, -1])
|
||||
>>> y = 0 # int 0; numpy behavior is different with float
|
||||
>>> result = x // y
|
||||
>>> result # raw numpy result does not fill division by zero
|
||||
array([0, 0, 0])
|
||||
>>> mask_zero_div_zero(x, y, result)
|
||||
array([ inf, nan, -inf])
|
||||
"""
|
||||
|
||||
if not hasattr(y, "dtype"):
|
||||
# e.g. scalar, tuple
|
||||
y = np.array(y)
|
||||
if not hasattr(x, "dtype"):
|
||||
# e.g scalar, tuple
|
||||
x = np.array(x)
|
||||
|
||||
zmask = y == 0
|
||||
|
||||
if zmask.any():
|
||||
|
||||
# Flip sign if necessary for -0.0
|
||||
zneg_mask = zmask & np.signbit(y)
|
||||
zpos_mask = zmask & ~zneg_mask
|
||||
|
||||
x_lt0 = x < 0
|
||||
x_gt0 = x > 0
|
||||
nan_mask = zmask & (x == 0)
|
||||
with np.errstate(invalid="ignore"):
|
||||
neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0)
|
||||
posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0)
|
||||
|
||||
if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
|
||||
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
|
||||
result = result.astype("float64", copy=False)
|
||||
|
||||
result[nan_mask] = np.nan
|
||||
result[posinf_mask] = np.inf
|
||||
result[neginf_mask] = -np.inf
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def dispatch_fill_zeros(op, left, right, result):
|
||||
"""
|
||||
Call _fill_zeros with the appropriate fill value depending on the operation,
|
||||
with special logic for divmod and rdivmod.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : function (operator.add, operator.div, ...)
|
||||
left : object (np.ndarray for non-reversed ops)
|
||||
right : object (np.ndarray for reversed ops)
|
||||
result : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : np.ndarray
|
||||
|
||||
Notes
|
||||
-----
|
||||
For divmod and rdivmod, the `result` parameter and returned `result`
|
||||
is a 2-tuple of ndarray objects.
|
||||
"""
|
||||
if op is divmod:
|
||||
result = (
|
||||
mask_zero_div_zero(left, right, result[0]),
|
||||
_fill_zeros(result[1], left, right),
|
||||
)
|
||||
elif op is roperator.rdivmod:
|
||||
result = (
|
||||
mask_zero_div_zero(right, left, result[0]),
|
||||
_fill_zeros(result[1], right, left),
|
||||
)
|
||||
elif op is operator.floordiv:
|
||||
# Note: no need to do this for truediv; in py3 numpy behaves the way
|
||||
# we want.
|
||||
result = mask_zero_div_zero(left, right, result)
|
||||
elif op is roperator.rfloordiv:
|
||||
# Note: no need to do this for rtruediv; in py3 numpy behaves the way
|
||||
# we want.
|
||||
result = mask_zero_div_zero(right, left, result)
|
||||
elif op is operator.mod:
|
||||
result = _fill_zeros(result, left, right)
|
||||
elif op is roperator.rmod:
|
||||
result = _fill_zeros(result, right, left)
|
||||
return result
|
Reference in New Issue
Block a user