mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 22:57:06 +00:00
first commit
This commit is contained in:
13
.venv/Lib/site-packages/pandas/core/window/__init__.py
Normal file
13
.venv/Lib/site-packages/pandas/core/window/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
from pandas.core.window.ewm import ( # noqa:F401
|
||||
ExponentialMovingWindow,
|
||||
ExponentialMovingWindowGroupby,
|
||||
)
|
||||
from pandas.core.window.expanding import ( # noqa:F401
|
||||
Expanding,
|
||||
ExpandingGroupby,
|
||||
)
|
||||
from pandas.core.window.rolling import ( # noqa:F401
|
||||
Rolling,
|
||||
RollingGroupby,
|
||||
Window,
|
||||
)
|
167
.venv/Lib/site-packages/pandas/core/window/common.py
Normal file
167
.venv/Lib/site-packages/pandas/core/window/common.py
Normal file
@ -0,0 +1,167 @@
|
||||
"""Common utility functions for rolling operations"""
|
||||
from collections import defaultdict
|
||||
from typing import cast
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.indexes.api import MultiIndex
|
||||
|
||||
|
||||
def flex_binary_moment(arg1, arg2, f, pairwise=False):
|
||||
|
||||
if isinstance(arg1, ABCSeries) and isinstance(arg2, ABCSeries):
|
||||
X, Y = prep_binary(arg1, arg2)
|
||||
return f(X, Y)
|
||||
|
||||
elif isinstance(arg1, ABCDataFrame):
|
||||
from pandas import DataFrame
|
||||
|
||||
def dataframe_from_int_dict(data, frame_template):
|
||||
result = DataFrame(data, index=frame_template.index)
|
||||
if len(result.columns) > 0:
|
||||
result.columns = frame_template.columns[result.columns]
|
||||
return result
|
||||
|
||||
results = {}
|
||||
if isinstance(arg2, ABCDataFrame):
|
||||
if pairwise is False:
|
||||
if arg1 is arg2:
|
||||
# special case in order to handle duplicate column names
|
||||
for i in range(len(arg1.columns)):
|
||||
results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
|
||||
return dataframe_from_int_dict(results, arg1)
|
||||
else:
|
||||
if not arg1.columns.is_unique:
|
||||
raise ValueError("'arg1' columns are not unique")
|
||||
if not arg2.columns.is_unique:
|
||||
raise ValueError("'arg2' columns are not unique")
|
||||
X, Y = arg1.align(arg2, join="outer")
|
||||
X, Y = prep_binary(X, Y)
|
||||
res_columns = arg1.columns.union(arg2.columns)
|
||||
for col in res_columns:
|
||||
if col in X and col in Y:
|
||||
results[col] = f(X[col], Y[col])
|
||||
return DataFrame(results, index=X.index, columns=res_columns)
|
||||
elif pairwise is True:
|
||||
results = defaultdict(dict)
|
||||
for i in range(len(arg1.columns)):
|
||||
for j in range(len(arg2.columns)):
|
||||
if j < i and arg2 is arg1:
|
||||
# Symmetric case
|
||||
results[i][j] = results[j][i]
|
||||
else:
|
||||
results[i][j] = f(
|
||||
*prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
|
||||
)
|
||||
|
||||
from pandas import concat
|
||||
|
||||
result_index = arg1.index.union(arg2.index)
|
||||
if len(result_index):
|
||||
|
||||
# construct result frame
|
||||
result = concat(
|
||||
[
|
||||
concat(
|
||||
[results[i][j] for j in range(len(arg2.columns))],
|
||||
ignore_index=True,
|
||||
)
|
||||
for i in range(len(arg1.columns))
|
||||
],
|
||||
ignore_index=True,
|
||||
axis=1,
|
||||
)
|
||||
result.columns = arg1.columns
|
||||
|
||||
# set the index and reorder
|
||||
if arg2.columns.nlevels > 1:
|
||||
# mypy needs to know columns is a MultiIndex, Index doesn't
|
||||
# have levels attribute
|
||||
arg2.columns = cast(MultiIndex, arg2.columns)
|
||||
# GH 21157: Equivalent to MultiIndex.from_product(
|
||||
# [result_index], <unique combinations of arg2.columns.levels>,
|
||||
# )
|
||||
# A normal MultiIndex.from_product will produce too many
|
||||
# combinations.
|
||||
result_level = np.tile(
|
||||
result_index, len(result) // len(result_index)
|
||||
)
|
||||
arg2_levels = (
|
||||
np.repeat(
|
||||
arg2.columns.get_level_values(i),
|
||||
len(result) // len(arg2.columns),
|
||||
)
|
||||
for i in range(arg2.columns.nlevels)
|
||||
)
|
||||
result_names = list(arg2.columns.names) + [result_index.name]
|
||||
result.index = MultiIndex.from_arrays(
|
||||
[*arg2_levels, result_level], names=result_names
|
||||
)
|
||||
# GH 34440
|
||||
num_levels = len(result.index.levels)
|
||||
new_order = [num_levels - 1] + list(range(num_levels - 1))
|
||||
result = result.reorder_levels(new_order).sort_index()
|
||||
else:
|
||||
result.index = MultiIndex.from_product(
|
||||
[range(len(arg2.columns)), range(len(result_index))]
|
||||
)
|
||||
result = result.swaplevel(1, 0).sort_index()
|
||||
result.index = MultiIndex.from_product(
|
||||
[result_index] + [arg2.columns]
|
||||
)
|
||||
else:
|
||||
|
||||
# empty result
|
||||
result = DataFrame(
|
||||
index=MultiIndex(
|
||||
levels=[arg1.index, arg2.columns], codes=[[], []]
|
||||
),
|
||||
columns=arg2.columns,
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
# reset our index names to arg1 names
|
||||
# reset our column names to arg2 names
|
||||
# careful not to mutate the original names
|
||||
result.columns = result.columns.set_names(arg1.columns.names)
|
||||
result.index = result.index.set_names(
|
||||
result_index.names + arg2.columns.names
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
results = {
|
||||
i: f(*prep_binary(arg1.iloc[:, i], arg2))
|
||||
for i in range(len(arg1.columns))
|
||||
}
|
||||
return dataframe_from_int_dict(results, arg1)
|
||||
|
||||
else:
|
||||
return flex_binary_moment(arg2, arg1, f)
|
||||
|
||||
|
||||
def zsqrt(x):
|
||||
with np.errstate(all="ignore"):
|
||||
result = np.sqrt(x)
|
||||
mask = x < 0
|
||||
|
||||
if isinstance(x, ABCDataFrame):
|
||||
if mask._values.any():
|
||||
result[mask] = 0
|
||||
else:
|
||||
if mask.any():
|
||||
result[mask] = 0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def prep_binary(arg1, arg2):
|
||||
# mask out values, this also makes a common index...
|
||||
X = arg1 + 0 * arg2
|
||||
Y = arg2 + 0 * arg1
|
||||
return X, Y
|
125
.venv/Lib/site-packages/pandas/core/window/doc.py
Normal file
125
.venv/Lib/site-packages/pandas/core/window/doc.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""Any shareable docstring components for rolling/expanding/ewm"""
|
||||
from textwrap import dedent
|
||||
|
||||
from pandas.core.shared_docs import _shared_docs
|
||||
|
||||
_shared_docs = dict(**_shared_docs)
|
||||
|
||||
|
||||
def create_section_header(header: str) -> str:
|
||||
"""Create numpydoc section header"""
|
||||
return "\n".join((header, "-" * len(header))) + "\n"
|
||||
|
||||
|
||||
template_header = "\nCalculate the {window_method} {aggregation_description}.\n\n"
|
||||
|
||||
template_returns = dedent(
|
||||
"""
|
||||
Series or DataFrame
|
||||
Return type is the same as the original object with ``np.float64`` dtype.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
template_see_also = dedent(
|
||||
"""
|
||||
pandas.Series.{window_method} : Calling {window_method} with Series data.
|
||||
pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames.
|
||||
pandas.Series.{agg_method} : Aggregating {agg_method} for Series.
|
||||
pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
args_compat = dedent(
|
||||
"""
|
||||
*args
|
||||
For NumPy compatibility and will not have an effect on the result.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
kwargs_compat = dedent(
|
||||
"""
|
||||
**kwargs
|
||||
For NumPy compatibility and will not have an effect on the result.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
kwargs_scipy = dedent(
|
||||
"""
|
||||
**kwargs
|
||||
Keyword arguments to configure the ``SciPy`` weighted window type.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
window_apply_parameters = dedent(
|
||||
"""
|
||||
func : function
|
||||
Must produce a single value from an ndarray input if ``raw=True``
|
||||
or a single value from a Series if ``raw=False``. Can also accept a
|
||||
Numba JIT function with ``engine='numba'`` specified.
|
||||
|
||||
.. versionchanged:: 1.0.0
|
||||
|
||||
raw : bool, default False
|
||||
* ``False`` : passes each row or column as a Series to the
|
||||
function.
|
||||
* ``True`` : the passed function will receive ndarray
|
||||
objects instead.
|
||||
If you are just applying a NumPy reduction function this will
|
||||
achieve much better performance.
|
||||
|
||||
engine : str, default None
|
||||
* ``'cython'`` : Runs rolling apply through C-extensions from cython.
|
||||
* ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
|
||||
Only available when ``raw`` is set to ``True``.
|
||||
* ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
|
||||
|
||||
.. versionadded:: 1.0.0
|
||||
|
||||
engine_kwargs : dict, default None
|
||||
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
|
||||
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
|
||||
and ``parallel`` dictionary keys. The values must either be ``True`` or
|
||||
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
|
||||
``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
|
||||
applied to both the ``func`` and the ``apply`` rolling aggregation.
|
||||
|
||||
.. versionadded:: 1.0.0
|
||||
|
||||
args : tuple, default None
|
||||
Positional arguments to be passed into func.
|
||||
|
||||
kwargs : dict, default None
|
||||
Keyword arguments to be passed into func.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
numba_notes = (
|
||||
"See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
|
||||
"extended documentation and performance considerations for the Numba engine.\n\n"
|
||||
)
|
||||
|
||||
|
||||
def window_agg_numba_parameters(version: str = "1.3") -> str:
|
||||
return (
|
||||
dedent(
|
||||
"""
|
||||
engine : str, default None
|
||||
* ``'cython'`` : Runs the operation through C-extensions from cython.
|
||||
* ``'numba'`` : Runs the operation through JIT compiled code from numba.
|
||||
* ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
|
||||
|
||||
.. versionadded:: {version}.0
|
||||
|
||||
engine_kwargs : dict, default None
|
||||
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
|
||||
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
|
||||
and ``parallel`` dictionary keys. The values must either be ``True`` or
|
||||
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
|
||||
``{{'nopython': True, 'nogil': False, 'parallel': False}}``
|
||||
|
||||
.. versionadded:: {version}.0\n
|
||||
"""
|
||||
)
|
||||
.replace("\n", "", 1)
|
||||
.replace("{version}", version)
|
||||
)
|
1025
.venv/Lib/site-packages/pandas/core/window/ewm.py
Normal file
1025
.venv/Lib/site-packages/pandas/core/window/ewm.py
Normal file
File diff suppressed because it is too large
Load Diff
807
.venv/Lib/site-packages/pandas/core/window/expanding.py
Normal file
807
.venv/Lib/site-packages/pandas/core/window/expanding.py
Normal file
@ -0,0 +1,807 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
from pandas._typing import (
|
||||
Axis,
|
||||
WindowingRankType,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas import DataFrame, Series
|
||||
from pandas.core.generic import NDFrame
|
||||
|
||||
from pandas.compat.numpy import function as nv
|
||||
from pandas.util._decorators import doc
|
||||
|
||||
from pandas.core.indexers.objects import (
|
||||
BaseIndexer,
|
||||
ExpandingIndexer,
|
||||
GroupbyIndexer,
|
||||
)
|
||||
from pandas.core.window.doc import (
|
||||
_shared_docs,
|
||||
args_compat,
|
||||
create_section_header,
|
||||
kwargs_compat,
|
||||
numba_notes,
|
||||
template_header,
|
||||
template_returns,
|
||||
template_see_also,
|
||||
window_agg_numba_parameters,
|
||||
window_apply_parameters,
|
||||
)
|
||||
from pandas.core.window.rolling import (
|
||||
BaseWindowGroupby,
|
||||
RollingAndExpandingMixin,
|
||||
)
|
||||
|
||||
|
||||
class Expanding(RollingAndExpandingMixin):
|
||||
"""
|
||||
Provide expanding window calculations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_periods : int, default 1
|
||||
Minimum number of observations in window required to have a value;
|
||||
otherwise, result is ``np.nan``.
|
||||
|
||||
center : bool, default False
|
||||
If False, set the window labels as the right edge of the window index.
|
||||
|
||||
If True, set the window labels as the center of the window index.
|
||||
|
||||
.. deprecated:: 1.1.0
|
||||
|
||||
axis : int or str, default 0
|
||||
If ``0`` or ``'index'``, roll across the rows.
|
||||
|
||||
If ``1`` or ``'columns'``, roll across the columns.
|
||||
|
||||
method : str {'single', 'table'}, default 'single'
|
||||
Execute the rolling operation per single column or row (``'single'``)
|
||||
or over the entire object (``'table'``).
|
||||
|
||||
This argument is only implemented when specifying ``engine='numba'``
|
||||
in the method call.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
``Expanding`` subclass
|
||||
|
||||
See Also
|
||||
--------
|
||||
rolling : Provides rolling window calculations.
|
||||
ewm : Provides exponential weighted functions.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See :ref:`Windowing Operations <window.expanding>` for further usage details
|
||||
and examples.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
>>> df
|
||||
B
|
||||
0 0.0
|
||||
1 1.0
|
||||
2 2.0
|
||||
3 NaN
|
||||
4 4.0
|
||||
|
||||
**min_periods**
|
||||
|
||||
Expanding sum with 1 vs 3 observations needed to calculate a value.
|
||||
|
||||
>>> df.expanding(1).sum()
|
||||
B
|
||||
0 0.0
|
||||
1 1.0
|
||||
2 3.0
|
||||
3 3.0
|
||||
4 7.0
|
||||
>>> df.expanding(3).sum()
|
||||
B
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 3.0
|
||||
3 3.0
|
||||
4 7.0
|
||||
"""
|
||||
|
||||
_attributes: list[str] = ["min_periods", "center", "axis", "method"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
obj: NDFrame,
|
||||
min_periods: int = 1,
|
||||
center=None,
|
||||
axis: Axis = 0,
|
||||
method: str = "single",
|
||||
selection=None,
|
||||
):
|
||||
super().__init__(
|
||||
obj=obj,
|
||||
min_periods=min_periods,
|
||||
center=center,
|
||||
axis=axis,
|
||||
method=method,
|
||||
selection=selection,
|
||||
)
|
||||
|
||||
def _get_window_indexer(self) -> BaseIndexer:
|
||||
"""
|
||||
Return an indexer class that will compute the window start and end bounds
|
||||
"""
|
||||
return ExpandingIndexer()
|
||||
|
||||
@doc(
|
||||
_shared_docs["aggregate"],
|
||||
see_also=dedent(
|
||||
"""
|
||||
See Also
|
||||
--------
|
||||
pandas.DataFrame.aggregate : Similar DataFrame method.
|
||||
pandas.Series.aggregate : Similar Series method.
|
||||
"""
|
||||
),
|
||||
examples=dedent(
|
||||
"""
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
|
||||
>>> df
|
||||
A B C
|
||||
0 1 4 7
|
||||
1 2 5 8
|
||||
2 3 6 9
|
||||
|
||||
>>> df.ewm(alpha=0.5).mean()
|
||||
A B C
|
||||
0 1.000000 4.000000 7.000000
|
||||
1 1.666667 4.666667 7.666667
|
||||
2 2.428571 5.428571 8.428571
|
||||
"""
|
||||
),
|
||||
klass="Series/Dataframe",
|
||||
axis="",
|
||||
)
|
||||
def aggregate(self, func, *args, **kwargs):
|
||||
return super().aggregate(func, *args, **kwargs)
|
||||
|
||||
agg = aggregate
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="count of non NaN observations",
|
||||
agg_method="count",
|
||||
)
|
||||
def count(self):
|
||||
return super().count()
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
window_apply_parameters,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="custom aggregation function",
|
||||
agg_method="apply",
|
||||
)
|
||||
def apply(
|
||||
self,
|
||||
func: Callable[..., Any],
|
||||
raw: bool = False,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
args: tuple[Any, ...] | None = None,
|
||||
kwargs: dict[str, Any] | None = None,
|
||||
):
|
||||
return super().apply(
|
||||
func,
|
||||
raw=raw,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
args_compat,
|
||||
window_agg_numba_parameters(),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="sum",
|
||||
agg_method="sum",
|
||||
)
|
||||
def sum(
|
||||
self,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("sum", args, kwargs)
|
||||
return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
args_compat,
|
||||
window_agg_numba_parameters(),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="maximum",
|
||||
agg_method="max",
|
||||
)
|
||||
def max(
|
||||
self,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("max", args, kwargs)
|
||||
return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
args_compat,
|
||||
window_agg_numba_parameters(),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="minimum",
|
||||
agg_method="min",
|
||||
)
|
||||
def min(
|
||||
self,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("min", args, kwargs)
|
||||
return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
args_compat,
|
||||
window_agg_numba_parameters(),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="mean",
|
||||
agg_method="mean",
|
||||
)
|
||||
def mean(
|
||||
self,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("mean", args, kwargs)
|
||||
return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
window_agg_numba_parameters(),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="median",
|
||||
agg_method="median",
|
||||
)
|
||||
def median(
|
||||
self,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
args_compat,
|
||||
window_agg_numba_parameters("1.4"),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"numpy.std : Equivalent method for NumPy array.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
The default ``ddof`` of 1 used in :meth:`Series.std` is different
|
||||
than the default ``ddof`` of 0 in :func:`numpy.std`.
|
||||
|
||||
A minimum of one period is required for the rolling calculation.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
||||
|
||||
>>> s.expanding(3).std()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 0.577350
|
||||
3 0.957427
|
||||
4 0.894427
|
||||
5 0.836660
|
||||
6 0.786796
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="standard deviation",
|
||||
agg_method="std",
|
||||
)
|
||||
def std(
|
||||
self,
|
||||
ddof: int = 1,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("std", args, kwargs)
|
||||
return super().std(
|
||||
ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
args_compat,
|
||||
window_agg_numba_parameters("1.4"),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"numpy.var : Equivalent method for NumPy array.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
The default ``ddof`` of 1 used in :meth:`Series.var` is different
|
||||
than the default ``ddof`` of 0 in :func:`numpy.var`.
|
||||
|
||||
A minimum of one period is required for the rolling calculation.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
||||
|
||||
>>> s.expanding(3).var()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 0.333333
|
||||
3 0.916667
|
||||
4 0.800000
|
||||
5 0.700000
|
||||
6 0.619048
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="variance",
|
||||
agg_method="var",
|
||||
)
|
||||
def var(
|
||||
self,
|
||||
ddof: int = 1,
|
||||
*args,
|
||||
engine: str | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
nv.validate_expanding_func("var", args, kwargs)
|
||||
return super().var(
|
||||
ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
args_compat,
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of one period is required for the calculation.\n\n",
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([0, 1, 2, 3])
|
||||
|
||||
>>> s.expanding().sem()
|
||||
0 NaN
|
||||
1 0.707107
|
||||
2 0.707107
|
||||
3 0.745356
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="standard error of mean",
|
||||
agg_method="sem",
|
||||
)
|
||||
def sem(self, ddof: int = 1, *args, **kwargs):
|
||||
return super().sem(ddof=ddof, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"scipy.stats.skew : Third moment of a probability density.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of three periods is required for the rolling calculation.\n",
|
||||
window_method="expanding",
|
||||
aggregation_description="unbiased skewness",
|
||||
agg_method="skew",
|
||||
)
|
||||
def skew(self, **kwargs):
|
||||
return super().skew(**kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"scipy.stats.kurtosis : Reference SciPy method.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of four periods is required for the calculation.\n\n",
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
The example below will show a rolling calculation with a window size of
|
||||
four matching the equivalent function call using `scipy.stats`.
|
||||
|
||||
>>> arr = [1, 2, 3, 4, 999]
|
||||
>>> import scipy.stats
|
||||
>>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
|
||||
-1.200000
|
||||
>>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")
|
||||
4.999874
|
||||
>>> s = pd.Series(arr)
|
||||
>>> s.expanding(4).kurt()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 NaN
|
||||
3 -1.200000
|
||||
4 4.999874
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="Fisher's definition of kurtosis without bias",
|
||||
agg_method="kurt",
|
||||
)
|
||||
def kurt(self, **kwargs):
|
||||
return super().kurt(**kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
quantile : float
|
||||
Quantile to compute. 0 <= quantile <= 1.
|
||||
interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
|
||||
This optional parameter specifies the interpolation method to use,
|
||||
when the desired quantile lies between two data points `i` and `j`:
|
||||
|
||||
* linear: `i + (j - i) * fraction`, where `fraction` is the
|
||||
fractional part of the index surrounded by `i` and `j`.
|
||||
* lower: `i`.
|
||||
* higher: `j`.
|
||||
* nearest: `i` or `j` whichever is nearest.
|
||||
* midpoint: (`i` + `j`) / 2.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="quantile",
|
||||
agg_method="quantile",
|
||||
)
|
||||
def quantile(
|
||||
self,
|
||||
quantile: float,
|
||||
interpolation: str = "linear",
|
||||
**kwargs,
|
||||
):
|
||||
return super().quantile(
|
||||
quantile=quantile,
|
||||
interpolation=interpolation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
".. versionadded:: 1.4.0 \n\n",
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
method : {{'average', 'min', 'max'}}, default 'average'
|
||||
How to rank the group of records that have the same value (i.e. ties):
|
||||
|
||||
* average: average rank of the group
|
||||
* min: lowest rank in the group
|
||||
* max: highest rank in the group
|
||||
|
||||
ascending : bool, default True
|
||||
Whether or not the elements should be ranked in ascending order.
|
||||
pct : bool, default False
|
||||
Whether or not to display the returned rankings in percentile
|
||||
form.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([1, 4, 2, 3, 5, 3])
|
||||
>>> s.expanding().rank()
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 3.5
|
||||
dtype: float64
|
||||
|
||||
>>> s.expanding().rank(method="max")
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 4.0
|
||||
dtype: float64
|
||||
|
||||
>>> s.expanding().rank(method="min")
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 3.0
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="rank",
|
||||
agg_method="rank",
|
||||
)
|
||||
def rank(
|
||||
self,
|
||||
method: WindowingRankType = "average",
|
||||
ascending: bool = True,
|
||||
pct: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
return super().rank(
|
||||
method=method,
|
||||
ascending=ascending,
|
||||
pct=pct,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
other : Series or DataFrame, optional
|
||||
If not supplied then will default to self and produce pairwise
|
||||
output.
|
||||
pairwise : bool, default None
|
||||
If False then only matching columns between self and other will be
|
||||
used and the output will be a DataFrame.
|
||||
If True then all pairwise combinations will be calculated and the
|
||||
output will be a MultiIndexed DataFrame in the case of DataFrame
|
||||
inputs. In the case of missing elements, only complete pairwise
|
||||
observations will be used.
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also[:-1],
|
||||
window_method="expanding",
|
||||
aggregation_description="sample covariance",
|
||||
agg_method="cov",
|
||||
)
|
||||
def cov(
|
||||
self,
|
||||
other: DataFrame | Series | None = None,
|
||||
pairwise: bool | None = None,
|
||||
ddof: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
other : Series or DataFrame, optional
|
||||
If not supplied then will default to self and produce pairwise
|
||||
output.
|
||||
pairwise : bool, default None
|
||||
If False then only matching columns between self and other will be
|
||||
used and the output will be a DataFrame.
|
||||
If True then all pairwise combinations will be calculated and the
|
||||
output will be a MultiIndexed DataFrame in the case of DataFrame
|
||||
inputs. In the case of missing elements, only complete pairwise
|
||||
observations will be used.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_compat,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
dedent(
|
||||
"""
|
||||
cov : Similar method to calculate covariance.
|
||||
numpy.corrcoef : NumPy Pearson's correlation calculation.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
This function uses Pearson's definition of correlation
|
||||
(https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
|
||||
|
||||
When `other` is not specified, the output will be self correlation (e.g.
|
||||
all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
|
||||
set to `True`.
|
||||
|
||||
Function will return ``NaN`` for correlations of equal valued sequences;
|
||||
this is the result of a 0/0 division error.
|
||||
|
||||
When `pairwise` is set to `False`, only matching columns between `self` and
|
||||
`other` will be used.
|
||||
|
||||
When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
|
||||
with the original index on the first level, and the `other` DataFrame
|
||||
columns on the second level.
|
||||
|
||||
In the case of missing elements, only complete pairwise observations
|
||||
will be used.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="correlation",
|
||||
agg_method="corr",
|
||||
)
|
||||
def corr(
|
||||
self,
|
||||
other: DataFrame | Series | None = None,
|
||||
pairwise: bool | None = None,
|
||||
ddof: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
return super().corr(other=other, pairwise=pairwise, ddof=ddof, **kwargs)
|
||||
|
||||
|
||||
class ExpandingGroupby(BaseWindowGroupby, Expanding):
|
||||
"""
|
||||
Provide a expanding groupby implementation.
|
||||
"""
|
||||
|
||||
_attributes = Expanding._attributes + BaseWindowGroupby._attributes
|
||||
|
||||
def _get_window_indexer(self) -> GroupbyIndexer:
|
||||
"""
|
||||
Return an indexer class that will compute the window start and end bounds
|
||||
|
||||
Returns
|
||||
-------
|
||||
GroupbyIndexer
|
||||
"""
|
||||
window_indexer = GroupbyIndexer(
|
||||
groupby_indices=self._grouper.indices,
|
||||
window_indexer=ExpandingIndexer,
|
||||
)
|
||||
return window_indexer
|
364
.venv/Lib/site-packages/pandas/core/window/numba_.py
Normal file
364
.venv/Lib/site-packages/pandas/core/window/numba_.py
Normal file
@ -0,0 +1,364 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import Scalar
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
from pandas.core.util.numba_ import (
|
||||
NUMBA_FUNC_CACHE,
|
||||
get_jit_arguments,
|
||||
jit_user_function,
|
||||
)
|
||||
|
||||
|
||||
def generate_numba_apply_func(
|
||||
kwargs: dict[str, Any],
|
||||
func: Callable[..., Scalar],
|
||||
engine_kwargs: dict[str, bool] | None,
|
||||
name: str,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted apply function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a rolling apply function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the rolling apply function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kwargs : dict
|
||||
**kwargs to be passed into the function
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
engine_kwargs : dict
|
||||
dictionary of arguments to be passed into numba.jit
|
||||
name: str
|
||||
name of the caller (Rolling/Expanding)
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)
|
||||
|
||||
cache_key = (func, f"{name}_apply_single")
|
||||
if cache_key in NUMBA_FUNC_CACHE:
|
||||
return NUMBA_FUNC_CACHE[cache_key]
|
||||
|
||||
numba_func = jit_user_function(func, nopython, nogil, parallel)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def roll_apply(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
result = np.empty(len(begin))
|
||||
for i in numba.prange(len(result)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
count_nan = np.sum(np.isnan(window))
|
||||
if len(window) - count_nan >= minimum_periods:
|
||||
result[i] = numba_func(window, *args)
|
||||
else:
|
||||
result[i] = np.nan
|
||||
return result
|
||||
|
||||
return roll_apply
|
||||
|
||||
|
||||
def generate_numba_ewm_func(
|
||||
engine_kwargs: dict[str, bool] | None,
|
||||
com: float,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
deltas: np.ndarray,
|
||||
normalize: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted ewm mean or sum function specified by values
|
||||
from engine_kwargs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
engine_kwargs : dict
|
||||
dictionary of arguments to be passed into numba.jit
|
||||
com : float
|
||||
adjust : bool
|
||||
ignore_na : bool
|
||||
deltas : numpy.ndarray
|
||||
normalize : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
|
||||
|
||||
str_key = "ewm_mean" if normalize else "ewm_sum"
|
||||
cache_key = (lambda x: x, str_key)
|
||||
if cache_key in NUMBA_FUNC_CACHE:
|
||||
return NUMBA_FUNC_CACHE[cache_key]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def ewm(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
) -> np.ndarray:
|
||||
result = np.empty(len(values))
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
old_wt_factor = 1.0 - alpha
|
||||
new_wt = 1.0 if adjust else alpha
|
||||
|
||||
for i in numba.prange(len(begin)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
sub_result = np.empty(len(window))
|
||||
|
||||
weighted = window[0]
|
||||
nobs = int(not np.isnan(weighted))
|
||||
sub_result[0] = weighted if nobs >= minimum_periods else np.nan
|
||||
old_wt = 1.0
|
||||
|
||||
for j in range(1, len(window)):
|
||||
cur = window[j]
|
||||
is_observation = not np.isnan(cur)
|
||||
nobs += is_observation
|
||||
if not np.isnan(weighted):
|
||||
|
||||
if is_observation or not ignore_na:
|
||||
if normalize:
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i]
|
||||
# is to be used in conjunction with vals[i+1]
|
||||
old_wt *= old_wt_factor ** deltas[start + j - 1]
|
||||
else:
|
||||
weighted = old_wt_factor * weighted
|
||||
if is_observation:
|
||||
if normalize:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted != cur:
|
||||
weighted = old_wt * weighted + new_wt * cur
|
||||
if normalize:
|
||||
weighted = weighted / (old_wt + new_wt)
|
||||
if adjust:
|
||||
old_wt += new_wt
|
||||
else:
|
||||
old_wt = 1.0
|
||||
else:
|
||||
weighted += cur
|
||||
elif is_observation:
|
||||
weighted = cur
|
||||
|
||||
sub_result[j] = weighted if nobs >= minimum_periods else np.nan
|
||||
|
||||
result[start:stop] = sub_result
|
||||
|
||||
return result
|
||||
|
||||
return ewm
|
||||
|
||||
|
||||
def generate_numba_table_func(
|
||||
kwargs: dict[str, Any],
|
||||
func: Callable[..., np.ndarray],
|
||||
engine_kwargs: dict[str, bool] | None,
|
||||
name: str,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted function to apply window calculations table-wise.
|
||||
|
||||
Func will be passed a M window size x N number of columns array, and
|
||||
must return a 1 x N number of columns array. Func is intended to operate
|
||||
row-wise, but the result will be transposed for axis=1.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a rolling apply function with the jitted function inline
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kwargs : dict
|
||||
**kwargs to be passed into the function
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
engine_kwargs : dict
|
||||
dictionary of arguments to be passed into numba.jit
|
||||
name : str
|
||||
caller (Rolling/Expanding) and original method name for numba cache key
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)
|
||||
|
||||
cache_key = (func, f"{name}_table")
|
||||
if cache_key in NUMBA_FUNC_CACHE:
|
||||
return NUMBA_FUNC_CACHE[cache_key]
|
||||
|
||||
numba_func = jit_user_function(func, nopython, nogil, parallel)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def roll_table(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
*args: Any,
|
||||
):
|
||||
result = np.empty(values.shape)
|
||||
min_periods_mask = np.empty(values.shape)
|
||||
for i in numba.prange(len(result)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
count_nan = np.sum(np.isnan(window), axis=0)
|
||||
sub_result = numba_func(window, *args)
|
||||
nan_mask = len(window) - count_nan >= minimum_periods
|
||||
min_periods_mask[i, :] = nan_mask
|
||||
result[i, :] = sub_result
|
||||
result = np.where(min_periods_mask, result, np.nan)
|
||||
return result
|
||||
|
||||
return roll_table
|
||||
|
||||
|
||||
# This function will no longer be needed once numba supports
|
||||
# axis for all np.nan* agg functions
|
||||
# https://github.com/numba/numba/issues/1269
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def generate_manual_numpy_nan_agg_with_axis(nan_func):
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=True, nogil=True, parallel=True)
|
||||
def nan_agg_with_axis(table):
|
||||
result = np.empty(table.shape[1])
|
||||
for i in numba.prange(table.shape[1]):
|
||||
partition = table[:, i]
|
||||
result[i] = nan_func(partition)
|
||||
return result
|
||||
|
||||
return nan_agg_with_axis
|
||||
|
||||
|
||||
def generate_numba_ewm_table_func(
|
||||
engine_kwargs: dict[str, bool] | None,
|
||||
com: float,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
deltas: np.ndarray,
|
||||
normalize: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted ewm mean or sum function applied table wise specified
|
||||
by values from engine_kwargs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
engine_kwargs : dict
|
||||
dictionary of arguments to be passed into numba.jit
|
||||
com : float
|
||||
adjust : bool
|
||||
ignore_na : bool
|
||||
deltas : numpy.ndarray
|
||||
normalize: bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
|
||||
|
||||
str_key = "ewm_mean_table" if normalize else "ewm_sum_table"
|
||||
cache_key = (lambda x: x, str_key)
|
||||
if cache_key in NUMBA_FUNC_CACHE:
|
||||
return NUMBA_FUNC_CACHE[cache_key]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def ewm_table(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
) -> np.ndarray:
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
old_wt_factor = 1.0 - alpha
|
||||
new_wt = 1.0 if adjust else alpha
|
||||
old_wt = np.ones(values.shape[1])
|
||||
|
||||
result = np.empty(values.shape)
|
||||
weighted = values[0].copy()
|
||||
nobs = (~np.isnan(weighted)).astype(np.int64)
|
||||
result[0] = np.where(nobs >= minimum_periods, weighted, np.nan)
|
||||
for i in range(1, len(values)):
|
||||
cur = values[i]
|
||||
is_observations = ~np.isnan(cur)
|
||||
nobs += is_observations.astype(np.int64)
|
||||
for j in numba.prange(len(cur)):
|
||||
if not np.isnan(weighted[j]):
|
||||
if is_observations[j] or not ignore_na:
|
||||
if normalize:
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i]
|
||||
# is to be used in conjunction with vals[i+1]
|
||||
old_wt[j] *= old_wt_factor ** deltas[i - 1]
|
||||
else:
|
||||
weighted[j] = old_wt_factor * weighted[j]
|
||||
if is_observations[j]:
|
||||
if normalize:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted[j] != cur[j]:
|
||||
weighted[j] = (
|
||||
old_wt[j] * weighted[j] + new_wt * cur[j]
|
||||
)
|
||||
if normalize:
|
||||
weighted[j] = weighted[j] / (old_wt[j] + new_wt)
|
||||
if adjust:
|
||||
old_wt[j] += new_wt
|
||||
else:
|
||||
old_wt[j] = 1.0
|
||||
else:
|
||||
weighted[j] += cur[j]
|
||||
elif is_observations[j]:
|
||||
weighted[j] = cur[j]
|
||||
|
||||
result[i] = np.where(nobs >= minimum_periods, weighted, np.nan)
|
||||
|
||||
return result
|
||||
|
||||
return ewm_table
|
122
.venv/Lib/site-packages/pandas/core/window/online.py
Normal file
122
.venv/Lib/site-packages/pandas/core/window/online.py
Normal file
@ -0,0 +1,122 @@
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Dict,
|
||||
Optional,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
from pandas.core.util.numba_ import (
|
||||
NUMBA_FUNC_CACHE,
|
||||
get_jit_arguments,
|
||||
)
|
||||
|
||||
|
||||
def generate_online_numba_ewma_func(engine_kwargs: Optional[Dict[str, bool]]):
|
||||
"""
|
||||
Generate a numba jitted groupby ewma function specified by values
|
||||
from engine_kwargs.
|
||||
Parameters
|
||||
----------
|
||||
engine_kwargs : dict
|
||||
dictionary of arguments to be passed into numba.jit
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
|
||||
|
||||
cache_key = (lambda x: x, "online_ewma")
|
||||
if cache_key in NUMBA_FUNC_CACHE:
|
||||
return NUMBA_FUNC_CACHE[cache_key]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def online_ewma(
|
||||
values: np.ndarray,
|
||||
deltas: np.ndarray,
|
||||
minimum_periods: int,
|
||||
old_wt_factor: float,
|
||||
new_wt: float,
|
||||
old_wt: np.ndarray,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
):
|
||||
"""
|
||||
Compute online exponentially weighted mean per column over 2D values.
|
||||
|
||||
Takes the first observation as is, then computes the subsequent
|
||||
exponentially weighted mean accounting minimum periods.
|
||||
"""
|
||||
result = np.empty(values.shape)
|
||||
weighted_avg = values[0]
|
||||
nobs = (~np.isnan(weighted_avg)).astype(np.int64)
|
||||
result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
|
||||
|
||||
for i in range(1, len(values)):
|
||||
cur = values[i]
|
||||
is_observations = ~np.isnan(cur)
|
||||
nobs += is_observations.astype(np.int64)
|
||||
for j in numba.prange(len(cur)):
|
||||
if not np.isnan(weighted_avg[j]):
|
||||
if is_observations[j] or not ignore_na:
|
||||
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i] is to be
|
||||
# used in conjunction with vals[i+1]
|
||||
old_wt[j] *= old_wt_factor ** deltas[j - 1]
|
||||
if is_observations[j]:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted_avg[j] != cur[j]:
|
||||
weighted_avg[j] = (
|
||||
(old_wt[j] * weighted_avg[j]) + (new_wt * cur[j])
|
||||
) / (old_wt[j] + new_wt)
|
||||
if adjust:
|
||||
old_wt[j] += new_wt
|
||||
else:
|
||||
old_wt[j] = 1.0
|
||||
elif is_observations[j]:
|
||||
weighted_avg[j] = cur[j]
|
||||
|
||||
result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
|
||||
|
||||
return result, old_wt
|
||||
|
||||
return online_ewma
|
||||
|
||||
|
||||
class EWMMeanState:
|
||||
def __init__(self, com, adjust, ignore_na, axis, shape):
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
self.axis = axis
|
||||
self.shape = shape
|
||||
self.adjust = adjust
|
||||
self.ignore_na = ignore_na
|
||||
self.new_wt = 1.0 if adjust else alpha
|
||||
self.old_wt_factor = 1.0 - alpha
|
||||
self.old_wt = np.ones(self.shape[self.axis - 1])
|
||||
self.last_ewm = None
|
||||
|
||||
def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func):
|
||||
result, old_wt = ewm_func(
|
||||
weighted_avg,
|
||||
deltas,
|
||||
min_periods,
|
||||
self.old_wt_factor,
|
||||
self.new_wt,
|
||||
self.old_wt,
|
||||
self.adjust,
|
||||
self.ignore_na,
|
||||
)
|
||||
self.old_wt = old_wt
|
||||
self.last_ewm = result[-1]
|
||||
return result
|
||||
|
||||
def reset(self):
|
||||
self.old_wt = np.ones(self.shape[self.axis - 1])
|
||||
self.last_ewm = None
|
2650
.venv/Lib/site-packages/pandas/core/window/rolling.py
Normal file
2650
.venv/Lib/site-packages/pandas/core/window/rolling.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user