first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,151 @@
"""
compat
======
Cross-compatible functions for different versions of Python.
Other items:
* platform checker
"""
import os
import platform
import sys
from pandas._typing import F
from pandas.compat.numpy import (
is_numpy_dev,
np_version_under1p19,
np_version_under1p20,
)
from pandas.compat.pyarrow import (
pa_version_under1p01,
pa_version_under2p0,
pa_version_under3p0,
pa_version_under4p0,
)
PY39 = sys.version_info >= (3, 9)
PY310 = sys.version_info >= (3, 10)
PYPY = platform.python_implementation() == "PyPy"
IS64 = sys.maxsize > 2**32
def set_function_name(f: F, name: str, cls) -> F:
"""
Bind the name/qualname attributes of the function.
"""
f.__name__ = name
f.__qualname__ = f"{cls.__name__}.{name}"
f.__module__ = cls.__module__
return f
def is_platform_little_endian() -> bool:
"""
Checking if the running platform is little endian.
Returns
-------
bool
True if the running platform is little endian.
"""
return sys.byteorder == "little"
def is_platform_windows() -> bool:
"""
Checking if the running platform is windows.
Returns
-------
bool
True if the running platform is windows.
"""
return sys.platform in ["win32", "cygwin"]
def is_platform_linux() -> bool:
"""
Checking if the running platform is linux.
Returns
-------
bool
True if the running platform is linux.
"""
return sys.platform == "linux"
def is_platform_mac() -> bool:
"""
Checking if the running platform is mac.
Returns
-------
bool
True if the running platform is mac.
"""
return sys.platform == "darwin"
def is_platform_arm() -> bool:
"""
Checking if the running platform use ARM architecture.
Returns
-------
bool
True if the running platform uses ARM architecture.
"""
return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith(
"armv"
)
def is_ci_environment() -> bool:
"""
Checking if running in a continuous integration environment by checking
the PANDAS_CI environment variable.
Returns
-------
bool
True if the running in a continuous integration environment.
"""
return os.environ.get("PANDAS_CI", "0") == "1"
def get_lzma_file():
"""
Importing the `LZMAFile` class from the `lzma` module.
Returns
-------
class
The `LZMAFile` class from the `lzma` module.
Raises
------
RuntimeError
If the `lzma` module was not imported correctly, or didn't exist.
"""
try:
import lzma
except ImportError:
raise RuntimeError(
"lzma module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return lzma.LZMAFile
__all__ = [
"is_numpy_dev",
"np_version_under1p19",
"np_version_under1p20",
"pa_version_under1p01",
"pa_version_under2p0",
"pa_version_under3p0",
"pa_version_under4p0",
]

View File

@ -0,0 +1,166 @@
from __future__ import annotations
import importlib
import sys
import types
import warnings
from pandas.util.version import Version
# Update install.rst when updating versions!
VERSIONS = {
"bs4": "4.8.2",
"blosc": "1.20.1",
"bottleneck": "1.3.1",
"brotli": "0.7.0",
"fastparquet": "0.4.0",
"fsspec": "0.7.4",
"html5lib": "1.1",
"gcsfs": "0.6.0",
"jinja2": "2.11",
"lxml.etree": "4.5.0",
"markupsafe": "2.0.1",
"matplotlib": "3.3.2",
"numba": "0.50.1",
"numexpr": "2.7.1",
"odfpy": "1.4.1",
"openpyxl": "3.0.3",
"pandas_gbq": "0.14.0",
"psycopg2": "2.8.4", # (dt dec pq3 ext lo64)
"pymysql": "0.10.1",
"pyarrow": "1.0.1",
"pyreadstat": "1.1.0",
"pytest": "6.0",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
"scipy": "1.4.1",
"snappy": "0.6.0",
"sqlalchemy": "1.4.0",
"tables": "3.6.1",
"tabulate": "0.8.7",
"xarray": "0.15.1",
"xlrd": "2.0.1",
"xlwt": "1.3.0",
"xlsxwriter": "1.2.2",
"zstandard": "0.15.2",
}
# A mapping from import name to package name (on PyPI) for packages where
# these two names are different.
INSTALL_MAPPING = {
"bs4": "beautifulsoup4",
"bottleneck": "Bottleneck",
"brotli": "brotlipy",
"jinja2": "Jinja2",
"lxml.etree": "lxml",
"odf": "odfpy",
"pandas_gbq": "pandas-gbq",
"snappy": "python-snappy",
"sqlalchemy": "SQLAlchemy",
"tables": "pytables",
}
def get_version(module: types.ModuleType) -> str:
version = getattr(module, "__version__", None)
if version is None:
# xlrd uses a capitalized attribute name
version = getattr(module, "__VERSION__", None)
if version is None:
if module.__name__ == "brotli":
# brotli doesn't contain attributes to confirm it's version
return ""
if module.__name__ == "snappy":
# snappy doesn't contain attributes to confirm it's version
# See https://github.com/andrix/python-snappy/pull/119
return ""
raise ImportError(f"Can't determine version for {module.__name__}")
if module.__name__ == "psycopg2":
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
version = version.split()[0]
return version
def import_optional_dependency(
name: str,
extra: str = "",
errors: str = "raise",
min_version: str | None = None,
):
"""
Import an optional dependency.
By default, if a dependency is missing an ImportError with a nice
message will be raised. If a dependency is present, but too old,
we raise.
Parameters
----------
name : str
The module name.
extra : str
Additional text to include in the ImportError message.
errors : str {'raise', 'warn', 'ignore'}
What to do when a dependency is not found or its version is too old.
* raise : Raise an ImportError
* warn : Only applicable when a module's version is to old.
Warns that the version is too old and returns None
* ignore: If the module is not installed, return None, otherwise,
return the module, even if the version is too old.
It's expected that users validate the version locally when
using ``errors="ignore"`` (see. ``io/html.py``)
min_version : str, default None
Specify a minimum version that is different from the global pandas
minimum version required.
Returns
-------
maybe_module : Optional[ModuleType]
The imported module, when found and the version is correct.
None is returned when the package is not found and `errors`
is False, or when the package's version is too old and `errors`
is ``'warn'``.
"""
assert errors in {"warn", "raise", "ignore"}
package_name = INSTALL_MAPPING.get(name)
install_name = package_name if package_name is not None else name
msg = (
f"Missing optional dependency '{install_name}'. {extra} "
f"Use pip or conda to install {install_name}."
)
try:
module = importlib.import_module(name)
except ImportError:
if errors == "raise":
raise ImportError(msg)
else:
return None
# Handle submodules: if we have submodule, grab parent module from sys.modules
parent = name.split(".")[0]
if parent != name:
install_name = parent
module_to_get = sys.modules[install_name]
else:
module_to_get = module
minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
if minimum_version:
version = get_version(module_to_get)
if version and Version(version) < Version(minimum_version):
msg = (
f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
f"(version '{version}' currently installed)."
)
if errors == "warn":
warnings.warn(msg, UserWarning)
return None
elif errors == "raise":
raise ImportError(msg)
return module

View File

@ -0,0 +1,35 @@
from typing import (
ChainMap,
TypeVar,
)
_KT = TypeVar("_KT")
_VT = TypeVar("_VT")
class DeepChainMap(ChainMap[_KT, _VT]):
"""
Variant of ChainMap that allows direct updates to inner scopes.
Only works when all passed mapping are mutable.
"""
def __setitem__(self, key: _KT, value: _VT) -> None:
for mapping in self.maps:
if key in mapping:
mapping[key] = value
return
self.maps[0][key] = value
def __delitem__(self, key: _KT) -> None:
"""
Raises
------
KeyError
If `key` doesn't exist.
"""
for mapping in self.maps:
if key in mapping:
del mapping[key]
return
raise KeyError(key)

View File

@ -0,0 +1,34 @@
""" support numpy compatibility across versions """
import numpy as np
from pandas.util.version import Version
# numpy versioning
_np_version = np.__version__
_nlv = Version(_np_version)
np_version_under1p19 = _nlv < Version("1.19")
np_version_under1p20 = _nlv < Version("1.20")
np_version_under1p22 = _nlv < Version("1.22")
np_version_gte1p22 = _nlv >= Version("1.22")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.18.5"
if is_numpy_dev or not np_version_under1p22:
np_percentile_argname = "method"
else:
np_percentile_argname = "interpolation"
if _nlv < Version(_min_numpy_ver):
raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n"
f"your numpy version is {_np_version}.\n"
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version"
)
__all__ = [
"np",
"_np_version",
"is_numpy_dev",
]

View File

@ -0,0 +1,409 @@
"""
For compatibility with numpy libraries, pandas functions or methods have to
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
are not actually used or respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation is performed in
'validators.py' to make sure that any extra parameters passed correspond ONLY
to those in the numpy signature. Part of that validation includes whether or
not the user attempted to pass in non-default values for these extraneous
parameters. As we want to discourage users from relying on these parameters
when calling the pandas implementation, we want them only to pass in the
default values for these parameters.
This module provides a set of commonly used default arguments for functions and
methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from __future__ import annotations
from typing import Any
from numpy import ndarray
from pandas._libs.lib import (
is_bool,
is_integer,
)
from pandas.errors import UnsupportedFunctionCall
from pandas.util._validators import (
validate_args,
validate_args_and_kwargs,
validate_kwargs,
)
class CompatValidator:
def __init__(
self,
defaults,
fname=None,
method: str | None = None,
max_fname_arg_count=None,
):
self.fname = fname
self.method = method
self.defaults = defaults
self.max_fname_arg_count = max_fname_arg_count
def __call__(
self,
args,
kwargs,
fname=None,
max_fname_arg_count=None,
method: str | None = None,
) -> None:
if args or kwargs:
fname = self.fname if fname is None else fname
max_fname_arg_count = (
self.max_fname_arg_count
if max_fname_arg_count is None
else max_fname_arg_count
)
method = self.method if method is None else method
if method == "args":
validate_args(fname, args, max_fname_arg_count, self.defaults)
elif method == "kwargs":
validate_kwargs(fname, kwargs, self.defaults)
elif method == "both":
validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, self.defaults
)
else:
raise ValueError(f"invalid validation method '{method}'")
ARGMINMAX_DEFAULTS = {"out": None}
validate_argmin = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
)
validate_argmax = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
)
def process_skipna(skipna, args):
if isinstance(skipna, ndarray) or skipna is None:
args = (skipna,) + args
skipna = True
return skipna, args
def validate_argmin_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmin' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs)
return skipna
def validate_argmax_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmax' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs)
return skipna
ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
ARGSORT_DEFAULTS["axis"] = -1
ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None
ARGSORT_DEFAULTS["kind"] = None
validate_argsort = CompatValidator(
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
)
# two different signatures of argsort, this second validation for when the
# `kind` param is supported
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
ARGSORT_DEFAULTS_KIND["axis"] = -1
ARGSORT_DEFAULTS_KIND["order"] = None
validate_argsort_kind = CompatValidator(
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
)
def validate_argsort_with_ascending(ascending, args, kwargs):
"""
If 'Categorical.argsort' is called via the 'numpy' library, the first
parameter in its signature is 'axis', which takes either an integer or
'None', so check if the 'ascending' parameter has either integer type or is
None, since 'ascending' itself should be a boolean
"""
if is_integer(ascending) or ascending is None:
args = (ascending,) + args
ascending = True
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
return ascending
CLIP_DEFAULTS: dict[str, Any] = {"out": None}
validate_clip = CompatValidator(
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
)
def validate_clip_with_axis(axis, args, kwargs):
"""
If 'NDFrame.clip' is called via the numpy library, the third parameter in
its signature is 'out', which can takes an ndarray, so check if the 'axis'
parameter is an instance of ndarray, since 'axis' itself should either be
an integer or None
"""
if isinstance(axis, ndarray):
args = (axis,) + args
axis = None
validate_clip(args, kwargs)
return axis
CUM_FUNC_DEFAULTS: dict[str, Any] = {}
CUM_FUNC_DEFAULTS["dtype"] = None
CUM_FUNC_DEFAULTS["out"] = None
validate_cum_func = CompatValidator(
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
)
validate_cumsum = CompatValidator(
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
)
def validate_cum_func_with_skipna(skipna, args, kwargs, name):
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
check if the 'skipna' parameter is a boolean or not
"""
if not is_bool(skipna):
args = (skipna,) + args
skipna = True
validate_cum_func(args, kwargs, fname=name)
return skipna
ALLANY_DEFAULTS: dict[str, bool | None] = {}
ALLANY_DEFAULTS["dtype"] = None
ALLANY_DEFAULTS["out"] = None
ALLANY_DEFAULTS["keepdims"] = False
ALLANY_DEFAULTS["axis"] = None
validate_all = CompatValidator(
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
)
validate_any = CompatValidator(
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
)
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False}
validate_min = CompatValidator(
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
)
validate_max = CompatValidator(
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
)
RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"}
validate_reshape = CompatValidator(
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
)
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
validate_repeat = CompatValidator(
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
)
ROUND_DEFAULTS: dict[str, Any] = {"out": None}
validate_round = CompatValidator(
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
)
SORT_DEFAULTS: dict[str, int | str | None] = {}
SORT_DEFAULTS["axis"] = -1
SORT_DEFAULTS["kind"] = "quicksort"
SORT_DEFAULTS["order"] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
STAT_FUNC_DEFAULTS["dtype"] = None
STAT_FUNC_DEFAULTS["out"] = None
SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
SUM_DEFAULTS["axis"] = None
SUM_DEFAULTS["keepdims"] = False
SUM_DEFAULTS["initial"] = None
PROD_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
PROD_DEFAULTS["axis"] = None
PROD_DEFAULTS["keepdims"] = False
PROD_DEFAULTS["initial"] = None
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
MEDIAN_DEFAULTS["overwrite_input"] = False
MEDIAN_DEFAULTS["keepdims"] = False
STAT_FUNC_DEFAULTS["keepdims"] = False
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
validate_sum = CompatValidator(
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
)
validate_prod = CompatValidator(
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
)
validate_mean = CompatValidator(
STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
)
validate_median = CompatValidator(
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
)
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
STAT_DDOF_FUNC_DEFAULTS["out"] = None
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
TAKE_DEFAULTS: dict[str, str | None] = {}
TAKE_DEFAULTS["out"] = None
TAKE_DEFAULTS["mode"] = "raise"
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
def validate_take_with_convert(convert, args, kwargs):
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'axis', which takes either an ndarray or 'None', so check
if the 'convert' parameter is either an instance of ndarray or is None
"""
if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args
convert = True
validate_take(args, kwargs, max_fname_arg_count=3, method="both")
return convert
TRANSPOSE_DEFAULTS = {"axes": None}
validate_transpose = CompatValidator(
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
)
def validate_window_func(name, args, kwargs) -> None:
numpy_args = ("axis", "dtype", "out")
msg = (
f"numpy operations are not valid with window objects. "
f"Use .{name}() directly instead "
)
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_rolling_func(name, args, kwargs) -> None:
numpy_args = ("axis", "dtype", "out")
msg = (
f"numpy operations are not valid with window objects. "
f"Use .rolling(...).{name}() instead "
)
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_expanding_func(name, args, kwargs) -> None:
numpy_args = ("axis", "dtype", "out")
msg = (
f"numpy operations are not valid with window objects. "
f"Use .expanding(...).{name}() instead "
)
if len(args) > 0:
raise UnsupportedFunctionCall(msg)
for arg in numpy_args:
if arg in kwargs:
raise UnsupportedFunctionCall(msg)
def validate_groupby_func(name, args, kwargs, allowed=None) -> None:
"""
'args' and 'kwargs' should be empty, except for allowed kwargs because all
of their necessary parameters are explicitly listed in the function
signature
"""
if allowed is None:
allowed = []
kwargs = set(kwargs) - set(allowed)
if len(args) + len(kwargs) > 0:
raise UnsupportedFunctionCall(
"numpy operations are not valid with groupby. "
f"Use .groupby(...).{name}() instead"
)
RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
def validate_resampler_func(method: str, args, kwargs) -> None:
"""
'args' and 'kwargs' should be empty because all of their necessary
parameters are explicitly listed in the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
raise UnsupportedFunctionCall(
"numpy operations are not valid with resample. "
f"Use .resample(...).{method}() instead"
)
else:
raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis: int | None, ndim: int = 1) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
ndim : int, default 1
Raises
------
ValueError
"""
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")

View File

@ -0,0 +1,303 @@
"""
Support pre-0.12 series pickle compatibility.
"""
from __future__ import annotations
import contextlib
import copy
import io
import pickle as pkl
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import BaseOffset
from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.internals import BlockManager
if TYPE_CHECKING:
from pandas import (
DataFrame,
Series,
)
def load_reduce(self):
stack = self.stack
args = stack.pop()
func = stack[-1]
try:
stack[-1] = func(*args)
return
except TypeError as err:
# If we have a deprecated function,
# try to replace and try again.
msg = "_reconstruct: First argument must be a sub-type of ndarray"
if msg in str(err):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
raise
_sparse_msg = """\
Loading a saved '{cls}' as a {new} with sparse values.
'{cls}' is now removed. You should re-save this dataset in its new format.
"""
class _LoadSparseSeries:
# To load a SparseSeries as a Series[Sparse]
# https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "Series", but must return
# a subtype of "_LoadSparseSeries")
def __new__(cls) -> Series: # type: ignore[misc]
from pandas import Series
warnings.warn(
_sparse_msg.format(cls="SparseSeries", new="Series"),
FutureWarning,
stacklevel=6,
)
return Series(dtype=object)
class _LoadSparseFrame:
# To load a SparseDataFrame as a DataFrame[Sparse]
# https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "DataFrame", but must
# return a subtype of "_LoadSparseFrame")
def __new__(cls) -> DataFrame: # type: ignore[misc]
from pandas import DataFrame
warnings.warn(
_sparse_msg.format(cls="SparseDataFrame", new="DataFrame"),
FutureWarning,
stacklevel=6,
)
return DataFrame()
# If classes are moved, provide compat here.
_class_locations_map = {
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
# 15477
("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
# 10890
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
("pandas.sparse.series", "SparseTimeSeries"): (
"pandas.core.sparse.series",
"SparseSeries",
),
# 12588, extensions moving
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
# 18543 moving period
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
("pandas.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
("pandas._libs.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
# 15998 top-level dirs moving
("pandas.sparse.array", "SparseArray"): (
"pandas.core.arrays.sparse",
"SparseArray",
),
("pandas.sparse.series", "SparseSeries"): (
"pandas.compat.pickle_compat",
"_LoadSparseSeries",
),
("pandas.sparse.frame", "SparseDataFrame"): (
"pandas.core.sparse.frame",
"_LoadSparseFrame",
),
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
("pandas.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.numeric",
"Int64Index",
),
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
("pandas.tseries.index", "_new_DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"_new_DatetimeIndex",
),
("pandas.tseries.index", "DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"DatetimeIndex",
),
("pandas.tseries.period", "PeriodIndex"): (
"pandas.core.indexes.period",
"PeriodIndex",
),
# 19269, arrays moving
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
# 19939, add timedeltaindex, float64index compat from 15998 move
("pandas.tseries.tdi", "TimedeltaIndex"): (
"pandas.core.indexes.timedeltas",
"TimedeltaIndex",
),
("pandas.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.numeric",
"Float64Index",
),
("pandas.core.sparse.series", "SparseSeries"): (
"pandas.compat.pickle_compat",
"_LoadSparseSeries",
),
("pandas.core.sparse.frame", "SparseDataFrame"): (
"pandas.compat.pickle_compat",
"_LoadSparseFrame",
),
}
# our Unpickler sub-class to override methods and some dispatcher
# functions for compat and uses a non-public class of the pickle module.
class Unpickler(pkl._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super().find_class(module, name)
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
def load_newobj(self):
args = self.stack.pop()
cls = self.stack[-1]
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
elif issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
obj = cls.__new__(cls, *args)
self.stack[-1] = obj
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
def load_newobj_ex(self):
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)
try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass
def load(fh, encoding: str | None = None, is_verbose: bool = False):
"""
Load a pickle, with a provided encoding,
Parameters
----------
fh : a filelike object
encoding : an optional encoding
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
else:
up = Unpickler(fh)
# "Unpickler" has no attribute "is_verbose" [attr-defined]
up.is_verbose = is_verbose # type: ignore[attr-defined]
return up.load()
except (ValueError, TypeError):
raise
def loads(
bytes_object: bytes,
*,
fix_imports: bool = True,
encoding: str = "ASCII",
errors: str = "strict",
):
"""
Analogous to pickle._loads.
"""
fd = io.BytesIO(bytes_object)
return Unpickler(
fd, fix_imports=fix_imports, encoding=encoding, errors=errors
).load()
@contextlib.contextmanager
def patch_pickle():
"""
Temporarily patch pickle to use our unpickler.
"""
orig_loads = pkl.loads
try:
setattr(pkl, "loads", loads)
yield
finally:
setattr(pkl, "loads", orig_loads)

View File

@ -0,0 +1,24 @@
""" support pyarrow compatibility across versions """
from pandas.util.version import Version
try:
import pyarrow as pa
_pa_version = pa.__version__
_palv = Version(_pa_version)
pa_version_under1p01 = _palv < Version("1.0.1")
pa_version_under2p0 = _palv < Version("2.0.0")
pa_version_under3p0 = _palv < Version("3.0.0")
pa_version_under4p0 = _palv < Version("4.0.0")
pa_version_under5p0 = _palv < Version("5.0.0")
pa_version_under6p0 = _palv < Version("6.0.0")
pa_version_under7p0 = _palv < Version("7.0.0")
except ImportError:
pa_version_under1p01 = True
pa_version_under2p0 = True
pa_version_under3p0 = True
pa_version_under4p0 = True
pa_version_under5p0 = True
pa_version_under6p0 = True
pa_version_under7p0 = True