mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 22:57:06 +00:00
first commit
This commit is contained in:
60
.venv/Lib/site-packages/pandas/core/internals/__init__.py
Normal file
60
.venv/Lib/site-packages/pandas/core/internals/__init__.py
Normal file
@ -0,0 +1,60 @@
|
||||
from pandas.core.internals.api import make_block
|
||||
from pandas.core.internals.array_manager import (
|
||||
ArrayManager,
|
||||
SingleArrayManager,
|
||||
)
|
||||
from pandas.core.internals.base import (
|
||||
DataManager,
|
||||
SingleDataManager,
|
||||
)
|
||||
from pandas.core.internals.blocks import ( # io.pytables, io.packers
|
||||
Block,
|
||||
DatetimeTZBlock,
|
||||
ExtensionBlock,
|
||||
NumericBlock,
|
||||
ObjectBlock,
|
||||
)
|
||||
from pandas.core.internals.concat import concatenate_managers
|
||||
from pandas.core.internals.managers import (
|
||||
BlockManager,
|
||||
SingleBlockManager,
|
||||
create_block_manager_from_blocks,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Block",
|
||||
"CategoricalBlock",
|
||||
"NumericBlock",
|
||||
"DatetimeTZBlock",
|
||||
"ExtensionBlock",
|
||||
"ObjectBlock",
|
||||
"make_block",
|
||||
"DataManager",
|
||||
"ArrayManager",
|
||||
"BlockManager",
|
||||
"SingleDataManager",
|
||||
"SingleBlockManager",
|
||||
"SingleArrayManager",
|
||||
"concatenate_managers",
|
||||
# this is preserved here for downstream compatibility (GH-33892)
|
||||
"create_block_manager_from_blocks",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
import warnings
|
||||
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
if name == "CategoricalBlock":
|
||||
warnings.warn(
|
||||
"CategoricalBlock is deprecated and will be removed in a future version. "
|
||||
"Use ExtensionBlock instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
from pandas.core.internals.blocks import CategoricalBlock
|
||||
|
||||
return CategoricalBlock
|
||||
|
||||
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
|
97
.venv/Lib/site-packages/pandas/core/internals/api.py
Normal file
97
.venv/Lib/site-packages/pandas/core/internals/api.py
Normal file
@ -0,0 +1,97 @@
|
||||
"""
|
||||
This is a pseudo-public API for downstream libraries. We ask that downstream
|
||||
authors
|
||||
|
||||
1) Try to avoid using internals directly altogether, and failing that,
|
||||
2) Use only functions exposed here (or in core.internals)
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
from pandas._typing import Dtype
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_datetime64tz_dtype,
|
||||
is_period_dtype,
|
||||
pandas_dtype,
|
||||
)
|
||||
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.internals.blocks import (
|
||||
Block,
|
||||
DatetimeTZBlock,
|
||||
ExtensionBlock,
|
||||
check_ndim,
|
||||
ensure_block_shape,
|
||||
extract_pandas_array,
|
||||
get_block_type,
|
||||
maybe_coerce_values,
|
||||
)
|
||||
|
||||
|
||||
def make_block(
|
||||
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
|
||||
) -> Block:
|
||||
"""
|
||||
This is a pseudo-public analogue to blocks.new_block.
|
||||
|
||||
We ask that downstream libraries use this rather than any fully-internal
|
||||
APIs, including but not limited to:
|
||||
|
||||
- core.internals.blocks.make_block
|
||||
- Block.make_block
|
||||
- Block.make_block_same_class
|
||||
- Block.__init__
|
||||
"""
|
||||
if dtype is not None:
|
||||
dtype = pandas_dtype(dtype)
|
||||
|
||||
values, dtype = extract_pandas_array(values, dtype, ndim)
|
||||
|
||||
if klass is ExtensionBlock and is_period_dtype(values.dtype):
|
||||
# GH-44681 changed PeriodArray to be stored in the 2D
|
||||
# NDArrayBackedExtensionBlock instead of ExtensionBlock
|
||||
# -> still allow ExtensionBlock to be passed in this case for back compat
|
||||
klass = None
|
||||
|
||||
if klass is None:
|
||||
dtype = dtype or values.dtype
|
||||
klass = get_block_type(dtype)
|
||||
|
||||
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
|
||||
# pyarrow calls get here
|
||||
values = DatetimeArray._simple_new(values, dtype=dtype)
|
||||
|
||||
if not isinstance(placement, BlockPlacement):
|
||||
placement = BlockPlacement(placement)
|
||||
|
||||
ndim = maybe_infer_ndim(values, placement, ndim)
|
||||
if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
|
||||
# GH#41168 ensure we can pass 1D dt64tz values
|
||||
# More generally, any EA dtype that isn't is_1d_only_ea_dtype
|
||||
values = extract_array(values, extract_numpy=True)
|
||||
values = ensure_block_shape(values, ndim)
|
||||
|
||||
check_ndim(values, placement, ndim)
|
||||
values = maybe_coerce_values(values)
|
||||
return klass(values, ndim=ndim, placement=placement)
|
||||
|
||||
|
||||
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
|
||||
"""
|
||||
If `ndim` is not provided, infer it from placment and values.
|
||||
"""
|
||||
if ndim is None:
|
||||
# GH#38134 Block constructor now assumes ndim is not None
|
||||
if not isinstance(values.dtype, np.dtype):
|
||||
if len(placement) != 1:
|
||||
ndim = 1
|
||||
else:
|
||||
ndim = 2
|
||||
else:
|
||||
ndim = values.ndim
|
||||
return ndim
|
1367
.venv/Lib/site-packages/pandas/core/internals/array_manager.py
Normal file
1367
.venv/Lib/site-packages/pandas/core/internals/array_manager.py
Normal file
File diff suppressed because it is too large
Load Diff
226
.venv/Lib/site-packages/pandas/core/internals/base.py
Normal file
226
.venv/Lib/site-packages/pandas/core/internals/base.py
Normal file
@ -0,0 +1,226 @@
|
||||
"""
|
||||
Base class for the internal managers. Both BlockManager and ArrayManager
|
||||
inherit from this class.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TypeVar,
|
||||
final,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
DtypeObj,
|
||||
Shape,
|
||||
)
|
||||
from pandas.errors import AbstractMethodError
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
find_common_type,
|
||||
np_can_hold_element,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
from pandas.core.indexes.api import (
|
||||
Index,
|
||||
default_index,
|
||||
)
|
||||
|
||||
T = TypeVar("T", bound="DataManager")
|
||||
|
||||
|
||||
class DataManager(PandasObject):
|
||||
|
||||
# TODO share more methods/attributes
|
||||
|
||||
axes: list[Index]
|
||||
|
||||
@property
|
||||
def items(self) -> Index:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def __len__(self) -> int:
|
||||
return len(self.items)
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return len(self.axes)
|
||||
|
||||
@property
|
||||
def shape(self) -> Shape:
|
||||
return tuple(len(ax) for ax in self.axes)
|
||||
|
||||
@final
|
||||
def _validate_set_axis(self, axis: int, new_labels: Index) -> None:
|
||||
# Caller is responsible for ensuring we have an Index object.
|
||||
old_len = len(self.axes[axis])
|
||||
new_len = len(new_labels)
|
||||
|
||||
if axis == 1 and len(self.items) == 0:
|
||||
# If we are setting the index on a DataFrame with no columns,
|
||||
# it is OK to change the length.
|
||||
pass
|
||||
|
||||
elif new_len != old_len:
|
||||
raise ValueError(
|
||||
f"Length mismatch: Expected axis has {old_len} elements, new "
|
||||
f"values have {new_len} elements"
|
||||
)
|
||||
|
||||
def reindex_indexer(
|
||||
self: T,
|
||||
new_axis,
|
||||
indexer,
|
||||
axis: int,
|
||||
fill_value=None,
|
||||
allow_dups: bool = False,
|
||||
copy: bool = True,
|
||||
consolidate: bool = True,
|
||||
only_slice: bool = False,
|
||||
) -> T:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def reindex_axis(
|
||||
self: T,
|
||||
new_index: Index,
|
||||
axis: int,
|
||||
fill_value=None,
|
||||
consolidate: bool = True,
|
||||
only_slice: bool = False,
|
||||
) -> T:
|
||||
"""
|
||||
Conform data manager to new index.
|
||||
"""
|
||||
new_index, indexer = self.axes[axis].reindex(new_index)
|
||||
|
||||
return self.reindex_indexer(
|
||||
new_index,
|
||||
indexer,
|
||||
axis=axis,
|
||||
fill_value=fill_value,
|
||||
copy=False,
|
||||
consolidate=consolidate,
|
||||
only_slice=only_slice,
|
||||
)
|
||||
|
||||
def _equal_values(self: T, other: T) -> bool:
|
||||
"""
|
||||
To be implemented by the subclasses. Only check the column values
|
||||
assuming shape and indexes have already been checked.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def equals(self, other: object) -> bool:
|
||||
"""
|
||||
Implementation for DataFrame.equals
|
||||
"""
|
||||
if not isinstance(other, DataManager):
|
||||
return False
|
||||
|
||||
self_axes, other_axes = self.axes, other.axes
|
||||
if len(self_axes) != len(other_axes):
|
||||
return False
|
||||
if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
|
||||
return False
|
||||
|
||||
return self._equal_values(other)
|
||||
|
||||
def apply(
|
||||
self: T,
|
||||
f,
|
||||
align_keys: list[str] | None = None,
|
||||
ignore_failures: bool = False,
|
||||
**kwargs,
|
||||
) -> T:
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def isna(self: T, func) -> T:
|
||||
return self.apply("apply", func=func)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Consolidation: No-ops for all but BlockManager
|
||||
|
||||
def is_consolidated(self) -> bool:
|
||||
return True
|
||||
|
||||
def consolidate(self: T) -> T:
|
||||
return self
|
||||
|
||||
def _consolidate_inplace(self) -> None:
|
||||
return
|
||||
|
||||
|
||||
class SingleDataManager(DataManager):
|
||||
ndim = 1
|
||||
|
||||
@final
|
||||
@property
|
||||
def array(self) -> ArrayLike:
|
||||
"""
|
||||
Quick access to the backing array of the Block or SingleArrayManager.
|
||||
"""
|
||||
# error: "SingleDataManager" has no attribute "arrays"; maybe "array"
|
||||
return self.arrays[0] # type: ignore[attr-defined]
|
||||
|
||||
def setitem_inplace(self, indexer, value) -> None:
|
||||
"""
|
||||
Set values with indexer.
|
||||
|
||||
For Single[Block/Array]Manager, this backs s[indexer] = value
|
||||
|
||||
This is an inplace version of `setitem()`, mutating the manager/values
|
||||
in place, not returning a new Manager (and Block), and thus never changing
|
||||
the dtype.
|
||||
"""
|
||||
arr = self.array
|
||||
|
||||
# EAs will do this validation in their own __setitem__ methods.
|
||||
if isinstance(arr, np.ndarray):
|
||||
# Note: checking for ndarray instead of np.dtype means we exclude
|
||||
# dt64/td64, which do their own validation.
|
||||
value = np_can_hold_element(arr.dtype, value)
|
||||
|
||||
arr[indexer] = value
|
||||
|
||||
def grouped_reduce(self, func, ignore_failures: bool = False):
|
||||
"""
|
||||
ignore_failures : bool, default False
|
||||
Not used; for compatibility with ArrayManager/BlockManager.
|
||||
"""
|
||||
|
||||
arr = self.array
|
||||
res = func(arr)
|
||||
index = default_index(len(res))
|
||||
|
||||
mgr = type(self).from_array(res, index)
|
||||
return mgr
|
||||
|
||||
@classmethod
|
||||
def from_array(cls, arr: ArrayLike, index: Index):
|
||||
raise AbstractMethodError(cls)
|
||||
|
||||
|
||||
def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
|
||||
"""
|
||||
Find the common dtype for `blocks`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blocks : List[DtypeObj]
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype : np.dtype, ExtensionDtype, or None
|
||||
None is returned when `blocks` is empty.
|
||||
"""
|
||||
if not len(dtypes):
|
||||
return None
|
||||
|
||||
return find_common_type(dtypes)
|
2256
.venv/Lib/site-packages/pandas/core/internals/blocks.py
Normal file
2256
.venv/Lib/site-packages/pandas/core/internals/blocks.py
Normal file
File diff suppressed because it is too large
Load Diff
652
.venv/Lib/site-packages/pandas/core/internals/concat.py
Normal file
652
.venv/Lib/site-packages/pandas/core/internals/concat.py
Normal file
@ -0,0 +1,652 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Sequence,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import (
|
||||
NaT,
|
||||
internals as libinternals,
|
||||
)
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
DtypeObj,
|
||||
Manager,
|
||||
Shape,
|
||||
)
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
ensure_dtype_can_hold_na,
|
||||
find_common_type,
|
||||
)
|
||||
from pandas.core.dtypes.common import (
|
||||
is_1d_only_ea_dtype,
|
||||
is_1d_only_ea_obj,
|
||||
is_datetime64tz_dtype,
|
||||
is_dtype_equal,
|
||||
)
|
||||
from pandas.core.dtypes.concat import (
|
||||
cast_to_common_type,
|
||||
concat_compat,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
ExtensionArray,
|
||||
)
|
||||
from pandas.core.construction import ensure_wrapped_if_datetimelike
|
||||
from pandas.core.internals.array_manager import (
|
||||
ArrayManager,
|
||||
NullArrayProxy,
|
||||
)
|
||||
from pandas.core.internals.blocks import (
|
||||
ensure_block_shape,
|
||||
new_block_2d,
|
||||
)
|
||||
from pandas.core.internals.managers import BlockManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas import Index
|
||||
from pandas.core.internals.blocks import Block
|
||||
|
||||
|
||||
def _concatenate_array_managers(
|
||||
mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool
|
||||
) -> Manager:
|
||||
"""
|
||||
Concatenate array managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
copy : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
ArrayManager
|
||||
"""
|
||||
# reindex all arrays
|
||||
mgrs = []
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
axis1_made_copy = False
|
||||
for ax, indexer in indexers.items():
|
||||
mgr = mgr.reindex_indexer(
|
||||
axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True
|
||||
)
|
||||
if ax == 1 and indexer is not None:
|
||||
axis1_made_copy = True
|
||||
if copy and concat_axis == 0 and not axis1_made_copy:
|
||||
# for concat_axis 1 we will always get a copy through concat_arrays
|
||||
mgr = mgr.copy()
|
||||
mgrs.append(mgr)
|
||||
|
||||
if concat_axis == 1:
|
||||
# concatting along the rows -> concat the reindexed arrays
|
||||
# TODO(ArrayManager) doesn't yet preserve the correct dtype
|
||||
arrays = [
|
||||
concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))])
|
||||
for j in range(len(mgrs[0].arrays))
|
||||
]
|
||||
else:
|
||||
# concatting along the columns -> combine reindexed arrays in a single manager
|
||||
assert concat_axis == 0
|
||||
arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
|
||||
|
||||
new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
|
||||
return new_mgr
|
||||
|
||||
|
||||
def concat_arrays(to_concat: list) -> ArrayLike:
|
||||
"""
|
||||
Alternative for concat_compat but specialized for use in the ArrayManager.
|
||||
|
||||
Differences: only deals with 1D arrays (no axis keyword), assumes
|
||||
ensure_wrapped_if_datetimelike and does not skip empty arrays to determine
|
||||
the dtype.
|
||||
In addition ensures that all NullArrayProxies get replaced with actual
|
||||
arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
to_concat : list of arrays
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray or ExtensionArray
|
||||
"""
|
||||
# ignore the all-NA proxies to determine the resulting dtype
|
||||
to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
|
||||
|
||||
dtypes = {x.dtype for x in to_concat_no_proxy}
|
||||
single_dtype = len(dtypes) == 1
|
||||
|
||||
if single_dtype:
|
||||
target_dtype = to_concat_no_proxy[0].dtype
|
||||
elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes):
|
||||
# GH#42092
|
||||
target_dtype = np.find_common_type(list(dtypes), [])
|
||||
else:
|
||||
target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
|
||||
|
||||
if target_dtype.kind in ["m", "M"]:
|
||||
# for datetimelike use DatetimeArray/TimedeltaArray concatenation
|
||||
# don't use arr.astype(target_dtype, copy=False), because that doesn't
|
||||
# work for DatetimeArray/TimedeltaArray (returns ndarray)
|
||||
to_concat = [
|
||||
arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) else arr
|
||||
for arr in to_concat
|
||||
]
|
||||
return type(to_concat_no_proxy[0])._concat_same_type(to_concat, axis=0)
|
||||
|
||||
to_concat = [
|
||||
arr.to_array(target_dtype)
|
||||
if isinstance(arr, NullArrayProxy)
|
||||
else cast_to_common_type(arr, target_dtype)
|
||||
for arr in to_concat
|
||||
]
|
||||
|
||||
if isinstance(to_concat[0], ExtensionArray):
|
||||
cls = type(to_concat[0])
|
||||
return cls._concat_same_type(to_concat)
|
||||
|
||||
result = np.concatenate(to_concat)
|
||||
|
||||
# TODO decide on exact behaviour (we shouldn't do this only for empty result)
|
||||
# see https://github.com/pandas-dev/pandas/issues/39817
|
||||
if len(result) == 0:
|
||||
# all empties -> check for bool to not coerce to float
|
||||
kinds = {obj.dtype.kind for obj in to_concat_no_proxy}
|
||||
if len(kinds) != 1:
|
||||
if "b" in kinds:
|
||||
result = result.astype(object)
|
||||
return result
|
||||
|
||||
|
||||
def concatenate_managers(
|
||||
mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool
|
||||
) -> Manager:
|
||||
"""
|
||||
Concatenate block managers into one.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
|
||||
axes : list of Index
|
||||
concat_axis : int
|
||||
copy : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
BlockManager
|
||||
"""
|
||||
# TODO(ArrayManager) this assumes that all managers are of the same type
|
||||
if isinstance(mgrs_indexers[0][0], ArrayManager):
|
||||
return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy)
|
||||
|
||||
# Assertions disabled for performance
|
||||
# for tup in mgrs_indexers:
|
||||
# # caller is responsible for ensuring this
|
||||
# indexers = tup[1]
|
||||
# assert concat_axis not in indexers
|
||||
|
||||
if concat_axis == 0:
|
||||
return _concat_managers_axis0(mgrs_indexers, axes, copy)
|
||||
|
||||
mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
|
||||
|
||||
# Assertion disabled for performance
|
||||
# assert all(not x[1] for x in mgrs_indexers)
|
||||
|
||||
concat_plans = [_get_mgr_concatenation_plan(mgr) for mgr, _ in mgrs_indexers]
|
||||
concat_plan = _combine_concat_plans(concat_plans)
|
||||
blocks = []
|
||||
|
||||
for placement, join_units in concat_plan:
|
||||
unit = join_units[0]
|
||||
blk = unit.block
|
||||
|
||||
if len(join_units) == 1:
|
||||
values = blk.values
|
||||
if copy:
|
||||
values = values.copy()
|
||||
else:
|
||||
values = values.view()
|
||||
fastpath = True
|
||||
elif _is_uniform_join_units(join_units):
|
||||
vals = [ju.block.values for ju in join_units]
|
||||
|
||||
if not blk.is_extension:
|
||||
# _is_uniform_join_units ensures a single dtype, so
|
||||
# we can use np.concatenate, which is more performant
|
||||
# than concat_compat
|
||||
values = np.concatenate(vals, axis=1)
|
||||
else:
|
||||
# TODO(EA2D): special-casing not needed with 2D EAs
|
||||
values = concat_compat(vals, axis=1)
|
||||
values = ensure_block_shape(values, ndim=2)
|
||||
|
||||
values = ensure_wrapped_if_datetimelike(values)
|
||||
|
||||
fastpath = blk.values.dtype == values.dtype
|
||||
else:
|
||||
values = _concatenate_join_units(join_units, copy=copy)
|
||||
fastpath = False
|
||||
|
||||
if fastpath:
|
||||
b = blk.make_block_same_class(values, placement=placement)
|
||||
else:
|
||||
b = new_block_2d(values, placement=placement)
|
||||
|
||||
blocks.append(b)
|
||||
|
||||
return BlockManager(tuple(blocks), axes)
|
||||
|
||||
|
||||
def _concat_managers_axis0(
|
||||
mgrs_indexers, axes: list[Index], copy: bool
|
||||
) -> BlockManager:
|
||||
"""
|
||||
concat_managers specialized to concat_axis=0, with reindexing already
|
||||
having been done in _maybe_reindex_columns_na_proxy.
|
||||
"""
|
||||
had_reindexers = {
|
||||
i: len(mgrs_indexers[i][1]) > 0 for i in range(len(mgrs_indexers))
|
||||
}
|
||||
mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
|
||||
|
||||
mgrs = [x[0] for x in mgrs_indexers]
|
||||
|
||||
offset = 0
|
||||
blocks = []
|
||||
for i, mgr in enumerate(mgrs):
|
||||
# If we already reindexed, then we definitely don't need another copy
|
||||
made_copy = had_reindexers[i]
|
||||
|
||||
for blk in mgr.blocks:
|
||||
if made_copy:
|
||||
nb = blk.copy(deep=False)
|
||||
elif copy:
|
||||
nb = blk.copy()
|
||||
else:
|
||||
# by slicing instead of copy(deep=False), we get a new array
|
||||
# object, see test_concat_copy
|
||||
nb = blk.getitem_block(slice(None))
|
||||
nb._mgr_locs = nb._mgr_locs.add(offset)
|
||||
blocks.append(nb)
|
||||
|
||||
offset += len(mgr.items)
|
||||
return BlockManager(tuple(blocks), axes)
|
||||
|
||||
|
||||
def _maybe_reindex_columns_na_proxy(
|
||||
axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]]
|
||||
) -> list[tuple[BlockManager, dict[int, np.ndarray]]]:
|
||||
"""
|
||||
Reindex along columns so that all of the BlockManagers being concatenated
|
||||
have matching columns.
|
||||
|
||||
Columns added in this reindexing have dtype=np.void, indicating they
|
||||
should be ignored when choosing a column's final dtype.
|
||||
"""
|
||||
new_mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] = []
|
||||
|
||||
for mgr, indexers in mgrs_indexers:
|
||||
# For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
|
||||
# is a cheap reindexing.
|
||||
for i, indexer in indexers.items():
|
||||
mgr = mgr.reindex_indexer(
|
||||
axes[i],
|
||||
indexers[i],
|
||||
axis=i,
|
||||
copy=False,
|
||||
only_slice=True, # only relevant for i==0
|
||||
allow_dups=True,
|
||||
use_na_proxy=True, # only relevant for i==0
|
||||
)
|
||||
new_mgrs_indexers.append((mgr, {}))
|
||||
|
||||
return new_mgrs_indexers
|
||||
|
||||
|
||||
def _get_mgr_concatenation_plan(mgr: BlockManager):
|
||||
"""
|
||||
Construct concatenation plan for given block manager.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mgr : BlockManager
|
||||
|
||||
Returns
|
||||
-------
|
||||
plan : list of (BlockPlacement, JoinUnit) tuples
|
||||
|
||||
"""
|
||||
# Calculate post-reindex shape , save for item axis which will be separate
|
||||
# for each block anyway.
|
||||
mgr_shape_list = list(mgr.shape)
|
||||
mgr_shape = tuple(mgr_shape_list)
|
||||
|
||||
if mgr.is_single_block:
|
||||
blk = mgr.blocks[0]
|
||||
return [(blk.mgr_locs, JoinUnit(blk, mgr_shape))]
|
||||
|
||||
blknos = mgr.blknos
|
||||
blklocs = mgr.blklocs
|
||||
|
||||
plan = []
|
||||
for blkno, placements in libinternals.get_blkno_placements(blknos, group=False):
|
||||
|
||||
assert placements.is_slice_like
|
||||
assert blkno != -1
|
||||
|
||||
shape_list = list(mgr_shape)
|
||||
shape_list[0] = len(placements)
|
||||
shape = tuple(shape_list)
|
||||
|
||||
blk = mgr.blocks[blkno]
|
||||
ax0_blk_indexer = blklocs[placements.indexer]
|
||||
|
||||
unit_no_ax0_reindexing = (
|
||||
len(placements) == len(blk.mgr_locs)
|
||||
and
|
||||
# Fastpath detection of join unit not
|
||||
# needing to reindex its block: no ax0
|
||||
# reindexing took place and block
|
||||
# placement was sequential before.
|
||||
(
|
||||
(blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1)
|
||||
or
|
||||
# Slow-ish detection: all indexer locs
|
||||
# are sequential (and length match is
|
||||
# checked above).
|
||||
(np.diff(ax0_blk_indexer) == 1).all()
|
||||
)
|
||||
)
|
||||
|
||||
if not unit_no_ax0_reindexing:
|
||||
# create block from subset of columns
|
||||
blk = blk.getitem_block(ax0_blk_indexer)
|
||||
|
||||
# Assertions disabled for performance
|
||||
# assert blk._mgr_locs.as_slice == placements.as_slice
|
||||
# assert blk.shape[0] == shape[0]
|
||||
unit = JoinUnit(blk, shape)
|
||||
|
||||
plan.append((placements, unit))
|
||||
|
||||
return plan
|
||||
|
||||
|
||||
class JoinUnit:
|
||||
def __init__(self, block: Block, shape: Shape):
|
||||
# Passing shape explicitly is required for cases when block is None.
|
||||
self.block = block
|
||||
self.shape = shape
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{type(self).__name__}({repr(self.block)})"
|
||||
|
||||
@cache_readonly
|
||||
def is_na(self) -> bool:
|
||||
blk = self.block
|
||||
if blk.dtype.kind == "V":
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
|
||||
values: ArrayLike
|
||||
|
||||
if self.is_na:
|
||||
return make_na_array(empty_dtype, self.shape)
|
||||
|
||||
else:
|
||||
|
||||
if not self.block._can_consolidate:
|
||||
# preserve these for validation in concat_compat
|
||||
return self.block.values
|
||||
|
||||
# No dtype upcasting is done here, it will be performed during
|
||||
# concatenation itself.
|
||||
values = self.block.values
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike:
|
||||
"""
|
||||
Construct an np.ndarray or ExtensionArray of the given dtype and shape
|
||||
holding all-NA values.
|
||||
"""
|
||||
if is_datetime64tz_dtype(dtype):
|
||||
# NaT here is analogous to dtype.na_value below
|
||||
i8values = np.full(shape, NaT.value)
|
||||
return DatetimeArray(i8values, dtype=dtype)
|
||||
|
||||
elif is_1d_only_ea_dtype(dtype):
|
||||
dtype = cast(ExtensionDtype, dtype)
|
||||
cls = dtype.construct_array_type()
|
||||
|
||||
missing_arr = cls._from_sequence([], dtype=dtype)
|
||||
nrows = shape[-1]
|
||||
taker = -1 * np.ones((nrows,), dtype=np.intp)
|
||||
return missing_arr.take(taker, allow_fill=True, fill_value=dtype.na_value)
|
||||
elif isinstance(dtype, ExtensionDtype):
|
||||
# TODO: no tests get here, a handful would if we disabled
|
||||
# the dt64tz special-case above (which is faster)
|
||||
cls = dtype.construct_array_type()
|
||||
missing_arr = cls._empty(shape=shape, dtype=dtype)
|
||||
missing_arr[:] = dtype.na_value
|
||||
return missing_arr
|
||||
else:
|
||||
# NB: we should never get here with dtype integer or bool;
|
||||
# if we did, the missing_arr.fill would cast to gibberish
|
||||
missing_arr = np.empty(shape, dtype=dtype)
|
||||
fill_value = _dtype_to_na_value(dtype)
|
||||
missing_arr.fill(fill_value)
|
||||
return missing_arr
|
||||
|
||||
|
||||
def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:
|
||||
"""
|
||||
Concatenate values from several join units along axis=1.
|
||||
"""
|
||||
|
||||
empty_dtype = _get_empty_dtype(join_units)
|
||||
|
||||
to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype) for ju in join_units]
|
||||
|
||||
if len(to_concat) == 1:
|
||||
# Only one block, nothing to concatenate.
|
||||
concat_values = to_concat[0]
|
||||
if copy:
|
||||
if isinstance(concat_values, np.ndarray):
|
||||
# non-reindexed (=not yet copied) arrays are made into a view
|
||||
# in JoinUnit.get_reindexed_values
|
||||
if concat_values.base is not None:
|
||||
concat_values = concat_values.copy()
|
||||
else:
|
||||
concat_values = concat_values.copy()
|
||||
|
||||
elif any(is_1d_only_ea_obj(t) for t in to_concat):
|
||||
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
|
||||
# NB: we are still assuming here that Hybrid blocks have shape (1, N)
|
||||
# concatting with at least one EA means we are concatting a single column
|
||||
# the non-EA values are 2D arrays with shape (1, n)
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
to_concat = [
|
||||
t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[call-overload]
|
||||
for t in to_concat
|
||||
]
|
||||
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)
|
||||
concat_values = ensure_block_shape(concat_values, 2)
|
||||
|
||||
else:
|
||||
concat_values = concat_compat(to_concat, axis=1)
|
||||
|
||||
return concat_values
|
||||
|
||||
|
||||
def _dtype_to_na_value(dtype: DtypeObj):
|
||||
"""
|
||||
Find the NA value to go with this dtype.
|
||||
"""
|
||||
if isinstance(dtype, ExtensionDtype):
|
||||
return dtype.na_value
|
||||
elif dtype.kind in ["m", "M"]:
|
||||
return dtype.type("NaT")
|
||||
elif dtype.kind in ["f", "c"]:
|
||||
return dtype.type("NaN")
|
||||
elif dtype.kind == "b":
|
||||
# different from missing.na_value_for_dtype
|
||||
return None
|
||||
elif dtype.kind in ["i", "u"]:
|
||||
return np.nan
|
||||
elif dtype.kind == "O":
|
||||
return np.nan
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
|
||||
"""
|
||||
Return dtype and N/A values to use when concatenating specified units.
|
||||
|
||||
Returned N/A value may be None which means there was no casting involved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dtype
|
||||
"""
|
||||
if len(join_units) == 1:
|
||||
blk = join_units[0].block
|
||||
return blk.dtype
|
||||
|
||||
if _is_uniform_reindex(join_units):
|
||||
empty_dtype = join_units[0].block.dtype
|
||||
return empty_dtype
|
||||
|
||||
needs_can_hold_na = any(unit.is_na for unit in join_units)
|
||||
|
||||
dtypes = [unit.block.dtype for unit in join_units if not unit.is_na]
|
||||
|
||||
dtype = find_common_type(dtypes)
|
||||
if needs_can_hold_na:
|
||||
dtype = ensure_dtype_can_hold_na(dtype)
|
||||
return dtype
|
||||
|
||||
|
||||
def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
|
||||
"""
|
||||
Check if the join units consist of blocks of uniform type that can
|
||||
be concatenated using Block.concat_same_type instead of the generic
|
||||
_concatenate_join_units (which uses `concat_compat`).
|
||||
|
||||
"""
|
||||
first = join_units[0].block
|
||||
if first.dtype.kind == "V":
|
||||
return False
|
||||
return (
|
||||
# exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
|
||||
all(type(ju.block) is type(first) for ju in join_units)
|
||||
and
|
||||
# e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform
|
||||
all(
|
||||
is_dtype_equal(ju.block.dtype, first.dtype)
|
||||
# GH#42092 we only want the dtype_equal check for non-numeric blocks
|
||||
# (for now, may change but that would need a deprecation)
|
||||
or ju.block.dtype.kind in ["b", "i", "u"]
|
||||
for ju in join_units
|
||||
)
|
||||
and
|
||||
# no blocks that would get missing values (can lead to type upcasts)
|
||||
# unless we're an extension dtype.
|
||||
all(not ju.is_na or ju.block.is_extension for ju in join_units)
|
||||
and
|
||||
# only use this path when there is something to concatenate
|
||||
len(join_units) > 1
|
||||
)
|
||||
|
||||
|
||||
def _is_uniform_reindex(join_units) -> bool:
|
||||
return (
|
||||
# TODO: should this be ju.block._can_hold_na?
|
||||
all(ju.block.is_extension for ju in join_units)
|
||||
and len({ju.block.dtype.name for ju in join_units}) == 1
|
||||
)
|
||||
|
||||
|
||||
def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit:
|
||||
"""
|
||||
Reduce join_unit's shape along item axis to length.
|
||||
|
||||
Extra items that didn't fit are returned as a separate block.
|
||||
"""
|
||||
|
||||
extra_block = join_unit.block.getitem_block(slice(length, None))
|
||||
join_unit.block = join_unit.block.getitem_block(slice(length))
|
||||
|
||||
extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:]
|
||||
join_unit.shape = (length,) + join_unit.shape[1:]
|
||||
|
||||
return JoinUnit(block=extra_block, shape=extra_shape)
|
||||
|
||||
|
||||
def _combine_concat_plans(plans):
|
||||
"""
|
||||
Combine multiple concatenation plans into one.
|
||||
|
||||
existing_plan is updated in-place.
|
||||
"""
|
||||
if len(plans) == 1:
|
||||
for p in plans[0]:
|
||||
yield p[0], [p[1]]
|
||||
|
||||
else:
|
||||
# singleton list so we can modify it as a side-effect within _next_or_none
|
||||
num_ended = [0]
|
||||
|
||||
def _next_or_none(seq):
|
||||
retval = next(seq, None)
|
||||
if retval is None:
|
||||
num_ended[0] += 1
|
||||
return retval
|
||||
|
||||
plans = list(map(iter, plans))
|
||||
next_items = list(map(_next_or_none, plans))
|
||||
|
||||
while num_ended[0] != len(next_items):
|
||||
if num_ended[0] > 0:
|
||||
raise ValueError("Plan shapes are not aligned")
|
||||
|
||||
placements, units = zip(*next_items)
|
||||
|
||||
lengths = list(map(len, placements))
|
||||
min_len, max_len = min(lengths), max(lengths)
|
||||
|
||||
if min_len == max_len:
|
||||
yield placements[0], units
|
||||
next_items[:] = map(_next_or_none, plans)
|
||||
else:
|
||||
yielded_placement = None
|
||||
yielded_units = [None] * len(next_items)
|
||||
for i, (plc, unit) in enumerate(next_items):
|
||||
yielded_units[i] = unit
|
||||
if len(plc) > min_len:
|
||||
# _trim_join_unit updates unit in place, so only
|
||||
# placement needs to be sliced to skip min_len.
|
||||
next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len))
|
||||
else:
|
||||
yielded_placement = plc
|
||||
next_items[i] = _next_or_none(plans[i])
|
||||
|
||||
yield yielded_placement, yielded_units
|
1075
.venv/Lib/site-packages/pandas/core/internals/construction.py
Normal file
1075
.venv/Lib/site-packages/pandas/core/internals/construction.py
Normal file
File diff suppressed because it is too large
Load Diff
2157
.venv/Lib/site-packages/pandas/core/internals/managers.py
Normal file
2157
.venv/Lib/site-packages/pandas/core/internals/managers.py
Normal file
File diff suppressed because it is too large
Load Diff
147
.venv/Lib/site-packages/pandas/core/internals/ops.py
Normal file
147
.venv/Lib/site-packages/pandas/core/internals/ops.py
Normal file
@ -0,0 +1,147 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Iterator,
|
||||
NamedTuple,
|
||||
)
|
||||
|
||||
from pandas._typing import ArrayLike
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._libs.internals import BlockPlacement
|
||||
|
||||
from pandas.core.internals.blocks import Block
|
||||
from pandas.core.internals.managers import BlockManager
|
||||
|
||||
|
||||
class BlockPairInfo(NamedTuple):
|
||||
lvals: ArrayLike
|
||||
rvals: ArrayLike
|
||||
locs: BlockPlacement
|
||||
left_ea: bool
|
||||
right_ea: bool
|
||||
rblk: Block
|
||||
|
||||
|
||||
def _iter_block_pairs(
|
||||
left: BlockManager, right: BlockManager
|
||||
) -> Iterator[BlockPairInfo]:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
for blk in left.blocks:
|
||||
locs = blk.mgr_locs
|
||||
blk_vals = blk.values
|
||||
|
||||
left_ea = blk_vals.ndim == 1
|
||||
|
||||
rblks = right._slice_take_blocks_ax0(locs.indexer, only_slice=True)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if left_ea:
|
||||
# assert len(locs) == 1, locs
|
||||
# assert len(rblks) == 1, rblks
|
||||
# assert rblks[0].shape[0] == 1, rblks[0].shape
|
||||
|
||||
for rblk in rblks:
|
||||
right_ea = rblk.values.ndim == 1
|
||||
|
||||
lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
|
||||
info = BlockPairInfo(lvals, rvals, locs, left_ea, right_ea, rblk)
|
||||
yield info
|
||||
|
||||
|
||||
def operate_blockwise(
|
||||
left: BlockManager, right: BlockManager, array_op
|
||||
) -> BlockManager:
|
||||
# At this point we have already checked the parent DataFrames for
|
||||
# assert rframe._indexed_same(lframe)
|
||||
|
||||
res_blks: list[Block] = []
|
||||
for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right):
|
||||
res_values = array_op(lvals, rvals)
|
||||
if left_ea and not right_ea and hasattr(res_values, "reshape"):
|
||||
res_values = res_values.reshape(1, -1)
|
||||
nbs = rblk._split_op_result(res_values)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# if right_ea or left_ea:
|
||||
# assert len(nbs) == 1
|
||||
# else:
|
||||
# assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
|
||||
|
||||
_reset_block_mgr_locs(nbs, locs)
|
||||
|
||||
res_blks.extend(nbs)
|
||||
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
|
||||
# nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
|
||||
# assert nlocs == len(left.items), (nlocs, len(left.items))
|
||||
# assert len(slocs) == nlocs, (len(slocs), nlocs)
|
||||
# assert slocs == set(range(nlocs)), slocs
|
||||
|
||||
new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False)
|
||||
return new_mgr
|
||||
|
||||
|
||||
def _reset_block_mgr_locs(nbs: list[Block], locs):
|
||||
"""
|
||||
Reset mgr_locs to correspond to our original DataFrame.
|
||||
"""
|
||||
for nb in nbs:
|
||||
nblocs = locs[nb.mgr_locs.indexer]
|
||||
nb.mgr_locs = nblocs
|
||||
# Assertions are disabled for performance, but should hold:
|
||||
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
|
||||
# assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
|
||||
|
||||
|
||||
def _get_same_shape_values(
|
||||
lblk: Block, rblk: Block, left_ea: bool, right_ea: bool
|
||||
) -> tuple[ArrayLike, ArrayLike]:
|
||||
"""
|
||||
Slice lblk.values to align with rblk. Squeeze if we have EAs.
|
||||
"""
|
||||
lvals = lblk.values
|
||||
rvals = rblk.values
|
||||
|
||||
# Require that the indexing into lvals be slice-like
|
||||
assert rblk.mgr_locs.is_slice_like, rblk.mgr_locs
|
||||
|
||||
# TODO(EA2D): with 2D EAs only this first clause would be needed
|
||||
if not (left_ea or right_ea):
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif left_ea and right_ea:
|
||||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
|
||||
elif right_ea:
|
||||
# lvals are 2D, rvals are 1D
|
||||
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[Union[ndarray, slice], slice]"
|
||||
lvals = lvals[rblk.mgr_locs.indexer, :] # type: ignore[call-overload]
|
||||
assert lvals.shape[0] == 1, lvals.shape
|
||||
lvals = lvals[0, :]
|
||||
else:
|
||||
# lvals are 1D, rvals are 2D
|
||||
assert rvals.shape[0] == 1, rvals.shape
|
||||
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
|
||||
# argument type "Tuple[int, slice]"
|
||||
rvals = rvals[0, :] # type: ignore[call-overload]
|
||||
|
||||
return lvals, rvals
|
||||
|
||||
|
||||
def blockwise_all(left: BlockManager, right: BlockManager, op) -> bool:
|
||||
"""
|
||||
Blockwise `all` reduction.
|
||||
"""
|
||||
for info in _iter_block_pairs(left, right):
|
||||
res = op(info.lvals, info.rvals)
|
||||
if not res:
|
||||
return False
|
||||
return True
|
Reference in New Issue
Block a user