first commit

2025-10-24 23:40:57 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/core/arrays/init.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/init.py
@@ -0,0 +1,41 @@
+from pandas.core.arrays.base import (
+    ExtensionArray,
+    ExtensionOpsMixin,
+    ExtensionScalarOpsMixin,
+)
+from pandas.core.arrays.boolean import BooleanArray
+from pandas.core.arrays.categorical import Categorical
+from pandas.core.arrays.datetimes import DatetimeArray
+from pandas.core.arrays.floating import FloatingArray
+from pandas.core.arrays.integer import IntegerArray
+from pandas.core.arrays.interval import IntervalArray
+from pandas.core.arrays.masked import BaseMaskedArray
+from pandas.core.arrays.numpy_ import PandasArray
+from pandas.core.arrays.period import (
+    PeriodArray,
+    period_array,
+)
+from pandas.core.arrays.sparse import SparseArray
+from pandas.core.arrays.string_ import StringArray
+from pandas.core.arrays.string_arrow import ArrowStringArray
+from pandas.core.arrays.timedeltas import TimedeltaArray
+
+__all__ = [
+    "ExtensionArray",
+    "ExtensionOpsMixin",
+    "ExtensionScalarOpsMixin",
+    "ArrowStringArray",
+    "BaseMaskedArray",
+    "BooleanArray",
+    "Categorical",
+    "DatetimeArray",
+    "FloatingArray",
+    "IntegerArray",
+    "IntervalArray",
+    "PandasArray",
+    "PeriodArray",
+    "period_array",
+    "SparseArray",
+    "StringArray",
+    "TimedeltaArray",
+]
--- a/.venv/Lib/site-packages/pandas/core/arrays/_arrow_utils.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/_arrow_utils.py
@@ -0,0 +1,141 @@
+import json
+
+import numpy as np
+import pyarrow
+
+from pandas.core.arrays.interval import VALID_CLOSED
+
+
+def pyarrow_array_to_numpy_and_mask(arr, dtype):
+    """
+    Convert a primitive pyarrow.Array to a numpy array and boolean mask based
+    on the buffers of the Array.
+
+    At the moment pyarrow.BooleanArray is not supported.
+
+    Parameters
+    ----------
+    arr : pyarrow.Array
+    dtype : numpy.dtype
+
+    Returns
+    -------
+    (data, mask)
+        Tuple of two numpy arrays with the raw data (with specified dtype) and
+        a boolean mask (validity mask, so False means missing)
+    """
+    dtype = np.dtype(dtype)
+
+    buflist = arr.buffers()
+    # Since Arrow buffers might contain padding and the data might be offset,
+    # the buffer gets sliced here before handing it to numpy.
+    # See also https://github.com/pandas-dev/pandas/issues/40896
+    offset = arr.offset * dtype.itemsize
+    length = len(arr) * dtype.itemsize
+    data_buf = buflist[1][offset : offset + length]
+    data = np.frombuffer(data_buf, dtype=dtype)
+    bitmask = buflist[0]
+    if bitmask is not None:
+        mask = pyarrow.BooleanArray.from_buffers(
+            pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset
+        )
+        mask = np.asarray(mask)
+    else:
+        mask = np.ones(len(arr), dtype=bool)
+    return data, mask
+
+
+class ArrowPeriodType(pyarrow.ExtensionType):
+    def __init__(self, freq):
+        # attributes need to be set first before calling
+        # super init (as that calls serialize)
+        self._freq = freq
+        pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")
+
+    @property
+    def freq(self):
+        return self._freq
+
+    def __arrow_ext_serialize__(self):
+        metadata = {"freq": self.freq}
+        return json.dumps(metadata).encode()
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        metadata = json.loads(serialized.decode())
+        return ArrowPeriodType(metadata["freq"])
+
+    def __eq__(self, other):
+        if isinstance(other, pyarrow.BaseExtensionType):
+            return type(self) == type(other) and self.freq == other.freq
+        else:
+            return NotImplemented
+
+    def __hash__(self):
+        return hash((str(self), self.freq))
+
+    def to_pandas_dtype(self):
+        import pandas as pd
+
+        return pd.PeriodDtype(freq=self.freq)
+
+
+# register the type with a dummy instance
+_period_type = ArrowPeriodType("D")
+pyarrow.register_extension_type(_period_type)
+
+
+class ArrowIntervalType(pyarrow.ExtensionType):
+    def __init__(self, subtype, closed):
+        # attributes need to be set first before calling
+        # super init (as that calls serialize)
+        assert closed in VALID_CLOSED
+        self._closed = closed
+        if not isinstance(subtype, pyarrow.DataType):
+            subtype = pyarrow.type_for_alias(str(subtype))
+        self._subtype = subtype
+
+        storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
+        pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
+
+    @property
+    def subtype(self):
+        return self._subtype
+
+    @property
+    def closed(self):
+        return self._closed
+
+    def __arrow_ext_serialize__(self):
+        metadata = {"subtype": str(self.subtype), "closed": self.closed}
+        return json.dumps(metadata).encode()
+
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        metadata = json.loads(serialized.decode())
+        subtype = pyarrow.type_for_alias(metadata["subtype"])
+        closed = metadata["closed"]
+        return ArrowIntervalType(subtype, closed)
+
+    def __eq__(self, other):
+        if isinstance(other, pyarrow.BaseExtensionType):
+            return (
+                type(self) == type(other)
+                and self.subtype == other.subtype
+                and self.closed == other.closed
+            )
+        else:
+            return NotImplemented
+
+    def __hash__(self):
+        return hash((str(self), str(self.subtype), self.closed))
+
+    def to_pandas_dtype(self):
+        import pandas as pd
+
+        return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed)
+
+
+# register the type with a dummy instance
+_interval_type = ArrowIntervalType(pyarrow.int64(), "left")
+pyarrow.register_extension_type(_interval_type)
--- a/.venv/Lib/site-packages/pandas/core/arrays/_mixins.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/_mixins.py
@@ -0,0 +1,496 @@
+from __future__ import annotations
+
+from functools import wraps
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+    Sequence,
+    TypeVar,
+    cast,
+    overload,
+)
+
+import numpy as np
+
+from pandas._libs import lib
+from pandas._libs.arrays import NDArrayBacked
+from pandas._typing import (
+    ArrayLike,
+    Dtype,
+    F,
+    PositionalIndexer2D,
+    PositionalIndexerTuple,
+    ScalarIndexer,
+    SequenceIndexer,
+    Shape,
+    TakeIndexer,
+    npt,
+    type_t,
+)
+from pandas.errors import AbstractMethodError
+from pandas.util._decorators import doc
+from pandas.util._validators import (
+    validate_bool_kwarg,
+    validate_fillna_kwargs,
+    validate_insert_loc,
+)
+
+from pandas.core.dtypes.common import (
+    is_dtype_equal,
+    pandas_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    DatetimeTZDtype,
+    ExtensionDtype,
+    PeriodDtype,
+)
+from pandas.core.dtypes.missing import array_equivalent
+
+from pandas.core import missing
+from pandas.core.algorithms import (
+    take,
+    unique,
+    value_counts,
+)
+from pandas.core.array_algos.quantile import quantile_with_mask
+from pandas.core.array_algos.transforms import shift
+from pandas.core.arrays.base import ExtensionArray
+from pandas.core.construction import extract_array
+from pandas.core.indexers import check_array_indexer
+from pandas.core.sorting import nargminmax
+
+NDArrayBackedExtensionArrayT = TypeVar(
+    "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray"
+)
+
+if TYPE_CHECKING:
+
+    from pandas._typing import (
+        NumpySorter,
+        NumpyValueArrayLike,
+    )
+
+
+def ravel_compat(meth: F) -> F:
+    """
+    Decorator to ravel a 2D array before passing it to a cython operation,
+    then reshape the result to our own shape.
+    """
+
+    @wraps(meth)
+    def method(self, *args, **kwargs):
+        if self.ndim == 1:
+            return meth(self, *args, **kwargs)
+
+        flags = self._ndarray.flags
+        flat = self.ravel("K")
+        result = meth(flat, *args, **kwargs)
+        order = "F" if flags.f_contiguous else "C"
+        return result.reshape(self.shape, order=order)
+
+    return cast(F, method)
+
+
+class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray):
+    """
+    ExtensionArray that is backed by a single NumPy ndarray.
+    """
+
+    _ndarray: np.ndarray
+
+    def _box_func(self, x):
+        """
+        Wrap numpy type in our dtype.type if necessary.
+        """
+        return x
+
+    def _validate_scalar(self, value):
+        # used by NDArrayBackedExtensionIndex.insert
+        raise AbstractMethodError(self)
+
+    # ------------------------------------------------------------------------
+
+    def view(self, dtype: Dtype | None = None) -> ArrayLike:
+        # We handle datetime64, datetime64tz, timedelta64, and period
+        #  dtypes here. Everything else we pass through to the underlying
+        #  ndarray.
+        if dtype is None or dtype is self.dtype:
+            return self._from_backing_data(self._ndarray)
+
+        if isinstance(dtype, type):
+            # we sometimes pass non-dtype objects, e.g np.ndarray;
+            #  pass those through to the underlying ndarray
+            return self._ndarray.view(dtype)
+
+        dtype = pandas_dtype(dtype)
+        arr = self._ndarray
+
+        if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
+            cls = dtype.construct_array_type()
+            return cls(arr.view("i8"), dtype=dtype)
+        elif dtype == "M8[ns]":
+            from pandas.core.arrays import DatetimeArray
+
+            return DatetimeArray(arr.view("i8"), dtype=dtype)
+        elif dtype == "m8[ns]":
+            from pandas.core.arrays import TimedeltaArray
+
+            return TimedeltaArray(arr.view("i8"), dtype=dtype)
+
+        # error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
+        # type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
+        # type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
+        # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
+        return arr.view(dtype=dtype)  # type: ignore[arg-type]
+
+    def take(
+        self: NDArrayBackedExtensionArrayT,
+        indices: TakeIndexer,
+        *,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+        axis: int = 0,
+    ) -> NDArrayBackedExtensionArrayT:
+        if allow_fill:
+            fill_value = self._validate_scalar(fill_value)
+
+        new_data = take(
+            self._ndarray,
+            indices,
+            allow_fill=allow_fill,
+            fill_value=fill_value,
+            axis=axis,
+        )
+        return self._from_backing_data(new_data)
+
+    # ------------------------------------------------------------------------
+
+    def equals(self, other) -> bool:
+        if type(self) is not type(other):
+            return False
+        if not is_dtype_equal(self.dtype, other.dtype):
+            return False
+        return bool(array_equivalent(self._ndarray, other._ndarray))
+
+    def _values_for_argsort(self) -> np.ndarray:
+        return self._ndarray
+
+    # Signature of "argmin" incompatible with supertype "ExtensionArray"
+    def argmin(self, axis: int = 0, skipna: bool = True):  # type:ignore[override]
+        # override base class by adding axis keyword
+        validate_bool_kwarg(skipna, "skipna")
+        if not skipna and self.isna().any():
+            raise NotImplementedError
+        return nargminmax(self, "argmin", axis=axis)
+
+    # Signature of "argmax" incompatible with supertype "ExtensionArray"
+    def argmax(self, axis: int = 0, skipna: bool = True):  # type:ignore[override]
+        # override base class by adding axis keyword
+        validate_bool_kwarg(skipna, "skipna")
+        if not skipna and self.isna().any():
+            raise NotImplementedError
+        return nargminmax(self, "argmax", axis=axis)
+
+    def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT:
+        new_data = unique(self._ndarray)
+        return self._from_backing_data(new_data)
+
+    @classmethod
+    @doc(ExtensionArray._concat_same_type)
+    def _concat_same_type(
+        cls: type[NDArrayBackedExtensionArrayT],
+        to_concat: Sequence[NDArrayBackedExtensionArrayT],
+        axis: int = 0,
+    ) -> NDArrayBackedExtensionArrayT:
+        dtypes = {str(x.dtype) for x in to_concat}
+        if len(dtypes) != 1:
+            raise ValueError("to_concat must have the same dtype (tz)", dtypes)
+
+        new_values = [x._ndarray for x in to_concat]
+        new_values = np.concatenate(new_values, axis=axis)
+        # error: Argument 1 to "_from_backing_data" of "NDArrayBackedExtensionArray" has
+        # incompatible type "List[ndarray]"; expected "ndarray"
+        return to_concat[0]._from_backing_data(new_values)  # type: ignore[arg-type]
+
+    @doc(ExtensionArray.searchsorted)
+    def searchsorted(
+        self,
+        value: NumpyValueArrayLike | ExtensionArray,
+        side: Literal["left", "right"] = "left",
+        sorter: NumpySorter = None,
+    ) -> npt.NDArray[np.intp] | np.intp:
+        npvalue = self._validate_searchsorted_value(value)
+        return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
+
+    def _validate_searchsorted_value(
+        self, value: NumpyValueArrayLike | ExtensionArray
+    ) -> NumpyValueArrayLike:
+        if isinstance(value, ExtensionArray):
+            return value.to_numpy()
+        else:
+            return value
+
+    @doc(ExtensionArray.shift)
+    def shift(self, periods=1, fill_value=None, axis=0):
+
+        fill_value = self._validate_shift_value(fill_value)
+        new_values = shift(self._ndarray, periods, axis, fill_value)
+
+        return self._from_backing_data(new_values)
+
+    def _validate_shift_value(self, fill_value):
+        # TODO(2.0): after deprecation in datetimelikearraymixin is enforced,
+        #  we can remove this and use validate_fill_value directly
+        return self._validate_scalar(fill_value)
+
+    def __setitem__(self, key, value):
+        key = check_array_indexer(self, key)
+        value = self._validate_setitem_value(value)
+        self._ndarray[key] = value
+
+    def _validate_setitem_value(self, value):
+        return value
+
+    @overload
+    def __getitem__(self, key: ScalarIndexer) -> Any:
+        ...
+
+    @overload
+    def __getitem__(
+        self: NDArrayBackedExtensionArrayT,
+        key: SequenceIndexer | PositionalIndexerTuple,
+    ) -> NDArrayBackedExtensionArrayT:
+        ...
+
+    def __getitem__(
+        self: NDArrayBackedExtensionArrayT,
+        key: PositionalIndexer2D,
+    ) -> NDArrayBackedExtensionArrayT | Any:
+        if lib.is_integer(key):
+            # fast-path
+            result = self._ndarray[key]
+            if self.ndim == 1:
+                return self._box_func(result)
+            return self._from_backing_data(result)
+
+        # error: Incompatible types in assignment (expression has type "ExtensionArray",
+        # variable has type "Union[int, slice, ndarray]")
+        key = extract_array(key, extract_numpy=True)  # type: ignore[assignment]
+        key = check_array_indexer(self, key)
+        result = self._ndarray[key]
+        if lib.is_scalar(result):
+            return self._box_func(result)
+
+        result = self._from_backing_data(result)
+        return result
+
+    def _fill_mask_inplace(
+        self, method: str, limit, mask: npt.NDArray[np.bool_]
+    ) -> None:
+        # (for now) when self.ndim == 2, we assume axis=0
+        func = missing.get_fill_func(method, ndim=self.ndim)
+        func(self._ndarray.T, limit=limit, mask=mask.T)
+        return
+
+    @doc(ExtensionArray.fillna)
+    def fillna(
+        self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None
+    ) -> NDArrayBackedExtensionArrayT:
+        value, method = validate_fillna_kwargs(
+            value, method, validate_scalar_dict_value=False
+        )
+
+        mask = self.isna()
+        # error: Argument 2 to "check_value_size" has incompatible type
+        # "ExtensionArray"; expected "ndarray"
+        value = missing.check_value_size(
+            value, mask, len(self)  # type: ignore[arg-type]
+        )
+
+        if mask.any():
+            if method is not None:
+                # TODO: check value is None
+                # (for now) when self.ndim == 2, we assume axis=0
+                func = missing.get_fill_func(method, ndim=self.ndim)
+                new_values, _ = func(self._ndarray.T.copy(), limit=limit, mask=mask.T)
+                new_values = new_values.T
+
+                # TODO: PandasArray didn't used to copy, need tests for this
+                new_values = self._from_backing_data(new_values)
+            else:
+                # fill with value
+                new_values = self.copy()
+                new_values[mask] = value
+        else:
+            # We validate the fill_value even if there is nothing to fill
+            if value is not None:
+                self._validate_setitem_value(value)
+
+            new_values = self.copy()
+        return new_values
+
+    # ------------------------------------------------------------------------
+    # Reductions
+
+    def _wrap_reduction_result(self, axis: int | None, result):
+        if axis is None or self.ndim == 1:
+            return self._box_func(result)
+        return self._from_backing_data(result)
+
+    # ------------------------------------------------------------------------
+    # __array_function__ methods
+
+    def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
+        """
+        Analogue to np.putmask(self, mask, value)
+
+        Parameters
+        ----------
+        mask : np.ndarray[bool]
+        value : scalar or listlike
+
+        Raises
+        ------
+        TypeError
+            If value cannot be cast to self.dtype.
+        """
+        value = self._validate_setitem_value(value)
+
+        np.putmask(self._ndarray, mask, value)
+
+    def _where(
+        self: NDArrayBackedExtensionArrayT, mask: np.ndarray, value
+    ) -> NDArrayBackedExtensionArrayT:
+        """
+        Analogue to np.where(mask, self, value)
+
+        Parameters
+        ----------
+        mask : np.ndarray[bool]
+        value : scalar or listlike
+
+        Raises
+        ------
+        TypeError
+            If value cannot be cast to self.dtype.
+        """
+        value = self._validate_setitem_value(value)
+
+        res_values = np.where(mask, self._ndarray, value)
+        return self._from_backing_data(res_values)
+
+    # ------------------------------------------------------------------------
+    # Index compat methods
+
+    def insert(
+        self: NDArrayBackedExtensionArrayT, loc: int, item
+    ) -> NDArrayBackedExtensionArrayT:
+        """
+        Make new ExtensionArray inserting new item at location. Follows
+        Python list.append semantics for negative values.
+
+        Parameters
+        ----------
+        loc : int
+        item : object
+
+        Returns
+        -------
+        type(self)
+        """
+        loc = validate_insert_loc(loc, len(self))
+
+        code = self._validate_scalar(item)
+
+        new_vals = np.concatenate(
+            (
+                self._ndarray[:loc],
+                np.asarray([code], dtype=self._ndarray.dtype),
+                self._ndarray[loc:],
+            )
+        )
+        return self._from_backing_data(new_vals)
+
+    # ------------------------------------------------------------------------
+    # Additional array methods
+    #  These are not part of the EA API, but we implement them because
+    #  pandas assumes they're there.
+
+    def value_counts(self, dropna: bool = True):
+        """
+        Return a Series containing counts of unique values.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of NA values.
+
+        Returns
+        -------
+        Series
+        """
+        if self.ndim != 1:
+            raise NotImplementedError
+
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        if dropna:
+            # error: Unsupported operand type for ~ ("ExtensionArray")
+            values = self[~self.isna()]._ndarray  # type: ignore[operator]
+        else:
+            values = self._ndarray
+
+        result = value_counts(values, sort=False, dropna=dropna)
+
+        index_arr = self._from_backing_data(np.asarray(result.index._data))
+        index = Index(index_arr, name=result.index.name)
+        return Series(result._values, index=index, name=result.name)
+
+    def _quantile(
+        self: NDArrayBackedExtensionArrayT,
+        qs: npt.NDArray[np.float64],
+        interpolation: str,
+    ) -> NDArrayBackedExtensionArrayT:
+        # TODO: disable for Categorical if not ordered?
+
+        # asarray needed for Sparse, see GH#24600
+        mask = np.asarray(self.isna())
+        mask = np.atleast_2d(mask)
+
+        arr = np.atleast_2d(self._ndarray)
+        # TODO: something NDArrayBacked-specific instead of _values_for_factorize[1]?
+        fill_value = self._values_for_factorize()[1]
+
+        res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
+
+        result = type(self)._from_factorized(res_values, self)
+        if self.ndim == 1:
+            assert result.shape == (1, len(qs)), result.shape
+            result = result[0]
+
+        return result
+
+    # ------------------------------------------------------------------------
+    # numpy-like methods
+
+    @classmethod
+    def _empty(
+        cls: type_t[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype
+    ) -> NDArrayBackedExtensionArrayT:
+        """
+        Analogous to np.empty(shape, dtype=dtype)
+
+        Parameters
+        ----------
+        shape : tuple[int]
+        dtype : ExtensionDtype
+        """
+        # The base implementation uses a naive approach to find the dtype
+        #  for the backing ndarray
+        arr = cls._from_sequence([], dtype=dtype)
+        backing = np.empty(shape, dtype=arr._ndarray.dtype)
+        return arr._from_backing_data(backing)
--- a/.venv/Lib/site-packages/pandas/core/arrays/_ranges.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/_ranges.py
@@ -0,0 +1,193 @@
+"""
+Helper functions to generate range-like data for DatetimeArray
+(and possibly TimedeltaArray/PeriodArray)
+"""
+from __future__ import annotations
+
+import numpy as np
+
+from pandas._libs.lib import i8max
+from pandas._libs.tslibs import (
+    BaseOffset,
+    OutOfBoundsDatetime,
+    Timedelta,
+    Timestamp,
+    iNaT,
+)
+
+
+def generate_regular_range(
+    start: Timestamp | Timedelta,
+    end: Timestamp | Timedelta,
+    periods: int,
+    freq: BaseOffset,
+):
+    """
+    Generate a range of dates or timestamps with the spans between dates
+    described by the given `freq` DateOffset.
+
+    Parameters
+    ----------
+    start : Timedelta, Timestamp or None
+        First point of produced date range.
+    end : Timedelta, Timestamp or None
+        Last point of produced date range.
+    periods : int
+        Number of periods in produced date range.
+    freq : Tick
+        Describes space between dates in produced date range.
+
+    Returns
+    -------
+    ndarray[np.int64] Representing nanoseconds.
+    """
+    istart = start.value if start is not None else None
+    iend = end.value if end is not None else None
+    stride = freq.nanos
+
+    if periods is None:
+        b = istart
+        # cannot just use e = Timestamp(end) + 1 because arange breaks when
+        # stride is too large, see GH10887
+        e = b + (iend - b) // stride * stride + stride // 2 + 1
+    elif istart is not None:
+        b = istart
+        e = _generate_range_overflow_safe(b, periods, stride, side="start")
+    elif iend is not None:
+        e = iend + stride
+        b = _generate_range_overflow_safe(e, periods, stride, side="end")
+    else:
+        raise ValueError(
+            "at least 'start' or 'end' should be specified if a 'period' is given."
+        )
+
+    with np.errstate(over="raise"):
+        # If the range is sufficiently large, np.arange may overflow
+        #  and incorrectly return an empty array if not caught.
+        try:
+            values = np.arange(b, e, stride, dtype=np.int64)
+        except FloatingPointError:
+            xdr = [b]
+            while xdr[-1] != e:
+                xdr.append(xdr[-1] + stride)
+            values = np.array(xdr[:-1], dtype=np.int64)
+    return values
+
+
+def _generate_range_overflow_safe(
+    endpoint: int, periods: int, stride: int, side: str = "start"
+) -> int:
+    """
+    Calculate the second endpoint for passing to np.arange, checking
+    to avoid an integer overflow.  Catch OverflowError and re-raise
+    as OutOfBoundsDatetime.
+
+    Parameters
+    ----------
+    endpoint : int
+        nanosecond timestamp of the known endpoint of the desired range
+    periods : int
+        number of periods in the desired range
+    stride : int
+        nanoseconds between periods in the desired range
+    side : {'start', 'end'}
+        which end of the range `endpoint` refers to
+
+    Returns
+    -------
+    other_end : int
+
+    Raises
+    ------
+    OutOfBoundsDatetime
+    """
+    # GH#14187 raise instead of incorrectly wrapping around
+    assert side in ["start", "end"]
+
+    i64max = np.uint64(i8max)
+    msg = f"Cannot generate range with {side}={endpoint} and periods={periods}"
+
+    with np.errstate(over="raise"):
+        # if periods * strides cannot be multiplied within the *uint64* bounds,
+        #  we cannot salvage the operation by recursing, so raise
+        try:
+            addend = np.uint64(periods) * np.uint64(np.abs(stride))
+        except FloatingPointError as err:
+            raise OutOfBoundsDatetime(msg) from err
+
+    if np.abs(addend) <= i64max:
+        # relatively easy case without casting concerns
+        return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
+
+    elif (endpoint > 0 and side == "start" and stride > 0) or (
+        endpoint < 0 and side == "end" and stride > 0
+    ):
+        # no chance of not-overflowing
+        raise OutOfBoundsDatetime(msg)
+
+    elif side == "end" and endpoint > i64max and endpoint - stride <= i64max:
+        # in _generate_regular_range we added `stride` thereby overflowing
+        #  the bounds.  Adjust to fix this.
+        return _generate_range_overflow_safe(
+            endpoint - stride, periods - 1, stride, side
+        )
+
+    # split into smaller pieces
+    mid_periods = periods // 2
+    remaining = periods - mid_periods
+    assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
+
+    midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side)
+    return _generate_range_overflow_safe(midpoint, remaining, stride, side)
+
+
+def _generate_range_overflow_safe_signed(
+    endpoint: int, periods: int, stride: int, side: str
+) -> int:
+    """
+    A special case for _generate_range_overflow_safe where `periods * stride`
+    can be calculated without overflowing int64 bounds.
+    """
+    assert side in ["start", "end"]
+    if side == "end":
+        stride *= -1
+
+    with np.errstate(over="raise"):
+        addend = np.int64(periods) * np.int64(stride)
+        try:
+            # easy case with no overflows
+            result = np.int64(endpoint) + addend
+            if result == iNaT:
+                # Putting this into a DatetimeArray/TimedeltaArray
+                #  would incorrectly be interpreted as NaT
+                raise OverflowError
+            # error: Incompatible return value type (got "signedinteger[_64Bit]",
+            # expected "int")
+            return result  # type: ignore[return-value]
+        except (FloatingPointError, OverflowError):
+            # with endpoint negative and addend positive we risk
+            #  FloatingPointError; with reversed signed we risk OverflowError
+            pass
+
+        # if stride and endpoint had opposite signs, then endpoint + addend
+        #  should never overflow.  so they must have the same signs
+        assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0)
+
+        if stride > 0:
+            # watch out for very special case in which we just slightly
+            #  exceed implementation bounds, but when passing the result to
+            #  np.arange will get a result slightly within the bounds
+
+            # error: Incompatible types in assignment (expression has type
+            # "unsignedinteger[_64Bit]", variable has type "signedinteger[_64Bit]")
+            result = np.uint64(endpoint) + np.uint64(addend)  # type: ignore[assignment]
+            i64max = np.uint64(i8max)
+            assert result > i64max
+            if result <= i64max + np.uint64(stride):
+                # error: Incompatible return value type (got "unsignedinteger", expected
+                # "int")
+                return result  # type: ignore[return-value]
+
+    raise OutOfBoundsDatetime(
+        f"Cannot generate range with {side}={endpoint} and periods={periods}"
+    )
--- a/.venv/Lib/site-packages/pandas/core/arrays/base.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/base.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/boolean.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/boolean.py
@@ -0,0 +1,535 @@
+from __future__ import annotations
+
+import numbers
+from typing import (
+    TYPE_CHECKING,
+    overload,
+)
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas._typing import (
+    ArrayLike,
+    AstypeArg,
+    Dtype,
+    DtypeObj,
+    npt,
+    type_t,
+)
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+    is_list_like,
+    is_numeric_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    ExtensionDtype,
+    register_extension_dtype,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas.core import ops
+from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays.masked import (
+    BaseMaskedArray,
+    BaseMaskedDtype,
+)
+
+if TYPE_CHECKING:
+    import pyarrow
+
+
+@register_extension_dtype
+class BooleanDtype(BaseMaskedDtype):
+    """
+    Extension dtype for boolean data.
+
+    .. versionadded:: 1.0.0
+
+    .. warning::
+
+       BooleanDtype is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Examples
+    --------
+    >>> pd.BooleanDtype()
+    BooleanDtype
+    """
+
+    name = "boolean"
+
+    # https://github.com/python/mypy/issues/4125
+    # error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
+    @property
+    def type(self) -> type:  # type: ignore[override]
+        return np.bool_
+
+    @property
+    def kind(self) -> str:
+        return "b"
+
+    @property
+    def numpy_dtype(self) -> np.dtype:
+        return np.dtype("bool")
+
+    @classmethod
+    def construct_array_type(cls) -> type_t[BooleanArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return BooleanArray
+
+    def __repr__(self) -> str:
+        return "BooleanDtype"
+
+    @property
+    def _is_boolean(self) -> bool:
+        return True
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
+
+    def __from_arrow__(
+        self, array: pyarrow.Array | pyarrow.ChunkedArray
+    ) -> BooleanArray:
+        """
+        Construct BooleanArray from pyarrow Array/ChunkedArray.
+        """
+        import pyarrow
+
+        if array.type != pyarrow.bool_():
+            raise TypeError(f"Expected array of boolean type, got {array.type} instead")
+
+        if isinstance(array, pyarrow.Array):
+            chunks = [array]
+        else:
+            # pyarrow.ChunkedArray
+            chunks = array.chunks
+
+        results = []
+        for arr in chunks:
+            buflist = arr.buffers()
+            data = pyarrow.BooleanArray.from_buffers(
+                arr.type, len(arr), [None, buflist[1]], offset=arr.offset
+            ).to_numpy(zero_copy_only=False)
+            if arr.null_count != 0:
+                mask = pyarrow.BooleanArray.from_buffers(
+                    arr.type, len(arr), [None, buflist[0]], offset=arr.offset
+                ).to_numpy(zero_copy_only=False)
+                mask = ~mask
+            else:
+                mask = np.zeros(len(arr), dtype=bool)
+
+            bool_arr = BooleanArray(data, mask)
+            results.append(bool_arr)
+
+        if not results:
+            return BooleanArray(
+                np.array([], dtype=np.bool_), np.array([], dtype=np.bool_)
+            )
+        else:
+            return BooleanArray._concat_same_type(results)
+
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        # Handle only boolean + np.bool_ -> boolean, since other cases like
+        # Int64 + boolean -> Int64 will be handled by the other type
+        if all(
+            isinstance(t, BooleanDtype)
+            or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_)))
+            for t in dtypes
+        ):
+            return BooleanDtype()
+        else:
+            return None
+
+
+def coerce_to_array(
+    values, mask=None, copy: bool = False
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Coerce the input values array to numpy arrays with a mask.
+
+    Parameters
+    ----------
+    values : 1D list-like
+    mask : bool 1D array, optional
+    copy : bool, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+    if isinstance(values, BooleanArray):
+        if mask is not None:
+            raise ValueError("cannot pass mask for BooleanArray input")
+        values, mask = values._data, values._mask
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+        return values, mask
+
+    mask_values = None
+    if isinstance(values, np.ndarray) and values.dtype == np.bool_:
+        if copy:
+            values = values.copy()
+    elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
+        mask_values = isna(values)
+
+        values_bool = np.zeros(len(values), dtype=bool)
+        values_bool[~mask_values] = values[~mask_values].astype(bool)
+
+        if not np.all(
+            values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
+        ):
+            raise TypeError("Need to pass bool-like values")
+
+        values = values_bool
+    else:
+        values_object = np.asarray(values, dtype=object)
+
+        inferred_dtype = lib.infer_dtype(values_object, skipna=True)
+        integer_like = ("floating", "integer", "mixed-integer-float")
+        if inferred_dtype not in ("boolean", "empty") + integer_like:
+            raise TypeError("Need to pass bool-like values")
+
+        mask_values = isna(values_object)
+        values = np.zeros(len(values), dtype=bool)
+        values[~mask_values] = values_object[~mask_values].astype(bool)
+
+        # if the values were integer-like, validate it were actually 0/1's
+        if (inferred_dtype in integer_like) and not (
+            np.all(
+                values[~mask_values].astype(float)
+                == values_object[~mask_values].astype(float)
+            )
+        ):
+            raise TypeError("Need to pass bool-like values")
+
+    if mask is None and mask_values is None:
+        mask = np.zeros(len(values), dtype=bool)
+    elif mask is None:
+        mask = mask_values
+    else:
+        if isinstance(mask, np.ndarray) and mask.dtype == np.bool_:
+            if mask_values is not None:
+                mask = mask | mask_values
+            else:
+                if copy:
+                    mask = mask.copy()
+        else:
+            mask = np.array(mask, dtype=bool)
+            if mask_values is not None:
+                mask = mask | mask_values
+
+    if values.shape != mask.shape:
+        raise ValueError("values.shape and mask.shape must match")
+
+    return values, mask
+
+
+class BooleanArray(BaseMaskedArray):
+    """
+    Array of boolean (True/False) data with missing values.
+
+    This is a pandas Extension array for boolean data, under the hood
+    represented by 2 numpy arrays: a boolean array with the data and
+    a boolean array with the mask (True indicating missing).
+
+    BooleanArray implements Kleene logic (sometimes called three-value
+    logic) for logical operations. See :ref:`boolean.kleene` for more.
+
+    To construct an BooleanArray from generic array-like input, use
+    :func:`pandas.array` specifying ``dtype="boolean"`` (see examples
+    below).
+
+    .. versionadded:: 1.0.0
+
+    .. warning::
+
+       BooleanArray is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Parameters
+    ----------
+    values : numpy.ndarray
+        A 1-d boolean-dtype array with the data.
+    mask : numpy.ndarray
+        A 1-d boolean-dtype array indicating missing values (True
+        indicates missing).
+    copy : bool, default False
+        Whether to copy the `values` and `mask` arrays.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    BooleanArray
+
+    Examples
+    --------
+    Create an BooleanArray with :func:`pandas.array`:
+
+    >>> pd.array([True, False, None], dtype="boolean")
+    <BooleanArray>
+    [True, False, <NA>]
+    Length: 3, dtype: boolean
+    """
+
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = False
+    # Fill values used for any/all
+    _truthy_value = True
+    _falsey_value = False
+    _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
+    _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
+
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
+            raise TypeError(
+                "values should be boolean numpy array. Use "
+                "the 'pd.array' function instead"
+            )
+        self._dtype = BooleanDtype()
+        super().__init__(values, mask, copy=copy)
+
+    @property
+    def dtype(self) -> BooleanDtype:
+        return self._dtype
+
+    @classmethod
+    def _from_sequence(
+        cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
+    ) -> BooleanArray:
+        if dtype:
+            assert dtype == "boolean"
+        values, mask = coerce_to_array(scalars, copy=copy)
+        return BooleanArray(values, mask)
+
+    @classmethod
+    def _from_sequence_of_strings(
+        cls,
+        strings: list[str],
+        *,
+        dtype: Dtype | None = None,
+        copy: bool = False,
+        true_values: list[str] | None = None,
+        false_values: list[str] | None = None,
+    ) -> BooleanArray:
+        true_values_union = cls._TRUE_VALUES.union(true_values or [])
+        false_values_union = cls._FALSE_VALUES.union(false_values or [])
+
+        def map_string(s):
+            if isna(s):
+                return s
+            elif s in true_values_union:
+                return True
+            elif s in false_values_union:
+                return False
+            else:
+                raise ValueError(f"{s} cannot be cast to bool")
+
+        scalars = [map_string(x) for x in strings]
+        return cls._from_sequence(scalars, dtype=dtype, copy=copy)
+
+    _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
+
+    def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
+        return coerce_to_array(value)
+
+    @overload
+    def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
+        ...
+
+    @overload
+    def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
+        ...
+
+    @overload
+    def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
+        ...
+
+    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
+
+        """
+        Cast to a NumPy array or ExtensionArray with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        ndarray or ExtensionArray
+            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
+
+        Raises
+        ------
+        TypeError
+            if incompatible type with an BooleanDtype, equivalent of same_kind
+            casting
+        """
+        dtype = pandas_dtype(dtype)
+
+        if isinstance(dtype, ExtensionDtype):
+            return super().astype(dtype, copy)
+
+        if is_bool_dtype(dtype):
+            # astype_nansafe converts np.nan to True
+            if self._hasna:
+                raise ValueError("cannot convert float NaN to bool")
+            else:
+                return self._data.astype(dtype, copy=copy)
+
+        # for integer, error if there are missing values
+        if is_integer_dtype(dtype) and self._hasna:
+            raise ValueError("cannot convert NA to integer")
+
+        # for float dtype, ensure we use np.nan before casting (numpy cannot
+        # deal with pd.NA)
+        na_value = self._na_value
+        if is_float_dtype(dtype):
+            na_value = np.nan
+        # coerce
+        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+
+    def _values_for_argsort(self) -> np.ndarray:
+        """
+        Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort : Return the indices that would sort this array.
+        """
+        data = self._data.copy()
+        data[self._mask] = -1
+        return data
+
+    def _logical_method(self, other, op):
+
+        assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
+        other_is_booleanarray = isinstance(other, BooleanArray)
+        other_is_scalar = lib.is_scalar(other)
+        mask = None
+
+        if other_is_booleanarray:
+            other, mask = other._data, other._mask
+        elif is_list_like(other):
+            other = np.asarray(other, dtype="bool")
+            if other.ndim > 1:
+                raise NotImplementedError("can only perform ops with 1-d structures")
+            other, mask = coerce_to_array(other, copy=False)
+        elif isinstance(other, np.bool_):
+            other = other.item()
+
+        if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
+            raise TypeError(
+                "'other' should be pandas.NA or a bool. "
+                f"Got {type(other).__name__} instead."
+            )
+
+        if not other_is_scalar and len(self) != len(other):
+            raise ValueError("Lengths must match to compare")
+
+        if op.__name__ in {"or_", "ror_"}:
+            result, mask = ops.kleene_or(self._data, other, self._mask, mask)
+        elif op.__name__ in {"and_", "rand_"}:
+            result, mask = ops.kleene_and(self._data, other, self._mask, mask)
+        elif op.__name__ in {"xor", "rxor"}:
+            result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
+
+        # error: Argument 2 to "BooleanArray" has incompatible type "Optional[Any]";
+        # expected "ndarray"
+        return BooleanArray(result, mask)  # type: ignore[arg-type]
+
+    def _arith_method(self, other, op):
+        mask = None
+        op_name = op.__name__
+
+        if isinstance(other, BooleanArray):
+            other, mask = other._data, other._mask
+
+        elif is_list_like(other):
+            other = np.asarray(other)
+            if other.ndim > 1:
+                raise NotImplementedError("can only perform ops with 1-d structures")
+            if len(self) != len(other):
+                raise ValueError("Lengths must match")
+
+        # nans propagate
+        if mask is None:
+            mask = self._mask
+            if other is libmissing.NA:
+                mask |= True
+        else:
+            mask = self._mask | mask
+
+        if other is libmissing.NA:
+            # if other is NA, the result will be all NA and we can't run the
+            # actual op, so we need to choose the resulting dtype manually
+            if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}:
+                dtype = "int8"
+            elif op_name in {"truediv", "rtruediv"}:
+                dtype = "float64"
+            else:
+                dtype = "bool"
+            result = np.zeros(len(self._data), dtype=dtype)
+        else:
+            if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):
+                # Avoid DeprecationWarning: In future, it will be an error
+                #  for 'np.bool_' scalars to be interpreted as an index
+                other = bool(other)
+
+            with np.errstate(all="ignore"):
+                result = op(self._data, other)
+
+        # divmod returns a tuple
+        if op_name == "divmod":
+            div, mod = result
+            return (
+                self._maybe_mask_result(div, mask, other, "floordiv"),
+                self._maybe_mask_result(mod, mask, other, "mod"),
+            )
+
+        return self._maybe_mask_result(result, mask, other, op_name)
+
+    def __abs__(self):
+        return self.copy()
--- a/.venv/Lib/site-packages/pandas/core/arrays/categorical.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/categorical.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/datetimelike.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/datetimelike.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/datetimes.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/datetimes.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/floating.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/floating.py
@@ -0,0 +1,375 @@
+from __future__ import annotations
+
+from typing import overload
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas._typing import (
+    ArrayLike,
+    AstypeArg,
+    DtypeObj,
+    npt,
+)
+from pandas.util._decorators import cache_readonly
+
+from pandas.core.dtypes.cast import astype_nansafe
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_datetime64_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    ExtensionDtype,
+    register_extension_dtype,
+)
+
+from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays.numeric import (
+    NumericArray,
+    NumericDtype,
+)
+from pandas.core.tools.numeric import to_numeric
+
+
+class FloatingDtype(NumericDtype):
+    """
+    An ExtensionDtype to hold a single size of floating dtype.
+
+    These specific implementations are subclasses of the non-public
+    FloatingDtype. For example we have Float32Dtype to represent float32.
+
+    The attributes name & type are set when these subclasses are created.
+    """
+
+    def __repr__(self) -> str:
+        return f"{self.name}Dtype()"
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
+
+    @classmethod
+    def construct_array_type(cls) -> type[FloatingArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return FloatingArray
+
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        # for now only handle other floating types
+        if not all(isinstance(t, FloatingDtype) for t in dtypes):
+            return None
+        np_dtype = np.find_common_type(
+            # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" has no
+            # attribute "numpy_dtype"
+            [t.numpy_dtype for t in dtypes],  # type: ignore[union-attr]
+            [],
+        )
+        if np.issubdtype(np_dtype, np.floating):
+            return FLOAT_STR_TO_DTYPE[str(np_dtype)]
+        return None
+
+
+def coerce_to_array(
+    values, dtype=None, mask=None, copy: bool = False
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Coerce the input values array to numpy arrays with a mask.
+
+    Parameters
+    ----------
+    values : 1D list-like
+    dtype : float dtype
+    mask : bool 1D array, optional
+    copy : bool, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+    # if values is floating numpy array, preserve its dtype
+    if dtype is None and hasattr(values, "dtype"):
+        if is_float_dtype(values.dtype):
+            dtype = values.dtype
+
+    if dtype is not None:
+        if isinstance(dtype, str) and dtype.startswith("Float"):
+            # Avoid DeprecationWarning from NumPy about np.dtype("Float64")
+            # https://github.com/numpy/numpy/pull/7476
+            dtype = dtype.lower()
+
+        if not issubclass(type(dtype), FloatingDtype):
+            try:
+                dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
+            except KeyError as err:
+                raise ValueError(f"invalid dtype specified {dtype}") from err
+
+    if isinstance(values, FloatingArray):
+        values, mask = values._data, values._mask
+        if dtype is not None:
+            values = values.astype(dtype.numpy_dtype, copy=False)
+
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+        return values, mask
+
+    values = np.array(values, copy=copy)
+    if is_object_dtype(values.dtype):
+        inferred_type = lib.infer_dtype(values, skipna=True)
+        if inferred_type == "empty":
+            pass
+        elif inferred_type not in [
+            "floating",
+            "integer",
+            "mixed-integer",
+            "integer-na",
+            "mixed-integer-float",
+        ]:
+            raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
+
+    elif is_bool_dtype(values) and is_float_dtype(dtype):
+        values = np.array(values, dtype=float, copy=copy)
+
+    elif not (is_integer_dtype(values) or is_float_dtype(values)):
+        raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
+
+    if values.ndim != 1:
+        raise TypeError("values must be a 1D list-like")
+
+    if mask is None:
+        mask = libmissing.is_numeric_na(values)
+
+    else:
+        assert len(mask) == len(values)
+
+    if not mask.ndim == 1:
+        raise TypeError("mask must be a 1D list-like")
+
+    # infer dtype if needed
+    if dtype is None:
+        dtype = np.dtype("float64")
+    else:
+        dtype = dtype.type
+
+    # if we are float, let's make sure that we can
+    # safely cast
+
+    # we copy as need to coerce here
+    # TODO should this be a safe cast?
+    if mask.any():
+        values = values.copy()
+        values[mask] = np.nan
+    values = values.astype(dtype, copy=False)  # , casting="safe")
+
+    return values, mask
+
+
+class FloatingArray(NumericArray):
+    """
+    Array of floating (optional missing) values.
+
+    .. versionadded:: 1.2.0
+
+    .. warning::
+
+       FloatingArray is currently experimental, and its API or internal
+       implementation may change without warning. Especially the behaviour
+       regarding NaN (distinct from NA missing values) is subject to change.
+
+    We represent a FloatingArray with 2 numpy arrays:
+
+    - data: contains a numpy float array of the appropriate dtype
+    - mask: a boolean array holding a mask on the data, True is missing
+
+    To construct an FloatingArray from generic array-like input, use
+    :func:`pandas.array` with one of the float dtypes (see examples).
+
+    See :ref:`integer_na` for more.
+
+    Parameters
+    ----------
+    values : numpy.ndarray
+        A 1-d float-dtype array.
+    mask : numpy.ndarray
+        A 1-d boolean-dtype array indicating missing values.
+    copy : bool, default False
+        Whether to copy the `values` and `mask`.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    FloatingArray
+
+    Examples
+    --------
+    Create an FloatingArray with :func:`pandas.array`:
+
+    >>> pd.array([0.1, None, 0.3], dtype=pd.Float32Dtype())
+    <FloatingArray>
+    [0.1, <NA>, 0.3]
+    Length: 3, dtype: Float32
+
+    String aliases for the dtypes are also available. They are capitalized.
+
+    >>> pd.array([0.1, None, 0.3], dtype="Float32")
+    <FloatingArray>
+    [0.1, <NA>, 0.3]
+    Length: 3, dtype: Float32
+    """
+
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = 0.0
+    # Fill values used for any/all
+    _truthy_value = 1.0
+    _falsey_value = 0.0
+
+    @cache_readonly
+    def dtype(self) -> FloatingDtype:
+        return FLOAT_STR_TO_DTYPE[str(self._data.dtype)]
+
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
+            raise TypeError(
+                "values should be floating numpy array. Use "
+                "the 'pd.array' function instead"
+            )
+        if values.dtype == np.float16:
+            # If we don't raise here, then accessing self.dtype would raise
+            raise TypeError("FloatingArray does not support np.float16 dtype.")
+
+        super().__init__(values, mask, copy=copy)
+
+    @classmethod
+    def _from_sequence(
+        cls, scalars, *, dtype=None, copy: bool = False
+    ) -> FloatingArray:
+        values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy)
+        return FloatingArray(values, mask)
+
+    @classmethod
+    def _from_sequence_of_strings(
+        cls, strings, *, dtype=None, copy: bool = False
+    ) -> FloatingArray:
+        scalars = to_numeric(strings, errors="raise")
+        return cls._from_sequence(scalars, dtype=dtype, copy=copy)
+
+    def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
+        return coerce_to_array(value, dtype=self.dtype)
+
+    @overload
+    def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
+        ...
+
+    @overload
+    def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
+        ...
+
+    @overload
+    def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
+        ...
+
+    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
+        """
+        Cast to a NumPy array or ExtensionArray with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        ndarray or ExtensionArray
+            NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
+            'dtype' for its dtype.
+
+        Raises
+        ------
+        TypeError
+            if incompatible type with an FloatingDtype, equivalent of same_kind
+            casting
+        """
+        dtype = pandas_dtype(dtype)
+
+        if isinstance(dtype, ExtensionDtype):
+            return super().astype(dtype, copy=copy)
+
+        # coerce
+        if is_float_dtype(dtype):
+            # In astype, we consider dtype=float to also mean na_value=np.nan
+            kwargs = {"na_value": np.nan}
+        elif is_datetime64_dtype(dtype):
+            # error: Dict entry 0 has incompatible type "str": "datetime64"; expected
+            # "str": "float"
+            kwargs = {"na_value": np.datetime64("NaT")}  # type: ignore[dict-item]
+        else:
+            kwargs = {}
+
+        # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible
+        # type "**Dict[str, float]"; expected "bool"
+        data = self.to_numpy(dtype=dtype, **kwargs)  # type: ignore[arg-type]
+        return astype_nansafe(data, dtype, copy=False)
+
+    def _values_for_argsort(self) -> np.ndarray:
+        return self._data
+
+
+_dtype_docstring = """
+An ExtensionDtype for {dtype} data.
+
+This dtype uses ``pd.NA`` as missing value indicator.
+
+Attributes
+----------
+None
+
+Methods
+-------
+None
+"""
+
+# create the Dtype
+
+
+@register_extension_dtype
+class Float32Dtype(FloatingDtype):
+    type = np.float32
+    name = "Float32"
+    __doc__ = _dtype_docstring.format(dtype="float32")
+
+
+@register_extension_dtype
+class Float64Dtype(FloatingDtype):
+    type = np.float64
+    name = "Float64"
+    __doc__ = _dtype_docstring.format(dtype="float64")
+
+
+FLOAT_STR_TO_DTYPE = {
+    "float32": Float32Dtype(),
+    "float64": Float64Dtype(),
+}
--- a/.venv/Lib/site-packages/pandas/core/arrays/integer.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/integer.py
@@ -0,0 +1,497 @@
+from __future__ import annotations
+
+from typing import overload
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas._typing import (
+    ArrayLike,
+    AstypeArg,
+    Dtype,
+    DtypeObj,
+    npt,
+)
+from pandas.util._decorators import cache_readonly
+
+from pandas.core.dtypes.base import (
+    ExtensionDtype,
+    register_extension_dtype,
+)
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_datetime64_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_string_dtype,
+    pandas_dtype,
+)
+
+from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays.masked import BaseMaskedDtype
+from pandas.core.arrays.numeric import (
+    NumericArray,
+    NumericDtype,
+)
+from pandas.core.tools.numeric import to_numeric
+
+
+class _IntegerDtype(NumericDtype):
+    """
+    An ExtensionDtype to hold a single size & kind of integer dtype.
+
+    These specific implementations are subclasses of the non-public
+    _IntegerDtype. For example we have Int8Dtype to represent signed int 8s.
+
+    The attributes name & type are set when these subclasses are created.
+    """
+
+    def __repr__(self) -> str:
+        sign = "U" if self.is_unsigned_integer else ""
+        return f"{sign}Int{8 * self.itemsize}Dtype()"
+
+    @cache_readonly
+    def is_signed_integer(self) -> bool:
+        return self.kind == "i"
+
+    @cache_readonly
+    def is_unsigned_integer(self) -> bool:
+        return self.kind == "u"
+
+    @property
+    def _is_numeric(self) -> bool:
+        return True
+
+    @classmethod
+    def construct_array_type(cls) -> type[IntegerArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return IntegerArray
+
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        # we only handle nullable EA dtypes and numeric numpy dtypes
+        if not all(
+            isinstance(t, BaseMaskedDtype)
+            or (
+                isinstance(t, np.dtype)
+                and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
+            )
+            for t in dtypes
+        ):
+            return None
+        np_dtype = np.find_common_type(
+            # error: List comprehension has incompatible type List[Union[Any,
+            # dtype, ExtensionDtype]]; expected List[Union[dtype, None, type,
+            # _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]],
+            # List[Any], _DtypeDict, Tuple[Any, Any]]]
+            [
+                t.numpy_dtype  # type: ignore[misc]
+                if isinstance(t, BaseMaskedDtype)
+                else t
+                for t in dtypes
+            ],
+            [],
+        )
+        if np.issubdtype(np_dtype, np.integer):
+            return INT_STR_TO_DTYPE[str(np_dtype)]
+        elif np.issubdtype(np_dtype, np.floating):
+            from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
+
+            return FLOAT_STR_TO_DTYPE[str(np_dtype)]
+        return None
+
+
+def safe_cast(values, dtype, copy: bool):
+    """
+    Safely cast the values to the dtype if they
+    are equivalent, meaning floats must be equivalent to the
+    ints.
+    """
+    try:
+        return values.astype(dtype, casting="safe", copy=copy)
+    except TypeError as err:
+        casted = values.astype(dtype, copy=copy)
+        if (casted == values).all():
+            return casted
+
+        raise TypeError(
+            f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
+        ) from err
+
+
+def coerce_to_array(
+    values, dtype, mask=None, copy: bool = False
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Coerce the input values array to numpy arrays with a mask.
+
+    Parameters
+    ----------
+    values : 1D list-like
+    dtype : integer dtype
+    mask : bool 1D array, optional
+    copy : bool, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+    # if values is integer numpy array, preserve its dtype
+    if dtype is None and hasattr(values, "dtype"):
+        if is_integer_dtype(values.dtype):
+            dtype = values.dtype
+
+    if dtype is not None:
+        if isinstance(dtype, str) and (
+            dtype.startswith("Int") or dtype.startswith("UInt")
+        ):
+            # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
+            # https://github.com/numpy/numpy/pull/7476
+            dtype = dtype.lower()
+
+        if not issubclass(type(dtype), _IntegerDtype):
+            try:
+                dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
+            except KeyError as err:
+                raise ValueError(f"invalid dtype specified {dtype}") from err
+
+    if isinstance(values, IntegerArray):
+        values, mask = values._data, values._mask
+        if dtype is not None:
+            values = values.astype(dtype.numpy_dtype, copy=False)
+
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+        return values, mask
+
+    values = np.array(values, copy=copy)
+    inferred_type = None
+    if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
+        inferred_type = lib.infer_dtype(values, skipna=True)
+        if inferred_type == "empty":
+            pass
+        elif inferred_type not in [
+            "floating",
+            "integer",
+            "mixed-integer",
+            "integer-na",
+            "mixed-integer-float",
+            "string",
+            "unicode",
+        ]:
+            raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
+
+    elif is_bool_dtype(values) and is_integer_dtype(dtype):
+        values = np.array(values, dtype=int, copy=copy)
+
+    elif not (is_integer_dtype(values) or is_float_dtype(values)):
+        raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
+
+    if values.ndim != 1:
+        raise TypeError("values must be a 1D list-like")
+
+    if mask is None:
+        mask = libmissing.is_numeric_na(values)
+    else:
+        assert len(mask) == len(values)
+
+    if mask.ndim != 1:
+        raise TypeError("mask must be a 1D list-like")
+
+    # infer dtype if needed
+    if dtype is None:
+        dtype = np.dtype("int64")
+    else:
+        dtype = dtype.type
+
+    # if we are float, let's make sure that we can
+    # safely cast
+
+    # we copy as need to coerce here
+    if mask.any():
+        values = values.copy()
+        values[mask] = 1
+    if inferred_type in ("string", "unicode"):
+        # casts from str are always safe since they raise
+        # a ValueError if the str cannot be parsed into an int
+        values = values.astype(dtype, copy=copy)
+    else:
+        values = safe_cast(values, dtype, copy=False)
+
+    return values, mask
+
+
+class IntegerArray(NumericArray):
+    """
+    Array of integer (optional missing) values.
+
+    .. versionchanged:: 1.0.0
+
+       Now uses :attr:`pandas.NA` as the missing value rather
+       than :attr:`numpy.nan`.
+
+    .. warning::
+
+       IntegerArray is currently experimental, and its API or internal
+       implementation may change without warning.
+
+    We represent an IntegerArray with 2 numpy arrays:
+
+    - data: contains a numpy integer array of the appropriate dtype
+    - mask: a boolean array holding a mask on the data, True is missing
+
+    To construct an IntegerArray from generic array-like input, use
+    :func:`pandas.array` with one of the integer dtypes (see examples).
+
+    See :ref:`integer_na` for more.
+
+    Parameters
+    ----------
+    values : numpy.ndarray
+        A 1-d integer-dtype array.
+    mask : numpy.ndarray
+        A 1-d boolean-dtype array indicating missing values.
+    copy : bool, default False
+        Whether to copy the `values` and `mask`.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    IntegerArray
+
+    Examples
+    --------
+    Create an IntegerArray with :func:`pandas.array`.
+
+    >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
+    >>> int_array
+    <IntegerArray>
+    [1, <NA>, 3]
+    Length: 3, dtype: Int32
+
+    String aliases for the dtypes are also available. They are capitalized.
+
+    >>> pd.array([1, None, 3], dtype='Int32')
+    <IntegerArray>
+    [1, <NA>, 3]
+    Length: 3, dtype: Int32
+
+    >>> pd.array([1, None, 3], dtype='UInt16')
+    <IntegerArray>
+    [1, <NA>, 3]
+    Length: 3, dtype: UInt16
+    """
+
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = 1
+    # Fill values used for any/all
+    _truthy_value = 1
+    _falsey_value = 0
+
+    @cache_readonly
+    def dtype(self) -> _IntegerDtype:
+        return INT_STR_TO_DTYPE[str(self._data.dtype)]
+
+    def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
+        if not (isinstance(values, np.ndarray) and values.dtype.kind in ["i", "u"]):
+            raise TypeError(
+                "values should be integer numpy array. Use "
+                "the 'pd.array' function instead"
+            )
+        super().__init__(values, mask, copy=copy)
+
+    @classmethod
+    def _from_sequence(
+        cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
+    ) -> IntegerArray:
+        values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy)
+        return IntegerArray(values, mask)
+
+    @classmethod
+    def _from_sequence_of_strings(
+        cls, strings, *, dtype: Dtype | None = None, copy: bool = False
+    ) -> IntegerArray:
+        scalars = to_numeric(strings, errors="raise")
+        return cls._from_sequence(scalars, dtype=dtype, copy=copy)
+
+    def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
+        return coerce_to_array(value, dtype=self.dtype)
+
+    @overload
+    def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
+        ...
+
+    @overload
+    def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
+        ...
+
+    @overload
+    def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
+        ...
+
+    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
+        """
+        Cast to a NumPy array or ExtensionArray with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        ndarray or ExtensionArray
+            NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype.
+
+        Raises
+        ------
+        TypeError
+            if incompatible type with an IntegerDtype, equivalent of same_kind
+            casting
+        """
+        dtype = pandas_dtype(dtype)
+
+        if isinstance(dtype, ExtensionDtype):
+            return super().astype(dtype, copy=copy)
+
+        na_value: float | np.datetime64 | lib.NoDefault
+
+        # coerce
+        if is_float_dtype(dtype):
+            # In astype, we consider dtype=float to also mean na_value=np.nan
+            na_value = np.nan
+        elif is_datetime64_dtype(dtype):
+            na_value = np.datetime64("NaT")
+        else:
+            na_value = lib.no_default
+
+        return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
+
+    def _values_for_argsort(self) -> np.ndarray:
+        """
+        Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort : Return the indices that would sort this array.
+        """
+        data = self._data.copy()
+        if self._mask.any():
+            data[self._mask] = data.min() - 1
+        return data
+
+
+_dtype_docstring = """
+An ExtensionDtype for {dtype} integer data.
+
+.. versionchanged:: 1.0.0
+
+   Now uses :attr:`pandas.NA` as its missing value,
+   rather than :attr:`numpy.nan`.
+
+Attributes
+----------
+None
+
+Methods
+-------
+None
+"""
+
+# create the Dtype
+
+
+@register_extension_dtype
+class Int8Dtype(_IntegerDtype):
+    type = np.int8
+    name = "Int8"
+    __doc__ = _dtype_docstring.format(dtype="int8")
+
+
+@register_extension_dtype
+class Int16Dtype(_IntegerDtype):
+    type = np.int16
+    name = "Int16"
+    __doc__ = _dtype_docstring.format(dtype="int16")
+
+
+@register_extension_dtype
+class Int32Dtype(_IntegerDtype):
+    type = np.int32
+    name = "Int32"
+    __doc__ = _dtype_docstring.format(dtype="int32")
+
+
+@register_extension_dtype
+class Int64Dtype(_IntegerDtype):
+    type = np.int64
+    name = "Int64"
+    __doc__ = _dtype_docstring.format(dtype="int64")
+
+
+@register_extension_dtype
+class UInt8Dtype(_IntegerDtype):
+    type = np.uint8
+    name = "UInt8"
+    __doc__ = _dtype_docstring.format(dtype="uint8")
+
+
+@register_extension_dtype
+class UInt16Dtype(_IntegerDtype):
+    type = np.uint16
+    name = "UInt16"
+    __doc__ = _dtype_docstring.format(dtype="uint16")
+
+
+@register_extension_dtype
+class UInt32Dtype(_IntegerDtype):
+    type = np.uint32
+    name = "UInt32"
+    __doc__ = _dtype_docstring.format(dtype="uint32")
+
+
+@register_extension_dtype
+class UInt64Dtype(_IntegerDtype):
+    type = np.uint64
+    name = "UInt64"
+    __doc__ = _dtype_docstring.format(dtype="uint64")
+
+
+INT_STR_TO_DTYPE: dict[str, _IntegerDtype] = {
+    "int8": Int8Dtype(),
+    "int16": Int16Dtype(),
+    "int32": Int32Dtype(),
+    "int64": Int64Dtype(),
+    "uint8": UInt8Dtype(),
+    "uint16": UInt16Dtype(),
+    "uint32": UInt32Dtype(),
+    "uint64": UInt64Dtype(),
+}
--- a/.venv/Lib/site-packages/pandas/core/arrays/interval.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/interval.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/masked.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/masked.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/numeric.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/numeric.py
@@ -0,0 +1,202 @@
+from __future__ import annotations
+
+import datetime
+import numbers
+from typing import (
+    TYPE_CHECKING,
+    TypeVar,
+)
+
+import numpy as np
+
+from pandas._libs import (
+    Timedelta,
+    missing as libmissing,
+)
+from pandas.compat.numpy import function as nv
+
+from pandas.core.dtypes.common import (
+    is_float,
+    is_float_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_list_like,
+    pandas_dtype,
+)
+
+from pandas.core.arrays.masked import (
+    BaseMaskedArray,
+    BaseMaskedDtype,
+)
+
+if TYPE_CHECKING:
+    import pyarrow
+
+T = TypeVar("T", bound="NumericArray")
+
+
+class NumericDtype(BaseMaskedDtype):
+    def __from_arrow__(
+        self, array: pyarrow.Array | pyarrow.ChunkedArray
+    ) -> BaseMaskedArray:
+        """
+        Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
+        """
+        import pyarrow
+
+        from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
+
+        array_class = self.construct_array_type()
+
+        pyarrow_type = pyarrow.from_numpy_dtype(self.type)
+        if not array.type.equals(pyarrow_type):
+            # test_from_arrow_type_error raise for string, but allow
+            #  through itemsize conversion GH#31896
+            rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
+            if rt_dtype.kind not in ["i", "u", "f"]:
+                # Could allow "c" or potentially disallow float<->int conversion,
+                #  but at the moment we specifically test that uint<->int works
+                raise TypeError(
+                    f"Expected array of {self} type, got {array.type} instead"
+                )
+
+            array = array.cast(pyarrow_type)
+
+        if isinstance(array, pyarrow.Array):
+            chunks = [array]
+        else:
+            # pyarrow.ChunkedArray
+            chunks = array.chunks
+
+        results = []
+        for arr in chunks:
+            data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
+            num_arr = array_class(data.copy(), ~mask, copy=False)
+            results.append(num_arr)
+
+        if not results:
+            return array_class(
+                np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_)
+            )
+        elif len(results) == 1:
+            # avoid additional copy in _concat_same_type
+            return results[0]
+        else:
+            return array_class._concat_same_type(results)
+
+
+class NumericArray(BaseMaskedArray):
+    """
+    Base class for IntegerArray and FloatingArray.
+    """
+
+    def _arith_method(self, other, op):
+        op_name = op.__name__
+        omask = None
+
+        if getattr(other, "ndim", 0) > 1:
+            raise NotImplementedError("can only perform ops with 1-d structures")
+
+        if isinstance(other, NumericArray):
+            other, omask = other._data, other._mask
+
+        elif is_list_like(other):
+            other = np.asarray(other)
+            if other.ndim > 1:
+                raise NotImplementedError("can only perform ops with 1-d structures")
+            if len(self) != len(other):
+                raise ValueError("Lengths must match")
+            if not (is_float_dtype(other) or is_integer_dtype(other)):
+                raise TypeError("can only perform ops with numeric values")
+
+        elif isinstance(other, (datetime.timedelta, np.timedelta64)):
+            other = Timedelta(other)
+
+        else:
+            if not (is_float(other) or is_integer(other) or other is libmissing.NA):
+                raise TypeError("can only perform ops with numeric values")
+
+        if omask is None:
+            mask = self._mask.copy()
+            if other is libmissing.NA:
+                mask |= True
+        else:
+            mask = self._mask | omask
+
+        if op_name == "pow":
+            # 1 ** x is 1.
+            mask = np.where((self._data == 1) & ~self._mask, False, mask)
+            # x ** 0 is 1.
+            if omask is not None:
+                mask = np.where((other == 0) & ~omask, False, mask)
+            elif other is not libmissing.NA:
+                mask = np.where(other == 0, False, mask)
+
+        elif op_name == "rpow":
+            # 1 ** x is 1.
+            if omask is not None:
+                mask = np.where((other == 1) & ~omask, False, mask)
+            elif other is not libmissing.NA:
+                mask = np.where(other == 1, False, mask)
+            # x ** 0 is 1.
+            mask = np.where((self._data == 0) & ~self._mask, False, mask)
+
+        if other is libmissing.NA:
+            result = np.ones_like(self._data)
+            if "truediv" in op_name and self.dtype.kind != "f":
+                # The actual data here doesn't matter since the mask
+                #  will be all-True, but since this is division, we want
+                #  to end up with floating dtype.
+                result = result.astype(np.float64)
+        else:
+            with np.errstate(all="ignore"):
+                result = op(self._data, other)
+
+        # divmod returns a tuple
+        if op_name == "divmod":
+            div, mod = result
+            return (
+                self._maybe_mask_result(div, mask, other, "floordiv"),
+                self._maybe_mask_result(mod, mask, other, "mod"),
+            )
+
+        return self._maybe_mask_result(result, mask, other, op_name)
+
+    _HANDLED_TYPES = (np.ndarray, numbers.Number)
+
+    def __neg__(self):
+        return type(self)(-self._data, self._mask.copy())
+
+    def __pos__(self):
+        return self.copy()
+
+    def __abs__(self):
+        return type(self)(abs(self._data), self._mask.copy())
+
+    def round(self: T, decimals: int = 0, *args, **kwargs) -> T:
+        """
+        Round each value in the array a to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default 0
+            Number of decimal places to round to. If decimals is negative,
+            it specifies the number of positions to the left of the decimal point.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be
+            accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        NumericArray
+            Rounded values of the NumericArray.
+
+        See Also
+        --------
+        numpy.around : Round values of an np.array.
+        DataFrame.round : Round values of a DataFrame.
+        Series.round : Round values of a Series.
+        """
+        nv.validate_round(args, kwargs)
+        values = np.round(self._data, decimals=decimals, **kwargs)
+        return type(self)(values, self._mask.copy())
--- a/.venv/Lib/site-packages/pandas/core/arrays/numpy_.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/numpy_.py
@@ -0,0 +1,428 @@
+from __future__ import annotations
+
+import numpy as np
+
+from pandas._libs import lib
+from pandas._typing import (
+    Dtype,
+    NpDtype,
+    Scalar,
+    npt,
+)
+from pandas.compat.numpy import function as nv
+
+from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
+from pandas.core.dtypes.dtypes import PandasDtype
+from pandas.core.dtypes.missing import isna
+
+from pandas.core import (
+    arraylike,
+    nanops,
+    ops,
+)
+from pandas.core.arraylike import OpsMixin
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
+from pandas.core.construction import ensure_wrapped_if_datetimelike
+from pandas.core.strings.object_array import ObjectStringArrayMixin
+
+
+class PandasArray(
+    OpsMixin,
+    NDArrayBackedExtensionArray,
+    ObjectStringArrayMixin,
+):
+    """
+    A pandas ExtensionArray for NumPy data.
+
+    This is mostly for internal compatibility, and is not especially
+    useful on its own.
+
+    Parameters
+    ----------
+    values : ndarray
+        The NumPy ndarray to wrap. Must be 1-dimensional.
+    copy : bool, default False
+        Whether to copy `values`.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+    """
+
+    # If you're wondering why pd.Series(cls) doesn't put the array in an
+    # ExtensionBlock, search for `ABCPandasArray`. We check for
+    # that _typ to ensure that users don't unnecessarily use EAs inside
+    # pandas internals, which turns off things like block consolidation.
+    _typ = "npy_extension"
+    __array_priority__ = 1000
+    _ndarray: np.ndarray
+    _dtype: PandasDtype
+
+    # ------------------------------------------------------------------------
+    # Constructors
+
+    def __init__(self, values: np.ndarray | PandasArray, copy: bool = False):
+        if isinstance(values, type(self)):
+            values = values._ndarray
+        if not isinstance(values, np.ndarray):
+            raise ValueError(
+                f"'values' must be a NumPy array, not {type(values).__name__}"
+            )
+
+        if values.ndim == 0:
+            # Technically we support 2, but do not advertise that fact.
+            raise ValueError("PandasArray must be 1-dimensional.")
+
+        if copy:
+            values = values.copy()
+
+        dtype = PandasDtype(values.dtype)
+        super().__init__(values, dtype)
+
+    @classmethod
+    def _from_sequence(
+        cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
+    ) -> PandasArray:
+        if isinstance(dtype, PandasDtype):
+            dtype = dtype._dtype
+
+        # error: Argument "dtype" to "asarray" has incompatible type
+        # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],
+        # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
+        # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
+        # _DTypeDict, Tuple[Any, Any]]]"
+        result = np.asarray(scalars, dtype=dtype)  # type: ignore[arg-type]
+        if (
+            result.ndim > 1
+            and not hasattr(scalars, "dtype")
+            and (dtype is None or dtype == object)
+        ):
+            # e.g. list-of-tuples
+            result = construct_1d_object_array_from_listlike(scalars)
+
+        if copy and result is scalars:
+            result = result.copy()
+        return cls(result)
+
+    @classmethod
+    def _from_factorized(cls, values, original) -> PandasArray:
+        return cls(values)
+
+    def _from_backing_data(self, arr: np.ndarray) -> PandasArray:
+        return type(self)(arr)
+
+    # ------------------------------------------------------------------------
+    # Data
+
+    @property
+    def dtype(self) -> PandasDtype:
+        return self._dtype
+
+    # ------------------------------------------------------------------------
+    # NumPy Array Interface
+
+    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+        return np.asarray(self._ndarray, dtype=dtype)
+
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        # Lightly modified version of
+        # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
+        # The primary modification is not boxing scalar return values
+        # in PandasArray, since pandas' ExtensionArrays are 1-d.
+        out = kwargs.get("out", ())
+
+        result = ops.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
+
+        if method == "reduce":
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                # e.g. tests.series.test_ufunc.TestNumpyReductions
+                return result
+
+        # Defer to the implementation of the ufunc on unwrapped values.
+        inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
+        if out:
+            kwargs["out"] = tuple(
+                x._ndarray if isinstance(x, PandasArray) else x for x in out
+            )
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+
+        if ufunc.nout > 1:
+            # multiple return values; re-box array-like results
+            return tuple(type(self)(x) for x in result)
+        elif method == "at":
+            # no return value
+            return None
+        elif method == "reduce":
+            if isinstance(result, np.ndarray):
+                # e.g. test_np_reduce_2d
+                return type(self)(result)
+
+            # e.g. test_np_max_nested_tuples
+            return result
+        else:
+            # one return value; re-box array-like results
+            return type(self)(result)
+
+    # ------------------------------------------------------------------------
+    # Pandas ExtensionArray Interface
+
+    def isna(self) -> np.ndarray:
+        return isna(self._ndarray)
+
+    def _validate_scalar(self, fill_value):
+        if fill_value is None:
+            # Primarily for subclasses
+            fill_value = self.dtype.na_value
+        return fill_value
+
+    def _values_for_factorize(self) -> tuple[np.ndarray, int]:
+        return self._ndarray, -1
+
+    # ------------------------------------------------------------------------
+    # Reductions
+
+    def any(
+        self,
+        *,
+        axis: int | None = None,
+        out=None,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_any((), {"out": out, "keepdims": keepdims})
+        result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    def all(
+        self,
+        *,
+        axis: int | None = None,
+        out=None,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_all((), {"out": out, "keepdims": keepdims})
+        result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar:
+        nv.validate_min((), kwargs)
+        result = nanops.nanmin(
+            values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar:
+        nv.validate_max((), kwargs)
+        result = nanops.nanmax(
+            values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def sum(
+        self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs
+    ) -> Scalar:
+        nv.validate_sum((), kwargs)
+        result = nanops.nansum(
+            self._ndarray, axis=axis, skipna=skipna, min_count=min_count
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def prod(
+        self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs
+    ) -> Scalar:
+        nv.validate_prod((), kwargs)
+        result = nanops.nanprod(
+            self._ndarray, axis=axis, skipna=skipna, min_count=min_count
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def mean(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
+        result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    def median(
+        self,
+        *,
+        axis: int | None = None,
+        out=None,
+        overwrite_input: bool = False,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_median(
+            (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}
+        )
+        result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    def std(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        ddof=1,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_stat_ddof_func(
+            (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
+        )
+        result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
+        return self._wrap_reduction_result(axis, result)
+
+    def var(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        ddof=1,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_stat_ddof_func(
+            (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
+        )
+        result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
+        return self._wrap_reduction_result(axis, result)
+
+    def sem(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        ddof=1,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_stat_ddof_func(
+            (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
+        )
+        result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
+        return self._wrap_reduction_result(axis, result)
+
+    def kurt(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_stat_ddof_func(
+            (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
+        )
+        result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    def skew(
+        self,
+        *,
+        axis: int | None = None,
+        dtype: NpDtype | None = None,
+        out=None,
+        keepdims: bool = False,
+        skipna: bool = True,
+    ):
+        nv.validate_stat_ddof_func(
+            (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
+        )
+        result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
+        return self._wrap_reduction_result(axis, result)
+
+    # ------------------------------------------------------------------------
+    # Additional Methods
+
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value=lib.no_default,
+    ) -> np.ndarray:
+        result = np.asarray(self._ndarray, dtype=dtype)
+
+        if (copy or na_value is not lib.no_default) and result is self._ndarray:
+            result = result.copy()
+
+        if na_value is not lib.no_default:
+            result[self.isna()] = na_value
+
+        return result
+
+    # ------------------------------------------------------------------------
+    # Ops
+
+    def __invert__(self) -> PandasArray:
+        return type(self)(~self._ndarray)
+
+    def __neg__(self) -> PandasArray:
+        return type(self)(-self._ndarray)
+
+    def __pos__(self) -> PandasArray:
+        return type(self)(+self._ndarray)
+
+    def __abs__(self) -> PandasArray:
+        return type(self)(abs(self._ndarray))
+
+    def _cmp_method(self, other, op):
+        if isinstance(other, PandasArray):
+            other = other._ndarray
+
+        other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
+        pd_op = ops.get_array_op(op)
+        other = ensure_wrapped_if_datetimelike(other)
+        with np.errstate(all="ignore"):
+            result = pd_op(self._ndarray, other)
+
+        if op is divmod or op is ops.rdivmod:
+            a, b = result
+            if isinstance(a, np.ndarray):
+                # for e.g. op vs TimedeltaArray, we may already
+                #  have an ExtensionArray, in which case we do not wrap
+                return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)
+            return a, b
+
+        if isinstance(result, np.ndarray):
+            # for e.g. multiplication vs TimedeltaArray, we may already
+            #  have an ExtensionArray, in which case we do not wrap
+            return self._wrap_ndarray_result(result)
+        return result
+
+    _arith_method = _cmp_method
+
+    def _wrap_ndarray_result(self, result: np.ndarray):
+        # If we have timedelta64[ns] result, return a TimedeltaArray instead
+        #  of a PandasArray
+        if result.dtype == "timedelta64[ns]":
+            from pandas.core.arrays import TimedeltaArray
+
+            return TimedeltaArray._simple_new(result)
+        return type(self)(result)
+
+    # ------------------------------------------------------------------------
+    # String methods interface
+    _str_na_value = np.nan
--- a/.venv/Lib/site-packages/pandas/core/arrays/period.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/period.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/sparse/init.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/sparse/init.py
@@ -0,0 +1,13 @@
+# flake8: noqa: F401
+
+from pandas.core.arrays.sparse.accessor import (
+    SparseAccessor,
+    SparseFrameAccessor,
+)
+from pandas.core.arrays.sparse.array import (
+    BlockIndex,
+    IntIndex,
+    SparseArray,
+    make_sparse_index,
+)
+from pandas.core.arrays.sparse.dtype import SparseDtype
--- a/.venv/Lib/site-packages/pandas/core/arrays/sparse/accessor.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/sparse/accessor.py
@@ -0,0 +1,386 @@
+"""Sparse accessor"""
+
+import numpy as np
+
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.core.dtypes.cast import find_common_type
+
+from pandas.core.accessor import (
+    PandasDelegate,
+    delegate_names,
+)
+from pandas.core.arrays.sparse.array import SparseArray
+from pandas.core.arrays.sparse.dtype import SparseDtype
+
+
+class BaseAccessor:
+    _validation_msg = "Can only use the '.sparse' accessor with Sparse data."
+
+    def __init__(self, data=None):
+        self._parent = data
+        self._validate(data)
+
+    def _validate(self, data):
+        raise NotImplementedError
+
+
+@delegate_names(
+    SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property"
+)
+class SparseAccessor(BaseAccessor, PandasDelegate):
+    """
+    Accessor for SparseSparse from other sparse matrix data types.
+    """
+
+    def _validate(self, data):
+        if not isinstance(data.dtype, SparseDtype):
+            raise AttributeError(self._validation_msg)
+
+    def _delegate_property_get(self, name, *args, **kwargs):
+        return getattr(self._parent.array, name)
+
+    def _delegate_method(self, name, *args, **kwargs):
+        if name == "from_coo":
+            return self.from_coo(*args, **kwargs)
+        elif name == "to_coo":
+            return self.to_coo(*args, **kwargs)
+        else:
+            raise ValueError
+
+    @classmethod
+    def from_coo(cls, A, dense_index=False):
+        """
+        Create a Series with sparse values from a scipy.sparse.coo_matrix.
+
+        Parameters
+        ----------
+        A : scipy.sparse.coo_matrix
+        dense_index : bool, default False
+            If False (default), the SparseSeries index consists of only the
+            coords of the non-null entries of the original coo_matrix.
+            If True, the SparseSeries index consists of the full sorted
+            (row, col) coordinates of the coo_matrix.
+
+        Returns
+        -------
+        s : Series
+            A Series with sparse values.
+
+        Examples
+        --------
+        >>> from scipy import sparse
+
+        >>> A = sparse.coo_matrix(
+        ...     ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)
+        ... )
+        >>> A
+        <3x4 sparse matrix of type '<class 'numpy.float64'>'
+        with 3 stored elements in COOrdinate format>
+
+        >>> A.todense()
+        matrix([[0., 0., 1., 2.],
+        [3., 0., 0., 0.],
+        [0., 0., 0., 0.]])
+
+        >>> ss = pd.Series.sparse.from_coo(A)
+        >>> ss
+        0  2    1.0
+           3    2.0
+        1  0    3.0
+        dtype: Sparse[float64, nan]
+        """
+        from pandas import Series
+        from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series
+
+        result = coo_to_sparse_series(A, dense_index=dense_index)
+        result = Series(result.array, index=result.index, copy=False)
+
+        return result
+
+    def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
+        """
+        Create a scipy.sparse.coo_matrix from a Series with MultiIndex.
+
+        Use row_levels and column_levels to determine the row and column
+        coordinates respectively. row_levels and column_levels are the names
+        (labels) or numbers of the levels. {row_levels, column_levels} must be
+        a partition of the MultiIndex level names (or numbers).
+
+        Parameters
+        ----------
+        row_levels : tuple/list
+        column_levels : tuple/list
+        sort_labels : bool, default False
+            Sort the row and column labels before forming the sparse matrix.
+            When `row_levels` and/or `column_levels` refer to a single level,
+            set to `True` for a faster execution.
+
+        Returns
+        -------
+        y : scipy.sparse.coo_matrix
+        rows : list (row labels)
+        columns : list (column labels)
+
+        Examples
+        --------
+        >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
+        >>> s.index = pd.MultiIndex.from_tuples(
+        ...     [
+        ...         (1, 2, "a", 0),
+        ...         (1, 2, "a", 1),
+        ...         (1, 1, "b", 0),
+        ...         (1, 1, "b", 1),
+        ...         (2, 1, "b", 0),
+        ...         (2, 1, "b", 1)
+        ...     ],
+        ...     names=["A", "B", "C", "D"],
+        ... )
+        >>> s
+        A  B  C  D
+        1  2  a  0    3.0
+                 1    NaN
+           1  b  0    1.0
+                 1    3.0
+        2  1  b  0    NaN
+                 1    NaN
+        dtype: float64
+
+        >>> ss = s.astype("Sparse")
+        >>> ss
+        A  B  C  D
+        1  2  a  0    3.0
+                 1    NaN
+           1  b  0    1.0
+                 1    3.0
+        2  1  b  0    NaN
+                 1    NaN
+        dtype: Sparse[float64, nan]
+
+        >>> A, rows, columns = ss.sparse.to_coo(
+        ...     row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True
+        ... )
+        >>> A
+        <3x4 sparse matrix of type '<class 'numpy.float64'>'
+        with 3 stored elements in COOrdinate format>
+        >>> A.todense()
+        matrix([[0., 0., 1., 3.],
+        [3., 0., 0., 0.],
+        [0., 0., 0., 0.]])
+
+        >>> rows
+        [(1, 1), (1, 2), (2, 1)]
+        >>> columns
+        [('a', 0), ('a', 1), ('b', 0), ('b', 1)]
+        """
+        from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo
+
+        A, rows, columns = sparse_series_to_coo(
+            self._parent, row_levels, column_levels, sort_labels=sort_labels
+        )
+        return A, rows, columns
+
+    def to_dense(self):
+        """
+        Convert a Series from sparse values to dense.
+
+        .. versionadded:: 0.25.0
+
+        Returns
+        -------
+        Series:
+            A Series with the same values, stored as a dense array.
+
+        Examples
+        --------
+        >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0]))
+        >>> series
+        0    0
+        1    1
+        2    0
+        dtype: Sparse[int64, 0]
+
+        >>> series.sparse.to_dense()
+        0    0
+        1    1
+        2    0
+        dtype: int64
+        """
+        from pandas import Series
+
+        return Series(
+            self._parent.array.to_dense(),
+            index=self._parent.index,
+            name=self._parent.name,
+        )
+
+
+class SparseFrameAccessor(BaseAccessor, PandasDelegate):
+    """
+    DataFrame accessor for sparse data.
+
+    .. versionadded:: 0.25.0
+    """
+
+    def _validate(self, data):
+        dtypes = data.dtypes
+        if not all(isinstance(t, SparseDtype) for t in dtypes):
+            raise AttributeError(self._validation_msg)
+
+    @classmethod
+    def from_spmatrix(cls, data, index=None, columns=None):
+        """
+        Create a new DataFrame from a scipy sparse matrix.
+
+        .. versionadded:: 0.25.0
+
+        Parameters
+        ----------
+        data : scipy.sparse.spmatrix
+            Must be convertible to csc format.
+        index, columns : Index, optional
+            Row and column labels to use for the resulting DataFrame.
+            Defaults to a RangeIndex.
+
+        Returns
+        -------
+        DataFrame
+            Each column of the DataFrame is stored as a
+            :class:`arrays.SparseArray`.
+
+        Examples
+        --------
+        >>> import scipy.sparse
+        >>> mat = scipy.sparse.eye(3)
+        >>> pd.DataFrame.sparse.from_spmatrix(mat)
+             0    1    2
+        0  1.0  0.0  0.0
+        1  0.0  1.0  0.0
+        2  0.0  0.0  1.0
+        """
+        from pandas._libs.sparse import IntIndex
+
+        from pandas import DataFrame
+
+        data = data.tocsc()
+        index, columns = cls._prep_index(data, index, columns)
+        n_rows, n_columns = data.shape
+        # We need to make sure indices are sorted, as we create
+        # IntIndex with no input validation (i.e. check_integrity=False ).
+        # Indices may already be sorted in scipy in which case this adds
+        # a small overhead.
+        data.sort_indices()
+        indices = data.indices
+        indptr = data.indptr
+        array_data = data.data
+        dtype = SparseDtype(array_data.dtype, 0)
+        arrays = []
+        for i in range(n_columns):
+            sl = slice(indptr[i], indptr[i + 1])
+            idx = IntIndex(n_rows, indices[sl], check_integrity=False)
+            arr = SparseArray._simple_new(array_data[sl], idx, dtype)
+            arrays.append(arr)
+        return DataFrame._from_arrays(
+            arrays, columns=columns, index=index, verify_integrity=False
+        )
+
+    def to_dense(self):
+        """
+        Convert a DataFrame with sparse values to dense.
+
+        .. versionadded:: 0.25.0
+
+        Returns
+        -------
+        DataFrame
+            A DataFrame with the same values stored as dense arrays.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
+        >>> df.sparse.to_dense()
+           A
+        0  0
+        1  1
+        2  0
+        """
+        from pandas import DataFrame
+
+        data = {k: v.array.to_dense() for k, v in self._parent.items()}
+        return DataFrame(data, index=self._parent.index, columns=self._parent.columns)
+
+    def to_coo(self):
+        """
+        Return the contents of the frame as a sparse SciPy COO matrix.
+
+        .. versionadded:: 0.25.0
+
+        Returns
+        -------
+        coo_matrix : scipy.sparse.spmatrix
+            If the caller is heterogeneous and contains booleans or objects,
+            the result will be of dtype=object. See Notes.
+
+        Notes
+        -----
+        The dtype will be the lowest-common-denominator type (implicit
+        upcasting); that is to say if the dtypes (even of numeric types)
+        are mixed, the one that accommodates all will be chosen.
+
+        e.g. If the dtypes are float16 and float32, dtype will be upcast to
+        float32. By numpy.find_common_type convention, mixing int64 and
+        and uint64 will result in a float64 dtype.
+        """
+        import_optional_dependency("scipy")
+        from scipy.sparse import coo_matrix
+
+        dtype = find_common_type(self._parent.dtypes.to_list())
+        if isinstance(dtype, SparseDtype):
+            dtype = dtype.subtype
+
+        cols, rows, data = [], [], []
+        for col, (_, ser) in enumerate(self._parent.iteritems()):
+            sp_arr = ser.array
+            if sp_arr.fill_value != 0:
+                raise ValueError("fill value must be 0 when converting to COO matrix")
+
+            row = sp_arr.sp_index.indices
+            cols.append(np.repeat(col, len(row)))
+            rows.append(row)
+            data.append(sp_arr.sp_values.astype(dtype, copy=False))
+
+        cols = np.concatenate(cols)
+        rows = np.concatenate(rows)
+        data = np.concatenate(data)
+        return coo_matrix((data, (rows, cols)), shape=self._parent.shape)
+
+    @property
+    def density(self) -> float:
+        """
+        Ratio of non-sparse points to total (dense) data points.
+        """
+        tmp = np.mean([column.array.density for _, column in self._parent.items()])
+        return tmp
+
+    @staticmethod
+    def _prep_index(data, index, columns):
+        from pandas.core.indexes.api import (
+            default_index,
+            ensure_index,
+        )
+
+        N, K = data.shape
+        if index is None:
+            index = default_index(N)
+        else:
+            index = ensure_index(index)
+        if columns is None:
+            columns = default_index(K)
+        else:
+            columns = ensure_index(columns)
+
+        if len(columns) != K:
+            raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}")
+        if len(index) != N:
+            raise ValueError(f"Index length mismatch: {len(index)} vs. {N}")
+        return index, columns
--- a/.venv/Lib/site-packages/pandas/core/arrays/sparse/array.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/sparse/array.py
--- a/.venv/Lib/site-packages/pandas/core/arrays/sparse/dtype.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/sparse/dtype.py
@@ -0,0 +1,414 @@
+"""Sparse Dtype"""
+from __future__ import annotations
+
+import re
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+import warnings
+
+import numpy as np
+
+from pandas._typing import (
+    Dtype,
+    DtypeObj,
+    type_t,
+)
+from pandas.errors import PerformanceWarning
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.base import (
+    ExtensionDtype,
+    register_extension_dtype,
+)
+from pandas.core.dtypes.cast import astype_nansafe
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_object_dtype,
+    is_scalar,
+    is_string_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.missing import (
+    isna,
+    na_value_for_dtype,
+)
+
+if TYPE_CHECKING:
+    from pandas.core.arrays.sparse.array import SparseArray
+
+
+@register_extension_dtype
+class SparseDtype(ExtensionDtype):
+    """
+    Dtype for data stored in :class:`SparseArray`.
+
+    This dtype implements the pandas ExtensionDtype interface.
+
+    Parameters
+    ----------
+    dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64
+        The dtype of the underlying array storing the non-fill value values.
+    fill_value : scalar, optional
+        The scalar value not stored in the SparseArray. By default, this
+        depends on `dtype`.
+
+        =========== ==========
+        dtype       na_value
+        =========== ==========
+        float       ``np.nan``
+        int         ``0``
+        bool        ``False``
+        datetime64  ``pd.NaT``
+        timedelta64 ``pd.NaT``
+        =========== ==========
+
+        The default value may be overridden by specifying a `fill_value`.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+    """
+
+    # We include `_is_na_fill_value` in the metadata to avoid hash collisions
+    # between SparseDtype(float, 0.0) and SparseDtype(float, nan).
+    # Without is_na_fill_value in the comparison, those would be equal since
+    # hash(nan) is (sometimes?) 0.
+    _metadata = ("_dtype", "_fill_value", "_is_na_fill_value")
+
+    def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None):
+
+        if isinstance(dtype, type(self)):
+            if fill_value is None:
+                fill_value = dtype.fill_value
+            dtype = dtype.subtype
+
+        dtype = pandas_dtype(dtype)
+        if is_string_dtype(dtype):
+            dtype = np.dtype("object")
+
+        if fill_value is None:
+            fill_value = na_value_for_dtype(dtype)
+
+        self._dtype = dtype
+        self._fill_value = fill_value
+        self._check_fill_value()
+
+    def __hash__(self):
+        # Python3 doesn't inherit __hash__ when a base class overrides
+        # __eq__, so we explicitly do it here.
+        return super().__hash__()
+
+    def __eq__(self, other: Any) -> bool:
+        # We have to override __eq__ to handle NA values in _metadata.
+        # The base class does simple == checks, which fail for NA.
+        if isinstance(other, str):
+            try:
+                other = self.construct_from_string(other)
+            except TypeError:
+                return False
+
+        if isinstance(other, type(self)):
+            subtype = self.subtype == other.subtype
+            if self._is_na_fill_value:
+                # this case is complicated by two things:
+                # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan)
+                # SparseDtype(float, np.nan)     != SparseDtype(float, pd.NaT)
+                # i.e. we want to treat any floating-point NaN as equal, but
+                # not a floating-point NaN and a datetime NaT.
+                fill_value = (
+                    other._is_na_fill_value
+                    and isinstance(self.fill_value, type(other.fill_value))
+                    or isinstance(other.fill_value, type(self.fill_value))
+                )
+            else:
+                fill_value = self.fill_value == other.fill_value
+
+            return subtype and fill_value
+        return False
+
+    @property
+    def fill_value(self):
+        """
+        The fill value of the array.
+
+        Converting the SparseArray to a dense ndarray will fill the
+        array with this value.
+
+        .. warning::
+
+           It's possible to end up with a SparseArray that has ``fill_value``
+           values in ``sp_values``. This can occur, for example, when setting
+           ``SparseArray.fill_value`` directly.
+        """
+        return self._fill_value
+
+    def _check_fill_value(self):
+        if not is_scalar(self._fill_value):
+            raise ValueError(
+                f"fill_value must be a scalar. Got {self._fill_value} instead"
+            )
+        # TODO: Right now we can use Sparse boolean array
+        #       with any fill_value. Here was an attempt
+        #       to allow only 3 value: True, False or nan
+        #       but plenty test has failed.
+        # see pull 44955
+        # if self._is_boolean and not (
+        #    is_bool(self._fill_value) or isna(self._fill_value)
+        # ):
+        #    raise ValueError(
+        #        "fill_value must be True, False or nan "
+        #        f"for boolean type. Got {self._fill_value} instead"
+        #    )
+
+    @property
+    def _is_na_fill_value(self) -> bool:
+        return isna(self.fill_value)
+
+    @property
+    def _is_numeric(self) -> bool:
+        return not is_object_dtype(self.subtype)
+
+    @property
+    def _is_boolean(self) -> bool:
+        return is_bool_dtype(self.subtype)
+
+    @property
+    def kind(self):
+        """
+        The sparse kind. Either 'integer', or 'block'.
+        """
+        return self.subtype.kind
+
+    @property
+    def type(self):
+        return self.subtype.type
+
+    @property
+    def subtype(self):
+        return self._dtype
+
+    @property
+    def name(self):
+        return f"Sparse[{self.subtype.name}, {repr(self.fill_value)}]"
+
+    def __repr__(self) -> str:
+        return self.name
+
+    @classmethod
+    def construct_array_type(cls) -> type_t[SparseArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        from pandas.core.arrays.sparse.array import SparseArray
+
+        return SparseArray
+
+    @classmethod
+    def construct_from_string(cls, string: str) -> SparseDtype:
+        """
+        Construct a SparseDtype from a string form.
+
+        Parameters
+        ----------
+        string : str
+            Can take the following forms.
+
+            string           dtype
+            ================ ============================
+            'int'            SparseDtype[np.int64, 0]
+            'Sparse'         SparseDtype[np.float64, nan]
+            'Sparse[int]'    SparseDtype[np.int64, 0]
+            'Sparse[int, 0]' SparseDtype[np.int64, 0]
+            ================ ============================
+
+            It is not possible to specify non-default fill values
+            with a string. An argument like ``'Sparse[int, 1]'``
+            will raise a ``TypeError`` because the default fill value
+            for integers is 0.
+
+        Returns
+        -------
+        SparseDtype
+        """
+        if not isinstance(string, str):
+            raise TypeError(
+                f"'construct_from_string' expects a string, got {type(string)}"
+            )
+        msg = f"Cannot construct a 'SparseDtype' from '{string}'"
+        if string.startswith("Sparse"):
+            try:
+                sub_type, has_fill_value = cls._parse_subtype(string)
+            except ValueError as err:
+                raise TypeError(msg) from err
+            else:
+                result = SparseDtype(sub_type)
+                msg = (
+                    f"Cannot construct a 'SparseDtype' from '{string}'.\n\nIt "
+                    "looks like the fill_value in the string is not "
+                    "the default for the dtype. Non-default fill_values "
+                    "are not supported. Use the 'SparseDtype()' "
+                    "constructor instead."
+                )
+                if has_fill_value and str(result) != string:
+                    raise TypeError(msg)
+                return result
+        else:
+            raise TypeError(msg)
+
+    @staticmethod
+    def _parse_subtype(dtype: str) -> tuple[str, bool]:
+        """
+        Parse a string to get the subtype
+
+        Parameters
+        ----------
+        dtype : str
+            A string like
+
+            * Sparse[subtype]
+            * Sparse[subtype, fill_value]
+
+        Returns
+        -------
+        subtype : str
+
+        Raises
+        ------
+        ValueError
+            When the subtype cannot be extracted.
+        """
+        xpr = re.compile(r"Sparse\[(?P<subtype>[^,]*)(, )?(?P<fill_value>.*?)?\]$")
+        m = xpr.match(dtype)
+        has_fill_value = False
+        if m:
+            subtype = m.groupdict()["subtype"]
+            has_fill_value = bool(m.groupdict()["fill_value"])
+        elif dtype == "Sparse":
+            subtype = "float64"
+        else:
+            raise ValueError(f"Cannot parse {dtype}")
+        return subtype, has_fill_value
+
+    @classmethod
+    def is_dtype(cls, dtype: object) -> bool:
+        dtype = getattr(dtype, "dtype", dtype)
+        if isinstance(dtype, str) and dtype.startswith("Sparse"):
+            sub_type, _ = cls._parse_subtype(dtype)
+            dtype = np.dtype(sub_type)
+        elif isinstance(dtype, cls):
+            return True
+        return isinstance(dtype, np.dtype) or dtype == "Sparse"
+
+    def update_dtype(self, dtype) -> SparseDtype:
+        """
+        Convert the SparseDtype to a new dtype.
+
+        This takes care of converting the ``fill_value``.
+
+        Parameters
+        ----------
+        dtype : Union[str, numpy.dtype, SparseDtype]
+            The new dtype to use.
+
+            * For a SparseDtype, it is simply returned
+            * For a NumPy dtype (or str), the current fill value
+              is converted to the new dtype, and a SparseDtype
+              with `dtype` and the new fill value is returned.
+
+        Returns
+        -------
+        SparseDtype
+            A new SparseDtype with the correct `dtype` and fill value
+            for that `dtype`.
+
+        Raises
+        ------
+        ValueError
+            When the current fill value cannot be converted to the
+            new `dtype` (e.g. trying to convert ``np.nan`` to an
+            integer dtype).
+
+
+        Examples
+        --------
+        >>> SparseDtype(int, 0).update_dtype(float)
+        Sparse[float64, 0.0]
+
+        >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan))
+        Sparse[float64, nan]
+        """
+        cls = type(self)
+        dtype = pandas_dtype(dtype)
+
+        if not isinstance(dtype, cls):
+            if not isinstance(dtype, np.dtype):
+                raise TypeError("sparse arrays of extension dtypes not supported")
+
+            fill_value = astype_nansafe(np.array(self.fill_value), dtype).item()
+            dtype = cls(dtype, fill_value=fill_value)
+
+        return dtype
+
+    @property
+    def _subtype_with_str(self):
+        """
+        Whether the SparseDtype's subtype should be considered ``str``.
+
+        Typically, pandas will store string data in an object-dtype array.
+        When converting values to a dtype, e.g. in ``.astype``, we need to
+        be more specific, we need the actual underlying type.
+
+        Returns
+        -------
+        >>> SparseDtype(int, 1)._subtype_with_str
+        dtype('int64')
+
+        >>> SparseDtype(object, 1)._subtype_with_str
+        dtype('O')
+
+        >>> dtype = SparseDtype(str, '')
+        >>> dtype.subtype
+        dtype('O')
+
+        >>> dtype._subtype_with_str
+        <class 'str'>
+        """
+        if isinstance(self.fill_value, str):
+            return type(self.fill_value)
+        return self.subtype
+
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        # TODO for now only handle SparseDtypes and numpy dtypes => extend
+        # with other compatible extension dtypes
+        if any(
+            isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype)
+            for x in dtypes
+        ):
+            return None
+
+        fill_values = [x.fill_value for x in dtypes if isinstance(x, SparseDtype)]
+        fill_value = fill_values[0]
+
+        # np.nan isn't a singleton, so we may end up with multiple
+        # NaNs here, so we ignore the all NA case too.
+        if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
+            warnings.warn(
+                "Concatenating sparse arrays with multiple fill "
+                f"values: '{fill_values}'. Picking the first and "
+                "converting the rest.",
+                PerformanceWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
+        return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value)
--- a/.venv/Lib/site-packages/pandas/core/arrays/sparse/scipy_sparse.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/sparse/scipy_sparse.py
@@ -0,0 +1,211 @@
+"""
+Interaction with scipy.sparse matrices.
+
+Currently only includes to_coo helpers.
+"""
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Iterable,
+)
+
+import numpy as np
+
+from pandas._libs import lib
+from pandas._typing import (
+    IndexLabel,
+    npt,
+)
+
+from pandas.core.dtypes.missing import notna
+
+from pandas.core.algorithms import factorize
+from pandas.core.indexes.api import MultiIndex
+from pandas.core.series import Series
+
+if TYPE_CHECKING:
+    import scipy.sparse
+
+
+def _check_is_partition(parts: Iterable, whole: Iterable):
+    whole = set(whole)
+    parts = [set(x) for x in parts]
+    if set.intersection(*parts) != set():
+        raise ValueError("Is not a partition because intersection is not null.")
+    if set.union(*parts) != whole:
+        raise ValueError("Is not a partition because union is not the whole.")
+
+
+def _levels_to_axis(
+    ss,
+    levels: tuple[int] | list[int],
+    valid_ilocs: npt.NDArray[np.intp],
+    sort_labels: bool = False,
+) -> tuple[npt.NDArray[np.intp], list[IndexLabel]]:
+    """
+    For a MultiIndexed sparse Series `ss`, return `ax_coords` and `ax_labels`,
+    where `ax_coords` are the coordinates along one of the two axes of the
+    destination sparse matrix, and `ax_labels` are the labels from `ss`' Index
+    which correspond to these coordinates.
+
+    Parameters
+    ----------
+    ss : Series
+    levels : tuple/list
+    valid_ilocs : numpy.ndarray
+        Array of integer positions of valid values for the sparse matrix in ss.
+    sort_labels : bool, default False
+        Sort the axis labels before forming the sparse matrix. When `levels`
+        refers to a single level, set to True for a faster execution.
+
+    Returns
+    -------
+    ax_coords : numpy.ndarray (axis coordinates)
+    ax_labels : list (axis labels)
+    """
+    # Since the labels are sorted in `Index.levels`, when we wish to sort and
+    # there is only one level of the MultiIndex for this axis, the desired
+    # output can be obtained in the following simpler, more efficient way.
+    if sort_labels and len(levels) == 1:
+        ax_coords = ss.index.codes[levels[0]][valid_ilocs]
+        ax_labels = ss.index.levels[levels[0]]
+
+    else:
+        levels_values = lib.fast_zip(
+            [ss.index.get_level_values(lvl).values for lvl in levels]
+        )
+        codes, ax_labels = factorize(levels_values, sort=sort_labels)
+        ax_coords = codes[valid_ilocs]
+
+    ax_labels = ax_labels.tolist()
+    return ax_coords, ax_labels
+
+
+def _to_ijv(
+    ss,
+    row_levels: tuple[int] | list[int] = (0,),
+    column_levels: tuple[int] | list[int] = (1,),
+    sort_labels: bool = False,
+) -> tuple[
+    np.ndarray,
+    npt.NDArray[np.intp],
+    npt.NDArray[np.intp],
+    list[IndexLabel],
+    list[IndexLabel],
+]:
+    """
+    For an arbitrary MultiIndexed sparse Series return (v, i, j, ilabels,
+    jlabels) where (v, (i, j)) is suitable for passing to scipy.sparse.coo
+    constructor, and ilabels and jlabels are the row and column labels
+    respectively.
+
+    Parameters
+    ----------
+    ss : Series
+    row_levels : tuple/list
+    column_levels : tuple/list
+    sort_labels : bool, default False
+        Sort the row and column labels before forming the sparse matrix.
+        When `row_levels` and/or `column_levels` refer to a single level,
+        set to `True` for a faster execution.
+
+    Returns
+    -------
+    values : numpy.ndarray
+        Valid values to populate a sparse matrix, extracted from
+        ss.
+    i_coords : numpy.ndarray (row coordinates of the values)
+    j_coords : numpy.ndarray (column coordinates of the values)
+    i_labels : list (row labels)
+    j_labels : list (column labels)
+    """
+    # index and column levels must be a partition of the index
+    _check_is_partition([row_levels, column_levels], range(ss.index.nlevels))
+    # From the sparse Series, get the integer indices and data for valid sparse
+    # entries.
+    sp_vals = ss.array.sp_values
+    na_mask = notna(sp_vals)
+    values = sp_vals[na_mask]
+    valid_ilocs = ss.array.sp_index.indices[na_mask]
+
+    i_coords, i_labels = _levels_to_axis(
+        ss, row_levels, valid_ilocs, sort_labels=sort_labels
+    )
+
+    j_coords, j_labels = _levels_to_axis(
+        ss, column_levels, valid_ilocs, sort_labels=sort_labels
+    )
+
+    return values, i_coords, j_coords, i_labels, j_labels
+
+
+def sparse_series_to_coo(
+    ss: Series,
+    row_levels: Iterable[int] = (0,),
+    column_levels: Iterable[int] = (1,),
+    sort_labels: bool = False,
+) -> tuple[scipy.sparse.coo_matrix, list[IndexLabel], list[IndexLabel]]:
+    """
+    Convert a sparse Series to a scipy.sparse.coo_matrix using index
+    levels row_levels, column_levels as the row and column
+    labels respectively. Returns the sparse_matrix, row and column labels.
+    """
+    import scipy.sparse
+
+    if ss.index.nlevels < 2:
+        raise ValueError("to_coo requires MultiIndex with nlevels >= 2.")
+    if not ss.index.is_unique:
+        raise ValueError(
+            "Duplicate index entries are not allowed in to_coo transformation."
+        )
+
+    # to keep things simple, only rely on integer indexing (not labels)
+    row_levels = [ss.index._get_level_number(x) for x in row_levels]
+    column_levels = [ss.index._get_level_number(x) for x in column_levels]
+
+    v, i, j, rows, columns = _to_ijv(
+        ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels
+    )
+    sparse_matrix = scipy.sparse.coo_matrix(
+        (v, (i, j)), shape=(len(rows), len(columns))
+    )
+    return sparse_matrix, rows, columns
+
+
+def coo_to_sparse_series(
+    A: scipy.sparse.coo_matrix, dense_index: bool = False
+) -> Series:
+    """
+    Convert a scipy.sparse.coo_matrix to a SparseSeries.
+
+    Parameters
+    ----------
+    A : scipy.sparse.coo_matrix
+    dense_index : bool, default False
+
+    Returns
+    -------
+    Series
+
+    Raises
+    ------
+    TypeError if A is not a coo_matrix
+    """
+    from pandas import SparseDtype
+
+    try:
+        ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+    except AttributeError as err:
+        raise TypeError(
+            f"Expected coo_matrix. Got {type(A).__name__} instead."
+        ) from err
+    ser = ser.sort_index()
+    ser = ser.astype(SparseDtype(ser.dtype))
+    if dense_index:
+        # is there a better constructor method to use here?
+        i = range(A.shape[0])
+        j = range(A.shape[1])
+        ind = MultiIndex.from_product([i, j])
+        ser = ser.reindex(ind)
+    return ser
--- a/.venv/Lib/site-packages/pandas/core/arrays/string_.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/string_.py
@@ -0,0 +1,572 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+import numpy as np
+
+from pandas._config import get_option
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas._libs.arrays import NDArrayBacked
+from pandas._typing import (
+    Dtype,
+    Scalar,
+    type_t,
+)
+from pandas.compat import pa_version_under1p01
+from pandas.compat.numpy import function as nv
+
+from pandas.core.dtypes.base import (
+    ExtensionDtype,
+    register_extension_dtype,
+)
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_dtype_equal,
+    is_integer_dtype,
+    is_object_dtype,
+    is_string_dtype,
+    pandas_dtype,
+)
+
+from pandas.core import ops
+from pandas.core.array_algos import masked_reductions
+from pandas.core.arrays import (
+    FloatingArray,
+    IntegerArray,
+    PandasArray,
+)
+from pandas.core.arrays.base import ExtensionArray
+from pandas.core.arrays.floating import FloatingDtype
+from pandas.core.arrays.integer import _IntegerDtype
+from pandas.core.construction import extract_array
+from pandas.core.indexers import check_array_indexer
+from pandas.core.missing import isna
+
+if TYPE_CHECKING:
+    import pyarrow
+
+
+@register_extension_dtype
+class StringDtype(ExtensionDtype):
+    """
+    Extension dtype for string data.
+
+    .. versionadded:: 1.0.0
+
+    .. warning::
+
+       StringDtype is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+       In particular, StringDtype.na_value may change to no longer be
+       ``numpy.nan``.
+
+    Parameters
+    ----------
+    storage : {"python", "pyarrow"}, optional
+        If not given, the value of ``pd.options.mode.string_storage``.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Examples
+    --------
+    >>> pd.StringDtype()
+    string[python]
+
+    >>> pd.StringDtype(storage="pyarrow")
+    string[pyarrow]
+    """
+
+    name = "string"
+
+    #: StringDtype.na_value uses pandas.NA
+    na_value = libmissing.NA
+    _metadata = ("storage",)
+
+    def __init__(self, storage=None):
+        if storage is None:
+            storage = get_option("mode.string_storage")
+        if storage not in {"python", "pyarrow"}:
+            raise ValueError(
+                f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
+            )
+        if storage == "pyarrow" and pa_version_under1p01:
+            raise ImportError(
+                "pyarrow>=1.0.0 is required for PyArrow backed StringArray."
+            )
+        self.storage = storage
+
+    @property
+    def type(self) -> type[str]:
+        return str
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """
+        Construct a StringDtype from a string.
+
+        Parameters
+        ----------
+        string : str
+            The type of the name. The storage type will be taking from `string`.
+            Valid options and their storage types are
+
+            ========================== ==============================================
+            string                     result storage
+            ========================== ==============================================
+            ``'string'``               pd.options.mode.string_storage, default python
+            ``'string[python]'``       python
+            ``'string[pyarrow]'``      pyarrow
+            ========================== ==============================================
+
+        Returns
+        -------
+        StringDtype
+
+        Raise
+        -----
+        TypeError
+            If the string is not a valid option.
+
+        """
+        if not isinstance(string, str):
+            raise TypeError(
+                f"'construct_from_string' expects a string, got {type(string)}"
+            )
+        if string == "string":
+            return cls()
+        elif string == "string[python]":
+            return cls(storage="python")
+        elif string == "string[pyarrow]":
+            return cls(storage="pyarrow")
+        else:
+            raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, str) and other == "string":
+            return True
+        return super().__eq__(other)
+
+    def __hash__(self) -> int:
+        # custom __eq__ so have to override __hash__
+        return super().__hash__()
+
+    # https://github.com/pandas-dev/pandas/issues/36126
+    # error: Signature of "construct_array_type" incompatible with supertype
+    # "ExtensionDtype"
+    def construct_array_type(  # type: ignore[override]
+        self,
+    ) -> type_t[BaseStringArray]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        from pandas.core.arrays.string_arrow import ArrowStringArray
+
+        if self.storage == "python":
+            return StringArray
+        else:
+            return ArrowStringArray
+
+    def __repr__(self):
+        return f"string[{self.storage}]"
+
+    def __str__(self):
+        return self.name
+
+    def __from_arrow__(
+        self, array: pyarrow.Array | pyarrow.ChunkedArray
+    ) -> BaseStringArray:
+        """
+        Construct StringArray from pyarrow Array/ChunkedArray.
+        """
+        if self.storage == "pyarrow":
+            from pandas.core.arrays.string_arrow import ArrowStringArray
+
+            return ArrowStringArray(array)
+        else:
+
+            import pyarrow
+
+            if isinstance(array, pyarrow.Array):
+                chunks = [array]
+            else:
+                # pyarrow.ChunkedArray
+                chunks = array.chunks
+
+            results = []
+            for arr in chunks:
+                # using _from_sequence to ensure None is converted to NA
+                str_arr = StringArray._from_sequence(np.array(arr))
+                results.append(str_arr)
+
+        if results:
+            return StringArray._concat_same_type(results)
+        else:
+            return StringArray(np.array([], dtype="object"))
+
+
+class BaseStringArray(ExtensionArray):
+    pass
+
+
+class StringArray(BaseStringArray, PandasArray):
+    """
+    Extension array for string data.
+
+    .. versionadded:: 1.0.0
+
+    .. warning::
+
+       StringArray is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Parameters
+    ----------
+    values : array-like
+        The array of data.
+
+        .. warning::
+
+           Currently, this expects an object-dtype ndarray
+           where the elements are Python strings or :attr:`pandas.NA`.
+           This may change without warning in the future. Use
+           :meth:`pandas.array` with ``dtype="string"`` for a stable way of
+           creating a `StringArray` from any sequence.
+
+    copy : bool, default False
+        Whether to copy the array of data.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    See Also
+    --------
+    array
+        The recommended function for creating a StringArray.
+    Series.str
+        The string methods are available on Series backed by
+        a StringArray.
+
+    Notes
+    -----
+    StringArray returns a BooleanArray for comparison methods.
+
+    Examples
+    --------
+    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
+    <StringArray>
+    ['This is', 'some text', <NA>, 'data.']
+    Length: 4, dtype: string
+
+    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
+    will convert the values to strings.
+
+    >>> pd.array(['1', 1], dtype="object")
+    <PandasArray>
+    ['1', 1]
+    Length: 2, dtype: object
+    >>> pd.array(['1', 1], dtype="string")
+    <StringArray>
+    ['1', '1']
+    Length: 2, dtype: string
+
+    However, instantiating StringArrays directly with non-strings will raise an error.
+
+    For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
+
+    >>> pd.array(["a", None, "c"], dtype="string") == "a"
+    <BooleanArray>
+    [True, <NA>, False]
+    Length: 3, dtype: boolean
+    """
+
+    # undo the PandasArray hack
+    _typ = "extension"
+
+    def __init__(self, values, copy=False):
+        values = extract_array(values)
+
+        super().__init__(values, copy=copy)
+        # error: Incompatible types in assignment (expression has type "StringDtype",
+        # variable has type "PandasDtype")
+        NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
+        if not isinstance(values, type(self)):
+            self._validate()
+
+    def _validate(self):
+        """Validate that we only store NA or strings."""
+        if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
+            raise ValueError("StringArray requires a sequence of strings or pandas.NA")
+        if self._ndarray.dtype != "object":
+            raise ValueError(
+                "StringArray requires a sequence of strings or pandas.NA. Got "
+                f"'{self._ndarray.dtype}' dtype instead."
+            )
+
+    @classmethod
+    def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
+        if dtype and not (isinstance(dtype, str) and dtype == "string"):
+            dtype = pandas_dtype(dtype)
+            assert isinstance(dtype, StringDtype) and dtype.storage == "python"
+
+        from pandas.core.arrays.masked import BaseMaskedArray
+
+        if isinstance(scalars, BaseMaskedArray):
+            # avoid costly conversion to object dtype
+            na_values = scalars._mask
+            result = scalars._data
+            result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
+            result[na_values] = StringDtype.na_value
+
+        else:
+            # convert non-na-likes to str, and nan-likes to StringDtype.na_value
+            result = lib.ensure_string_array(
+                scalars, na_value=StringDtype.na_value, copy=copy
+            )
+
+        # Manually creating new array avoids the validation step in the __init__, so is
+        # faster. Refactor need for validation?
+        new_string_array = cls.__new__(cls)
+        NDArrayBacked.__init__(new_string_array, result, StringDtype(storage="python"))
+
+        return new_string_array
+
+    @classmethod
+    def _from_sequence_of_strings(
+        cls, strings, *, dtype: Dtype | None = None, copy=False
+    ):
+        return cls._from_sequence(strings, dtype=dtype, copy=copy)
+
+    @classmethod
+    def _empty(cls, shape, dtype) -> StringArray:
+        values = np.empty(shape, dtype=object)
+        values[:] = libmissing.NA
+        return cls(values).astype(dtype, copy=False)
+
+    def __arrow_array__(self, type=None):
+        """
+        Convert myself into a pyarrow Array.
+        """
+        import pyarrow as pa
+
+        if type is None:
+            type = pa.string()
+
+        values = self._ndarray.copy()
+        values[self.isna()] = None
+        return pa.array(values, type=type, from_pandas=True)
+
+    def _values_for_factorize(self):
+        arr = self._ndarray.copy()
+        mask = self.isna()
+        arr[mask] = -1
+        return arr, -1
+
+    def __setitem__(self, key, value):
+        value = extract_array(value, extract_numpy=True)
+        if isinstance(value, type(self)):
+            # extract_array doesn't extract PandasArray subclasses
+            value = value._ndarray
+
+        key = check_array_indexer(self, key)
+        scalar_key = lib.is_scalar(key)
+        scalar_value = lib.is_scalar(value)
+        if scalar_key and not scalar_value:
+            raise ValueError("setting an array element with a sequence.")
+
+        # validate new items
+        if scalar_value:
+            if isna(value):
+                value = StringDtype.na_value
+            elif not isinstance(value, str):
+                raise ValueError(
+                    f"Cannot set non-string value '{value}' into a StringArray."
+                )
+        else:
+            if not is_array_like(value):
+                value = np.asarray(value, dtype=object)
+            if len(value) and not lib.is_string_array(value, skipna=True):
+                raise ValueError("Must provide strings.")
+
+        super().__setitem__(key, value)
+
+    def astype(self, dtype, copy: bool = True):
+        dtype = pandas_dtype(dtype)
+
+        if is_dtype_equal(dtype, self.dtype):
+            if copy:
+                return self.copy()
+            return self
+
+        elif isinstance(dtype, _IntegerDtype):
+            arr = self._ndarray.copy()
+            mask = self.isna()
+            arr[mask] = 0
+            values = arr.astype(dtype.numpy_dtype)
+            return IntegerArray(values, mask, copy=False)
+        elif isinstance(dtype, FloatingDtype):
+            arr = self.copy()
+            mask = self.isna()
+            arr[mask] = "0"
+            values = arr.astype(dtype.numpy_dtype)
+            return FloatingArray(values, mask, copy=False)
+        elif isinstance(dtype, ExtensionDtype):
+            return super().astype(dtype, copy=copy)
+        elif np.issubdtype(dtype, np.floating):
+            arr = self._ndarray.copy()
+            mask = self.isna()
+            arr[mask] = 0
+            values = arr.astype(dtype)
+            values[mask] = np.nan
+            return values
+
+        return super().astype(dtype, copy)
+
+    def _reduce(
+        self, name: str, *, skipna: bool = True, axis: int | None = 0, **kwargs
+    ):
+        if name in ["min", "max"]:
+            return getattr(self, name)(skipna=skipna, axis=axis)
+
+        raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
+
+    def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
+        nv.validate_min((), kwargs)
+        result = masked_reductions.min(
+            values=self.to_numpy(), mask=self.isna(), skipna=skipna
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
+        nv.validate_max((), kwargs)
+        result = masked_reductions.max(
+            values=self.to_numpy(), mask=self.isna(), skipna=skipna
+        )
+        return self._wrap_reduction_result(axis, result)
+
+    def value_counts(self, dropna: bool = True):
+        from pandas import value_counts
+
+        result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
+        result.index = result.index.astype(self.dtype)
+        return result
+
+    def memory_usage(self, deep: bool = False) -> int:
+        result = self._ndarray.nbytes
+        if deep:
+            return result + lib.memory_usage_of_objects(self._ndarray)
+        return result
+
+    def _cmp_method(self, other, op):
+        from pandas.arrays import BooleanArray
+
+        if isinstance(other, StringArray):
+            other = other._ndarray
+
+        mask = isna(self) | isna(other)
+        valid = ~mask
+
+        if not lib.is_scalar(other):
+            if len(other) != len(self):
+                # prevent improper broadcasting when other is 2D
+                raise ValueError(
+                    f"Lengths of operands do not match: {len(self)} != {len(other)}"
+                )
+
+            other = np.asarray(other)
+            other = other[valid]
+
+        if op.__name__ in ops.ARITHMETIC_BINOPS:
+            result = np.empty_like(self._ndarray, dtype="object")
+            result[mask] = StringDtype.na_value
+            result[valid] = op(self._ndarray[valid], other)
+            return StringArray(result)
+        else:
+            # logical
+            result = np.zeros(len(self._ndarray), dtype="bool")
+            result[valid] = op(self._ndarray[valid], other)
+            return BooleanArray(result, mask)
+
+    _arith_method = _cmp_method
+
+    # ------------------------------------------------------------------------
+    # String methods interface
+    # error: Incompatible types in assignment (expression has type "NAType",
+    # base class "PandasArray" defined the type as "float")
+    _str_na_value = StringDtype.na_value  # type: ignore[assignment]
+
+    def _str_map(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        from pandas.arrays import BooleanArray
+
+        if dtype is None:
+            dtype = StringDtype(storage="python")
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: type[IntegerArray] | type[BooleanArray]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                # error: Argument 1 to "dtype" has incompatible type
+                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
+                # "Type[object]"
+                dtype=np.dtype(dtype),  # type: ignore[arg-type]
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
+            )
+            return StringArray(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))
--- a/.venv/Lib/site-packages/pandas/core/arrays/string_arrow.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/string_arrow.py
@@ -0,0 +1,862 @@
+from __future__ import annotations
+
+from collections.abc import Callable  # noqa: PDF001
+import re
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    cast,
+    overload,
+)
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas._typing import (
+    Dtype,
+    NpDtype,
+    PositionalIndexer,
+    Scalar,
+    ScalarIndexer,
+    SequenceIndexer,
+    TakeIndexer,
+    npt,
+)
+from pandas.compat import (
+    pa_version_under1p01,
+    pa_version_under2p0,
+    pa_version_under3p0,
+    pa_version_under4p0,
+)
+from pandas.util._decorators import doc
+
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_dtype_equal,
+    is_integer,
+    is_integer_dtype,
+    is_object_dtype,
+    is_scalar,
+    is_string_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas.core.arraylike import OpsMixin
+from pandas.core.arrays.base import ExtensionArray
+from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.integer import Int64Dtype
+from pandas.core.arrays.numeric import NumericDtype
+from pandas.core.arrays.string_ import (
+    BaseStringArray,
+    StringDtype,
+)
+from pandas.core.indexers import (
+    check_array_indexer,
+    unpack_tuple_and_ellipses,
+    validate_indices,
+)
+from pandas.core.strings.object_array import ObjectStringArrayMixin
+
+if not pa_version_under1p01:
+    import pyarrow as pa
+    import pyarrow.compute as pc
+
+    ARROW_CMP_FUNCS = {
+        "eq": pc.equal,
+        "ne": pc.not_equal,
+        "lt": pc.less,
+        "gt": pc.greater,
+        "le": pc.less_equal,
+        "ge": pc.greater_equal,
+    }
+
+
+if TYPE_CHECKING:
+    from pandas import Series
+
+ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
+
+
+def _chk_pyarrow_available() -> None:
+    if pa_version_under1p01:
+        msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray."
+        raise ImportError(msg)
+
+
+# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
+# ObjectStringArrayMixin because we want to have the object-dtype based methods as
+# fallback for the ones that pyarrow doesn't yet support
+
+
+class ArrowStringArray(OpsMixin, BaseStringArray, ObjectStringArrayMixin):
+    """
+    Extension array for string data in a ``pyarrow.ChunkedArray``.
+
+    .. versionadded:: 1.2.0
+
+    .. warning::
+
+       ArrowStringArray is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Parameters
+    ----------
+    values : pyarrow.Array or pyarrow.ChunkedArray
+        The array of data.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    See Also
+    --------
+    array
+        The recommended function for creating a ArrowStringArray.
+    Series.str
+        The string methods are available on Series backed by
+        a ArrowStringArray.
+
+    Notes
+    -----
+    ArrowStringArray returns a BooleanArray for comparison methods.
+
+    Examples
+    --------
+    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]")
+    <ArrowStringArray>
+    ['This is', 'some text', <NA>, 'data.']
+    Length: 4, dtype: string
+    """
+
+    def __init__(self, values):
+        self._dtype = StringDtype(storage="pyarrow")
+        if isinstance(values, pa.Array):
+            self._data = pa.chunked_array([values])
+        elif isinstance(values, pa.ChunkedArray):
+            self._data = values
+        else:
+            raise ValueError(f"Unsupported type '{type(values)}' for ArrowStringArray")
+
+        if not pa.types.is_string(self._data.type):
+            raise ValueError(
+                "ArrowStringArray requires a PyArrow (chunked) array of string type"
+            )
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
+        from pandas.core.arrays.masked import BaseMaskedArray
+
+        _chk_pyarrow_available()
+
+        if dtype and not (isinstance(dtype, str) and dtype == "string"):
+            dtype = pandas_dtype(dtype)
+            assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow"
+
+        if isinstance(scalars, BaseMaskedArray):
+            # avoid costly conversion to object dtype in ensure_string_array and
+            # numerical issues with Float32Dtype
+            na_values = scalars._mask
+            result = scalars._data
+            result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
+            return cls(pa.array(result, mask=na_values, type=pa.string()))
+
+        # convert non-na-likes to str
+        result = lib.ensure_string_array(scalars, copy=copy)
+        return cls(pa.array(result, type=pa.string(), from_pandas=True))
+
+    @classmethod
+    def _from_sequence_of_strings(
+        cls, strings, dtype: Dtype | None = None, copy: bool = False
+    ):
+        return cls._from_sequence(strings, dtype=dtype, copy=copy)
+
+    @property
+    def dtype(self) -> StringDtype:
+        """
+        An instance of 'string[pyarrow]'.
+        """
+        return self._dtype
+
+    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+        """Correctly construct numpy arrays when passed to `np.asarray()`."""
+        return self.to_numpy(dtype=dtype)
+
+    def __arrow_array__(self, type=None):
+        """Convert myself to a pyarrow Array or ChunkedArray."""
+        return self._data
+
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value=lib.no_default,
+    ) -> np.ndarray:
+        """
+        Convert to a NumPy ndarray.
+        """
+        # TODO: copy argument is ignored
+
+        result = np.array(self._data, dtype=dtype)
+        if self._data.null_count > 0:
+            if na_value is lib.no_default:
+                if dtype and np.issubdtype(dtype, np.floating):
+                    return result
+                na_value = self._dtype.na_value
+            mask = self.isna()
+            result[mask] = na_value
+        return result
+
+    def __len__(self) -> int:
+        """
+        Length of this array.
+
+        Returns
+        -------
+        length : int
+        """
+        return len(self._data)
+
+    @doc(ExtensionArray.factorize)
+    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
+        encoded = self._data.dictionary_encode()
+        indices = pa.chunked_array(
+            [c.indices for c in encoded.chunks], type=encoded.type.index_type
+        ).to_pandas()
+        if indices.dtype.kind == "f":
+            indices[np.isnan(indices)] = na_sentinel
+        indices = indices.astype(np.int64, copy=False)
+
+        if encoded.num_chunks:
+            uniques = type(self)(encoded.chunk(0).dictionary)
+        else:
+            uniques = type(self)(pa.array([], type=encoded.type.value_type))
+
+        return indices.values, uniques
+
+    @classmethod
+    def _concat_same_type(cls, to_concat) -> ArrowStringArray:
+        """
+        Concatenate multiple ArrowStringArray.
+
+        Parameters
+        ----------
+        to_concat : sequence of ArrowStringArray
+
+        Returns
+        -------
+        ArrowStringArray
+        """
+        return cls(
+            pa.chunked_array(
+                [array for ea in to_concat for array in ea._data.iterchunks()]
+            )
+        )
+
+    @overload
+    def __getitem__(self, item: ScalarIndexer) -> ArrowStringScalarOrNAT:
+        ...
+
+    @overload
+    def __getitem__(self: ArrowStringArray, item: SequenceIndexer) -> ArrowStringArray:
+        ...
+
+    def __getitem__(
+        self: ArrowStringArray, item: PositionalIndexer
+    ) -> ArrowStringArray | ArrowStringScalarOrNAT:
+        """Select a subset of self.
+
+        Parameters
+        ----------
+        item : int, slice, or ndarray
+            * int: The position in 'self' to get.
+            * slice: A slice object, where 'start', 'stop', and 'step' are
+              integers or None
+            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+
+        Returns
+        -------
+        item : scalar or ExtensionArray
+
+        Notes
+        -----
+        For scalar ``item``, return a scalar value suitable for the array's
+        type. This should be an instance of ``self.dtype.type``.
+        For slice ``key``, return an instance of ``ExtensionArray``, even
+        if the slice is length 0 or 1.
+        For a boolean mask, return an instance of ``ExtensionArray``, filtered
+        to the values where ``item`` is True.
+        """
+        item = check_array_indexer(self, item)
+
+        if isinstance(item, np.ndarray):
+            if not len(item):
+                return type(self)(pa.chunked_array([], type=pa.string()))
+            elif is_integer_dtype(item.dtype):
+                return self.take(item)
+            elif is_bool_dtype(item.dtype):
+                return type(self)(self._data.filter(item))
+            else:
+                raise IndexError(
+                    "Only integers, slices and integer or "
+                    "boolean arrays are valid indices."
+                )
+        elif isinstance(item, tuple):
+            item = unpack_tuple_and_ellipses(item)
+
+        # error: Non-overlapping identity check (left operand type:
+        # "Union[Union[int, integer[Any]], Union[slice, List[int],
+        # ndarray[Any, Any]]]", right operand type: "ellipsis")
+        if item is Ellipsis:  # type: ignore[comparison-overlap]
+            # TODO: should be handled by pyarrow?
+            item = slice(None)
+
+        if is_scalar(item) and not is_integer(item):
+            # e.g. "foo" or 2.5
+            # exception message copied from numpy
+            raise IndexError(
+                r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
+                r"(`None`) and integer or boolean arrays are valid indices"
+            )
+        # We are not an array indexer, so maybe e.g. a slice or integer
+        # indexer. We dispatch to pyarrow.
+        value = self._data[item]
+        if isinstance(value, pa.ChunkedArray):
+            return type(self)(value)
+        else:
+            return self._as_pandas_scalar(value)
+
+    def _as_pandas_scalar(self, arrow_scalar: pa.Scalar):
+        scalar = arrow_scalar.as_py()
+        if scalar is None:
+            return self._dtype.na_value
+        else:
+            return scalar
+
+    @property
+    def nbytes(self) -> int:
+        """
+        The number of bytes needed to store this object in memory.
+        """
+        return self._data.nbytes
+
+    def isna(self) -> np.ndarray:
+        """
+        Boolean NumPy array indicating if each value is missing.
+
+        This should return a 1-D array the same length as 'self'.
+        """
+        # TODO: Implement .to_numpy for ChunkedArray
+        return self._data.is_null().to_pandas().values
+
+    def copy(self) -> ArrowStringArray:
+        """
+        Return a shallow copy of the array.
+
+        Underlying ChunkedArray is immutable, so a deep copy is unnecessary.
+
+        Returns
+        -------
+        ArrowStringArray
+        """
+        return type(self)(self._data)
+
+    def _cmp_method(self, other, op):
+        from pandas.arrays import BooleanArray
+
+        pc_func = ARROW_CMP_FUNCS[op.__name__]
+        if isinstance(other, ArrowStringArray):
+            result = pc_func(self._data, other._data)
+        elif isinstance(other, (np.ndarray, list)):
+            result = pc_func(self._data, other)
+        elif is_scalar(other):
+            try:
+                result = pc_func(self._data, pa.scalar(other))
+            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
+                mask = isna(self) | isna(other)
+                valid = ~mask
+                result = np.zeros(len(self), dtype="bool")
+                result[valid] = op(np.array(self)[valid], other)
+                return BooleanArray(result, mask)
+        else:
+            return NotImplemented
+
+        # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
+        return BooleanArray._from_sequence(result.to_pandas().values)
+
+    def insert(self, loc: int, item):
+        if not isinstance(item, str) and item is not libmissing.NA:
+            raise TypeError("Scalar must be NA or str")
+        return super().insert(loc, item)
+
+    def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
+        """Set one or more values inplace.
+
+        Parameters
+        ----------
+        key : int, ndarray, or slice
+            When called from, e.g. ``Series.__setitem__``, ``key`` will be
+            one of
+
+            * scalar int
+            * ndarray of integers.
+            * boolean ndarray
+            * slice object
+
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            value or values to be set of ``key``.
+
+        Returns
+        -------
+        None
+        """
+        key = check_array_indexer(self, key)
+
+        if is_integer(key):
+            key = cast(int, key)
+
+            if not is_scalar(value):
+                raise ValueError("Must pass scalars with scalar indexer")
+            elif isna(value):
+                value = None
+            elif not isinstance(value, str):
+                raise ValueError("Scalar must be NA or str")
+
+            # Slice data and insert in-between
+            new_data = [
+                *self._data[0:key].chunks,
+                pa.array([value], type=pa.string()),
+                *self._data[(key + 1) :].chunks,
+            ]
+            self._data = pa.chunked_array(new_data)
+        else:
+            # Convert to integer indices and iteratively assign.
+            # TODO: Make a faster variant of this in Arrow upstream.
+            #       This is probably extremely slow.
+
+            # Convert all possible input key types to an array of integers
+            if isinstance(key, slice):
+                key_array = np.array(range(len(self))[key])
+            elif is_bool_dtype(key):
+                # TODO(ARROW-9430): Directly support setitem(booleans)
+                key_array = np.argwhere(key).flatten()
+            else:
+                # TODO(ARROW-9431): Directly support setitem(integers)
+                key_array = np.asanyarray(key)
+
+            if is_scalar(value):
+                value = np.broadcast_to(value, len(key_array))
+            else:
+                value = np.asarray(value)
+
+            if len(key_array) != len(value):
+                raise ValueError("Length of indexer and values mismatch")
+
+            for k, v in zip(key_array, value):
+                self[k] = v
+
+    def take(
+        self,
+        indices: TakeIndexer,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+    ):
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of int or one-dimensional np.ndarray of int
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, optional
+            Fill value to use for NA-indices when `allow_fill` is True.
+            This may be ``None``, in which case the default NA value for
+            the type, ``self.dtype.na_value``, is used.
+
+            For many ExtensionArrays, there will be two representations of
+            `fill_value`: a user-facing "boxed" scalar, and a low-level
+            physical NA value. `fill_value` should be the user-facing version,
+            and the implementation should handle translating that to the
+            physical version for processing the take if necessary.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        ValueError
+            When `indices` contains negative values other than ``-1``
+            and `allow_fill` is True.
+
+        See Also
+        --------
+        numpy.take
+        api.extensions.take
+
+        Notes
+        -----
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when `indices` is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex`, or any other method
+        that causes realignment, with a `fill_value`.
+        """
+        # TODO: Remove once we got rid of the (indices < 0) check
+        if not is_array_like(indices):
+            indices_array = np.asanyarray(indices)
+        else:
+            # error: Incompatible types in assignment (expression has type
+            # "Sequence[int]", variable has type "ndarray")
+            indices_array = indices  # type: ignore[assignment]
+
+        if len(self._data) == 0 and (indices_array >= 0).any():
+            raise IndexError("cannot do a non-empty take")
+        if indices_array.size > 0 and indices_array.max() >= len(self._data):
+            raise IndexError("out of bounds value in 'indices'.")
+
+        if allow_fill:
+            fill_mask = indices_array < 0
+            if fill_mask.any():
+                validate_indices(indices_array, len(self._data))
+                # TODO(ARROW-9433): Treat negative indices as NULL
+                indices_array = pa.array(indices_array, mask=fill_mask)
+                result = self._data.take(indices_array)
+                if isna(fill_value):
+                    return type(self)(result)
+                # TODO: ArrowNotImplementedError: Function fill_null has no
+                # kernel matching input types (array[string], scalar[string])
+                result = type(self)(result)
+                result[fill_mask] = fill_value
+                return result
+                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
+            else:
+                # Nothing to fill
+                return type(self)(self._data.take(indices))
+        else:  # allow_fill=False
+            # TODO(ARROW-9432): Treat negative indices as indices from the right.
+            if (indices_array < 0).any():
+                # Don't modify in-place
+                indices_array = np.copy(indices_array)
+                indices_array[indices_array < 0] += len(self._data)
+            return type(self)(self._data.take(indices_array))
+
+    def isin(self, values):
+        if pa_version_under2p0:
+            return super().isin(values)
+
+        value_set = [
+            pa_scalar.as_py()
+            for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
+            if pa_scalar.type in (pa.string(), pa.null())
+        ]
+
+        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
+        # for null values, so we short-circuit to return all False array.
+        if not len(value_set):
+            return np.zeros(len(self), dtype=bool)
+
+        kwargs = {}
+        if pa_version_under3p0:
+            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
+            # with unexpected keyword argument in pyarrow 3.0.0+
+            kwargs["skip_null"] = True
+
+        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
+        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
+        # to False
+        return np.array(result, dtype=np.bool_)
+
+    def value_counts(self, dropna: bool = True) -> Series:
+        """
+        Return a Series containing counts of each unique value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of missing values.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+        """
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        vc = self._data.value_counts()
+
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
+        # No missing values so we can adhere to the interface and return a numpy array.
+        counts = np.array(counts)
+
+        index = Index(type(self)(values))
+
+        return Series(counts, index=index).astype("Int64")
+
+    def astype(self, dtype, copy: bool = True):
+        dtype = pandas_dtype(dtype)
+
+        if is_dtype_equal(dtype, self.dtype):
+            if copy:
+                return self.copy()
+            return self
+
+        elif isinstance(dtype, NumericDtype):
+            data = self._data.cast(pa.from_numpy_dtype(dtype.numpy_dtype))
+            return dtype.__from_arrow__(data)
+
+        return super().astype(dtype, copy=copy)
+
+    # ------------------------------------------------------------------------
+    # String methods interface
+
+    # error: Cannot determine type of 'na_value'
+    _str_na_value = StringDtype.na_value  # type: ignore[has-type]
+
+    def _str_map(
+        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+    ):
+        # TODO: de-duplicate with StringArray method. This method is moreless copy and
+        # paste.
+
+        from pandas.arrays import (
+            BooleanArray,
+            IntegerArray,
+        )
+
+        if dtype is None:
+            dtype = self.dtype
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: type[IntegerArray] | type[BooleanArray]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                # error: Argument 1 to "dtype" has incompatible type
+                # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
+                # "Type[object]"
+                dtype=np.dtype(dtype),  # type: ignore[arg-type]
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
+            )
+            result = pa.array(result, mask=mask, type=pa.string(), from_pandas=True)
+            return type(self)(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))
+
+    def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
+        if flags:
+            return super()._str_contains(pat, case, flags, na, regex)
+
+        if regex:
+            if pa_version_under4p0 or case is False:
+                return super()._str_contains(pat, case, flags, na, regex)
+            else:
+                result = pc.match_substring_regex(self._data, pat)
+        else:
+            if case:
+                result = pc.match_substring(self._data, pat)
+            else:
+                result = pc.match_substring(pc.utf8_upper(self._data), pat.upper())
+        result = BooleanDtype().__from_arrow__(result)
+        if not isna(na):
+            result[isna(result)] = bool(na)
+        return result
+
+    def _str_startswith(self, pat: str, na=None):
+        if pa_version_under4p0:
+            return super()._str_startswith(pat, na)
+
+        pat = "^" + re.escape(pat)
+        return self._str_contains(pat, na=na, regex=True)
+
+    def _str_endswith(self, pat: str, na=None):
+        if pa_version_under4p0:
+            return super()._str_endswith(pat, na)
+
+        pat = re.escape(pat) + "$"
+        return self._str_contains(pat, na=na, regex=True)
+
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ):
+        if (
+            pa_version_under4p0
+            or isinstance(pat, re.Pattern)
+            or callable(repl)
+            or not case
+            or flags
+        ):
+            return super()._str_replace(pat, repl, n, case, flags, regex)
+
+        func = pc.replace_substring_regex if regex else pc.replace_substring
+        result = func(self._data, pattern=pat, replacement=repl, max_replacements=n)
+        return type(self)(result)
+
+    def _str_match(
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
+    ):
+        if pa_version_under4p0:
+            return super()._str_match(pat, case, flags, na)
+
+        if not pat.startswith("^"):
+            pat = "^" + pat
+        return self._str_contains(pat, case, flags, na, regex=True)
+
+    def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
+        if pa_version_under4p0:
+            return super()._str_fullmatch(pat, case, flags, na)
+
+        if not pat.endswith("$") or pat.endswith("//$"):
+            pat = pat + "$"
+        return self._str_match(pat, case, flags, na)
+
+    def _str_isalnum(self):
+        result = pc.utf8_is_alnum(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isalpha(self):
+        result = pc.utf8_is_alpha(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isdecimal(self):
+        result = pc.utf8_is_decimal(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isdigit(self):
+        result = pc.utf8_is_digit(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_islower(self):
+        result = pc.utf8_is_lower(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isnumeric(self):
+        result = pc.utf8_is_numeric(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isspace(self):
+        if pa_version_under2p0:
+            return super()._str_isspace()
+
+        result = pc.utf8_is_space(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_istitle(self):
+        result = pc.utf8_is_title(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_isupper(self):
+        result = pc.utf8_is_upper(self._data)
+        return BooleanDtype().__from_arrow__(result)
+
+    def _str_len(self):
+        if pa_version_under4p0:
+            return super()._str_len()
+
+        result = pc.utf8_length(self._data)
+        return Int64Dtype().__from_arrow__(result)
+
+    def _str_lower(self):
+        return type(self)(pc.utf8_lower(self._data))
+
+    def _str_upper(self):
+        return type(self)(pc.utf8_upper(self._data))
+
+    def _str_strip(self, to_strip=None):
+        if pa_version_under4p0:
+            return super()._str_strip(to_strip)
+
+        if to_strip is None:
+            result = pc.utf8_trim_whitespace(self._data)
+        else:
+            result = pc.utf8_trim(self._data, characters=to_strip)
+        return type(self)(result)
+
+    def _str_lstrip(self, to_strip=None):
+        if pa_version_under4p0:
+            return super()._str_lstrip(to_strip)
+
+        if to_strip is None:
+            result = pc.utf8_ltrim_whitespace(self._data)
+        else:
+            result = pc.utf8_ltrim(self._data, characters=to_strip)
+        return type(self)(result)
+
+    def _str_rstrip(self, to_strip=None):
+        if pa_version_under4p0:
+            return super()._str_rstrip(to_strip)
+
+        if to_strip is None:
+            result = pc.utf8_rtrim_whitespace(self._data)
+        else:
+            result = pc.utf8_rtrim(self._data, characters=to_strip)
+        return type(self)(result)
--- a/.venv/Lib/site-packages/pandas/core/arrays/timedeltas.py
+++ b/.venv/Lib/site-packages/pandas/core/arrays/timedeltas.py