first commit

2025-07-02 06:22:25 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/init.py
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_accessor.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_accessor.py
@ -0,0 +1,159 @@
+import string
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays.sparse import (
+    SparseArray,
+    SparseDtype,
+)
+
+
+class TestSeriesAccessor:
+    # TODO: collect other Series accessor tests
+    def test_to_dense(self):
+        s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
+        result = s.sparse.to_dense()
+        expected = pd.Series([0, 1, 0, 10])
+        tm.assert_series_equal(result, expected)
+
+
+class TestFrameAccessor:
+    def test_accessor_raises(self):
+        df = pd.DataFrame({"A": [0, 1]})
+        with pytest.raises(AttributeError, match="sparse"):
+            df.sparse
+
+    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
+    @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
+    @pytest.mark.parametrize("dtype", ["float64", "int64"])
+    @td.skip_if_no_scipy
+    def test_from_spmatrix(self, format, labels, dtype):
+        import scipy.sparse
+
+        sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
+
+        mat = scipy.sparse.eye(10, format=format, dtype=dtype)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
+        expected = pd.DataFrame(
+            np.eye(10, dtype=dtype), index=labels, columns=labels
+        ).astype(sp_dtype)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
+    @td.skip_if_no_scipy
+    def test_from_spmatrix_including_explicit_zero(self, format):
+        import scipy.sparse
+
+        mat = scipy.sparse.random(10, 2, density=0.5, format=format)
+        mat.data[0] = 0
+        result = pd.DataFrame.sparse.from_spmatrix(mat)
+        dtype = SparseDtype("float64", 0.0)
+        expected = pd.DataFrame(mat.todense()).astype(dtype)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "columns",
+        [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
+    )
+    @td.skip_if_no_scipy
+    def test_from_spmatrix_columns(self, columns):
+        import scipy.sparse
+
+        dtype = SparseDtype("float64", 0.0)
+
+        mat = scipy.sparse.random(10, 2, density=0.5)
+        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
+        expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
+    )
+    @td.skip_if_no_scipy
+    def test_to_coo(self, colnames):
+        import scipy.sparse
+
+        df = pd.DataFrame(
+            {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]"
+        )
+        result = df.sparse.to_coo()
+        expected = scipy.sparse.coo_matrix(np.asarray(df))
+        assert (result != expected).nnz == 0
+
+    @pytest.mark.parametrize("fill_value", [1, np.nan])
+    @td.skip_if_no_scipy
+    def test_to_coo_nonzero_fill_val_raises(self, fill_value):
+        df = pd.DataFrame(
+            {
+                "A": SparseArray(
+                    [fill_value, fill_value, fill_value, 2], fill_value=fill_value
+                ),
+                "B": SparseArray(
+                    [fill_value, 2, fill_value, fill_value], fill_value=fill_value
+                ),
+            }
+        )
+        with pytest.raises(ValueError, match="fill value must be 0"):
+            df.sparse.to_coo()
+
+    def test_to_dense(self):
+        df = pd.DataFrame(
+            {
+                "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)),
+                "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)),
+                "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)),
+            },
+            index=["b", "a"],
+        )
+        result = df.sparse.to_dense()
+        expected = pd.DataFrame(
+            {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_density(self):
+        df = pd.DataFrame(
+            {
+                "A": SparseArray([1, 0, 2, 1], fill_value=0),
+                "B": SparseArray([0, 1, 1, 1], fill_value=0),
+            }
+        )
+        res = df.sparse.density
+        expected = 0.75
+        assert res == expected
+
+    @pytest.mark.parametrize("dtype", ["int64", "float64"])
+    @pytest.mark.parametrize("dense_index", [True, False])
+    @td.skip_if_no_scipy
+    def test_series_from_coo(self, dtype, dense_index):
+        import scipy.sparse
+
+        A = scipy.sparse.eye(3, format="coo", dtype=dtype)
+        result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
+        index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
+        expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
+        if dense_index:
+            expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
+
+        tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_series_from_coo_incorrect_format_raises(self):
+        # gh-26554
+        import scipy.sparse
+
+        m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
+        with pytest.raises(
+            TypeError, match="Expected coo_matrix. Got csr_matrix instead."
+        ):
+            pd.Series.sparse.from_coo(m)
+
+    def test_with_column_named_sparse(self):
+        # https://github.com/pandas-dev/pandas/issues/30758
+        df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
+        assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor)
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_arithmetics.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_arithmetics.py
@ -0,0 +1,530 @@
+import operator
+
+import numpy as np
+import pytest
+
+from pandas.compat import np_version_under1p20
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core import ops
+from pandas.core.arrays.sparse import (
+    SparseArray,
+    SparseDtype,
+)
+
+
+@pytest.fixture(params=["integer", "block"])
+def kind(request):
+    """kind kwarg to pass to SparseArray/SparseSeries"""
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def mix(request):
+    # whether to operate op(sparse, dense) instead of op(sparse, sparse)
+    return request.param
+
+
+class TestSparseArrayArithmetics:
+
+    _base = np.array
+    _klass = SparseArray
+
+    def _assert(self, a, b):
+        # We have to use tm.assert_sp_array_equal. See GH #45126
+        tm.assert_numpy_array_equal(a, b)
+
+    def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op):
+        # Check that arithmetic behavior matches non-Sparse Series arithmetic
+
+        if isinstance(a_dense, np.ndarray):
+            expected = op(pd.Series(a_dense), b_dense).values
+        elif isinstance(b_dense, np.ndarray):
+            expected = op(a_dense, pd.Series(b_dense)).values
+        else:
+            raise NotImplementedError
+
+        with np.errstate(invalid="ignore", divide="ignore"):
+            if mix:
+                result = op(a, b_dense).to_dense()
+            else:
+                result = op(a, b).to_dense()
+
+        self._assert(result, expected)
+
+    def _check_bool_result(self, res):
+        assert isinstance(res, self._klass)
+        assert isinstance(res.dtype, SparseDtype)
+        assert res.dtype.subtype == np.bool_
+        assert isinstance(res.fill_value, bool)
+
+    def _check_comparison_ops(self, a, b, a_dense, b_dense):
+        with np.errstate(invalid="ignore"):
+            # Unfortunately, trying to wrap the computation of each expected
+            # value is with np.errstate() is too tedious.
+            #
+            # sparse & sparse
+            self._check_bool_result(a == b)
+            self._assert((a == b).to_dense(), a_dense == b_dense)
+
+            self._check_bool_result(a != b)
+            self._assert((a != b).to_dense(), a_dense != b_dense)
+
+            self._check_bool_result(a >= b)
+            self._assert((a >= b).to_dense(), a_dense >= b_dense)
+
+            self._check_bool_result(a <= b)
+            self._assert((a <= b).to_dense(), a_dense <= b_dense)
+
+            self._check_bool_result(a > b)
+            self._assert((a > b).to_dense(), a_dense > b_dense)
+
+            self._check_bool_result(a < b)
+            self._assert((a < b).to_dense(), a_dense < b_dense)
+
+            # sparse & dense
+            self._check_bool_result(a == b_dense)
+            self._assert((a == b_dense).to_dense(), a_dense == b_dense)
+
+            self._check_bool_result(a != b_dense)
+            self._assert((a != b_dense).to_dense(), a_dense != b_dense)
+
+            self._check_bool_result(a >= b_dense)
+            self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
+
+            self._check_bool_result(a <= b_dense)
+            self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
+
+            self._check_bool_result(a > b_dense)
+            self._assert((a > b_dense).to_dense(), a_dense > b_dense)
+
+            self._check_bool_result(a < b_dense)
+            self._assert((a < b_dense).to_dense(), a_dense < b_dense)
+
+    def _check_logical_ops(self, a, b, a_dense, b_dense):
+        # sparse & sparse
+        self._check_bool_result(a & b)
+        self._assert((a & b).to_dense(), a_dense & b_dense)
+
+        self._check_bool_result(a | b)
+        self._assert((a | b).to_dense(), a_dense | b_dense)
+        # sparse & dense
+        self._check_bool_result(a & b_dense)
+        self._assert((a & b_dense).to_dense(), a_dense & b_dense)
+
+        self._check_bool_result(a | b_dense)
+        self._assert((a | b_dense).to_dense(), a_dense | b_dense)
+
+    @pytest.mark.parametrize("scalar", [0, 1, 3])
+    @pytest.mark.parametrize("fill_value", [None, 0, 2])
+    def test_float_scalar(
+        self, kind, mix, all_arithmetic_functions, fill_value, scalar, request
+    ):
+        op = all_arithmetic_functions
+
+        if np_version_under1p20:
+            if op in [operator.floordiv, ops.rfloordiv]:
+                if op is operator.floordiv and scalar != 0:
+                    pass
+                elif op is ops.rfloordiv and scalar == 0:
+                    pass
+                else:
+                    mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172")
+                    request.node.add_marker(mark)
+
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+
+        a = self._klass(values, kind=kind, fill_value=fill_value)
+        self._check_numeric_ops(a, scalar, values, scalar, mix, op)
+
+    def test_float_scalar_comparison(self, kind):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+
+        a = self._klass(values, kind=kind)
+        self._check_comparison_ops(a, 1, values, 1)
+        self._check_comparison_ops(a, 0, values, 0)
+        self._check_comparison_ops(a, 3, values, 3)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        self._check_comparison_ops(a, 1, values, 1)
+        self._check_comparison_ops(a, 0, values, 0)
+        self._check_comparison_ops(a, 3, values, 3)
+
+        a = self._klass(values, kind=kind, fill_value=2)
+        self._check_comparison_ops(a, 1, values, 1)
+        self._check_comparison_ops(a, 0, values, 0)
+        self._check_comparison_ops(a, 3, values, 3)
+
+    def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions):
+        # when sp_index are the same
+        op = all_arithmetic_functions
+
+        values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
+        rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_float_same_index_with_nans(
+        self, kind, mix, all_arithmetic_functions, request
+    ):
+        # when sp_index are the same
+        op = all_arithmetic_functions
+
+        if (
+            np_version_under1p20
+            and op is ops.rfloordiv
+            and not (mix and kind == "block")
+        ):
+            mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172")
+            request.node.add_marker(mark)
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_float_same_index_comparison(self, kind):
+        # when sp_index are the same
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
+        rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+    def test_float_array(self, kind, mix, all_arithmetic_functions):
+        op = all_arithmetic_functions
+
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=1)
+        b = self._klass(rvalues, kind=kind, fill_value=2)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_float_array_different_kind(self, mix, all_arithmetic_functions):
+        op = all_arithmetic_functions
+
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        a = self._klass(values, kind="integer")
+        b = self._klass(rvalues, kind="block")
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
+
+        a = self._klass(values, kind="integer", fill_value=0)
+        b = self._klass(rvalues, kind="block")
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind="integer", fill_value=0)
+        b = self._klass(rvalues, kind="block", fill_value=0)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind="integer", fill_value=1)
+        b = self._klass(rvalues, kind="block", fill_value=2)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_float_array_comparison(self, kind):
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        self._check_comparison_ops(a, b, values, rvalues)
+        self._check_comparison_ops(a, b * 0, values, rvalues * 0)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind=kind, fill_value=1)
+        b = self._klass(rvalues, kind=kind, fill_value=2)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+    def test_int_array(self, kind, mix, all_arithmetic_functions):
+        op = all_arithmetic_functions
+
+        # have to specify dtype explicitly until fixing GH 667
+        dtype = np.int64
+
+        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
+
+        a = self._klass(values, dtype=dtype, kind=kind)
+        assert a.dtype == SparseDtype(dtype)
+        b = self._klass(rvalues, dtype=dtype, kind=kind)
+        assert b.dtype == SparseDtype(dtype)
+
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
+
+        a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
+        assert a.dtype == SparseDtype(dtype)
+        b = self._klass(rvalues, dtype=dtype, kind=kind)
+        assert b.dtype == SparseDtype(dtype)
+
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
+        assert a.dtype == SparseDtype(dtype)
+        b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
+        assert b.dtype == SparseDtype(dtype)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
+        assert a.dtype == SparseDtype(dtype, fill_value=1)
+        b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
+        assert b.dtype == SparseDtype(dtype, fill_value=2)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_int_array_comparison(self, kind):
+        dtype = "int64"
+        # int32 NI ATM
+
+        values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
+
+        a = self._klass(values, dtype=dtype, kind=kind)
+        b = self._klass(rvalues, dtype=dtype, kind=kind)
+        self._check_comparison_ops(a, b, values, rvalues)
+        self._check_comparison_ops(a, b * 0, values, rvalues * 0)
+
+        a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
+        b = self._klass(rvalues, dtype=dtype, kind=kind)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
+        b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
+        b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+    @pytest.mark.parametrize("fill_value", [True, False, np.nan])
+    def test_bool_same_index(self, kind, fill_value):
+        # GH 14000
+        # when sp_index are the same
+        values = self._base([True, False, True, True], dtype=np.bool_)
+        rvalues = self._base([True, False, True, True], dtype=np.bool_)
+
+        a = self._klass(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
+        b = self._klass(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
+        self._check_logical_ops(a, b, values, rvalues)
+
+    @pytest.mark.parametrize("fill_value", [True, False, np.nan])
+    def test_bool_array_logical(self, kind, fill_value):
+        # GH 14000
+        # when sp_index are the same
+        values = self._base([True, False, True, False, True, True], dtype=np.bool_)
+        rvalues = self._base([True, False, False, True, False, True], dtype=np.bool_)
+
+        a = self._klass(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
+        b = self._klass(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
+        self._check_logical_ops(a, b, values, rvalues)
+
+    def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request):
+        op = all_arithmetic_functions
+
+        if np_version_under1p20 and op in [operator.floordiv, ops.rfloordiv] and mix:
+            mark = pytest.mark.xfail(raises=AssertionError, reason="GH#38172")
+            request.node.add_marker(mark)
+
+        rdtype = "int64"
+
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        assert b.dtype == SparseDtype(rdtype)
+
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+        self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind)
+        assert b.dtype == SparseDtype(rdtype)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        assert b.dtype == SparseDtype(rdtype)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+        a = self._klass(values, kind=kind, fill_value=1)
+        b = self._klass(rvalues, kind=kind, fill_value=2)
+        assert b.dtype == SparseDtype(rdtype, fill_value=2)
+        self._check_numeric_ops(a, b, values, rvalues, mix, op)
+
+    def test_mixed_array_comparison(self, kind):
+        rdtype = "int64"
+        # int32 NI ATM
+
+        values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
+        rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
+
+        a = self._klass(values, kind=kind)
+        b = self._klass(rvalues, kind=kind)
+        assert b.dtype == SparseDtype(rdtype)
+
+        self._check_comparison_ops(a, b, values, rvalues)
+        self._check_comparison_ops(a, b * 0, values, rvalues * 0)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind)
+        assert b.dtype == SparseDtype(rdtype)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind=kind, fill_value=0)
+        b = self._klass(rvalues, kind=kind, fill_value=0)
+        assert b.dtype == SparseDtype(rdtype)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+        a = self._klass(values, kind=kind, fill_value=1)
+        b = self._klass(rvalues, kind=kind, fill_value=2)
+        assert b.dtype == SparseDtype(rdtype, fill_value=2)
+        self._check_comparison_ops(a, b, values, rvalues)
+
+    def test_xor(self):
+        s = SparseArray([True, True, False, False])
+        t = SparseArray([True, False, True, False])
+        result = s ^ t
+        sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32"))
+        expected = SparseArray([False, True, True], sparse_index=sp_index)
+        tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("op", [operator.eq, operator.add])
+def test_with_list(op):
+    arr = SparseArray([0, 1], fill_value=0)
+    result = op(arr, [0, 1])
+    expected = op(arr, SparseArray([0, 1]))
+    tm.assert_sp_array_equal(result, expected)
+
+
+def test_with_dataframe():
+    # GH#27910
+    arr = SparseArray([0, 1], fill_value=0)
+    df = pd.DataFrame([[1, 2], [3, 4]])
+    result = arr.__add__(df)
+    assert result is NotImplemented
+
+
+def test_with_zerodim_ndarray():
+    # GH#27910
+    arr = SparseArray([0, 1], fill_value=0)
+
+    result = arr * np.array(2)
+    expected = arr * 2
+    tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("ufunc", [np.abs, np.exp])
+@pytest.mark.parametrize(
+    "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])]
+)
+def test_ufuncs(ufunc, arr):
+    result = ufunc(arr)
+    fill_value = ufunc(arr.fill_value)
+    expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
+    tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "a, b",
+    [
+        (SparseArray([0, 0, 0]), np.array([0, 1, 2])),
+        (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
+        (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
+        (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
+        (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
+    ],
+)
+@pytest.mark.parametrize("ufunc", [np.add, np.greater])
+def test_binary_ufuncs(ufunc, a, b):
+    # can't say anything about fill value here.
+    result = ufunc(a, b)
+    expected = ufunc(np.asarray(a), np.asarray(b))
+    assert isinstance(result, SparseArray)
+    tm.assert_numpy_array_equal(np.asarray(result), expected)
+
+
+def test_ndarray_inplace():
+    sparray = SparseArray([0, 2, 0, 0])
+    ndarray = np.array([0, 1, 2, 3])
+    ndarray += sparray
+    expected = np.array([0, 3, 2, 3])
+    tm.assert_numpy_array_equal(ndarray, expected)
+
+
+def test_sparray_inplace():
+    sparray = SparseArray([0, 2, 0, 0])
+    ndarray = np.array([0, 1, 2, 3])
+    sparray += ndarray
+    expected = SparseArray([0, 3, 2, 3], fill_value=0)
+    tm.assert_sp_array_equal(sparray, expected)
+
+
+@pytest.mark.parametrize("fill_value", [True, False])
+def test_invert(fill_value):
+    arr = np.array([True, False, False, True])
+    sparray = SparseArray(arr, fill_value=fill_value)
+    result = ~sparray
+    expected = SparseArray(~arr, fill_value=not fill_value)
+    tm.assert_sp_array_equal(result, expected)
+
+    result = ~pd.Series(sparray)
+    expected = pd.Series(expected)
+    tm.assert_series_equal(result, expected)
+
+    result = ~pd.DataFrame({"A": sparray})
+    expected = pd.DataFrame({"A": expected})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("fill_value", [0, np.nan])
+@pytest.mark.parametrize("op", [operator.pos, operator.neg])
+def test_unary_op(op, fill_value):
+    arr = np.array([0, 1, np.nan, 2])
+    sparray = SparseArray(arr, fill_value=fill_value)
+    result = op(sparray)
+    expected = SparseArray(op(arr), fill_value=op(fill_value))
+    tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("cons", [list, np.array, SparseArray])
+def test_mismatched_length_cmp_op(cons):
+    left = SparseArray([True, True])
+    right = cons([True, True, True])
+    with pytest.raises(ValueError, match="operands have mismatched length"):
+        left & right
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_array.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_array.py
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_combine_concat.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_combine_concat.py
@ -0,0 +1,62 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays.sparse import SparseArray
+
+
+class TestSparseArrayConcat:
+    @pytest.mark.parametrize("kind", ["integer", "block"])
+    def test_basic(self, kind):
+        a = SparseArray([1, 0, 0, 2], kind=kind)
+        b = SparseArray([1, 0, 2, 2], kind=kind)
+
+        result = SparseArray._concat_same_type([a, b])
+        # Can't make any assertions about the sparse index itself
+        # since we aren't don't merge sparse blocs across arrays
+        # in to_concat
+        expected = np.array([1, 2, 1, 2, 2], dtype="int64")
+        tm.assert_numpy_array_equal(result.sp_values, expected)
+        assert result.kind == kind
+
+    @pytest.mark.parametrize("kind", ["integer", "block"])
+    def test_uses_first_kind(self, kind):
+        other = "integer" if kind == "block" else "block"
+        a = SparseArray([1, 0, 0, 2], kind=kind)
+        b = SparseArray([1, 0, 2, 2], kind=other)
+
+        result = SparseArray._concat_same_type([a, b])
+        expected = np.array([1, 2, 1, 2, 2], dtype="int64")
+        tm.assert_numpy_array_equal(result.sp_values, expected)
+        assert result.kind == kind
+
+
+@pytest.mark.parametrize(
+    "other, expected_dtype",
+    [
+        # compatible dtype -> preserve sparse
+        (pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)),
+        # (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)),
+        # incompatible dtype -> Sparse[common dtype]
+        (pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)),
+        # incompatible dtype -> Sparse[object] dtype
+        (pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)),
+        # categorical with compatible categories -> dtype of the categories
+        (pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")),
+        (pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")),
+        # categorical with incompatible categories -> object dtype
+        (pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)),
+    ],
+)
+def test_concat_with_non_sparse(other, expected_dtype):
+    # https://github.com/pandas-dev/pandas/issues/34336
+    s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0))
+
+    result = pd.concat([s_sparse, other], ignore_index=True)
+    expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = pd.concat([other, s_sparse], ignore_index=True)
+    expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype)
+    tm.assert_series_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_dtype.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_dtype.py
@ -0,0 +1,209 @@
+import re
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas.core.arrays.sparse import SparseDtype
+
+
+@pytest.mark.parametrize(
+    "dtype, fill_value",
+    [
+        ("int", 0),
+        ("float", np.nan),
+        ("bool", False),
+        ("object", np.nan),
+        ("datetime64[ns]", np.datetime64("NaT", "ns")),
+        ("timedelta64[ns]", np.timedelta64("NaT", "ns")),
+    ],
+)
+def test_inferred_dtype(dtype, fill_value):
+    sparse_dtype = SparseDtype(dtype)
+    result = sparse_dtype.fill_value
+    if pd.isna(fill_value):
+        assert pd.isna(result) and type(result) == type(fill_value)
+    else:
+        assert result == fill_value
+
+
+def test_from_sparse_dtype():
+    dtype = SparseDtype("float", 0)
+    result = SparseDtype(dtype)
+    assert result.fill_value == 0
+
+
+def test_from_sparse_dtype_fill_value():
+    dtype = SparseDtype("int", 1)
+    result = SparseDtype(dtype, fill_value=2)
+    expected = SparseDtype("int", 2)
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "dtype, fill_value",
+    [
+        ("int", None),
+        ("float", None),
+        ("bool", None),
+        ("object", None),
+        ("datetime64[ns]", None),
+        ("timedelta64[ns]", None),
+        ("int", np.nan),
+        ("float", 0),
+    ],
+)
+def test_equal(dtype, fill_value):
+    a = SparseDtype(dtype, fill_value)
+    b = SparseDtype(dtype, fill_value)
+    assert a == b
+    assert b == a
+
+
+def test_nans_equal():
+    a = SparseDtype(float, float("nan"))
+    b = SparseDtype(float, np.nan)
+    assert a == b
+    assert b == a
+
+
+@pytest.mark.parametrize(
+    "a, b",
+    [
+        (SparseDtype("float64"), SparseDtype("float32")),
+        (SparseDtype("float64"), SparseDtype("float64", 0)),
+        (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
+        (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
+        (SparseDtype("float64"), np.dtype("float64")),
+    ],
+)
+def test_not_equal(a, b):
+    assert a != b
+
+
+def test_construct_from_string_raises():
+    with pytest.raises(
+        TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'"
+    ):
+        SparseDtype.construct_from_string("not a dtype")
+
+
+@pytest.mark.parametrize(
+    "dtype, expected",
+    [
+        (SparseDtype(int), True),
+        (SparseDtype(float), True),
+        (SparseDtype(bool), True),
+        (SparseDtype(object), False),
+        (SparseDtype(str), False),
+    ],
+)
+def test_is_numeric(dtype, expected):
+    assert dtype._is_numeric is expected
+
+
+def test_str_uses_object():
+    result = SparseDtype(str).subtype
+    assert result == np.dtype("object")
+
+
+@pytest.mark.parametrize(
+    "string, expected",
+    [
+        ("Sparse[float64]", SparseDtype(np.dtype("float64"))),
+        ("Sparse[float32]", SparseDtype(np.dtype("float32"))),
+        ("Sparse[int]", SparseDtype(np.dtype("int"))),
+        ("Sparse[str]", SparseDtype(np.dtype("str"))),
+        ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
+        ("Sparse", SparseDtype(np.dtype("float"), np.nan)),
+    ],
+)
+def test_construct_from_string(string, expected):
+    result = SparseDtype.construct_from_string(string)
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "a, b, expected",
+    [
+        (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
+        (SparseDtype(int, 0), SparseDtype(int, 0), True),
+        (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
+        (SparseDtype(float, 0), SparseDtype(float, np.nan), False),
+        (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
+    ],
+)
+def test_hash_equal(a, b, expected):
+    result = a == b
+    assert result is expected
+
+    result = hash(a) == hash(b)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "string, expected",
+    [
+        ("Sparse[int]", "int"),
+        ("Sparse[int, 0]", "int"),
+        ("Sparse[int64]", "int64"),
+        ("Sparse[int64, 0]", "int64"),
+        ("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
+    ],
+)
+def test_parse_subtype(string, expected):
+    subtype, _ = SparseDtype._parse_subtype(string)
+    assert subtype == expected
+
+
+@pytest.mark.parametrize(
+    "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
+)
+def test_construct_from_string_fill_value_raises(string):
+    with pytest.raises(TypeError, match="fill_value in the string is not"):
+        SparseDtype.construct_from_string(string)
+
+
+@pytest.mark.parametrize(
+    "original, dtype, expected",
+    [
+        (SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
+        (SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
+        (SparseDtype(int, 1), str, SparseDtype(object, "1")),
+        (SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
+    ],
+)
+def test_update_dtype(original, dtype, expected):
+    result = original.update_dtype(dtype)
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "original, dtype, expected_error_msg",
+    [
+        (
+            SparseDtype(float, np.nan),
+            int,
+            re.escape("Cannot convert non-finite values (NA or inf) to integer"),
+        ),
+        (
+            SparseDtype(str, "abc"),
+            int,
+            re.escape("invalid literal for int() with base 10: 'abc'"),
+        ),
+    ],
+)
+def test_update_dtype_raises(original, dtype, expected_error_msg):
+    with pytest.raises(ValueError, match=expected_error_msg):
+        original.update_dtype(dtype)
+
+
+def test_repr():
+    # GH-34352
+    result = str(SparseDtype("int64", fill_value=0))
+    expected = "Sparse[int64, 0]"
+    assert result == expected
+
+    result = str(SparseDtype(object, fill_value="0"))
+    expected = "Sparse[object, '0']"
+    assert result == expected
--- a/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_libsparse.py
+++ b/.venv/Lib/site-packages/pandas/tests/arrays/sparse/test_libsparse.py
@ -0,0 +1,618 @@
+import operator
+
+import numpy as np
+import pytest
+
+import pandas._libs.sparse as splib
+import pandas.util._test_decorators as td
+
+from pandas import Series
+import pandas._testing as tm
+from pandas.core.arrays.sparse import (
+    BlockIndex,
+    IntIndex,
+    make_sparse_index,
+)
+
+TEST_LENGTH = 20
+
+plain_case = {
+    "xloc": [0, 7, 15],
+    "xlen": [3, 5, 5],
+    "yloc": [2, 9, 14],
+    "ylen": [2, 3, 5],
+    "intersect_loc": [2, 9, 15],
+    "intersect_len": [1, 3, 4],
+}
+delete_blocks = {
+    "xloc": [0, 5],
+    "xlen": [4, 4],
+    "yloc": [1],
+    "ylen": [4],
+    "intersect_loc": [1],
+    "intersect_len": [3],
+}
+split_blocks = {
+    "xloc": [0],
+    "xlen": [10],
+    "yloc": [0, 5],
+    "ylen": [3, 7],
+    "intersect_loc": [0, 5],
+    "intersect_len": [3, 5],
+}
+skip_block = {
+    "xloc": [10],
+    "xlen": [5],
+    "yloc": [0, 12],
+    "ylen": [5, 3],
+    "intersect_loc": [12],
+    "intersect_len": [3],
+}
+
+no_intersect = {
+    "xloc": [0, 10],
+    "xlen": [4, 6],
+    "yloc": [5, 17],
+    "ylen": [4, 2],
+    "intersect_loc": [],
+    "intersect_len": [],
+}
+
+
+def check_cases(_check_case):
+    def _check_case_dict(case):
+        _check_case(
+            case["xloc"],
+            case["xlen"],
+            case["yloc"],
+            case["ylen"],
+            case["intersect_loc"],
+            case["intersect_len"],
+        )
+
+    _check_case_dict(plain_case)
+    _check_case_dict(delete_blocks)
+    _check_case_dict(split_blocks)
+    _check_case_dict(skip_block)
+    _check_case_dict(no_intersect)
+
+    # one or both is empty
+    _check_case([0], [5], [], [], [], [])
+    _check_case([], [], [], [], [], [])
+
+
+class TestSparseIndexUnion:
+    def test_index_make_union(self):
+        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
+            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
+            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+            bresult = xindex.make_union(yindex)
+            assert isinstance(bresult, BlockIndex)
+            tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
+            tm.assert_numpy_array_equal(
+                bresult.blengths, np.array(elen, dtype=np.int32)
+            )
+
+            ixindex = xindex.to_int_index()
+            iyindex = yindex.to_int_index()
+            iresult = ixindex.make_union(iyindex)
+            assert isinstance(iresult, IntIndex)
+            tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices)
+
+        """
+        x: ----
+        y:     ----
+        r: --------
+        """
+        xloc = [0]
+        xlen = [5]
+        yloc = [5]
+        ylen = [4]
+        eloc = [0]
+        elen = [9]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: -----     -----
+        y:   -----          --
+        """
+        xloc = [0, 10]
+        xlen = [5, 5]
+        yloc = [2, 17]
+        ylen = [5, 2]
+        eloc = [0, 10, 17]
+        elen = [7, 5, 2]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ------
+        y:    -------
+        r: ----------
+        """
+        xloc = [1]
+        xlen = [5]
+        yloc = [3]
+        ylen = [5]
+        eloc = [1]
+        elen = [7]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ------  -----
+        y:    -------
+        r: -------------
+        """
+        xloc = [2, 10]
+        xlen = [4, 4]
+        yloc = [4]
+        ylen = [8]
+        eloc = [2]
+        elen = [12]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ---  -----
+        y: -------
+        r: -------------
+        """
+        xloc = [0, 5]
+        xlen = [3, 5]
+        yloc = [0]
+        ylen = [7]
+        eloc = [0]
+        elen = [10]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ------  -----
+        y:    -------  ---
+        r: -------------
+        """
+        xloc = [2, 10]
+        xlen = [4, 4]
+        yloc = [4, 13]
+        ylen = [8, 4]
+        eloc = [2]
+        elen = [15]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ----------------------
+        y:   ----  ----   ---
+        r: ----------------------
+        """
+        xloc = [2]
+        xlen = [15]
+        yloc = [4, 9, 14]
+        ylen = [3, 2, 2]
+        eloc = [2]
+        elen = [15]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+        """
+        x: ----       ---
+        y:       ---       ---
+        """
+        xloc = [0, 10]
+        xlen = [3, 3]
+        yloc = [5, 15]
+        ylen = [2, 2]
+        eloc = [0, 5, 10, 15]
+        elen = [3, 2, 3, 2]
+        _check_case(xloc, xlen, yloc, ylen, eloc, elen)
+
+    def test_int_index_make_union(self):
+        a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
+        b = IntIndex(5, np.array([0, 2], dtype=np.int32))
+        res = a.make_union(b)
+        exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
+        assert res.equals(exp)
+
+        a = IntIndex(5, np.array([], dtype=np.int32))
+        b = IntIndex(5, np.array([0, 2], dtype=np.int32))
+        res = a.make_union(b)
+        exp = IntIndex(5, np.array([0, 2], np.int32))
+        assert res.equals(exp)
+
+        a = IntIndex(5, np.array([], dtype=np.int32))
+        b = IntIndex(5, np.array([], dtype=np.int32))
+        res = a.make_union(b)
+        exp = IntIndex(5, np.array([], np.int32))
+        assert res.equals(exp)
+
+        a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
+        b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
+        res = a.make_union(b)
+        exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
+        assert res.equals(exp)
+
+        a = IntIndex(5, np.array([0, 1], dtype=np.int32))
+        b = IntIndex(4, np.array([0, 1], dtype=np.int32))
+
+        msg = "Indices must reference same underlying length"
+        with pytest.raises(ValueError, match=msg):
+            a.make_union(b)
+
+
+class TestSparseIndexIntersect:
+    @td.skip_if_windows
+    def test_intersect(self):
+        def _check_correct(a, b, expected):
+            result = a.intersect(b)
+            assert result.equals(expected)
+
+        def _check_length_exc(a, longer):
+            msg = "Indices must reference same underlying length"
+            with pytest.raises(Exception, match=msg):
+                a.intersect(longer)
+
+        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
+            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
+            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+            expected = BlockIndex(TEST_LENGTH, eloc, elen)
+            longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
+
+            _check_correct(xindex, yindex, expected)
+            _check_correct(
+                xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index()
+            )
+
+            _check_length_exc(xindex, longer_index)
+            _check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
+
+        check_cases(_check_case)
+
+    def test_intersect_empty(self):
+        xindex = IntIndex(4, np.array([], dtype=np.int32))
+        yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
+        assert xindex.intersect(yindex).equals(xindex)
+        assert yindex.intersect(xindex).equals(xindex)
+
+        xindex = xindex.to_block_index()
+        yindex = yindex.to_block_index()
+        assert xindex.intersect(yindex).equals(xindex)
+        assert yindex.intersect(xindex).equals(xindex)
+
+    def test_intersect_identical(self):
+        cases = [
+            IntIndex(5, np.array([1, 2], dtype=np.int32)),
+            IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
+            IntIndex(0, np.array([], dtype=np.int32)),
+            IntIndex(5, np.array([], dtype=np.int32)),
+        ]
+
+        for case in cases:
+            assert case.intersect(case).equals(case)
+            case = case.to_block_index()
+            assert case.intersect(case).equals(case)
+
+
+class TestSparseIndexCommon:
+    def test_int_internal(self):
+        idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 2
+        tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 0
+        tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
+
+        idx = make_sparse_index(
+            4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
+        )
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 4
+        tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
+
+    def test_block_internal(self):
+        idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 2
+        tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 0
+        tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 4
+        tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 3
+        tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
+
+    def test_lookup(self):
+        for kind in ["integer", "block"]:
+            idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
+            assert idx.lookup(-1) == -1
+            assert idx.lookup(0) == -1
+            assert idx.lookup(1) == -1
+            assert idx.lookup(2) == 0
+            assert idx.lookup(3) == 1
+            assert idx.lookup(4) == -1
+
+            idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
+
+            for i in range(-1, 5):
+                assert idx.lookup(i) == -1
+
+            idx = make_sparse_index(
+                4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind
+            )
+            assert idx.lookup(-1) == -1
+            assert idx.lookup(0) == 0
+            assert idx.lookup(1) == 1
+            assert idx.lookup(2) == 2
+            assert idx.lookup(3) == 3
+            assert idx.lookup(4) == -1
+
+            idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
+            assert idx.lookup(-1) == -1
+            assert idx.lookup(0) == 0
+            assert idx.lookup(1) == -1
+            assert idx.lookup(2) == 1
+            assert idx.lookup(3) == 2
+            assert idx.lookup(4) == -1
+
+    def test_lookup_array(self):
+        for kind in ["integer", "block"]:
+            idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
+
+            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
+            exp = np.array([-1, -1, 0], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
+            exp = np.array([-1, 0, -1, 1], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+            idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
+            res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
+            exp = np.array([-1, -1, -1, -1], dtype=np.int32)
+
+            idx = make_sparse_index(
+                4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind
+            )
+            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
+            exp = np.array([-1, 0, 2], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
+            exp = np.array([-1, 2, 1, 3], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+            idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
+            res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
+            exp = np.array([1, -1, 2, 0], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+            res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
+            exp = np.array([-1, -1, 1, -1], dtype=np.int32)
+            tm.assert_numpy_array_equal(res, exp)
+
+    @pytest.mark.parametrize(
+        "idx, expected",
+        [
+            [0, -1],
+            [5, 0],
+            [7, 2],
+            [8, -1],
+            [9, -1],
+            [10, -1],
+            [11, -1],
+            [12, 3],
+            [17, 8],
+            [18, -1],
+        ],
+    )
+    def test_lookup_basics(self, idx, expected):
+        bindex = BlockIndex(20, [5, 12], [3, 6])
+        assert bindex.lookup(idx) == expected
+
+        iindex = bindex.to_int_index()
+        assert iindex.lookup(idx) == expected
+
+
+class TestBlockIndex:
+    def test_block_internal(self):
+        idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 2
+        tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 0
+        tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 4
+        tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
+        assert isinstance(idx, BlockIndex)
+        assert idx.npoints == 3
+        tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
+        tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
+
+    def test_make_block_boundary(self):
+        for i in [5, 10, 100, 101]:
+            idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block")
+
+            exp = np.arange(0, i, 2, dtype=np.int32)
+            tm.assert_numpy_array_equal(idx.blocs, exp)
+            tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
+
+    def test_equals(self):
+        index = BlockIndex(10, [0, 4], [2, 5])
+
+        assert index.equals(index)
+        assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
+
+    def test_check_integrity(self):
+        locs = []
+        lengths = []
+
+        # 0-length OK
+        BlockIndex(0, locs, lengths)
+
+        # also OK even though empty
+        BlockIndex(1, locs, lengths)
+
+        msg = "Block 0 extends beyond end"
+        with pytest.raises(ValueError, match=msg):
+            BlockIndex(10, [5], [10])
+
+        msg = "Block 0 overlaps"
+        with pytest.raises(ValueError, match=msg):
+            BlockIndex(10, [2, 5], [5, 3])
+
+    def test_to_int_index(self):
+        locs = [0, 10]
+        lengths = [4, 6]
+        exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
+
+        block = BlockIndex(20, locs, lengths)
+        dense = block.to_int_index()
+
+        tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32))
+
+    def test_to_block_index(self):
+        index = BlockIndex(10, [0, 5], [4, 5])
+        assert index.to_block_index() is index
+
+
+class TestIntIndex:
+    def test_check_integrity(self):
+
+        # Too many indices than specified in self.length
+        msg = "Too many indices"
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=1, indices=[1, 2, 3])
+
+        # No index can be negative.
+        msg = "No index can be less than zero"
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, -2, 3])
+
+        # No index can be negative.
+        msg = "No index can be less than zero"
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, -2, 3])
+
+        # All indices must be less than the length.
+        msg = "All indices must be less than the length"
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, 2, 5])
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, 2, 6])
+
+        # Indices must be strictly ascending.
+        msg = "Indices must be strictly increasing"
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, 3, 2])
+
+        with pytest.raises(ValueError, match=msg):
+            IntIndex(length=5, indices=[1, 3, 3])
+
+    def test_int_internal(self):
+        idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 2
+        tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
+
+        idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 0
+        tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
+
+        idx = make_sparse_index(
+            4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
+        )
+        assert isinstance(idx, IntIndex)
+        assert idx.npoints == 4
+        tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
+
+    def test_equals(self):
+        index = IntIndex(10, [0, 1, 2, 3, 4])
+        assert index.equals(index)
+        assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
+
+    def test_to_block_index(self):
+        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
+            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
+            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+
+            # see if survive the round trip
+            xbindex = xindex.to_int_index().to_block_index()
+            ybindex = yindex.to_int_index().to_block_index()
+            assert isinstance(xbindex, BlockIndex)
+            assert xbindex.equals(xindex)
+            assert ybindex.equals(yindex)
+
+        check_cases(_check_case)
+
+    def test_to_int_index(self):
+        index = IntIndex(10, [2, 3, 4, 5, 6])
+        assert index.to_int_index() is index
+
+
+class TestSparseOperators:
+    def _op_tests(self, sparse_op, python_op):
+        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
+            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
+            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
+
+            xdindex = xindex.to_int_index()
+            ydindex = yindex.to_int_index()
+
+            x = np.arange(xindex.npoints) * 10.0 + 1
+            y = np.arange(yindex.npoints) * 100.0 + 1
+
+            xfill = 0
+            yfill = 2
+
+            result_block_vals, rb_index, bfill = sparse_op(
+                x, xindex, xfill, y, yindex, yfill
+            )
+            result_int_vals, ri_index, ifill = sparse_op(
+                x, xdindex, xfill, y, ydindex, yfill
+            )
+
+            assert rb_index.to_int_index().equals(ri_index)
+            tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
+            assert bfill == ifill
+
+            # check versus Series...
+            xseries = Series(x, xdindex.indices)
+            xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)
+
+            yseries = Series(y, ydindex.indices)
+            yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)
+
+            series_result = python_op(xseries, yseries)
+            series_result = series_result.reindex(ri_index.indices)
+
+            tm.assert_numpy_array_equal(result_block_vals, series_result.values)
+            tm.assert_numpy_array_equal(result_int_vals, series_result.values)
+
+        check_cases(_check_case)
+
+    @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
+    def test_op(self, opname):
+        sparse_op = getattr(splib, f"sparse_{opname}_float64")
+        python_op = getattr(operator, opname)
+        self._op_tests(sparse_op, python_op)