first commit

2025-07-01 22:13:01 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/tests/libs/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/libs/init.py
--- a/.venv/Lib/site-packages/pandas/tests/libs/test_hashtable.py
+++ b/.venv/Lib/site-packages/pandas/tests/libs/test_hashtable.py
@ -0,0 +1,529 @@
+from contextlib import contextmanager
+import tracemalloc
+
+import numpy as np
+import pytest
+
+from pandas._libs import hashtable as ht
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.algorithms import isin
+
+
+@contextmanager
+def activated_tracemalloc():
+    tracemalloc.start()
+    try:
+        yield
+    finally:
+        tracemalloc.stop()
+
+
+def get_allocated_khash_memory():
+    snapshot = tracemalloc.take_snapshot()
+    snapshot = snapshot.filter_traces(
+        (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
+    )
+    return sum(map(lambda x: x.size, snapshot.traces))
+
+
+@pytest.mark.parametrize(
+    "table_type, dtype",
+    [
+        (ht.PyObjectHashTable, np.object_),
+        (ht.Complex128HashTable, np.complex128),
+        (ht.Int64HashTable, np.int64),
+        (ht.UInt64HashTable, np.uint64),
+        (ht.Float64HashTable, np.float64),
+        (ht.Complex64HashTable, np.complex64),
+        (ht.Int32HashTable, np.int32),
+        (ht.UInt32HashTable, np.uint32),
+        (ht.Float32HashTable, np.float32),
+        (ht.Int16HashTable, np.int16),
+        (ht.UInt16HashTable, np.uint16),
+        (ht.Int8HashTable, np.int8),
+        (ht.UInt8HashTable, np.uint8),
+        (ht.IntpHashTable, np.intp),
+    ],
+)
+class TestHashTable:
+    def test_get_set_contains_len(self, table_type, dtype):
+        index = 5
+        table = table_type(55)
+        assert len(table) == 0
+        assert index not in table
+
+        table.set_item(index, 42)
+        assert len(table) == 1
+        assert index in table
+        assert table.get_item(index) == 42
+
+        table.set_item(index + 1, 41)
+        assert index in table
+        assert index + 1 in table
+        assert len(table) == 2
+        assert table.get_item(index) == 42
+        assert table.get_item(index + 1) == 41
+
+        table.set_item(index, 21)
+        assert index in table
+        assert index + 1 in table
+        assert len(table) == 2
+        assert table.get_item(index) == 21
+        assert table.get_item(index + 1) == 41
+        assert index + 2 not in table
+
+        with pytest.raises(KeyError, match=str(index + 2)):
+            table.get_item(index + 2)
+
+    def test_map(self, table_type, dtype, writable):
+        # PyObjectHashTable has no map-method
+        if table_type != ht.PyObjectHashTable:
+            N = 77
+            table = table_type()
+            keys = np.arange(N).astype(dtype)
+            vals = np.arange(N).astype(np.int64) + N
+            keys.flags.writeable = writable
+            vals.flags.writeable = writable
+            table.map(keys, vals)
+            for i in range(N):
+                assert table.get_item(keys[i]) == i + N
+
+    def test_map_locations(self, table_type, dtype, writable):
+        N = 8
+        table = table_type()
+        keys = (np.arange(N) + N).astype(dtype)
+        keys.flags.writeable = writable
+        table.map_locations(keys)
+        for i in range(N):
+            assert table.get_item(keys[i]) == i
+
+    def test_lookup(self, table_type, dtype, writable):
+        N = 3
+        table = table_type()
+        keys = (np.arange(N) + N).astype(dtype)
+        keys.flags.writeable = writable
+        table.map_locations(keys)
+        result = table.lookup(keys)
+        expected = np.arange(N)
+        tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64))
+
+    def test_lookup_wrong(self, table_type, dtype):
+        if dtype in (np.int8, np.uint8):
+            N = 100
+        else:
+            N = 512
+        table = table_type()
+        keys = (np.arange(N) + N).astype(dtype)
+        table.map_locations(keys)
+        wrong_keys = np.arange(N).astype(dtype)
+        result = table.lookup(wrong_keys)
+        assert np.all(result == -1)
+
+    def test_unique(self, table_type, dtype, writable):
+        if dtype in (np.int8, np.uint8):
+            N = 88
+        else:
+            N = 1000
+        table = table_type()
+        expected = (np.arange(N) + N).astype(dtype)
+        keys = np.repeat(expected, 5)
+        keys.flags.writeable = writable
+        unique = table.unique(keys)
+        tm.assert_numpy_array_equal(unique, expected)
+
+    def test_tracemalloc_works(self, table_type, dtype):
+        if dtype in (np.int8, np.uint8):
+            N = 256
+        else:
+            N = 30000
+        keys = np.arange(N).astype(dtype)
+        with activated_tracemalloc():
+            table = table_type()
+            table.map_locations(keys)
+            used = get_allocated_khash_memory()
+            my_size = table.sizeof()
+            assert used == my_size
+            del table
+            assert get_allocated_khash_memory() == 0
+
+    def test_tracemalloc_for_empty(self, table_type, dtype):
+        with activated_tracemalloc():
+            table = table_type()
+            used = get_allocated_khash_memory()
+            my_size = table.sizeof()
+            assert used == my_size
+            del table
+            assert get_allocated_khash_memory() == 0
+
+    def test_get_state(self, table_type, dtype):
+        table = table_type(1000)
+        state = table.get_state()
+        assert state["size"] == 0
+        assert state["n_occupied"] == 0
+        assert "n_buckets" in state
+        assert "upper_bound" in state
+
+    def test_no_reallocation(self, table_type, dtype):
+        for N in range(1, 110):
+            keys = np.arange(N).astype(dtype)
+            preallocated_table = table_type(N)
+            n_buckets_start = preallocated_table.get_state()["n_buckets"]
+            preallocated_table.map_locations(keys)
+            n_buckets_end = preallocated_table.get_state()["n_buckets"]
+            # original number of buckets was enough:
+            assert n_buckets_start == n_buckets_end
+            # check with clean table (not too much preallocated)
+            clean_table = table_type()
+            clean_table.map_locations(keys)
+            assert n_buckets_start == clean_table.get_state()["n_buckets"]
+
+
+class TestPyObjectHashTableWithNans:
+    def test_nan_float(self):
+        nan1 = float("nan")
+        nan2 = float("nan")
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_complex_both(self):
+        nan1 = complex(float("nan"), float("nan"))
+        nan2 = complex(float("nan"), float("nan"))
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_complex_real(self):
+        nan1 = complex(float("nan"), 1)
+        nan2 = complex(float("nan"), 1)
+        other = complex(float("nan"), 2)
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
+    def test_nan_complex_imag(self):
+        nan1 = complex(1, float("nan"))
+        nan2 = complex(1, float("nan"))
+        other = complex(2, float("nan"))
+        assert nan1 is not nan2
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
+    def test_nan_in_tuple(self):
+        nan1 = (float("nan"),)
+        nan2 = (float("nan"),)
+        assert nan1[0] is not nan2[0]
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+
+    def test_nan_in_nested_tuple(self):
+        nan1 = (1, (2, (float("nan"),)))
+        nan2 = (1, (2, (float("nan"),)))
+        other = (1, 2)
+        table = ht.PyObjectHashTable()
+        table.set_item(nan1, 42)
+        assert table.get_item(nan2) == 42
+        with pytest.raises(KeyError, match=None) as error:
+            table.get_item(other)
+        assert str(error.value) == str(other)
+
+
+def test_hash_equal_tuple_with_nans():
+    a = (float("nan"), (float("nan"), float("nan")))
+    b = (float("nan"), (float("nan"), float("nan")))
+    assert ht.object_hash(a) == ht.object_hash(b)
+    assert ht.objects_are_equal(a, b)
+
+
+def test_get_labels_groupby_for_Int64(writable):
+    table = ht.Int64HashTable()
+    vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)
+    vals.flags.writeable = writable
+    arr, unique = table.get_labels_groupby(vals)
+    expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp)
+    expected_unique = np.array([1, 2], dtype=np.int64)
+    tm.assert_numpy_array_equal(arr, expected_arr)
+    tm.assert_numpy_array_equal(unique, expected_unique)
+
+
+def test_tracemalloc_works_for_StringHashTable():
+    N = 1000
+    keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
+    with activated_tracemalloc():
+        table = ht.StringHashTable()
+        table.map_locations(keys)
+        used = get_allocated_khash_memory()
+        my_size = table.sizeof()
+        assert used == my_size
+        del table
+        assert get_allocated_khash_memory() == 0
+
+
+def test_tracemalloc_for_empty_StringHashTable():
+    with activated_tracemalloc():
+        table = ht.StringHashTable()
+        used = get_allocated_khash_memory()
+        my_size = table.sizeof()
+        assert used == my_size
+        del table
+        assert get_allocated_khash_memory() == 0
+
+
+def test_no_reallocation_StringHashTable():
+    for N in range(1, 110):
+        keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
+        preallocated_table = ht.StringHashTable(N)
+        n_buckets_start = preallocated_table.get_state()["n_buckets"]
+        preallocated_table.map_locations(keys)
+        n_buckets_end = preallocated_table.get_state()["n_buckets"]
+        # original number of buckets was enough:
+        assert n_buckets_start == n_buckets_end
+        # check with clean table (not too much preallocated)
+        clean_table = ht.StringHashTable()
+        clean_table.map_locations(keys)
+        assert n_buckets_start == clean_table.get_state()["n_buckets"]
+
+
+@pytest.mark.parametrize(
+    "table_type, dtype",
+    [
+        (ht.Float64HashTable, np.float64),
+        (ht.Float32HashTable, np.float32),
+        (ht.Complex128HashTable, np.complex128),
+        (ht.Complex64HashTable, np.complex64),
+    ],
+)
+class TestHashTableWithNans:
+    def test_get_set_contains_len(self, table_type, dtype):
+        index = float("nan")
+        table = table_type()
+        assert index not in table
+
+        table.set_item(index, 42)
+        assert len(table) == 1
+        assert index in table
+        assert table.get_item(index) == 42
+
+        table.set_item(index, 41)
+        assert len(table) == 1
+        assert index in table
+        assert table.get_item(index) == 41
+
+    def test_map(self, table_type, dtype):
+        N = 332
+        table = table_type()
+        keys = np.full(N, np.nan, dtype=dtype)
+        vals = (np.arange(N) + N).astype(np.int64)
+        table.map(keys, vals)
+        assert len(table) == 1
+        assert table.get_item(np.nan) == 2 * N - 1
+
+    def test_map_locations(self, table_type, dtype):
+        N = 10
+        table = table_type()
+        keys = np.full(N, np.nan, dtype=dtype)
+        table.map_locations(keys)
+        assert len(table) == 1
+        assert table.get_item(np.nan) == N - 1
+
+    def test_unique(self, table_type, dtype):
+        N = 1020
+        table = table_type()
+        keys = np.full(N, np.nan, dtype=dtype)
+        unique = table.unique(keys)
+        assert np.all(np.isnan(unique)) and len(unique) == 1
+
+
+def test_unique_for_nan_objects_floats():
+    table = ht.PyObjectHashTable()
+    keys = np.array([float("nan") for i in range(50)], dtype=np.object_)
+    unique = table.unique(keys)
+    assert len(unique) == 1
+
+
+def test_unique_for_nan_objects_complex():
+    table = ht.PyObjectHashTable()
+    keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_)
+    unique = table.unique(keys)
+    assert len(unique) == 1
+
+
+def test_unique_for_nan_objects_tuple():
+    table = ht.PyObjectHashTable()
+    keys = np.array(
+        [1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_
+    )
+    unique = table.unique(keys)
+    assert len(unique) == 2
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        np.object_,
+        np.complex128,
+        np.int64,
+        np.uint64,
+        np.float64,
+        np.complex64,
+        np.int32,
+        np.uint32,
+        np.float32,
+        np.int16,
+        np.uint16,
+        np.int8,
+        np.uint8,
+        np.intp,
+    ],
+)
+class TestHelpFunctions:
+    def test_value_count(self, dtype, writable):
+        N = 43
+        expected = (np.arange(N) + N).astype(dtype)
+        values = np.repeat(expected, 5)
+        values.flags.writeable = writable
+        keys, counts = ht.value_count(values, False)
+        tm.assert_numpy_array_equal(np.sort(keys), expected)
+        assert np.all(counts == 5)
+
+    def test_value_count_stable(self, dtype, writable):
+        # GH12679
+        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
+        values.flags.writeable = writable
+        keys, counts = ht.value_count(values, False)
+        tm.assert_numpy_array_equal(keys, values)
+        assert np.all(counts == 1)
+
+    def test_duplicated_first(self, dtype, writable):
+        N = 100
+        values = np.repeat(np.arange(N).astype(dtype), 5)
+        values.flags.writeable = writable
+        result = ht.duplicated(values)
+        expected = np.ones_like(values, dtype=np.bool_)
+        expected[::5] = False
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_ismember_yes(self, dtype, writable):
+        N = 127
+        arr = np.arange(N).astype(dtype)
+        values = np.arange(N).astype(dtype)
+        arr.flags.writeable = writable
+        values.flags.writeable = writable
+        result = ht.ismember(arr, values)
+        expected = np.ones_like(values, dtype=np.bool_)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_ismember_no(self, dtype):
+        N = 17
+        arr = np.arange(N).astype(dtype)
+        values = (np.arange(N) + N).astype(dtype)
+        result = ht.ismember(arr, values)
+        expected = np.zeros_like(values, dtype=np.bool_)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_mode(self, dtype, writable):
+        if dtype in (np.int8, np.uint8):
+            N = 53
+        else:
+            N = 11111
+        values = np.repeat(np.arange(N).astype(dtype), 5)
+        values[0] = 42
+        values.flags.writeable = writable
+        result = ht.mode(values, False)
+        assert result == 42
+
+    def test_mode_stable(self, dtype, writable):
+        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
+        values.flags.writeable = writable
+        keys = ht.mode(values, False)
+        tm.assert_numpy_array_equal(keys, values)
+
+
+def test_modes_with_nans():
+    # GH42688, nans aren't mangled
+    nulls = [pd.NA, np.nan, pd.NaT, None]
+    values = np.array([True] + nulls * 2, dtype=np.object_)
+    modes = ht.mode(values, False)
+    assert modes.size == len(nulls)
+
+
+def test_unique_label_indices_intp(writable):
+    keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
+    keys.flags.writeable = writable
+    result = ht.unique_label_indices(keys)
+    expected = np.array([0, 1, 5], dtype=np.intp)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        np.float64,
+        np.float32,
+        np.complex128,
+        np.complex64,
+    ],
+)
+class TestHelpFunctionsWithNans:
+    def test_value_count(self, dtype):
+        values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
+        keys, counts = ht.value_count(values, True)
+        assert len(keys) == 0
+        keys, counts = ht.value_count(values, False)
+        assert len(keys) == 1 and np.all(np.isnan(keys))
+        assert counts[0] == 3
+
+    def test_duplicated_first(self, dtype):
+        values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
+        result = ht.duplicated(values)
+        expected = np.array([False, True, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_ismember_yes(self, dtype):
+        arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
+        values = np.array([np.nan, np.nan], dtype=dtype)
+        result = ht.ismember(arr, values)
+        expected = np.array([True, True, True], dtype=np.bool_)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_ismember_no(self, dtype):
+        arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
+        values = np.array([1], dtype=dtype)
+        result = ht.ismember(arr, values)
+        expected = np.array([False, False, False], dtype=np.bool_)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_mode(self, dtype):
+        values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
+        assert ht.mode(values, True) == 42
+        assert np.isnan(ht.mode(values, False))
+
+
+def test_ismember_tuple_with_nans():
+    # GH-41836
+    values = [("a", float("nan")), ("b", 1)]
+    comps = [("a", float("nan"))]
+    result = isin(values, comps)
+    expected = np.array([True, False], dtype=np.bool_)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_float_complex_int_are_equal_as_objects():
+    values = ["a", 5, 5.0, 5.0 + 0j]
+    comps = list(range(129))
+    result = isin(values, comps)
+    expected = np.array([False, True, True, True], dtype=np.bool_)
+    tm.assert_numpy_array_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/libs/test_join.py
+++ b/.venv/Lib/site-packages/pandas/tests/libs/test_join.py
@ -0,0 +1,390 @@
+import numpy as np
+import pytest
+
+from pandas._libs import join as libjoin
+from pandas._libs.join import (
+    inner_join,
+    left_outer_join,
+)
+
+import pandas._testing as tm
+
+
+class TestIndexer:
+    @pytest.mark.parametrize(
+        "dtype", ["int32", "int64", "float32", "float64", "object"]
+    )
+    def test_outer_join_indexer(self, dtype):
+        indexer = libjoin.outer_join_indexer
+
+        left = np.arange(3, dtype=dtype)
+        right = np.arange(2, 5, dtype=dtype)
+        empty = np.array([], dtype=dtype)
+
+        result, lindexer, rindexer = indexer(left, right)
+        assert isinstance(result, np.ndarray)
+        assert isinstance(lindexer, np.ndarray)
+        assert isinstance(rindexer, np.ndarray)
+        tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype))
+        exp = np.array([0, 1, 2, -1, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(lindexer, exp)
+        exp = np.array([-1, -1, 0, 1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(rindexer, exp)
+
+        result, lindexer, rindexer = indexer(empty, right)
+        tm.assert_numpy_array_equal(result, right)
+        exp = np.array([-1, -1, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(lindexer, exp)
+        exp = np.array([0, 1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(rindexer, exp)
+
+        result, lindexer, rindexer = indexer(left, empty)
+        tm.assert_numpy_array_equal(result, left)
+        exp = np.array([0, 1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(lindexer, exp)
+        exp = np.array([-1, -1, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(rindexer, exp)
+
+    def test_cython_left_outer_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp)
+        max_group = 5
+
+        ls, rs = left_outer_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10])
+        exp_ri = np.array(
+            [0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1]
+        )
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_cython_right_outer_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp)
+        max_group = 5
+
+        rs, ls = left_outer_join(right, left, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        #            0        1        1        1
+        exp_li = np.array(
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                3,
+                4,
+                5,
+                3,
+                4,
+                5,
+                #            2        2        4
+                6,
+                7,
+                8,
+                6,
+                7,
+                8,
+                -1,
+            ]
+        )
+        exp_ri = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls)
+        tm.assert_numpy_array_equal(rs, exp_rs)
+
+    def test_cython_inner_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.intp)
+        max_group = 5
+
+        ls, rs = inner_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8])
+        exp_ri = np.array([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls)
+        tm.assert_numpy_array_equal(rs, exp_rs)
+
+
+@pytest.mark.parametrize("readonly", [True, False])
+def test_left_join_indexer_unique(readonly):
+    a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
+    b = np.array([2, 2, 3, 4, 4], dtype=np.int64)
+    if readonly:
+        # GH#37312, GH#37264
+        a.setflags(write=False)
+        b.setflags(write=False)
+
+    result = libjoin.left_join_indexer_unique(b, a)
+    expected = np.array([1, 1, 2, 3, 3], dtype=np.intp)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_left_outer_join_bug():
+    left = np.array(
+        [
+            0,
+            1,
+            0,
+            1,
+            1,
+            2,
+            3,
+            1,
+            0,
+            2,
+            1,
+            2,
+            0,
+            1,
+            1,
+            2,
+            3,
+            2,
+            3,
+            2,
+            1,
+            1,
+            3,
+            0,
+            3,
+            2,
+            3,
+            0,
+            0,
+            2,
+            3,
+            2,
+            0,
+            3,
+            1,
+            3,
+            0,
+            1,
+            3,
+            0,
+            0,
+            1,
+            0,
+            3,
+            1,
+            0,
+            1,
+            0,
+            1,
+            1,
+            0,
+            2,
+            2,
+            2,
+            2,
+            2,
+            0,
+            3,
+            1,
+            2,
+            0,
+            0,
+            3,
+            1,
+            3,
+            2,
+            2,
+            0,
+            1,
+            3,
+            0,
+            2,
+            3,
+            2,
+            3,
+            3,
+            2,
+            3,
+            3,
+            1,
+            3,
+            2,
+            0,
+            0,
+            3,
+            1,
+            1,
+            1,
+            0,
+            2,
+            3,
+            3,
+            1,
+            2,
+            0,
+            3,
+            1,
+            2,
+            0,
+            2,
+        ],
+        dtype=np.intp,
+    )
+
+    right = np.array([3, 1], dtype=np.intp)
+    max_groups = 4
+
+    lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False)
+
+    exp_lidx = np.arange(len(left), dtype=np.intp)
+    exp_ridx = -np.ones(len(left), dtype=np.intp)
+
+    exp_ridx[left == 1] = 1
+    exp_ridx[left == 3] = 0
+
+    tm.assert_numpy_array_equal(lidx, exp_lidx)
+    tm.assert_numpy_array_equal(ridx, exp_ridx)
+
+
+def test_inner_join_indexer():
+    a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
+    b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
+
+    index, ares, bres = libjoin.inner_join_indexer(a, b)
+
+    index_exp = np.array([3, 5], dtype=np.int64)
+    tm.assert_almost_equal(index, index_exp)
+
+    aexp = np.array([2, 4], dtype=np.intp)
+    bexp = np.array([1, 2], dtype=np.intp)
+    tm.assert_almost_equal(ares, aexp)
+    tm.assert_almost_equal(bres, bexp)
+
+    a = np.array([5], dtype=np.int64)
+    b = np.array([5], dtype=np.int64)
+
+    index, ares, bres = libjoin.inner_join_indexer(a, b)
+    tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
+    tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp))
+    tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp))
+
+
+def test_outer_join_indexer():
+    a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
+    b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
+
+    index, ares, bres = libjoin.outer_join_indexer(a, b)
+
+    index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64)
+    tm.assert_almost_equal(index, index_exp)
+
+    aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.intp)
+    bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp)
+    tm.assert_almost_equal(ares, aexp)
+    tm.assert_almost_equal(bres, bexp)
+
+    a = np.array([5], dtype=np.int64)
+    b = np.array([5], dtype=np.int64)
+
+    index, ares, bres = libjoin.outer_join_indexer(a, b)
+    tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
+    tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp))
+    tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp))
+
+
+def test_left_join_indexer():
+    a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
+    b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
+
+    index, ares, bres = libjoin.left_join_indexer(a, b)
+
+    tm.assert_almost_equal(index, a)
+
+    aexp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
+    bexp = np.array([-1, -1, 1, -1, 2], dtype=np.intp)
+    tm.assert_almost_equal(ares, aexp)
+    tm.assert_almost_equal(bres, bexp)
+
+    a = np.array([5], dtype=np.int64)
+    b = np.array([5], dtype=np.int64)
+
+    index, ares, bres = libjoin.left_join_indexer(a, b)
+    tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
+    tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.intp))
+    tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.intp))
+
+
+def test_left_join_indexer2():
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
+
+    res, lidx, ridx = libjoin.left_join_indexer(idx2, idx)
+
+    exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
+    tm.assert_almost_equal(res, exp_res)
+
+    exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
+    tm.assert_almost_equal(lidx, exp_lidx)
+
+    exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
+    tm.assert_almost_equal(ridx, exp_ridx)
+
+
+def test_outer_join_indexer2():
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
+
+    res, lidx, ridx = libjoin.outer_join_indexer(idx2, idx)
+
+    exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
+    tm.assert_almost_equal(res, exp_res)
+
+    exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
+    tm.assert_almost_equal(lidx, exp_lidx)
+
+    exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
+    tm.assert_almost_equal(ridx, exp_ridx)
+
+
+def test_inner_join_indexer2():
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
+
+    res, lidx, ridx = libjoin.inner_join_indexer(idx2, idx)
+
+    exp_res = np.array([1, 1, 2, 5], dtype=np.int64)
+    tm.assert_almost_equal(res, exp_res)
+
+    exp_lidx = np.array([0, 0, 1, 2], dtype=np.intp)
+    tm.assert_almost_equal(lidx, exp_lidx)
+
+    exp_ridx = np.array([0, 1, 2, 3], dtype=np.intp)
+    tm.assert_almost_equal(ridx, exp_ridx)
--- a/.venv/Lib/site-packages/pandas/tests/libs/test_lib.py
+++ b/.venv/Lib/site-packages/pandas/tests/libs/test_lib.py
@ -0,0 +1,208 @@
+import numpy as np
+import pytest
+
+from pandas._libs import (
+    lib,
+    writers as libwriters,
+)
+
+from pandas import Index
+import pandas._testing as tm
+
+
+class TestMisc:
+    def test_max_len_string_array(self):
+
+        arr = a = np.array(["foo", "b", np.nan], dtype="object")
+        assert libwriters.max_len_string_array(arr) == 3
+
+        # unicode
+        arr = a.astype("U").astype(object)
+        assert libwriters.max_len_string_array(arr) == 3
+
+        # bytes for python3
+        arr = a.astype("S").astype(object)
+        assert libwriters.max_len_string_array(arr) == 3
+
+        # raises
+        msg = "No matching signature found"
+        with pytest.raises(TypeError, match=msg):
+            libwriters.max_len_string_array(arr.astype("U"))
+
+    def test_fast_unique_multiple_list_gen_sort(self):
+        keys = [["p", "a"], ["n", "d"], ["a", "s"]]
+
+        gen = (key for key in keys)
+        expected = np.array(["a", "d", "n", "p", "s"])
+        out = lib.fast_unique_multiple_list_gen(gen, sort=True)
+        tm.assert_numpy_array_equal(np.array(out), expected)
+
+        gen = (key for key in keys)
+        expected = np.array(["p", "a", "n", "d", "s"])
+        out = lib.fast_unique_multiple_list_gen(gen, sort=False)
+        tm.assert_numpy_array_equal(np.array(out), expected)
+
+
+class TestIndexing:
+    def test_maybe_indices_to_slice_left_edge(self):
+        target = np.arange(100)
+
+        # slice
+        indices = np.array([], dtype=np.intp)
+        maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+        assert isinstance(maybe_slice, slice)
+        tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+        for end in [1, 2, 5, 20, 99]:
+            for step in [1, 2, 4]:
+                indices = np.arange(0, end, step, dtype=np.intp)
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+                # reverse
+                indices = indices[::-1]
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+        # not slice
+        for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]:
+            indices = np.array(case, dtype=np.intp)
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+            assert not isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(maybe_slice, indices)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+    def test_maybe_indices_to_slice_right_edge(self):
+        target = np.arange(100)
+
+        # slice
+        for start in [0, 2, 5, 20, 97, 98]:
+            for step in [1, 2, 4]:
+                indices = np.arange(start, 99, step, dtype=np.intp)
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+                # reverse
+                indices = indices[::-1]
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+        # not slice
+        indices = np.array([97, 98, 99, 100], dtype=np.intp)
+        maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+        assert not isinstance(maybe_slice, slice)
+        tm.assert_numpy_array_equal(maybe_slice, indices)
+
+        msg = "index 100 is out of bounds for axis (0|1) with size 100"
+
+        with pytest.raises(IndexError, match=msg):
+            target[indices]
+        with pytest.raises(IndexError, match=msg):
+            target[maybe_slice]
+
+        indices = np.array([100, 99, 98, 97], dtype=np.intp)
+        maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+        assert not isinstance(maybe_slice, slice)
+        tm.assert_numpy_array_equal(maybe_slice, indices)
+
+        with pytest.raises(IndexError, match=msg):
+            target[indices]
+        with pytest.raises(IndexError, match=msg):
+            target[maybe_slice]
+
+        for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]:
+            indices = np.array(case, dtype=np.intp)
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+            assert not isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(maybe_slice, indices)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+    def test_maybe_indices_to_slice_both_edges(self):
+        target = np.arange(10)
+
+        # slice
+        for step in [1, 2, 4, 5, 8, 9]:
+            indices = np.arange(0, 9, step, dtype=np.intp)
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+            assert isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+            # reverse
+            indices = indices[::-1]
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+            assert isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+        # not slice
+        for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]:
+            indices = np.array(case, dtype=np.intp)
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+            assert not isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(maybe_slice, indices)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+    def test_maybe_indices_to_slice_middle(self):
+        target = np.arange(100)
+
+        # slice
+        for start, end in [(2, 10), (5, 25), (65, 97)]:
+            for step in [1, 2, 4, 20]:
+                indices = np.arange(start, end, step, dtype=np.intp)
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+                # reverse
+                indices = indices[::-1]
+                maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+                assert isinstance(maybe_slice, slice)
+                tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+        # not slice
+        for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]:
+            indices = np.array(case, dtype=np.intp)
+            maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
+
+            assert not isinstance(maybe_slice, slice)
+            tm.assert_numpy_array_equal(maybe_slice, indices)
+            tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
+
+    def test_maybe_booleans_to_slice(self):
+        arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8)
+        result = lib.maybe_booleans_to_slice(arr)
+        assert result.dtype == np.bool_
+
+        result = lib.maybe_booleans_to_slice(arr[:0])
+        assert result == slice(0, 0)
+
+    def test_get_reverse_indexer(self):
+        indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp)
+        result = lib.get_reverse_indexer(indexer, 5)
+        expected = np.array([4, 2, 3, 6, 7], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
+
+def test_cache_readonly_preserve_docstrings():
+    # GH18197
+    assert Index.hasnans.__doc__ is not None
+
+
+def test_no_default_pickle():
+    # GH#40397
+    obj = tm.round_trip_pickle(lib.no_default)
+    assert obj is lib.no_default