first commit

2025-10-24 15:30:57 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/init.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/init.py
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_join.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_join.py
@@ -0,0 +1,883 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    concat,
+    merge,
+)
+import pandas._testing as tm
+from pandas.tests.reshape.merge.test_merge import (
+    NGROUPS,
+    N,
+    get_test_data,
+)
+
+a_ = np.array
+
+
+class TestJoin:
+    def setup_method(self, method):
+        # aggregate multiple columns
+        self.df = DataFrame(
+            {
+                "key1": get_test_data(),
+                "key2": get_test_data(),
+                "data1": np.random.randn(N),
+                "data2": np.random.randn(N),
+            }
+        )
+
+        # exclude a couple keys for fun
+        self.df = self.df[self.df["key2"] > 1]
+
+        self.df2 = DataFrame(
+            {
+                "key1": get_test_data(n=N // 5),
+                "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5),
+                "value": np.random.randn(N // 5),
+            }
+        )
+
+        index, data = tm.getMixedTypeDict()
+        self.target = DataFrame(data, index=index)
+
+        # Join on string value
+        self.source = DataFrame(
+            {"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"]
+        )
+
+    def test_left_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="left")
+
+        joined_both = merge(self.df, self.df2)
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="left")
+
+    def test_right_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="right")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="right")
+
+        joined_both = merge(self.df, self.df2, how="right")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="right")
+
+    def test_full_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="outer")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="outer")
+
+        joined_both = merge(self.df, self.df2, how="outer")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="outer")
+
+    def test_inner_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="inner")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="inner")
+
+        joined_both = merge(self.df, self.df2, how="inner")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="inner")
+
+    def test_handle_overlap(self):
+        joined = merge(self.df, self.df2, on="key2", suffixes=(".foo", ".bar"))
+
+        assert "key1.foo" in joined
+        assert "key1.bar" in joined
+
+    def test_handle_overlap_arbitrary_key(self):
+        joined = merge(
+            self.df,
+            self.df2,
+            left_on="key2",
+            right_on="key1",
+            suffixes=(".foo", ".bar"),
+        )
+        assert "key1.foo" in joined
+        assert "key2.bar" in joined
+
+    def test_join_on(self):
+        target = self.target
+        source = self.source
+
+        merged = target.join(source, on="C")
+        tm.assert_series_equal(merged["MergedA"], target["A"], check_names=False)
+        tm.assert_series_equal(merged["MergedD"], target["D"], check_names=False)
+
+        # join with duplicates (fix regression from DataFrame/Matrix merge)
+        df = DataFrame({"key": ["a", "a", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"])
+        joined = df.join(df2, on="key")
+        expected = DataFrame(
+            {"key": ["a", "a", "b", "b", "c"], "value": [0, 0, 1, 1, 2]}
+        )
+        tm.assert_frame_equal(joined, expected)
+
+        # Test when some are missing
+        df_a = DataFrame([[1], [2], [3]], index=["a", "b", "c"], columns=["one"])
+        df_b = DataFrame([["foo"], ["bar"]], index=[1, 2], columns=["two"])
+        df_c = DataFrame([[1], [2]], index=[1, 2], columns=["three"])
+        joined = df_a.join(df_b, on="one")
+        joined = joined.join(df_c, on="one")
+        assert np.isnan(joined["two"]["c"])
+        assert np.isnan(joined["three"]["c"])
+
+        # merge column not p resent
+        with pytest.raises(KeyError, match="^'E'$"):
+            target.join(source, on="E")
+
+        # overlap
+        source_copy = source.copy()
+        source_copy["A"] = 0
+        msg = (
+            "You are trying to merge on float64 and object columns. If "
+            "you wish to proceed you should use pd.concat"
+        )
+        with pytest.raises(ValueError, match=msg):
+            target.join(source_copy, on="A")
+
+    def test_join_on_fails_with_different_right_index(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
+            index=tm.makeCustomIndex(10, 2),
+        )
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, left_on="a", right_index=True)
+
+    def test_join_on_fails_with_different_left_index(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)},
+            index=tm.makeCustomIndex(3, 2),
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}
+        )
+        msg = r'len\(right_on\) must equal the number of levels in the index of "left"'
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on="b", left_index=True)
+
+    def test_join_on_fails_with_different_column_counts(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
+            index=tm.makeCustomIndex(10, 2),
+        )
+        msg = r"len\(right_on\) must equal len\(left_on\)"
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on="a", left_on=["a", "b"])
+
+    @pytest.mark.parametrize("wrong_type", [2, "str", None, np.array([0, 1])])
+    def test_join_on_fails_with_wrong_object_type(self, wrong_type):
+        # GH12081 - original issue
+
+        # GH21220 - merging of Series and DataFrame is now allowed
+        # Edited test to remove the Series object from test parameters
+
+        df = DataFrame({"a": [1, 1]})
+        msg = (
+            "Can only merge Series or DataFrame objects, "
+            f"a {type(wrong_type)} was passed"
+        )
+        with pytest.raises(TypeError, match=msg):
+            merge(wrong_type, df, left_on="a", right_on="a")
+        with pytest.raises(TypeError, match=msg):
+            merge(df, wrong_type, left_on="a", right_on="a")
+
+    def test_join_on_pass_vector(self):
+        expected = self.target.join(self.source, on="C")
+        del expected["C"]
+
+        join_col = self.target.pop("C")
+        result = self.target.join(self.source, on=join_col)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_with_len0(self):
+        # nothing to merge
+        merged = self.target.join(self.source.reindex([]), on="C")
+        for col in self.source:
+            assert col in merged
+            assert merged[col].isna().all()
+
+        merged2 = self.target.join(self.source.reindex([]), on="C", how="inner")
+        tm.assert_index_equal(merged2.columns, merged.columns)
+        assert len(merged2) == 0
+
+    def test_join_on_inner(self):
+        df = DataFrame({"key": ["a", "a", "d", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1]}, index=["a", "b"])
+
+        joined = df.join(df2, on="key", how="inner")
+
+        expected = df.join(df2, on="key")
+        expected = expected[expected["value"].notna()]
+        tm.assert_series_equal(joined["key"], expected["key"])
+        tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False)
+        tm.assert_index_equal(joined.index, expected.index)
+
+    def test_join_on_singlekey_list(self):
+        df = DataFrame({"key": ["a", "a", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"])
+
+        # corner cases
+        joined = df.join(df2, on=["key"])
+        expected = df.join(df2, on="key")
+
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_on_series(self):
+        result = self.target.join(self.source["MergedA"], on="C")
+        expected = self.target.join(self.source[["MergedA"]], on="C")
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_on_series_buglet(self):
+        # GH #638
+        df = DataFrame({"a": [1, 1]})
+        ds = Series([2], index=[1], name="b")
+        result = df.join(ds, on="a")
+        expected = DataFrame({"a": [1, 1], "b": [2, 2]}, index=df.index)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_index_mixed(self, join_type):
+        # no overlapping blocks
+        df1 = DataFrame(index=np.arange(10))
+        df1["bool"] = True
+        df1["string"] = "foo"
+
+        df2 = DataFrame(index=np.arange(5, 15))
+        df2["int"] = 1
+        df2["float"] = 1.0
+
+        joined = df1.join(df2, how=join_type)
+        expected = _join_by_hand(df1, df2, how=join_type)
+        tm.assert_frame_equal(joined, expected)
+
+        joined = df2.join(df1, how=join_type)
+        expected = _join_by_hand(df2, df1, how=join_type)
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_index_mixed_overlap(self):
+        df1 = DataFrame(
+            {"A": 1.0, "B": 2, "C": "foo", "D": True},
+            index=np.arange(10),
+            columns=["A", "B", "C", "D"],
+        )
+        assert df1["B"].dtype == np.int64
+        assert df1["D"].dtype == np.bool_
+
+        df2 = DataFrame(
+            {"A": 1.0, "B": 2, "C": "foo", "D": True},
+            index=np.arange(0, 10, 2),
+            columns=["A", "B", "C", "D"],
+        )
+
+        # overlap
+        joined = df1.join(df2, lsuffix="_one", rsuffix="_two")
+        expected_columns = [
+            "A_one",
+            "B_one",
+            "C_one",
+            "D_one",
+            "A_two",
+            "B_two",
+            "C_two",
+            "D_two",
+        ]
+        df1.columns = expected_columns[:4]
+        df2.columns = expected_columns[4:]
+        expected = _join_by_hand(df1, df2)
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_empty_bug(self):
+        # generated an exception in 0.4.3
+        x = DataFrame()
+        x.join(DataFrame([3], index=[0], columns=["A"]), how="outer")
+
+    def test_join_unconsolidated(self):
+        # GH #331
+        a = DataFrame(np.random.randn(30, 2), columns=["a", "b"])
+        c = Series(np.random.randn(30))
+        a["c"] = c
+        d = DataFrame(np.random.randn(30, 1), columns=["q"])
+
+        # it works!
+        a.join(d)
+        d.join(a)
+
+    def test_join_multiindex(self):
+        index1 = MultiIndex.from_arrays(
+            [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 1, 2, 3]],
+            names=["first", "second"],
+        )
+
+        index2 = MultiIndex.from_arrays(
+            [["b", "b", "b", "c", "c", "c"], [1, 2, 3, 1, 2, 3]],
+            names=["first", "second"],
+        )
+
+        df1 = DataFrame(data=np.random.randn(6), index=index1, columns=["var X"])
+        df2 = DataFrame(data=np.random.randn(6), index=index2, columns=["var Y"])
+
+        df1 = df1.sort_index(level=0)
+        df2 = df2.sort_index(level=0)
+
+        joined = df1.join(df2, how="outer")
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+        tm.assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+        df1 = df1.sort_index(level=1)
+        df2 = df2.sort_index(level=1)
+
+        joined = df1.join(df2, how="outer").sort_index(level=0)
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+
+        tm.assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+    def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex):
+        key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"]
+        key2 = [
+            "two",
+            "one",
+            "three",
+            "one",
+            "two",
+            "one",
+            "two",
+            "two",
+            "three",
+            "one",
+        ]
+
+        data = np.random.randn(len(key1))
+        data = DataFrame({"key1": key1, "key2": key2, "data": data})
+
+        index = lexsorted_two_level_string_multiindex
+        to_join = DataFrame(
+            np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"]
+        )
+
+        joined = data.join(to_join, on=["key1", "key2"], how="inner")
+        expected = merge(
+            data,
+            to_join.reset_index(),
+            left_on=["key1", "key2"],
+            right_on=["first", "second"],
+            how="inner",
+            sort=False,
+        )
+
+        expected2 = merge(
+            to_join,
+            data,
+            right_on=["key1", "key2"],
+            left_index=True,
+            how="inner",
+            sort=False,
+        )
+        tm.assert_frame_equal(joined, expected2.reindex_like(joined))
+
+        expected2 = merge(
+            to_join,
+            data,
+            right_on=["key1", "key2"],
+            left_index=True,
+            how="inner",
+            sort=False,
+        )
+
+        expected = expected.drop(["first", "second"], axis=1)
+        expected.index = joined.index
+
+        assert joined.index.is_monotonic
+        tm.assert_frame_equal(joined, expected)
+
+        # _assert_same_contents(expected, expected2.loc[:, expected.columns])
+
+    def test_join_hierarchical_mixed(self):
+        # GH 2024
+        df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"])
+        new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]})
+        other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"])
+        other_df.set_index("a", inplace=True)
+        # GH 9455, 12219
+        msg = "merging between different levels is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = merge(new_df, other_df, left_index=True, right_index=True)
+        assert ("b", "mean") in result
+        assert "b" in result
+
+    def test_join_float64_float32(self):
+
+        a = DataFrame(np.random.randn(10, 2), columns=["a", "b"], dtype=np.float64)
+        b = DataFrame(np.random.randn(10, 1), columns=["c"], dtype=np.float32)
+        joined = a.join(b)
+        assert joined.dtypes["a"] == "float64"
+        assert joined.dtypes["b"] == "float64"
+        assert joined.dtypes["c"] == "float32"
+
+        a = np.random.randint(0, 5, 100).astype("int64")
+        b = np.random.random(100).astype("float64")
+        c = np.random.random(100).astype("float32")
+        df = DataFrame({"a": a, "b": b, "c": c})
+        xpdf = DataFrame({"a": a, "b": b, "c": c})
+        s = DataFrame(np.random.random(5).astype("float32"), columns=["md"])
+        rs = df.merge(s, left_on="a", right_index=True)
+        assert rs.dtypes["a"] == "int64"
+        assert rs.dtypes["b"] == "float64"
+        assert rs.dtypes["c"] == "float32"
+        assert rs.dtypes["md"] == "float32"
+
+        xp = xpdf.merge(s, left_on="a", right_index=True)
+        tm.assert_frame_equal(rs, xp)
+
+    def test_join_many_non_unique_index(self):
+        df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]})
+        df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]})
+        df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+
+        result = idf1.join([idf2, idf3], how="outer")
+
+        df_partially_merged = merge(df1, df2, on=["a", "b"], how="outer")
+        expected = merge(df_partially_merged, df3, on=["a", "b"], how="outer")
+
+        result = result.reset_index()
+        expected = expected[result.columns]
+        expected["a"] = expected.a.astype("int64")
+        expected["b"] = expected.b.astype("int64")
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]})
+        df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]})
+        df3 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+        result = idf1.join([idf2, idf3], how="inner")
+
+        df_partially_merged = merge(df1, df2, on=["a", "b"], how="inner")
+        expected = merge(df_partially_merged, df3, on=["a", "b"], how="inner")
+
+        result = result.reset_index()
+
+        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+
+        # GH 11519
+        df = DataFrame(
+            {
+                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+                "C": np.random.randn(8),
+                "D": np.random.randn(8),
+            }
+        )
+        s = Series(
+            np.repeat(np.arange(8), 2), index=np.repeat(np.arange(8), 2), name="TEST"
+        )
+        inner = df.join(s, how="inner")
+        outer = df.join(s, how="outer")
+        left = df.join(s, how="left")
+        right = df.join(s, how="right")
+        tm.assert_frame_equal(inner, outer)
+        tm.assert_frame_equal(inner, left)
+        tm.assert_frame_equal(inner, right)
+
+    def test_join_sort(self):
+        left = DataFrame({"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]})
+        right = DataFrame({"value2": ["a", "b", "c"]}, index=["bar", "baz", "foo"])
+
+        joined = left.join(right, on="key", sort=True)
+        expected = DataFrame(
+            {
+                "key": ["bar", "baz", "foo", "foo"],
+                "value": [2, 3, 1, 4],
+                "value2": ["a", "b", "c", "c"],
+            },
+            index=[1, 2, 0, 3],
+        )
+        tm.assert_frame_equal(joined, expected)
+
+        # smoke test
+        joined = left.join(right, on="key", sort=False)
+        tm.assert_index_equal(joined.index, Index(range(4)), exact=True)
+
+    def test_join_mixed_non_unique_index(self):
+        # GH 12814, unorderable types in py3 with a non-unique index
+        df1 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 3, "a"])
+        df2 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 3, 3, 4])
+        result = df1.join(df2)
+        expected = DataFrame(
+            {"a": [1, 2, 3, 3, 4], "b": [5, np.nan, 6, 7, np.nan]},
+            index=[1, 2, 3, 3, "a"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        df3 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 2, "a"])
+        df4 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 2, 3, 4])
+        result = df3.join(df4)
+        expected = DataFrame(
+            {"a": [1, 2, 3, 4], "b": [5, 6, 6, np.nan]}, index=[1, 2, 2, "a"]
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_non_unique_period_index(self):
+        # GH #16871
+        index = pd.period_range("2016-01-01", periods=16, freq="M")
+        df = DataFrame(list(range(len(index))), index=index, columns=["pnum"])
+        df2 = concat([df, df])
+        result = df.join(df2, how="inner", rsuffix="_df2")
+        expected = DataFrame(
+            np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2),
+            columns=["pnum", "pnum_df2"],
+            index=df2.sort_index().index,
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_mixed_type_join_with_suffix(self):
+        # GH #916
+        df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"])
+        df.insert(0, "id", 0)
+        df.insert(5, "dt", "foo")
+
+        grouped = df.groupby("id")
+        mn = grouped.mean()
+        cn = grouped.count()
+
+        # it works!
+        mn.join(cn, rsuffix="_right")
+
+    def test_join_many(self):
+        df = DataFrame(np.random.randn(10, 6), columns=list("abcdef"))
+        df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]]
+
+        joined = df_list[0].join(df_list[1:])
+        tm.assert_frame_equal(joined, df)
+
+        df_list = [df[["a", "b"]][:-2], df[["c", "d"]][2:], df[["e", "f"]][1:9]]
+
+        def _check_diff_index(df_list, result, exp_index):
+            reindexed = [x.reindex(exp_index) for x in df_list]
+            expected = reindexed[0].join(reindexed[1:])
+            tm.assert_frame_equal(result, expected)
+
+        # different join types
+        joined = df_list[0].join(df_list[1:], how="outer")
+        _check_diff_index(df_list, joined, df.index)
+
+        joined = df_list[0].join(df_list[1:])
+        _check_diff_index(df_list, joined, df_list[0].index)
+
+        joined = df_list[0].join(df_list[1:], how="inner")
+        _check_diff_index(df_list, joined, df.index[2:8])
+
+        msg = "Joining multiple DataFrames only supported for joining on index"
+        with pytest.raises(ValueError, match=msg):
+            df_list[0].join(df_list[1:], on="a")
+
+    def test_join_many_mixed(self):
+        df = DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"])
+        df["key"] = ["foo", "bar"] * 4
+        df1 = df.loc[:, ["A", "B"]]
+        df2 = df.loc[:, ["C", "D"]]
+        df3 = df.loc[:, ["key"]]
+
+        result = df1.join([df2, df3])
+        tm.assert_frame_equal(result, df)
+
+    def test_join_dups(self):
+
+        # joining dups
+        df = concat(
+            [
+                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
+                DataFrame(
+                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
+                ),
+            ],
+            axis=1,
+        )
+
+        expected = concat([df, df], axis=1)
+        result = df.join(df, rsuffix="_2")
+        result.columns = expected.columns
+        tm.assert_frame_equal(result, expected)
+
+        # GH 4975, invalid join on dups
+        w = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        x = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        y = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        z = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+
+        dta = x.merge(y, left_index=True, right_index=True).merge(
+            z, left_index=True, right_index=True, how="outer"
+        )
+        with tm.assert_produces_warning(FutureWarning):
+            dta = dta.merge(w, left_index=True, right_index=True)
+        expected = concat([x, y, z, w], axis=1)
+        expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"]
+        tm.assert_frame_equal(dta, expected)
+
+    def test_join_multi_to_multi(self, join_type):
+        # GH 20475
+        leftindex = MultiIndex.from_product(
+            [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"]
+        )
+        left = DataFrame({"v1": range(12)}, index=leftindex)
+
+        rightindex = MultiIndex.from_product(
+            [list("abc"), list("xy")], names=["abc", "xy"]
+        )
+        right = DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex)
+
+        result = left.join(right, on=["abc", "xy"], how=join_type)
+        expected = (
+            left.reset_index()
+            .merge(right.reset_index(), on=["abc", "xy"], how=join_type)
+            .set_index(["abc", "xy", "num"])
+        )
+        tm.assert_frame_equal(expected, result)
+
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
+        with pytest.raises(ValueError, match=msg):
+            left.join(right, on="xy", how=join_type)
+
+        with pytest.raises(ValueError, match=msg):
+            right.join(left, on=["abc", "xy"], how=join_type)
+
+    def test_join_on_tz_aware_datetimeindex(self):
+        # GH 23931, 26335
+        df1 = DataFrame(
+            {
+                "date": pd.date_range(
+                    start="2018-01-01", periods=5, tz="America/Chicago"
+                ),
+                "vals": list("abcde"),
+            }
+        )
+
+        df2 = DataFrame(
+            {
+                "date": pd.date_range(
+                    start="2018-01-03", periods=5, tz="America/Chicago"
+                ),
+                "vals_2": list("tuvwx"),
+            }
+        )
+        result = df1.join(df2.set_index("date"), on="date")
+        expected = df1.copy()
+        expected["vals_2"] = Series([np.nan] * 2 + list("tuv"), dtype=object)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_datetime_string(self):
+        # GH 5647
+        dfa = DataFrame(
+            [
+                ["2012-08-02", "L", 10],
+                ["2012-08-02", "J", 15],
+                ["2013-04-06", "L", 20],
+                ["2013-04-06", "J", 25],
+            ],
+            columns=["x", "y", "a"],
+        )
+        dfa["x"] = pd.to_datetime(dfa["x"])
+        dfb = DataFrame(
+            [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]],
+            columns=["x", "y", "z"],
+            index=[2, 4],
+        )
+        dfb["x"] = pd.to_datetime(dfb["x"])
+        result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"])
+        expected = DataFrame(
+            [
+                [Timestamp("2012-08-02 00:00:00"), "J", 1, 15],
+                [Timestamp("2013-04-06 00:00:00"), "L", 2, 20],
+            ],
+            index=[2, 4],
+            columns=["x", "y", "z", "a"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"):
+
+    # some smoke tests
+    for c in join_col:
+        assert result[c].notna().all()
+
+    left_grouped = left.groupby(join_col)
+    right_grouped = right.groupby(join_col)
+
+    for group_key, group in result.groupby(join_col):
+        l_joined = _restrict_to_columns(group, left.columns, lsuffix)
+        r_joined = _restrict_to_columns(group, right.columns, rsuffix)
+
+        try:
+            lgroup = left_grouped.get_group(group_key)
+        except KeyError as err:
+            if how in ("left", "inner"):
+                raise AssertionError(
+                    f"key {group_key} should not have been in the join"
+                ) from err
+
+            _assert_all_na(l_joined, left.columns, join_col)
+        else:
+            _assert_same_contents(l_joined, lgroup)
+
+        try:
+            rgroup = right_grouped.get_group(group_key)
+        except KeyError as err:
+            if how in ("right", "inner"):
+                raise AssertionError(
+                    f"key {group_key} should not have been in the join"
+                ) from err
+
+            _assert_all_na(r_joined, right.columns, join_col)
+        else:
+            _assert_same_contents(r_joined, rgroup)
+
+
+def _restrict_to_columns(group, columns, suffix):
+    found = [
+        c for c in group.columns if c in columns or c.replace(suffix, "") in columns
+    ]
+
+    # filter
+    group = group.loc[:, found]
+
+    # get rid of suffixes, if any
+    group = group.rename(columns=lambda x: x.replace(suffix, ""))
+
+    # put in the right order...
+    group = group.loc[:, columns]
+
+    return group
+
+
+def _assert_same_contents(join_chunk, source):
+    NA_SENTINEL = -1234567  # drop_duplicates not so NA-friendly...
+
+    jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values
+    svalues = source.fillna(NA_SENTINEL).drop_duplicates().values
+
+    rows = {tuple(row) for row in jvalues}
+    assert len(rows) == len(source)
+    assert all(tuple(row) in rows for row in svalues)
+
+
+def _assert_all_na(join_chunk, source_columns, join_col):
+    for c in source_columns:
+        if c in join_col:
+            continue
+        assert join_chunk[c].isna().all()
+
+
+def _join_by_hand(a, b, how="left"):
+    join_index = a.index.join(b.index, how=how)
+
+    a_re = a.reindex(join_index)
+    b_re = b.reindex(join_index)
+
+    result_columns = a.columns.append(b.columns)
+
+    for col, s in b_re.items():
+        a_re[col] = s
+    return a_re.reindex(columns=result_columns)
+
+
+def test_join_inner_multiindex_deterministic_order():
+    # GH: 36910
+    left = DataFrame(
+        data={"e": 5},
+        index=MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")),
+    )
+    right = DataFrame(
+        data={"f": 6}, index=MultiIndex.from_tuples([(2, 3)], names=("b", "c"))
+    )
+    result = left.join(right, how="inner")
+    expected = DataFrame(
+        {"e": [5], "f": [6]},
+        index=MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])]
+)
+def test_join_cross(input_col, output_cols):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({input_col: [3, 4]})
+    result = left.join(right, how="cross", lsuffix="_x", rsuffix="_y")
+    expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_join_multiindex_one_level(join_type):
+    # GH#36909
+    left = DataFrame(
+        data={"c": 3}, index=MultiIndex.from_tuples([(1, 2)], names=("a", "b"))
+    )
+    right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",)))
+    result = left.join(right, how=join_type)
+    expected = DataFrame(
+        {"c": [3], "d": [4]},
+        index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "categories, values",
+    [
+        (["Y", "X"], ["Y", "X", "X"]),
+        ([2, 1], [2, 1, 1]),
+        ([2.5, 1.5], [2.5, 1.5, 1.5]),
+        (
+            [Timestamp("2020-12-31"), Timestamp("2019-12-31")],
+            [Timestamp("2020-12-31"), Timestamp("2019-12-31"), Timestamp("2019-12-31")],
+        ),
+    ],
+)
+def test_join_multiindex_not_alphabetical_categorical(categories, values):
+    # GH#38502
+    left = DataFrame(
+        {
+            "first": ["A", "A"],
+            "second": Categorical(categories, categories=categories),
+            "value": [1, 2],
+        }
+    ).set_index(["first", "second"])
+    right = DataFrame(
+        {
+            "first": ["A", "A", "B"],
+            "second": Categorical(values, categories=categories),
+            "value": [3, 4, 5],
+        }
+    ).set_index(["first", "second"])
+    result = left.join(right, lsuffix="_left", rsuffix="_right")
+
+    expected = DataFrame(
+        {
+            "first": ["A", "A"],
+            "second": Categorical(categories, categories=categories),
+            "value_left": [1, 2],
+            "value_right": [3, 4],
+        }
+    ).set_index(["first", "second"])
+    tm.assert_frame_equal(result, expected)
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge.py
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_asof.py
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_cross.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_cross.py
@@ -0,0 +1,98 @@
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.core.reshape.merge import (
+    MergeError,
+    merge,
+)
+
+
+@pytest.mark.parametrize(
+    ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])]
+)
+def test_merge_cross(input_col, output_cols):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({input_col: [3, 4]})
+    left_copy = left.copy()
+    right_copy = right.copy()
+    result = merge(left, right, how="cross")
+    expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(left, left_copy)
+    tm.assert_frame_equal(right, right_copy)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"left_index": True},
+        {"right_index": True},
+        {"on": "a"},
+        {"left_on": "a"},
+        {"right_on": "b"},
+    ],
+)
+def test_merge_cross_error_reporting(kwargs):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({"b": [3, 4]})
+    msg = (
+        "Can not pass on, right_on, left_on or set right_index=True or "
+        "left_index=True"
+    )
+    with pytest.raises(MergeError, match=msg):
+        merge(left, right, how="cross", **kwargs)
+
+
+def test_merge_cross_mixed_dtypes():
+    # GH#5401
+    left = DataFrame(["a", "b", "c"], columns=["A"])
+    right = DataFrame(range(2), columns=["B"])
+    result = merge(left, right, how="cross")
+    expected = DataFrame({"A": ["a", "a", "b", "b", "c", "c"], "B": [0, 1, 0, 1, 0, 1]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_merge_cross_more_than_one_column():
+    # GH#5401
+    left = DataFrame({"A": list("ab"), "B": [2, 1]})
+    right = DataFrame({"C": range(2), "D": range(4, 6)})
+    result = merge(left, right, how="cross")
+    expected = DataFrame(
+        {
+            "A": ["a", "a", "b", "b"],
+            "B": [2, 2, 1, 1],
+            "C": [0, 1, 0, 1],
+            "D": [4, 5, 4, 5],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_merge_cross_null_values(nulls_fixture):
+    # GH#5401
+    left = DataFrame({"a": [1, nulls_fixture]})
+    right = DataFrame({"b": ["a", "b"], "c": [1.0, 2.0]})
+    result = merge(left, right, how="cross")
+    expected = DataFrame(
+        {
+            "a": [1, 1, nulls_fixture, nulls_fixture],
+            "b": ["a", "b", "a", "b"],
+            "c": [1.0, 2.0, 1.0, 2.0],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_join_cross_error_reporting():
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({"a": [3, 4]})
+    msg = (
+        "Can not pass on, right_on, left_on or set right_index=True or "
+        "left_index=True"
+    )
+    with pytest.raises(MergeError, match=msg):
+        left.join(right, how="cross", on="a")
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_index_as_string.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_index_as_string.py
@@ -0,0 +1,189 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.fixture
+def df1():
+    return DataFrame(
+        {
+            "outer": [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4],
+            "inner": [1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2],
+            "v1": np.linspace(0, 1, 11),
+        }
+    )
+
+
+@pytest.fixture
+def df2():
+    return DataFrame(
+        {
+            "outer": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3],
+            "inner": [1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3],
+            "v2": np.linspace(10, 11, 12),
+        }
+    )
+
+
+@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
+def left_df(request, df1):
+    """Construct left test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v1')
+    """
+    levels = request.param
+    if levels:
+        df1 = df1.set_index(levels)
+
+    return df1
+
+
+@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
+def right_df(request, df2):
+    """Construct right test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v2')
+    """
+    levels = request.param
+
+    if levels:
+        df2 = df2.set_index(levels)
+
+    return df2
+
+
+def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None):
+    """
+    Compute the expected merge result for the test case.
+
+    This method computes the expected result of merging two DataFrames on
+    a combination of their columns and index levels. It does so by
+    explicitly dropping/resetting their named index levels, performing a
+    merge on their columns, and then finally restoring the appropriate
+    index in the result.
+
+    Parameters
+    ----------
+    df_left : DataFrame
+        The left DataFrame (may have zero or more named index levels)
+    df_right : DataFrame
+        The right DataFrame (may have zero or more named index levels)
+    on : list of str
+        The on parameter to the merge operation
+    left_on : list of str
+        The left_on parameter to the merge operation
+    right_on : list of str
+        The right_on parameter to the merge operation
+    how : str
+        The how parameter to the merge operation
+
+    Returns
+    -------
+    DataFrame
+        The expected merge result
+    """
+    # Handle on param if specified
+    if on is not None:
+        left_on, right_on = on, on
+
+    # Compute input named index levels
+    left_levels = [n for n in df_left.index.names if n is not None]
+    right_levels = [n for n in df_right.index.names if n is not None]
+
+    # Compute output named index levels
+    output_levels = [i for i in left_on if i in right_levels and i in left_levels]
+
+    # Drop index levels that aren't involved in the merge
+    drop_left = [n for n in left_levels if n not in left_on]
+    if drop_left:
+        df_left = df_left.reset_index(drop_left, drop=True)
+
+    drop_right = [n for n in right_levels if n not in right_on]
+    if drop_right:
+        df_right = df_right.reset_index(drop_right, drop=True)
+
+    # Convert remaining index levels to columns
+    reset_left = [n for n in left_levels if n in left_on]
+    if reset_left:
+        df_left = df_left.reset_index(level=reset_left)
+
+    reset_right = [n for n in right_levels if n in right_on]
+    if reset_right:
+        df_right = df_right.reset_index(level=reset_right)
+
+    # Perform merge
+    expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how)
+
+    # Restore index levels
+    if output_levels:
+        expected = expected.set_index(output_levels)
+
+    return expected
+
+
+@pytest.mark.parametrize(
+    "on,how",
+    [
+        (["outer"], "inner"),
+        (["inner"], "left"),
+        (["outer", "inner"], "right"),
+        (["inner", "outer"], "outer"),
+    ],
+)
+def test_merge_indexes_and_columns_on(left_df, right_df, on, how):
+
+    # Construct expected result
+    expected = compute_expected(left_df, right_df, on=on, how=how)
+
+    # Perform merge
+    result = left_df.merge(right_df, on=on, how=how)
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize(
+    "left_on,right_on,how",
+    [
+        (["outer"], ["outer"], "inner"),
+        (["inner"], ["inner"], "right"),
+        (["outer", "inner"], ["outer", "inner"], "left"),
+        (["inner", "outer"], ["inner", "outer"], "outer"),
+    ],
+)
+def test_merge_indexes_and_columns_lefton_righton(
+    left_df, right_df, left_on, right_on, how
+):
+
+    # Construct expected result
+    expected = compute_expected(
+        left_df, right_df, left_on=left_on, right_on=right_on, how=how
+    )
+
+    # Perform merge
+    result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how)
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]])
+def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):
+
+    # Construct left_df
+    left_df = df1.set_index(left_index)
+
+    # Construct right_df
+    right_df = df2.set_index(["outer", "inner"])
+
+    # Result
+    expected = (
+        left_df.reset_index()
+        .join(
+            right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
+        )
+        .set_index(left_index)
+    )
+
+    # Perform join
+    result = left_df.join(
+        right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
+    )
+
+    tm.assert_frame_equal(result, expected, check_like=True)
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_ordered.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_merge_ordered.py
@@ -0,0 +1,201 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    merge_ordered,
+)
+import pandas._testing as tm
+
+
+class TestMergeOrdered:
+    def setup_method(self, method):
+        self.left = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]})
+
+        self.right = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]})
+
+    def test_basic(self):
+        result = merge_ordered(self.left, self.right, on="key")
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"],
+                "lvalue": [1, np.nan, 2, np.nan, 3, np.nan],
+                "rvalue": [np.nan, 1, 2, 3, np.nan, 4],
+            }
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_ffill(self):
+        result = merge_ordered(self.left, self.right, on="key", fill_method="ffill")
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"],
+                "lvalue": [1.0, 1, 2, 2, 3, 3.0],
+                "rvalue": [np.nan, 1, 2, 3, 3, 4],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_multigroup(self):
+        left = pd.concat([self.left, self.left], ignore_index=True)
+
+        left["group"] = ["a"] * 3 + ["b"] * 3
+
+        result = merge_ordered(
+            left, self.right, on="key", left_by="group", fill_method="ffill"
+        )
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"] * 2,
+                "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2,
+                "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2,
+            }
+        )
+        expected["group"] = ["a"] * 6 + ["b"] * 6
+
+        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+
+        result2 = merge_ordered(
+            self.right, left, on="key", right_by="group", fill_method="ffill"
+        )
+        tm.assert_frame_equal(result, result2.loc[:, result.columns])
+
+        result = merge_ordered(left, self.right, on="key", left_by="group")
+        assert result["group"].notna().all()
+
+    def test_merge_type(self):
+        class NotADataFrame(DataFrame):
+            @property
+            def _constructor(self):
+                return NotADataFrame
+
+        nad = NotADataFrame(self.left)
+        result = nad.merge(self.right, on="key")
+
+        assert isinstance(result, NotADataFrame)
+
+    def test_empty_sequence_concat(self):
+        # GH 9157
+        empty_pat = "[Nn]o objects"
+        none_pat = "objects.*None"
+        test_cases = [
+            ((), empty_pat),
+            ([], empty_pat),
+            ({}, empty_pat),
+            ([None], none_pat),
+            ([None, None], none_pat),
+        ]
+        for df_seq, pattern in test_cases:
+            with pytest.raises(ValueError, match=pattern):
+                pd.concat(df_seq)
+
+        pd.concat([DataFrame()])
+        pd.concat([None, DataFrame()])
+        pd.concat([DataFrame(), None])
+
+    def test_doc_example(self):
+        left = DataFrame(
+            {
+                "group": list("aaabbb"),
+                "key": ["a", "c", "e", "a", "c", "e"],
+                "lvalue": [1, 2, 3] * 2,
+            }
+        )
+
+        right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
+
+        result = merge_ordered(left, right, fill_method="ffill", left_by="group")
+
+        expected = DataFrame(
+            {
+                "group": list("aaaaabbbbb"),
+                "key": ["a", "b", "c", "d", "e"] * 2,
+                "lvalue": [1, 1, 2, 2, 3] * 2,
+                "rvalue": [np.nan, 1, 2, 3, 3] * 2,
+            }
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "left, right, on, left_by, right_by, expected",
+        [
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                ["T"],
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                "T",
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"T": [2], "E": [1]}),
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                ["T"],
+                None,
+                ["G", "H"],
+                DataFrame(
+                    {
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                    }
+                ),
+            ),
+        ],
+    )
+    def test_list_type_by(self, left, right, on, left_by, right_by, expected):
+        # GH 35269
+        result = merge_ordered(
+            left=left,
+            right=right,
+            on=on,
+            left_by=left_by,
+            right_by=right_by,
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_by_length_equals_to_right_shape0(self):
+        # GH 38166
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
+        result = merge_ordered(left, right, on="E", left_by=["G", "H"])
+        expected = DataFrame(
+            {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]}
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_elements_not_in_by_but_in_df(self):
+        # GH 38167
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
+        msg = r"\{'h'\} not found in left columns"
+        with pytest.raises(KeyError, match=msg):
+            merge_ordered(left, right, on="E", left_by=["G", "h"])
--- a/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_multi.py
+++ b/.venv/Lib/site-packages/pandas/tests/reshape/merge/test_multi.py
@@ -0,0 +1,909 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+)
+import pandas._testing as tm
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.merge import merge
+
+
+@pytest.fixture
+def left():
+    """left dataframe (not multi-indexed) for multi-index join tests"""
+    # a little relevant example with NAs
+    key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"]
+    key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"]
+
+    data = np.random.randn(len(key1))
+    return DataFrame({"key1": key1, "key2": key2, "data": data})
+
+
+@pytest.fixture
+def right(multiindex_dataframe_random_data):
+    """right dataframe (multi-indexed) for multi-index join tests"""
+    df = multiindex_dataframe_random_data
+    df.index.names = ["key1", "key2"]
+
+    df.columns = ["j_one", "j_two", "j_three"]
+    return df
+
+
+@pytest.fixture
+def left_multi():
+    return DataFrame(
+        {
+            "Origin": ["A", "A", "B", "B", "C"],
+            "Destination": ["A", "B", "A", "C", "A"],
+            "Period": ["AM", "AM", "IP", "AM", "OP"],
+            "TripPurp": ["hbw", "nhb", "hbo", "nhb", "hbw"],
+            "Trips": [1987, 3647, 2470, 4296, 4444],
+        },
+        columns=["Origin", "Destination", "Period", "TripPurp", "Trips"],
+    ).set_index(["Origin", "Destination", "Period", "TripPurp"])
+
+
+@pytest.fixture
+def right_multi():
+    return DataFrame(
+        {
+            "Origin": ["A", "A", "B", "B", "C", "C", "E"],
+            "Destination": ["A", "B", "A", "B", "A", "B", "F"],
+            "Period": ["AM", "AM", "IP", "AM", "OP", "IP", "AM"],
+            "LinkType": ["a", "b", "c", "b", "a", "b", "a"],
+            "Distance": [100, 80, 90, 80, 75, 35, 55],
+        },
+        columns=["Origin", "Destination", "Period", "LinkType", "Distance"],
+    ).set_index(["Origin", "Destination", "Period", "LinkType"])
+
+
+@pytest.fixture
+def on_cols_multi():
+    return ["Origin", "Destination", "Period"]
+
+
+@pytest.fixture
+def idx_cols_multi():
+    return ["Origin", "Destination", "Period", "TripPurp", "LinkType"]
+
+
+class TestMergeMulti:
+    def test_merge_on_multikey(self, left, right, join_type):
+        on_cols = ["key1", "key2"]
+        result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True)
+
+        expected = merge(left, right.reset_index(), on=on_cols, how=join_type)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on=on_cols, how=join_type, sort=True).reset_index(
+            drop=True
+        )
+
+        expected = merge(
+            left, right.reset_index(), on=on_cols, how=join_type, sort=True
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_left_join_multi_index(self, left, right, sort):
+        icols = ["1st", "2nd", "3rd"]
+
+        def bind_cols(df):
+            iord = lambda a: 0 if a != a else ord(a)
+            f = lambda ts: ts.map(iord) - ord("a")
+            return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4
+
+        def run_asserts(left, right, sort):
+            res = left.join(right, on=icols, how="left", sort=sort)
+
+            assert len(left) < len(res) + 1
+            assert not res["4th"].isna().any()
+            assert not res["5th"].isna().any()
+
+            tm.assert_series_equal(res["4th"], -res["5th"], check_names=False)
+            result = bind_cols(res.iloc[:, :-2])
+            tm.assert_series_equal(res["4th"], result, check_names=False)
+            assert result.name is None
+
+            if sort:
+                tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort"))
+
+            out = merge(left, right.reset_index(), on=icols, sort=sort, how="left")
+
+            res.index = np.arange(len(res))
+            tm.assert_frame_equal(out, res)
+
+        lc = list(map(chr, np.arange(ord("a"), ord("z") + 1)))
+        left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"])
+        left.insert(1, "2nd", np.random.randint(0, 1000, len(left)))
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i].copy()
+
+        left["4th"] = bind_cols(left)
+        right["5th"] = -bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+        # inject some nulls
+        left.loc[1::23, "1st"] = np.nan
+        left.loc[2::37, "2nd"] = np.nan
+        left.loc[3::43, "3rd"] = np.nan
+        left["4th"] = bind_cols(left)
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i, :-1]
+        right["5th"] = -bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_merge_right_vs_left(self, left, right, sort):
+        # compare left vs right merge with multikey
+        on_cols = ["key1", "key2"]
+        merged_left_right = left.merge(
+            right, left_on=on_cols, right_index=True, how="left", sort=sort
+        )
+
+        merge_right_left = right.merge(
+            left, right_on=on_cols, left_index=True, how="right", sort=sort
+        )
+
+        # Reorder columns
+        merge_right_left = merge_right_left[merged_left_right.columns]
+
+        tm.assert_frame_equal(merged_left_right, merge_right_left)
+
+    def test_merge_multiple_cols_with_mixed_cols_index(self):
+        # GH29522
+        s = Series(
+            range(6),
+            MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]),
+            name="Amount",
+        )
+        df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0})
+        result = merge(df, s.reset_index(), on=["lev1", "lev2"])
+        expected = DataFrame(
+            {
+                "lev1": list("AAABBB"),
+                "lev2": [1, 2, 3, 1, 2, 3],
+                "col": [0] * 6,
+                "Amount": range(6),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_compress_group_combinations(self):
+
+        # ~ 40000000 possible unique groups
+        key1 = tm.rands_array(10, 10000)
+        key1 = np.tile(key1, 2)
+        key2 = key1[::-1]
+
+        df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)})
+
+        df2 = DataFrame(
+            {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)}
+        )
+
+        # just to hit the label compression code path
+        merge(df, df2, how="outer")
+
+    def test_left_join_index_preserve_order(self):
+
+        on_cols = ["k1", "k2"]
+        left = DataFrame(
+            {
+                "k1": [0, 1, 2] * 8,
+                "k2": ["foo", "bar"] * 12,
+                "v": np.array(np.arange(24), dtype=np.int64),
+            }
+        )
+
+        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
+        right = DataFrame({"v2": [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected["v2"] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result.sort_values(on_cols, kind="mergesort", inplace=True)
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+        # test join with multi dtypes blocks
+        left = DataFrame(
+            {
+                "k1": [0, 1, 2] * 8,
+                "k2": ["foo", "bar"] * 12,
+                "k3": np.array([0, 1, 2] * 8, dtype=np.float32),
+                "v": np.array(np.arange(24), dtype=np.int32),
+            }
+        )
+
+        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
+        right = DataFrame({"v2": [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected["v2"] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result = result.sort_values(on_cols, kind="mergesort")
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match_multiindex(self):
+        left = DataFrame(
+            [
+                ["X", "Y", "C", "a"],
+                ["W", "Y", "C", "e"],
+                ["V", "Q", "A", "h"],
+                ["V", "R", "D", "i"],
+                ["X", "Y", "D", "b"],
+                ["X", "Y", "A", "c"],
+                ["W", "Q", "B", "f"],
+                ["W", "R", "C", "g"],
+                ["V", "Y", "C", "j"],
+                ["X", "Y", "B", "d"],
+            ],
+            columns=["cola", "colb", "colc", "tag"],
+            index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8],
+        )
+
+        right = DataFrame(
+            [
+                ["W", "R", "C", 0],
+                ["W", "Q", "B", 3],
+                ["W", "Q", "B", 8],
+                ["X", "Y", "A", 1],
+                ["X", "Y", "A", 4],
+                ["X", "Y", "B", 5],
+                ["X", "Y", "C", 6],
+                ["X", "Y", "C", 9],
+                ["X", "Q", "C", -6],
+                ["X", "R", "C", -9],
+                ["V", "Y", "C", 7],
+                ["V", "R", "D", 2],
+                ["V", "R", "D", -1],
+                ["V", "Q", "A", -3],
+            ],
+            columns=["col1", "col2", "col3", "val"],
+        ).set_index(["col1", "col2", "col3"])
+
+        result = left.join(right, on=["cola", "colb", "colc"], how="left")
+
+        expected = DataFrame(
+            [
+                ["X", "Y", "C", "a", 6],
+                ["X", "Y", "C", "a", 9],
+                ["W", "Y", "C", "e", np.nan],
+                ["V", "Q", "A", "h", -3],
+                ["V", "R", "D", "i", 2],
+                ["V", "R", "D", "i", -1],
+                ["X", "Y", "D", "b", np.nan],
+                ["X", "Y", "A", "c", 1],
+                ["X", "Y", "A", "c", 4],
+                ["W", "Q", "B", "f", 3],
+                ["W", "Q", "B", "f", 8],
+                ["W", "R", "C", "g", 0],
+                ["V", "Y", "C", "j", 7],
+                ["X", "Y", "B", "d", 5],
+            ],
+            columns=["cola", "colb", "colc", "tag", "val"],
+            index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8],
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on=["cola", "colb", "colc"], how="left", sort=True)
+
+        expected = expected.sort_values(["cola", "colb", "colc"], kind="mergesort")
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match(self):
+        left = DataFrame(
+            [["c", 0], ["b", 1], ["a", 2], ["b", 3]],
+            columns=["tag", "val"],
+            index=[2, 0, 1, 3],
+        )
+
+        right = DataFrame(
+            [
+                ["a", "v"],
+                ["c", "w"],
+                ["c", "x"],
+                ["d", "y"],
+                ["a", "z"],
+                ["c", "r"],
+                ["e", "q"],
+                ["c", "s"],
+            ],
+            columns=["tag", "char"],
+        ).set_index("tag")
+
+        result = left.join(right, on="tag", how="left")
+
+        expected = DataFrame(
+            [
+                ["c", 0, "w"],
+                ["c", 0, "x"],
+                ["c", 0, "r"],
+                ["c", 0, "s"],
+                ["b", 1, np.nan],
+                ["a", 2, "v"],
+                ["a", 2, "z"],
+                ["b", 3, np.nan],
+            ],
+            columns=["tag", "val", "char"],
+            index=[2, 2, 2, 2, 0, 1, 1, 3],
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on="tag", how="left", sort=True)
+        expected2 = expected.sort_values("tag", kind="mergesort")
+
+        tm.assert_frame_equal(result, expected2)
+
+        # GH7331 - maintain left frame order in left merge
+        result = merge(left, right.reset_index(), how="left", on="tag")
+        expected.index = np.arange(len(expected))
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_merge_na_buglet(self):
+        left = DataFrame(
+            {
+                "id": list("abcde"),
+                "v1": np.random.randn(5),
+                "v2": np.random.randn(5),
+                "dummy": list("abcde"),
+                "v3": np.random.randn(5),
+            },
+            columns=["id", "v1", "v2", "dummy", "v3"],
+        )
+        right = DataFrame(
+            {
+                "id": ["a", "b", np.nan, np.nan, np.nan],
+                "sv3": [1.234, 5.678, np.nan, np.nan, np.nan],
+            }
+        )
+
+        result = merge(left, right, on="id", how="left")
+
+        rdf = right.drop(["id"], axis=1)
+        expected = left.join(rdf)
+        tm.assert_frame_equal(result, expected)
+
+    def test_merge_na_keys(self):
+        data = [
+            [1950, "A", 1.5],
+            [1950, "B", 1.5],
+            [1955, "B", 1.5],
+            [1960, "B", np.nan],
+            [1970, "B", 4.0],
+            [1950, "C", 4.0],
+            [1960, "C", np.nan],
+            [1965, "C", 3.0],
+            [1970, "C", 4.0],
+        ]
+
+        frame = DataFrame(data, columns=["year", "panel", "data"])
+
+        other_data = [
+            [1960, "A", np.nan],
+            [1970, "A", np.nan],
+            [1955, "A", np.nan],
+            [1965, "A", np.nan],
+            [1965, "B", np.nan],
+            [1955, "C", np.nan],
+        ]
+        other = DataFrame(other_data, columns=["year", "panel", "data"])
+
+        result = frame.merge(other, how="outer")
+
+        expected = frame.fillna(-999).merge(other.fillna(-999), how="outer")
+        expected = expected.replace(-999, np.nan)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, klass):
+        # see gh-19038
+        df = DataFrame(
+            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
+        )
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if klass is not None:
+            on_vector = klass(on_vector)
+
+        expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]})
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]}
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("merge_type", ["left", "right"])
+    def test_merge_datetime_multi_index_empty_df(self, merge_type):
+        # see gh-36895
+
+        left = DataFrame(
+            data={
+                "data": [1.5, 1.5],
+            },
+            index=MultiIndex.from_tuples(
+                [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
+                names=["date", "panel"],
+            ),
+        )
+
+        right = DataFrame(
+            index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"]
+        )
+
+        expected_index = MultiIndex.from_tuples(
+            [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
+            names=["date", "panel"],
+        )
+
+        if merge_type == "left":
+            expected = DataFrame(
+                data={
+                    "data": [1.5, 1.5],
+                    "state": [None, None],
+                },
+                index=expected_index,
+            )
+            results_merge = left.merge(right, how="left", on=["date", "panel"])
+            results_join = left.join(right, how="left")
+        else:
+            expected = DataFrame(
+                data={
+                    "state": [None, None],
+                    "data": [1.5, 1.5],
+                },
+                index=expected_index,
+            )
+            results_merge = right.merge(left, how="right", on=["date", "panel"])
+            results_join = right.join(left, how="right")
+
+        tm.assert_frame_equal(results_merge, expected)
+        tm.assert_frame_equal(results_join, expected)
+
+    @pytest.fixture
+    def household(self):
+        household = DataFrame(
+            {
+                "household_id": [1, 2, 3],
+                "male": [0, 1, 0],
+                "wealth": [196087.3, 316478.7, 294750],
+            },
+            columns=["household_id", "male", "wealth"],
+        ).set_index("household_id")
+        return household
+
+    @pytest.fixture
+    def portfolio(self):
+        portfolio = DataFrame(
+            {
+                "household_id": [1, 2, 2, 3, 3, 3, 4],
+                "asset_id": [
+                    "nl0000301109",
+                    "nl0000289783",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "nl0000289965",
+                    np.nan,
+                ],
+                "name": [
+                    "ABN Amro",
+                    "Robeco",
+                    "Royal Dutch Shell",
+                    "Royal Dutch Shell",
+                    "AAB Eastern Europe Equity Fund",
+                    "Postbank BioTech Fonds",
+                    np.nan,
+                ],
+                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
+            },
+            columns=["household_id", "asset_id", "name", "share"],
+        ).set_index(["household_id", "asset_id"])
+        return portfolio
+
+    @pytest.fixture
+    def expected(self):
+        expected = (
+            DataFrame(
+                {
+                    "male": [0, 1, 1, 0, 0, 0],
+                    "wealth": [
+                        196087.3,
+                        316478.7,
+                        316478.7,
+                        294750.0,
+                        294750.0,
+                        294750.0,
+                    ],
+                    "name": [
+                        "ABN Amro",
+                        "Robeco",
+                        "Royal Dutch Shell",
+                        "Royal Dutch Shell",
+                        "AAB Eastern Europe Equity Fund",
+                        "Postbank BioTech Fonds",
+                    ],
+                    "share": [1.00, 0.40, 0.60, 0.15, 0.60, 0.25],
+                    "household_id": [1, 2, 2, 3, 3, 3],
+                    "asset_id": [
+                        "nl0000301109",
+                        "nl0000289783",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "nl0000289965",
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id"])
+            .reindex(columns=["male", "wealth", "name", "share"])
+        )
+        return expected
+
+    def test_join_multi_levels(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # GH 3662
+        # merge multi-levels
+        result = household.join(portfolio, how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_merge_equivalence(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # equivalency
+        result = merge(
+            household.reset_index(),
+            portfolio.reset_index(),
+            on=["household_id"],
+            how="inner",
+        ).set_index(["household_id", "asset_id"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_outer(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        result = household.join(portfolio, how="outer")
+        expected = concat(
+            [
+                expected,
+                (
+                    DataFrame(
+                        {"share": [1.00]},
+                        index=MultiIndex.from_tuples(
+                            [(4, np.nan)], names=["household_id", "asset_id"]
+                        ),
+                    )
+                ),
+            ],
+            axis=0,
+            sort=True,
+        ).reindex(columns=expected.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_invalid(self, portfolio, household):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # invalid cases
+        household.index.name = "foo"
+
+        with pytest.raises(
+            ValueError, match="cannot join with no overlapping index names"
+        ):
+            household.join(portfolio, how="inner")
+
+        portfolio2 = portfolio.copy()
+        portfolio2.index.set_names(["household_id", "foo"])
+
+        with pytest.raises(ValueError, match="columns overlap but no suffix specified"):
+            portfolio2.join(portfolio, how="inner")
+
+    def test_join_multi_levels2(self):
+
+        # some more advanced merges
+        # GH6360
+        household = DataFrame(
+            {
+                "household_id": [1, 2, 2, 3, 3, 3, 4],
+                "asset_id": [
+                    "nl0000301109",
+                    "nl0000301109",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "nl0000289965",
+                    np.nan,
+                ],
+                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
+            },
+            columns=["household_id", "asset_id", "share"],
+        ).set_index(["household_id", "asset_id"])
+
+        log_return = DataFrame(
+            {
+                "asset_id": [
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "lu0197800237",
+                ],
+                "t": [233, 234, 235, 180, 181],
+                "log_return": [
+                    0.09604978,
+                    -0.06524096,
+                    0.03532373,
+                    0.03025441,
+                    0.036997,
+                ],
+            }
+        ).set_index(["asset_id", "t"])
+
+        expected = (
+            DataFrame(
+                {
+                    "household_id": [2, 2, 2, 3, 3, 3, 3, 3],
+                    "asset_id": [
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "lu0197800237",
+                    ],
+                    "t": [233, 234, 235, 233, 234, 235, 180, 181],
+                    "share": [0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6],
+                    "log_return": [
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.03025441,
+                        0.036997,
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=["share", "log_return"])
+        )
+
+        # this is the equivalency
+        result = merge(
+            household.reset_index(),
+            log_return.reset_index(),
+            on=["asset_id"],
+            how="inner",
+        ).set_index(["household_id", "asset_id", "t"])
+        tm.assert_frame_equal(result, expected)
+
+        expected = (
+            DataFrame(
+                {
+                    "household_id": [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4],
+                    "asset_id": [
+                        "nl0000301109",
+                        "nl0000301109",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "lu0197800237",
+                        "nl0000289965",
+                        None,
+                    ],
+                    "t": [
+                        None,
+                        None,
+                        233,
+                        234,
+                        235,
+                        233,
+                        234,
+                        235,
+                        180,
+                        181,
+                        None,
+                        None,
+                    ],
+                    "share": [
+                        1.0,
+                        0.4,
+                        0.6,
+                        0.6,
+                        0.6,
+                        0.15,
+                        0.15,
+                        0.15,
+                        0.6,
+                        0.6,
+                        0.25,
+                        1.0,
+                    ],
+                    "log_return": [
+                        None,
+                        None,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.03025441,
+                        0.036997,
+                        None,
+                        None,
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=["share", "log_return"])
+        )
+
+        result = merge(
+            household.reset_index(),
+            log_return.reset_index(),
+            on=["asset_id"],
+            how="outer",
+        ).set_index(["household_id", "asset_id", "t"])
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestJoinMultiMulti:
+    def test_join_multi_multi(
+        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
+    ):
+        # Multi-index join tests
+        expected = (
+            merge(
+                left_multi.reset_index(),
+                right_multi.reset_index(),
+                how=join_type,
+                on=on_cols_multi,
+            )
+            .set_index(idx_cols_multi)
+            .sort_index()
+        )
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_empty_frames(
+        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
+    ):
+
+        left_multi = left_multi.drop(columns=left_multi.columns)
+        right_multi = right_multi.drop(columns=right_multi.columns)
+
+        expected = (
+            merge(
+                left_multi.reset_index(),
+                right_multi.reset_index(),
+                how=join_type,
+                on=on_cols_multi,
+            )
+            .set_index(idx_cols_multi)
+            .sort_index()
+        )
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("box", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, box):
+        # see gh-19038
+        df = DataFrame(
+            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
+        )
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if box is not None:
+            on_vector = box(on_vector)
+
+        expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]})
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]}
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_common_level(self):
+        index_left = MultiIndex.from_tuples(
+            [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"]
+        )
+
+        left = DataFrame(
+            {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left
+        )
+
+        index_right = MultiIndex.from_tuples(
+            [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"]
+        )
+
+        right = DataFrame(
+            {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]},
+            index=index_right,
+        )
+
+        result = left.join(right)
+        expected = merge(
+            left.reset_index(), right.reset_index(), on=["key"], how="inner"
+        ).set_index(["key", "X", "Y"])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_wrong_order(self):
+        # GH 25760
+        # GH 28956
+
+        midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
+        midx3 = MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"])
+
+        left = DataFrame(index=midx1, data={"x": [10, 20, 30, 40]})
+        right = DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]})
+
+        result = left.join(right)
+
+        expected = DataFrame(
+            index=midx1,
+            data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]},
+        )
+
+        tm.assert_frame_equal(result, expected)