first commit

2026-05-05 07:35:38 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
@@ -0,0 +1,60 @@
+import re
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    MultiIndex,
+)
+
+
+class TestDataFrameDelItem:
+    def test_delitem(self, float_frame):
+        del float_frame["A"]
+        assert "A" not in float_frame
+
+    def test_delitem_multiindex(self):
+        midx = MultiIndex.from_product([["A", "B"], [1, 2]])
+        df = DataFrame(np.random.randn(4, 4), columns=midx)
+        assert len(df.columns) == 4
+        assert ("A",) in df.columns
+        assert "A" in df.columns
+
+        result = df["A"]
+        assert isinstance(result, DataFrame)
+        del df["A"]
+
+        assert len(df.columns) == 2
+
+        # A still in the levels, BUT get a KeyError if trying
+        # to delete
+        assert ("A",) not in df.columns
+        with pytest.raises(KeyError, match=re.escape("('A',)")):
+            del df[("A",)]
+
+        # behavior of dropped/deleted MultiIndex levels changed from
+        # GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
+        # levels which are dropped/deleted
+        assert "A" not in df.columns
+        with pytest.raises(KeyError, match=re.escape("('A',)")):
+            del df["A"]
+
+    def test_delitem_corner(self, float_frame):
+        f = float_frame.copy()
+        del f["D"]
+        assert len(f.columns) == 3
+        with pytest.raises(KeyError, match=r"^'D'$"):
+            del f["D"]
+        del f["B"]
+        assert len(f.columns) == 2
+
+    def test_delitem_col_still_multiindex(self):
+        arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
+
+        tuples = sorted(zip(*arrays))
+        index = MultiIndex.from_tuples(tuples)
+
+        df = DataFrame(np.random.randn(3, 4), columns=index)
+        del df[("a", "", "")]
+        assert isinstance(df.columns, MultiIndex)
@@ -0,0 +1,27 @@
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+class TestGet:
+    def test_get(self, float_frame):
+        b = float_frame.get("B")
+        tm.assert_series_equal(b, float_frame["B"])
+
+        assert float_frame.get("foo") is None
+        tm.assert_series_equal(
+            float_frame.get("foo", float_frame["B"]), float_frame["B"]
+        )
+
+    @pytest.mark.parametrize(
+        "df",
+        [
+            DataFrame(),
+            DataFrame(columns=list("AB")),
+            DataFrame(columns=list("AB"), index=range(3)),
+        ],
+    )
+    def test_get_none(self, df):
+        # see gh-5652
+        assert df.get(None) is None
@@ -0,0 +1,22 @@
+import pytest
+
+from pandas import (
+    DataFrame,
+    MultiIndex,
+)
+
+
+class TestGetValue:
+    def test_get_set_value_no_partial_indexing(self):
+        # partial w/ MultiIndex raise exception
+        index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
+        df = DataFrame(index=index, columns=range(4))
+        with pytest.raises(KeyError, match=r"^0$"):
+            df._get_value(0, 1)
+
+    def test_get_value(self, float_frame):
+        for idx in float_frame.index:
+            for col in float_frame.columns:
+                result = float_frame._get_value(idx, col)
+                expected = float_frame[col][idx]
+                assert result == expected
@@ -0,0 +1,406 @@
+import re
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    CategoricalDtype,
+    CategoricalIndex,
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    concat,
+    get_dummies,
+    period_range,
+)
+import pandas._testing as tm
+from pandas.core.arrays import SparseArray
+
+
+class TestGetitem:
+    def test_getitem_unused_level_raises(self):
+        # GH#20410
+        mi = MultiIndex(
+            levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
+            codes=[[1, 0], [1, 0]],
+        )
+        df = DataFrame(-1, index=range(3), columns=mi)
+
+        with pytest.raises(KeyError, match="notevenone"):
+            df["notevenone"]
+
+    def test_getitem_periodindex(self):
+        rng = period_range("1/1/2000", periods=5)
+        df = DataFrame(np.random.randn(10, 5), columns=rng)
+
+        ts = df[rng[0]]
+        tm.assert_series_equal(ts, df.iloc[:, 0])
+
+        # GH#1211; smoketest unrelated to the rest of this test
+        repr(df)
+
+        ts = df["1/1/2000"]
+        tm.assert_series_equal(ts, df.iloc[:, 0])
+
+    def test_getitem_list_of_labels_categoricalindex_cols(self):
+        # GH#16115
+        cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
+
+        expected = DataFrame(
+            [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
+        )
+        dummies = get_dummies(cats)
+        result = dummies[list(dummies.columns)]
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_sparse_column_return_type_and_dtype(self):
+        # https://github.com/pandas-dev/pandas/issues/23559
+        data = SparseArray([0, 1])
+        df = DataFrame({"A": data})
+        expected = Series(data, name="A")
+        result = df["A"]
+        tm.assert_series_equal(result, expected)
+
+        # Also check iloc and loc while we're here
+        result = df.iloc[:, 0]
+        tm.assert_series_equal(result, expected)
+
+        result = df.loc[:, "A"]
+        tm.assert_series_equal(result, expected)
+
+
+class TestGetitemListLike:
+    def test_getitem_list_missing_key(self):
+        # GH#13822, incorrect error string with non-unique columns when missing
+        # column is accessed
+        df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
+        df.columns = ["x", "x", "z"]
+
+        # Check that we get the correct value in the KeyError
+        with pytest.raises(KeyError, match=r"\['y'\] not in index"):
+            df[["x", "y", "z"]]
+
+    def test_getitem_list_duplicates(self):
+        # GH#1943
+        df = DataFrame(np.random.randn(4, 4), columns=list("AABC"))
+        df.columns.name = "foo"
+
+        result = df[["B", "C"]]
+        assert result.columns.name == "foo"
+
+        expected = df.iloc[:, 2:]
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_dupe_cols(self):
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
+        msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
+        with pytest.raises(KeyError, match=re.escape(msg)):
+            df[["baf"]]
+
+    @pytest.mark.parametrize(
+        "idx_type",
+        [
+            list,
+            iter,
+            Index,
+            set,
+            lambda l: dict(zip(l, range(len(l)))),
+            lambda l: dict(zip(l, range(len(l)))).keys(),
+        ],
+        ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
+    )
+    @pytest.mark.parametrize("levels", [1, 2])
+    def test_getitem_listlike(self, idx_type, levels, float_frame):
+        # GH#21294
+
+        if levels == 1:
+            frame, missing = float_frame, "food"
+        else:
+            # MultiIndex columns
+            frame = DataFrame(
+                np.random.randn(8, 3),
+                columns=Index(
+                    [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
+                    name=("sth", "sth2"),
+                ),
+            )
+            missing = ("good", "food")
+
+        keys = [frame.columns[1], frame.columns[0]]
+        idx = idx_type(keys)
+        idx_check = list(idx_type(keys))
+
+        if isinstance(idx, (set, dict)):
+            with tm.assert_produces_warning(FutureWarning):
+                result = frame[idx]
+        else:
+            result = frame[idx]
+
+        expected = frame.loc[:, idx_check]
+        expected.columns.names = frame.columns.names
+
+        tm.assert_frame_equal(result, expected)
+
+        idx = idx_type(keys + [missing])
+        with pytest.raises(KeyError, match="not in index"):
+            with tm.assert_produces_warning(FutureWarning):
+                frame[idx]
+
+    def test_getitem_iloc_generator(self):
+        # GH#39614
+        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        indexer = (x for x in [1, 2])
+        result = df.iloc[indexer]
+        expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_iloc_two_dimensional_generator(self):
+        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        indexer = (x for x in [1, 2])
+        result = df.iloc[indexer, 1]
+        expected = Series([5, 6], name="b", index=[1, 2])
+        tm.assert_series_equal(result, expected)
+
+
+class TestGetitemCallable:
+    def test_getitem_callable(self, float_frame):
+        # GH#12533
+        result = float_frame[lambda x: "A"]
+        expected = float_frame.loc[:, "A"]
+        tm.assert_series_equal(result, expected)
+
+        result = float_frame[lambda x: ["A", "B"]]
+        expected = float_frame.loc[:, ["A", "B"]]
+        tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
+
+        df = float_frame[:3]
+        result = df[lambda x: [True, False, True]]
+        expected = float_frame.iloc[[0, 2], :]
+        tm.assert_frame_equal(result, expected)
+
+    def test_loc_multiindex_columns_one_level(self):
+        # GH#29749
+        df = DataFrame([[1, 2]], columns=[["a", "b"]])
+        expected = DataFrame([1], columns=[["a"]])
+
+        result = df["a"]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[:, "a"]
+        tm.assert_frame_equal(result, expected)
+
+
+class TestGetitemBooleanMask:
+    def test_getitem_bool_mask_categorical_index(self):
+
+        df3 = DataFrame(
+            {
+                "A": np.arange(6, dtype="int64"),
+            },
+            index=CategoricalIndex(
+                [1, 1, 2, 1, 3, 2],
+                dtype=CategoricalDtype([3, 2, 1], ordered=True),
+                name="B",
+            ),
+        )
+        df4 = DataFrame(
+            {
+                "A": np.arange(6, dtype="int64"),
+            },
+            index=CategoricalIndex(
+                [1, 1, 2, 1, 3, 2],
+                dtype=CategoricalDtype([3, 2, 1], ordered=False),
+                name="B",
+            ),
+        )
+
+        result = df3[df3.index == "a"]
+        expected = df3.iloc[[]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df4[df4.index == "a"]
+        expected = df4.iloc[[]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df3[df3.index == 1]
+        expected = df3.iloc[[0, 1, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df4[df4.index == 1]
+        expected = df4.iloc[[0, 1, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        # since we have an ordered categorical
+
+        # CategoricalIndex([1, 1, 2, 1, 3, 2],
+        #         categories=[3, 2, 1],
+        #         ordered=True,
+        #         name='B')
+        result = df3[df3.index < 2]
+        expected = df3.iloc[[4]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df3[df3.index > 1]
+        expected = df3.iloc[[]]
+        tm.assert_frame_equal(result, expected)
+
+        # unordered
+        # cannot be compared
+
+        # CategoricalIndex([1, 1, 2, 1, 3, 2],
+        #         categories=[3, 2, 1],
+        #         ordered=False,
+        #         name='B')
+        msg = "Unordered Categoricals can only compare equality or not"
+        with pytest.raises(TypeError, match=msg):
+            df4[df4.index < 2]
+        with pytest.raises(TypeError, match=msg):
+            df4[df4.index > 1]
+
+    @pytest.mark.parametrize(
+        "data1,data2,expected_data",
+        (
+            (
+                [[1, 2], [3, 4]],
+                [[0.5, 6], [7, 8]],
+                [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
+            ),
+            (
+                [[1, 2], [3, 4]],
+                [[5, 6], [7, 8]],
+                [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
+            ),
+        ),
+    )
+    def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
+        self,
+        data1,
+        data2,
+        expected_data,
+    ):
+        # GH#31954
+
+        df1 = DataFrame(np.array(data1))
+        df2 = DataFrame(np.array(data2))
+        df = concat([df1, df2], axis=1)
+
+        result = df[df > 2]
+
+        exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
+        expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.fixture
+    def df_dup_cols(self):
+        dups = ["A", "A", "C", "D"]
+        df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
+        return df
+
+    def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
+        # `df.A > 6` is a DataFrame with a different shape from df
+
+        # boolean with the duplicate raises
+        df = df_dup_cols
+        msg = "cannot reindex on an axis with duplicate labels"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(FutureWarning, match="non-unique"):
+                df[df.A > 6]
+
+    def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
+        # boolean indexing
+        # GH#4879
+        df = DataFrame(
+            np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
+        )
+        expected = df[df.C > 6]
+        expected.columns = df_dup_cols.columns
+
+        df = df_dup_cols
+        result = df[df.C > 6]
+
+        tm.assert_frame_equal(result, expected)
+        result.dtypes
+        str(result)
+
+    def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
+
+        # where
+        df = DataFrame(
+            np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
+        )
+        # `df > 6` is a DataFrame with the same shape+alignment as df
+        expected = df[df > 6]
+        expected.columns = df_dup_cols.columns
+
+        df = df_dup_cols
+        result = df[df > 6]
+
+        tm.assert_frame_equal(result, expected)
+        result.dtypes
+        str(result)
+
+    def test_getitem_empty_frame_with_boolean(self):
+        # Test for issue GH#11859
+
+        df = DataFrame()
+        df2 = df[df > 0]
+        tm.assert_frame_equal(df, df2)
+
+
+class TestGetitemSlice:
+    def test_getitem_slice_float64(self, frame_or_series):
+        values = np.arange(10.0, 50.0, 2)
+        index = Index(values)
+
+        start, end = values[[5, 15]]
+
+        data = np.random.randn(20, 3)
+        if frame_or_series is not DataFrame:
+            data = data[:, 0]
+
+        obj = frame_or_series(data, index=index)
+
+        result = obj[start:end]
+        expected = obj.iloc[5:16]
+        tm.assert_equal(result, expected)
+
+        result = obj.loc[start:end]
+        tm.assert_equal(result, expected)
+
+    def test_getitem_datetime_slice(self):
+        # GH#43223
+        df = DataFrame(
+            {"a": 0},
+            index=DatetimeIndex(
+                [
+                    "11.01.2011 22:00",
+                    "11.01.2011 23:00",
+                    "12.01.2011 00:00",
+                    "2011-01-13 00:00",
+                ]
+            ),
+        )
+        with tm.assert_produces_warning(FutureWarning):
+            result = df["2011-01-01":"2011-11-01"]
+        expected = DataFrame(
+            {"a": 0},
+            index=DatetimeIndex(
+                ["11.01.2011 22:00", "11.01.2011 23:00", "2011-01-13 00:00"]
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+class TestGetitemDeprecatedIndexers:
+    @pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
+    def test_getitem_dict_and_set_deprecated(self, key):
+        # GH#42825
+        df = DataFrame(
+            [[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
+        )
+        with tm.assert_produces_warning(FutureWarning):
+            df[key]
@@ -0,0 +1,106 @@
+"""
+test_insert is specifically for the DataFrame.insert method; not to be
+confused with tests with "insert" in their names that are really testing
+__setitem__.
+"""
+import numpy as np
+import pytest
+
+from pandas.errors import PerformanceWarning
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+
+class TestDataFrameInsert:
+    def test_insert(self):
+        df = DataFrame(
+            np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
+        )
+
+        df.insert(0, "foo", df["a"])
+        tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
+        tm.assert_series_equal(df["a"], df["foo"], check_names=False)
+
+        df.insert(2, "bar", df["c"])
+        tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
+        tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
+
+        with pytest.raises(ValueError, match="already exists"):
+            df.insert(1, "a", df["b"])
+
+        msg = "cannot insert c, already exists"
+        with pytest.raises(ValueError, match=msg):
+            df.insert(1, "c", df["b"])
+
+        df.columns.name = "some_name"
+        # preserve columns name field
+        df.insert(0, "baz", df["c"])
+        assert df.columns.name == "some_name"
+
+    def test_insert_column_bug_4032(self):
+
+        # GH#4032, inserting a column and renaming causing errors
+        df = DataFrame({"b": [1.1, 2.2]})
+
+        df = df.rename(columns={})
+        df.insert(0, "a", [1, 2])
+        result = df.rename(columns={})
+
+        str(result)
+        expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+
+        df.insert(0, "c", [1.3, 2.3])
+        result = df.rename(columns={})
+
+        str(result)
+        expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_insert_with_columns_dups(self):
+        # GH#14291
+        df = DataFrame()
+        df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
+        df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
+        df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
+        exp = DataFrame(
+            [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
+        )
+        tm.assert_frame_equal(df, exp)
+
+    def test_insert_item_cache(self, using_array_manager):
+        df = DataFrame(np.random.randn(4, 3))
+        ser = df[0]
+
+        if using_array_manager:
+            expected_warning = None
+        else:
+            # with BlockManager warn about high fragmentation of single dtype
+            expected_warning = PerformanceWarning
+
+        with tm.assert_produces_warning(expected_warning):
+            for n in range(100):
+                df[n + 3] = df[1] * n
+
+        ser.values[0] = 99
+
+        assert df.iloc[0, 0] == df[0][0]
+
+    def test_insert_EA_no_warning(self):
+        # PerformanceWarning about fragmented frame should not be raised when
+        # using EAs (https://github.com/pandas-dev/pandas/issues/44098)
+        df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64")
+        with tm.assert_produces_warning(None):
+            df["a"] = np.array([1, 2, 3])
+
+    def test_insert_frame(self):
+        # GH#42403
+        df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
+
+        msg = r"Expected a 1D array, got an array with shape \(2, 2\)"
+        with pytest.raises(ValueError, match=msg):
+            df.insert(1, "newcol", df)
@@ -0,0 +1,94 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestLookup:
+    def test_lookup_float(self, float_frame):
+        df = float_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.lookup(rows, cols)
+
+        expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_lookup_mixed(self, float_string_frame):
+        df = float_string_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.lookup(rows, cols)
+
+        expected = np.array(
+            [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
+        )
+        tm.assert_almost_equal(result, expected)
+
+    def test_lookup_bool(self):
+        df = DataFrame(
+            {
+                "label": ["a", "b", "a", "c"],
+                "mask_a": [True, True, False, True],
+                "mask_b": [True, False, False, False],
+                "mask_c": [False, True, False, True],
+            }
+        )
+        with tm.assert_produces_warning(FutureWarning):
+            df["mask"] = df.lookup(df.index, "mask_" + df["label"])
+
+        exp_mask = np.array(
+            [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
+        )
+
+        tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask"))
+        assert df["mask"].dtype == np.bool_
+
+    def test_lookup_raises(self, float_frame):
+        with pytest.raises(KeyError, match="'One or more row labels was not found'"):
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup(["xyz"], ["A"])
+
+        with pytest.raises(KeyError, match="'One or more column labels was not found'"):
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup([float_frame.index[0]], ["xyz"])
+
+        with pytest.raises(ValueError, match="same size"):
+            with tm.assert_produces_warning(FutureWarning):
+                float_frame.lookup(["a", "b", "c"], ["a"])
+
+    def test_lookup_requires_unique_axes(self):
+        # GH#33041 raise with a helpful error message
+        df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"])
+
+        rows = [0, 1]
+        cols = ["A", "A"]
+
+        # homogeneous-dtype case
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            with tm.assert_produces_warning(FutureWarning):
+                df.lookup(rows, cols)
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            with tm.assert_produces_warning(FutureWarning):
+                df.T.lookup(cols, rows)
+
+        # heterogeneous dtype
+        df["B"] = 0
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            with tm.assert_produces_warning(FutureWarning):
+                df.lookup(rows, cols)
+
+
+def test_lookup_deprecated():
+    # GH#18262
+    df = DataFrame(
+        {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]}
+    )
+    with tm.assert_produces_warning(FutureWarning):
+        df.lookup(df.index, df["col"])
@@ -0,0 +1,162 @@
+"""
+Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
+"""
+
+import numpy as np
+
+from pandas import (
+    NA,
+    DataFrame,
+    Series,
+    StringDtype,
+    Timedelta,
+    isna,
+)
+import pandas._testing as tm
+
+
+class TestDataFrameMask:
+    def test_mask(self):
+        df = DataFrame(np.random.randn(5, 3))
+        cond = df > 0
+
+        rs = df.where(cond, np.nan)
+        tm.assert_frame_equal(rs, df.mask(df <= 0))
+        tm.assert_frame_equal(rs, df.mask(~cond))
+
+        other = DataFrame(np.random.randn(5, 3))
+        rs = df.where(cond, other)
+        tm.assert_frame_equal(rs, df.mask(df <= 0, other))
+        tm.assert_frame_equal(rs, df.mask(~cond, other))
+
+    def test_mask2(self):
+        # see GH#21891
+        df = DataFrame([1, 2])
+        res = df.mask([[True], [False]])
+
+        exp = DataFrame([np.nan, 2])
+        tm.assert_frame_equal(res, exp)
+
+    def test_mask_inplace(self):
+        # GH#8801
+        df = DataFrame(np.random.randn(5, 3))
+        cond = df > 0
+
+        rdf = df.copy()
+
+        return_value = rdf.where(cond, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(rdf, df.where(cond))
+        tm.assert_frame_equal(rdf, df.mask(~cond))
+
+        rdf = df.copy()
+        return_value = rdf.where(cond, -df, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(rdf, df.where(cond, -df))
+        tm.assert_frame_equal(rdf, df.mask(~cond, -df))
+
+    def test_mask_edge_case_1xN_frame(self):
+        # GH#4071
+        df = DataFrame([[1, 2]])
+        res = df.mask(DataFrame([[True, False]]))
+        expec = DataFrame([[np.nan, 2]])
+        tm.assert_frame_equal(res, expec)
+
+    def test_mask_callable(self):
+        # GH#12533
+        df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+        result = df.mask(lambda x: x > 4, lambda x: x + 1)
+        exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
+
+        # return ndarray and scalar
+        result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
+        exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
+
+        # chain
+        result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
+        exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
+
+    def test_mask_dtype_bool_conversion(self):
+        # GH#3733
+        df = DataFrame(data=np.random.randn(100, 50))
+        df = df.where(df > 0)  # create nans
+        bools = df > 0
+        mask = isna(df)
+        expected = bools.astype(object).mask(mask)
+        result = bools.mask(mask)
+        tm.assert_frame_equal(result, expected)
+
+    def test_mask_pos_args_deprecation(self, frame_or_series):
+        # https://github.com/pandas-dev/pandas/issues/41485
+        obj = DataFrame({"a": range(5)})
+        expected = DataFrame({"a": [-1, 1, -1, 3, -1]})
+        obj = tm.get_obj(obj, frame_or_series)
+        expected = tm.get_obj(expected, frame_or_series)
+
+        cond = obj % 2 == 0
+        msg = (
+            r"In a future version of pandas all arguments of "
+            f"{frame_or_series.__name__}.mask except for "
+            r"the arguments 'cond' and 'other' will be keyword-only"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = obj.mask(cond, -1, False)
+        tm.assert_equal(result, expected)
+
+
+def test_mask_try_cast_deprecated(frame_or_series):
+
+    obj = DataFrame(np.random.randn(4, 3))
+    if frame_or_series is not DataFrame:
+        obj = obj[0]
+
+    mask = obj > 0
+
+    with tm.assert_produces_warning(FutureWarning):
+        # try_cast keyword deprecated
+        obj.mask(mask, -1, try_cast=True)
+
+
+def test_mask_stringdtype(frame_or_series):
+    # GH 40824
+    obj = DataFrame(
+        {"A": ["foo", "bar", "baz", NA]},
+        index=["id1", "id2", "id3", "id4"],
+        dtype=StringDtype(),
+    )
+    filtered_obj = DataFrame(
+        {"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
+    )
+    expected = DataFrame(
+        {"A": [NA, "this", "that", NA]},
+        index=["id1", "id2", "id3", "id4"],
+        dtype=StringDtype(),
+    )
+    if frame_or_series is Series:
+        obj = obj["A"]
+        filtered_obj = filtered_obj["A"]
+        expected = expected["A"]
+
+    filter_ser = Series([False, True, True, False])
+    result = obj.mask(filter_ser, filtered_obj)
+
+    tm.assert_equal(result, expected)
+
+
+def test_mask_where_dtype_timedelta():
+    # https://github.com/pandas-dev/pandas/issues/39548
+    df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
+
+    expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
+    tm.assert_frame_equal(df.mask(df.notna()), expected)
+
+    expected = DataFrame(
+        [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
+    )
+    tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
@@ -0,0 +1,68 @@
+import numpy as np
+
+from pandas.core.dtypes.common import is_float_dtype
+
+from pandas import (
+    DataFrame,
+    isna,
+)
+
+
+class TestSetValue:
+    def test_set_value(self, float_frame):
+        for idx in float_frame.index:
+            for col in float_frame.columns:
+                float_frame._set_value(idx, col, 1)
+                assert float_frame[col][idx] == 1
+
+    def test_set_value_resize(self, float_frame):
+
+        res = float_frame._set_value("foobar", "B", 0)
+        assert res is None
+        assert float_frame.index[-1] == "foobar"
+        assert float_frame._get_value("foobar", "B") == 0
+
+        float_frame.loc["foobar", "qux"] = 0
+        assert float_frame._get_value("foobar", "qux") == 0
+
+        res = float_frame.copy()
+        res._set_value("foobar", "baz", "sam")
+        assert res["baz"].dtype == np.object_
+
+        res = float_frame.copy()
+        res._set_value("foobar", "baz", True)
+        assert res["baz"].dtype == np.object_
+
+        res = float_frame.copy()
+        res._set_value("foobar", "baz", 5)
+        assert is_float_dtype(res["baz"])
+        assert isna(res["baz"].drop(["foobar"])).all()
+
+        res._set_value("foobar", "baz", "sam")
+        assert res.loc["foobar", "baz"] == "sam"
+
+    def test_set_value_with_index_dtype_change(self):
+        df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC"))
+
+        # this is actually ambiguous as the 2 is interpreted as a positional
+        # so column is not created
+        df = df_orig.copy()
+        df._set_value("C", 2, 1.0)
+        assert list(df.index) == list(df_orig.index) + ["C"]
+        # assert list(df.columns) == list(df_orig.columns) + [2]
+
+        df = df_orig.copy()
+        df.loc["C", 2] = 1.0
+        assert list(df.index) == list(df_orig.index) + ["C"]
+        # assert list(df.columns) == list(df_orig.columns) + [2]
+
+        # create both new
+        df = df_orig.copy()
+        df._set_value("C", "D", 1.0)
+        assert list(df.index) == list(df_orig.index) + ["C"]
+        assert list(df.columns) == list(df_orig.columns) + ["D"]
+
+        df = df_orig.copy()
+        df.loc["C", "D"] = 1.0
+        assert list(df.index) == list(df_orig.index) + ["C"]
+        assert list(df.columns) == list(df_orig.columns) + ["D"]
@@ -0,0 +1,88 @@
+import pytest
+
+import pandas._testing as tm
+
+
+class TestDataFrameTake:
+    def test_take(self, float_frame):
+        # homogeneous
+        order = [3, 1, 2, 0]
+        for df in [float_frame]:
+
+            result = df.take(order, axis=0)
+            expected = df.reindex(df.index.take(order))
+            tm.assert_frame_equal(result, expected)
+
+            # axis = 1
+            result = df.take(order, axis=1)
+            expected = df.loc[:, ["D", "B", "C", "A"]]
+            tm.assert_frame_equal(result, expected, check_names=False)
+
+        # negative indices
+        order = [2, 1, -1]
+        for df in [float_frame]:
+
+            result = df.take(order, axis=0)
+            expected = df.reindex(df.index.take(order))
+            tm.assert_frame_equal(result, expected)
+
+            result = df.take(order, axis=0)
+            tm.assert_frame_equal(result, expected)
+
+            # axis = 1
+            result = df.take(order, axis=1)
+            expected = df.loc[:, ["C", "B", "D"]]
+            tm.assert_frame_equal(result, expected, check_names=False)
+
+        # illegal indices
+        msg = "indices are out-of-bounds"
+        with pytest.raises(IndexError, match=msg):
+            df.take([3, 1, 2, 30], axis=0)
+        with pytest.raises(IndexError, match=msg):
+            df.take([3, 1, 2, -31], axis=0)
+        with pytest.raises(IndexError, match=msg):
+            df.take([3, 1, 2, 5], axis=1)
+        with pytest.raises(IndexError, match=msg):
+            df.take([3, 1, 2, -5], axis=1)
+
+    def test_take_mixed_type(self, float_string_frame):
+
+        # mixed-dtype
+        order = [4, 1, 2, 0, 3]
+        for df in [float_string_frame]:
+
+            result = df.take(order, axis=0)
+            expected = df.reindex(df.index.take(order))
+            tm.assert_frame_equal(result, expected)
+
+            # axis = 1
+            result = df.take(order, axis=1)
+            expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
+            tm.assert_frame_equal(result, expected)
+
+        # negative indices
+        order = [4, 1, -2]
+        for df in [float_string_frame]:
+
+            result = df.take(order, axis=0)
+            expected = df.reindex(df.index.take(order))
+            tm.assert_frame_equal(result, expected)
+
+            # axis = 1
+            result = df.take(order, axis=1)
+            expected = df.loc[:, ["foo", "B", "D"]]
+            tm.assert_frame_equal(result, expected)
+
+    def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
+        # by dtype
+        order = [1, 2, 0, 3]
+        for df in [mixed_float_frame, mixed_int_frame]:
+
+            result = df.take(order, axis=0)
+            expected = df.reindex(df.index.take(order))
+            tm.assert_frame_equal(result, expected)
+
+            # axis = 1
+            result = df.take(order, axis=1)
+            expected = df.loc[:, ["B", "C", "A", "D"]]
+            tm.assert_frame_equal(result, expected)
@@ -0,0 +1,929 @@
+from datetime import datetime
+
+from hypothesis import given
+import numpy as np
+import pytest
+
+from pandas.compat import np_version_under1p19
+
+from pandas.core.dtypes.common import is_scalar
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Series,
+    StringDtype,
+    Timestamp,
+    date_range,
+    isna,
+)
+import pandas._testing as tm
+from pandas._testing._hypothesis import OPTIONAL_ONE_OF_ALL
+
+
+@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"])
+def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame):
+    if request.param == "default":
+        return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
+    if request.param == "float_string":
+        return float_string_frame
+    if request.param == "mixed_float":
+        return mixed_float_frame
+    if request.param == "mixed_int":
+        return mixed_int_frame
+
+
+def _safe_add(df):
+    # only add to the numeric items
+    def is_ok(s):
+        return (
+            issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != "uint8"
+        )
+
+    return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items()))
+
+
+class TestDataFrameIndexingWhere:
+    def test_where_get(self, where_frame, float_string_frame):
+        def _check_get(df, cond, check_dtypes=True):
+            other1 = _safe_add(df)
+            rs = df.where(cond, other1)
+            rs2 = df.where(cond.values, other1)
+            for k, v in rs.items():
+                exp = Series(np.where(cond[k], df[k], other1[k]), index=v.index)
+                tm.assert_series_equal(v, exp, check_names=False)
+            tm.assert_frame_equal(rs, rs2)
+
+            # dtypes
+            if check_dtypes:
+                assert (rs.dtypes == df.dtypes).all()
+
+        # check getting
+        df = where_frame
+        if df is float_string_frame:
+            msg = "'>' not supported between instances of 'str' and 'int'"
+            with pytest.raises(TypeError, match=msg):
+                df > 0
+            return
+        cond = df > 0
+        _check_get(df, cond)
+
+    def test_where_upcasting(self):
+        # upcasting case (GH # 2794)
+        df = DataFrame(
+            {
+                c: Series([1] * 3, dtype=c)
+                for c in ["float32", "float64", "int32", "int64"]
+            }
+        )
+        df.iloc[1, :] = 0
+        result = df.dtypes
+        expected = Series(
+            [
+                np.dtype("float32"),
+                np.dtype("float64"),
+                np.dtype("int32"),
+                np.dtype("int64"),
+            ],
+            index=["float32", "float64", "int32", "int64"],
+        )
+
+        # when we don't preserve boolean casts
+        #
+        # expected = Series({ 'float32' : 1, 'float64' : 3 })
+
+        tm.assert_series_equal(result, expected)
+
+    def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame):
+        # aligning
+        def _check_align(df, cond, other, check_dtypes=True):
+            rs = df.where(cond, other)
+            for i, k in enumerate(rs.columns):
+                result = rs[k]
+                d = df[k].values
+                c = cond[k].reindex(df[k].index).fillna(False).values
+
+                if is_scalar(other):
+                    o = other
+                else:
+                    if isinstance(other, np.ndarray):
+                        o = Series(other[:, i], index=result.index).values
+                    else:
+                        o = other[k].values
+
+                new_values = d if c.all() else np.where(c, d, o)
+                expected = Series(new_values, index=result.index, name=k)
+
+                # since we can't always have the correct numpy dtype
+                # as numpy doesn't know how to downcast, don't check
+                tm.assert_series_equal(result, expected, check_dtype=False)
+
+            # dtypes
+            # can't check dtype when other is an ndarray
+
+            if check_dtypes and not isinstance(other, np.ndarray):
+                assert (rs.dtypes == df.dtypes).all()
+
+        df = where_frame
+        if df is float_string_frame:
+            msg = "'>' not supported between instances of 'str' and 'int'"
+            with pytest.raises(TypeError, match=msg):
+                df > 0
+            return
+
+        # other is a frame
+        cond = (df > 0)[1:]
+        _check_align(df, cond, _safe_add(df))
+
+        # check other is ndarray
+        cond = df > 0
+        warn = None
+        if df is mixed_int_frame:
+            warn = FutureWarning
+        with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
+            _check_align(df, cond, (_safe_add(df).values))
+
+        # integers are upcast, so don't check the dtypes
+        cond = df > 0
+        check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes)
+        _check_align(df, cond, np.nan, check_dtypes=check_dtypes)
+
+    def test_where_invalid(self):
+        # invalid conditions
+        df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
+        cond = df > 0
+
+        err1 = (df + 1).values[0:2, :]
+        msg = "other must be the same shape as self when an ndarray"
+        with pytest.raises(ValueError, match=msg):
+            df.where(cond, err1)
+
+        err2 = cond.iloc[:2, :].values
+        other1 = _safe_add(df)
+        msg = "Array conditional must be same shape as self"
+        with pytest.raises(ValueError, match=msg):
+            df.where(err2, other1)
+
+        with pytest.raises(ValueError, match=msg):
+            df.mask(True)
+        with pytest.raises(ValueError, match=msg):
+            df.mask(0)
+
+    def test_where_set(self, where_frame, float_string_frame):
+        # where inplace
+
+        def _check_set(df, cond, check_dtypes=True):
+            dfi = df.copy()
+            econd = cond.reindex_like(df).fillna(True)
+            expected = dfi.mask(~econd)
+
+            return_value = dfi.where(cond, np.nan, inplace=True)
+            assert return_value is None
+            tm.assert_frame_equal(dfi, expected)
+
+            # dtypes (and confirm upcasts)x
+            if check_dtypes:
+                for k, v in df.dtypes.items():
+                    if issubclass(v.type, np.integer) and not cond[k].all():
+                        v = np.dtype("float64")
+                    assert dfi[k].dtype == v
+
+        df = where_frame
+        if df is float_string_frame:
+            msg = "'>' not supported between instances of 'str' and 'int'"
+            with pytest.raises(TypeError, match=msg):
+                df > 0
+            return
+
+        cond = df > 0
+        _check_set(df, cond)
+
+        cond = df >= 0
+        _check_set(df, cond)
+
+        # aligning
+        cond = (df >= 0)[1:]
+        _check_set(df, cond)
+
+    def test_where_series_slicing(self):
+        # GH 10218
+        # test DataFrame.where with Series slicing
+        df = DataFrame({"a": range(3), "b": range(4, 7)})
+        result = df.where(df["a"] == 1)
+        expected = df[df["a"] == 1].reindex(df.index)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [list, tuple, np.array])
+    def test_where_array_like(self, klass):
+        # see gh-15414
+        df = DataFrame({"a": [1, 2, 3]})
+        cond = [[False], [True], [True]]
+        expected = DataFrame({"a": [np.nan, 2, 3]})
+
+        result = df.where(klass(cond))
+        tm.assert_frame_equal(result, expected)
+
+        df["b"] = 2
+        expected["b"] = [2, np.nan, 2]
+        cond = [[False, True], [True, False], [True, True]]
+
+        result = df.where(klass(cond))
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "cond",
+        [
+            [[1], [0], [1]],
+            Series([[2], [5], [7]]),
+            DataFrame({"a": [2, 5, 7]}),
+            [["True"], ["False"], ["True"]],
+            [[Timestamp("2017-01-01")], [pd.NaT], [Timestamp("2017-01-02")]],
+        ],
+    )
+    def test_where_invalid_input_single(self, cond):
+        # see gh-15414: only boolean arrays accepted
+        df = DataFrame({"a": [1, 2, 3]})
+        msg = "Boolean array expected for the condition"
+
+        with pytest.raises(ValueError, match=msg):
+            df.where(cond)
+
+    @pytest.mark.parametrize(
+        "cond",
+        [
+            [[0, 1], [1, 0], [1, 1]],
+            Series([[0, 2], [5, 0], [4, 7]]),
+            [["False", "True"], ["True", "False"], ["True", "True"]],
+            DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}),
+            [
+                [pd.NaT, Timestamp("2017-01-01")],
+                [Timestamp("2017-01-02"), pd.NaT],
+                [Timestamp("2017-01-03"), Timestamp("2017-01-03")],
+            ],
+        ],
+    )
+    def test_where_invalid_input_multiple(self, cond):
+        # see gh-15414: only boolean arrays accepted
+        df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]})
+        msg = "Boolean array expected for the condition"
+
+        with pytest.raises(ValueError, match=msg):
+            df.where(cond)
+
+    def test_where_dataframe_col_match(self):
+        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+        cond = DataFrame([[True, False, True], [False, False, True]])
+
+        result = df.where(cond)
+        expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]])
+        tm.assert_frame_equal(result, expected)
+
+        # this *does* align, though has no matching columns
+        cond.columns = ["a", "b", "c"]
+        result = df.where(cond)
+        expected = DataFrame(np.nan, index=df.index, columns=df.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_ndframe_align(self):
+        msg = "Array conditional must be same shape as self"
+        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+
+        cond = [True]
+        with pytest.raises(ValueError, match=msg):
+            df.where(cond)
+
+        expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]])
+
+        out = df.where(Series(cond))
+        tm.assert_frame_equal(out, expected)
+
+        cond = np.array([False, True, False, True])
+        with pytest.raises(ValueError, match=msg):
+            df.where(cond)
+
+        expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]])
+
+        out = df.where(Series(cond))
+        tm.assert_frame_equal(out, expected)
+
+    def test_where_bug(self):
+        # see gh-2793
+        df = DataFrame(
+            {"a": [1.0, 2.0, 3.0, 4.0], "b": [4.0, 3.0, 2.0, 1.0]}, dtype="float64"
+        )
+        expected = DataFrame(
+            {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]},
+            dtype="float64",
+        )
+        result = df.where(df > 2, np.nan)
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(result > 2, np.nan, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_bug_mixed(self, any_signed_int_numpy_dtype):
+        # see gh-2793
+        df = DataFrame(
+            {
+                "a": np.array([1, 2, 3, 4], dtype=any_signed_int_numpy_dtype),
+                "b": np.array([4.0, 3.0, 2.0, 1.0], dtype="float64"),
+            }
+        )
+
+        expected = DataFrame(
+            {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]},
+            dtype="float64",
+        )
+
+        result = df.where(df > 2, np.nan)
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(result > 2, np.nan, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_bug_transposition(self):
+        # see gh-7506
+        a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]})
+        b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]})
+        do_not_replace = b.isna() | (a > b)
+
+        expected = a.copy()
+        expected[~do_not_replace] = b
+
+        result = a.where(do_not_replace, b)
+        tm.assert_frame_equal(result, expected)
+
+        a = DataFrame({0: [4, 6], 1: [1, 0]})
+        b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]})
+        do_not_replace = b.isna() | (a > b)
+
+        expected = a.copy()
+        expected[~do_not_replace] = b
+
+        result = a.where(do_not_replace, b)
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_datetime(self):
+
+        # GH 3311
+        df = DataFrame(
+            {
+                "A": date_range("20130102", periods=5),
+                "B": date_range("20130104", periods=5),
+                "C": np.random.randn(5),
+            }
+        )
+
+        stamp = datetime(2013, 1, 3)
+        msg = "'>' not supported between instances of 'float' and 'datetime.datetime'"
+        with pytest.raises(TypeError, match=msg):
+            df > stamp
+
+        result = df[df.iloc[:, :-1] > stamp]
+
+        expected = df.copy()
+        expected.loc[[0, 1], "A"] = np.nan
+        expected.loc[:, "C"] = np.nan
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_none(self):
+        # GH 4667
+        # setting with None changes dtype
+        df = DataFrame({"series": Series(range(10))}).astype(float)
+        df[df > 7] = None
+        expected = DataFrame(
+            {"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])}
+        )
+        tm.assert_frame_equal(df, expected)
+
+        # GH 7656
+        df = DataFrame(
+            [
+                {"A": 1, "B": np.nan, "C": "Test"},
+                {"A": np.nan, "B": "Test", "C": np.nan},
+            ]
+        )
+        msg = "boolean setting on mixed-type"
+
+        with pytest.raises(TypeError, match=msg):
+            df.where(~isna(df), None, inplace=True)
+
+    def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self):
+        # see gh-21947
+        df = DataFrame(columns=["a"])
+        cond = df
+        assert (cond.dtypes == object).all()
+
+        result = df.where(cond)
+        tm.assert_frame_equal(result, df)
+
+    def test_where_align(self):
+        def create():
+            df = DataFrame(np.random.randn(10, 3))
+            df.iloc[3:5, 0] = np.nan
+            df.iloc[4:6, 1] = np.nan
+            df.iloc[5:8, 2] = np.nan
+            return df
+
+        # series
+        df = create()
+        expected = df.fillna(df.mean())
+        result = df.where(pd.notna(df), df.mean(), axis="columns")
+        tm.assert_frame_equal(result, expected)
+
+        return_value = df.where(pd.notna(df), df.mean(), inplace=True, axis="columns")
+        assert return_value is None
+        tm.assert_frame_equal(df, expected)
+
+        df = create().fillna(0)
+        expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0])
+        result = df.where(df > 0, df[0], axis="index")
+        tm.assert_frame_equal(result, expected)
+        result = df.where(df > 0, df[0], axis="rows")
+        tm.assert_frame_equal(result, expected)
+
+        # frame
+        df = create()
+        expected = df.fillna(1)
+        result = df.where(
+            pd.notna(df), DataFrame(1, index=df.index, columns=df.columns)
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_complex(self):
+        # GH 6345
+        expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"])
+        df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"])
+        df[df.abs() >= 5] = np.nan
+        tm.assert_frame_equal(df, expected)
+
+    def test_where_axis(self, using_array_manager):
+        # GH 9736
+        df = DataFrame(np.random.randn(2, 2))
+        mask = DataFrame([[False, False], [False, False]])
+        s = Series([0, 1])
+
+        expected = DataFrame([[0, 0], [1, 1]], dtype="float64")
+        result = df.where(mask, s, axis="index")
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(mask, s, axis="index", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame([[0, 1], [0, 1]], dtype="float64")
+        result = df.where(mask, s, axis="columns")
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(mask, s, axis="columns", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        # Upcast needed
+        df = DataFrame([[1, 2], [3, 4]], dtype="int64")
+        mask = DataFrame([[False, False], [False, False]])
+        s = Series([0, np.nan])
+
+        expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64")
+        result = df.where(mask, s, axis="index")
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(mask, s, axis="index", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        warn = FutureWarning if using_array_manager else None
+        expected = DataFrame([[0, np.nan], [0, np.nan]])
+        with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
+            result = df.where(mask, s, axis="columns")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            {
+                0: np.array([0, 0], dtype="int64"),
+                1: np.array([np.nan, np.nan], dtype="float64"),
+            }
+        )
+        result = df.copy()
+        return_value = result.where(mask, s, axis="columns", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_axis_multiple_dtypes(self):
+        # Multiple dtypes (=> multiple Blocks)
+        df = pd.concat(
+            [
+                DataFrame(np.random.randn(10, 2)),
+                DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"),
+            ],
+            ignore_index=True,
+            axis=1,
+        )
+        mask = DataFrame(False, columns=df.columns, index=df.index)
+        s1 = Series(1, index=df.columns)
+        s2 = Series(2, index=df.index)
+
+        result = df.where(mask, s1, axis="columns")
+        expected = DataFrame(1.0, columns=df.columns, index=df.index)
+        expected[2] = expected[2].astype("int64")
+        expected[3] = expected[3].astype("int64")
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(mask, s1, axis="columns", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        result = df.where(mask, s2, axis="index")
+        expected = DataFrame(2.0, columns=df.columns, index=df.index)
+        expected[2] = expected[2].astype("int64")
+        expected[3] = expected[3].astype("int64")
+        tm.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        return_value = result.where(mask, s2, axis="index", inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        # DataFrame vs DataFrame
+        d1 = df.copy().drop(1, axis=0)
+        expected = df.copy()
+        expected.loc[1, :] = np.nan
+
+        result = df.where(mask, d1)
+        tm.assert_frame_equal(result, expected)
+        result = df.where(mask, d1, axis="index")
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        return_value = result.where(mask, d1, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        return_value = result.where(mask, d1, inplace=True, axis="index")
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+        d2 = df.copy().drop(1, axis=1)
+        expected = df.copy()
+        expected.loc[:, 1] = np.nan
+
+        result = df.where(mask, d2)
+        tm.assert_frame_equal(result, expected)
+        result = df.where(mask, d2, axis="columns")
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        return_value = result.where(mask, d2, inplace=True)
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        return_value = result.where(mask, d2, inplace=True, axis="columns")
+        assert return_value is None
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_callable(self):
+        # GH 12533
+        df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+        result = df.where(lambda x: x > 4, lambda x: x + 1)
+        exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, df.where(df > 4, df + 1))
+
+        # return ndarray and scalar
+        result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99)
+        exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, df.where(df % 2 == 0, 99))
+
+        # chain
+        result = (df + 2).where(lambda x: x > 8, lambda x: x + 10)
+        exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]])
+        tm.assert_frame_equal(result, exp)
+        tm.assert_frame_equal(result, (df + 2).where((df + 2) > 8, (df + 2) + 10))
+
+    def test_where_tz_values(self, tz_naive_fixture, frame_or_series):
+        obj1 = DataFrame(
+            DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
+            columns=["date"],
+        )
+        obj2 = DataFrame(
+            DatetimeIndex(["20150103", "20150104", "20150105"], tz=tz_naive_fixture),
+            columns=["date"],
+        )
+        mask = DataFrame([True, True, False], columns=["date"])
+        exp = DataFrame(
+            DatetimeIndex(["20150101", "20150102", "20150105"], tz=tz_naive_fixture),
+            columns=["date"],
+        )
+        if frame_or_series is Series:
+            obj1 = obj1["date"]
+            obj2 = obj2["date"]
+            mask = mask["date"]
+            exp = exp["date"]
+
+        result = obj1.where(mask, obj2)
+        tm.assert_equal(exp, result)
+
+    def test_df_where_change_dtype(self):
+        # GH#16979
+        df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC"))
+        mask = np.array([[True, False, False], [False, False, True]])
+
+        result = df.where(mask)
+        expected = DataFrame(
+            [[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC")
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("kwargs", [{}, {"other": None}])
+    def test_df_where_with_category(self, kwargs):
+        # GH#16979
+        df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC"))
+        mask = np.array([[True, False, False], [False, False, True]])
+
+        # change type to category
+        df.A = df.A.astype("category")
+        df.B = df.B.astype("category")
+        df.C = df.C.astype("category")
+
+        result = df.where(mask, **kwargs)
+        A = pd.Categorical([0, np.nan], categories=[0, 3])
+        B = pd.Categorical([np.nan, np.nan], categories=[1, 4])
+        C = pd.Categorical([np.nan, 5], categories=[2, 5])
+        expected = DataFrame({"A": A, "B": B, "C": C})
+
+        tm.assert_frame_equal(result, expected)
+
+        # Check Series.where while we're here
+        result = df.A.where(mask[:, 0], **kwargs)
+        expected = Series(A, name="A")
+
+        tm.assert_series_equal(result, expected)
+
+    def test_where_categorical_filtering(self):
+        # GH#22609 Verify filtering operations on DataFrames with categorical Series
+        df = DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"])
+        df["b"] = df["b"].astype("category")
+
+        result = df.where(df["a"] > 0)
+        expected = df.copy()
+        expected.loc[0, :] = np.nan
+
+        tm.assert_equal(result, expected)
+
+    def test_where_ea_other(self):
+        # GH#38729/GH#38742
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        arr = pd.array([7, pd.NA, 9])
+        ser = Series(arr)
+        mask = np.ones(df.shape, dtype=bool)
+        mask[1, :] = False
+
+        # TODO: ideally we would get Int64 instead of object
+        result = df.where(mask, ser, axis=0)
+        expected = DataFrame({"A": [1, pd.NA, 3], "B": [4, pd.NA, 6]}).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+        ser2 = Series(arr[:2], index=["A", "B"])
+        expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]})
+        expected["B"] = expected["B"].astype(object)
+        result = df.where(mask, ser2, axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    def test_where_interval_noop(self):
+        # GH#44181
+        df = DataFrame([pd.Interval(0, 0)])
+        res = df.where(df.notna())
+        tm.assert_frame_equal(res, df)
+
+        ser = df[0]
+        res = ser.where(ser.notna())
+        tm.assert_series_equal(res, ser)
+
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            "timedelta64[ns]",
+            "datetime64[ns]",
+            "datetime64[ns, Asia/Tokyo]",
+            "Period[D]",
+        ],
+    )
+    def test_where_datetimelike_noop(self, dtype):
+        # GH#45135, analogue to GH#44181 for Period don't raise on no-op
+        # For td64/dt64/dt64tz we already don't raise, but also are
+        #  checking that we don't unnecessarily upcast to object.
+        ser = Series(np.arange(3) * 10**9, dtype=np.int64).view(dtype)
+        df = ser.to_frame()
+        mask = np.array([False, False, False])
+
+        res = ser.where(~mask, "foo")
+        tm.assert_series_equal(res, ser)
+
+        mask2 = mask.reshape(-1, 1)
+        res2 = df.where(~mask2, "foo")
+        tm.assert_frame_equal(res2, df)
+
+        res3 = ser.mask(mask, "foo")
+        tm.assert_series_equal(res3, ser)
+
+        res4 = df.mask(mask2, "foo")
+        tm.assert_frame_equal(res4, df)
+
+
+def test_where_try_cast_deprecated(frame_or_series):
+    obj = DataFrame(np.random.randn(4, 3))
+    obj = tm.get_obj(obj, frame_or_series)
+
+    mask = obj > 0
+
+    with tm.assert_produces_warning(FutureWarning):
+        # try_cast keyword deprecated
+        obj.where(mask, -1, try_cast=False)
+
+
+def test_where_int_downcasting_deprecated(using_array_manager):
+    # GH#44597
+    arr = np.arange(6).astype(np.int16).reshape(3, 2)
+    df = DataFrame(arr)
+
+    mask = np.zeros(arr.shape, dtype=bool)
+    mask[:, 0] = True
+
+    msg = "Downcasting integer-dtype"
+    warn = FutureWarning if not using_array_manager else None
+    with tm.assert_produces_warning(warn, match=msg):
+        res = df.where(mask, 2**17)
+
+    expected = DataFrame({0: arr[:, 0], 1: np.array([2**17] * 3, dtype=np.int32)})
+    tm.assert_frame_equal(res, expected)
+
+
+def test_where_copies_with_noop(frame_or_series):
+    # GH-39595
+    result = frame_or_series([1, 2, 3, 4])
+    expected = result.copy()
+    col = result[0] if frame_or_series is DataFrame else result
+
+    where_res = result.where(col < 5)
+    where_res *= 2
+
+    tm.assert_equal(result, expected)
+
+    where_res = result.where(col > 5, [1, 2, 3, 4])
+    where_res *= 2
+
+    tm.assert_equal(result, expected)
+
+
+def test_where_string_dtype(frame_or_series):
+    # GH40824
+    obj = frame_or_series(
+        ["a", "b", "c", "d"], index=["id1", "id2", "id3", "id4"], dtype=StringDtype()
+    )
+    filtered_obj = frame_or_series(
+        ["b", "c"], index=["id2", "id3"], dtype=StringDtype()
+    )
+    filter_ser = Series([False, True, True, False])
+
+    result = obj.where(filter_ser, filtered_obj)
+    expected = frame_or_series(
+        [pd.NA, "b", "c", pd.NA],
+        index=["id1", "id2", "id3", "id4"],
+        dtype=StringDtype(),
+    )
+    tm.assert_equal(result, expected)
+
+
+def test_where_bool_comparison():
+    # GH 10336
+    df_mask = DataFrame(
+        {"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False, True, False]}
+    )
+    result = df_mask.where(df_mask == False)  # noqa:E712
+    expected = DataFrame(
+        {
+            "AAA": np.array([np.nan] * 4, dtype=object),
+            "BBB": [False] * 4,
+            "CCC": [np.nan, False, np.nan, False],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_where_none_nan_coerce():
+    # GH 15613
+    expected = DataFrame(
+        {
+            "A": [Timestamp("20130101"), pd.NaT, Timestamp("20130103")],
+            "B": [1, 2, np.nan],
+        }
+    )
+    result = expected.where(expected.notnull(), None)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_where_non_keyword_deprecation(frame_or_series):
+    # GH 41485
+    obj = frame_or_series(range(5))
+    msg = (
+        "In a future version of pandas all arguments of "
+        f"{frame_or_series.__name__}.where except for the arguments 'cond' "
+        "and 'other' will be keyword-only"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = obj.where(obj > 1, 10, False)
+    expected = frame_or_series([10, 10, 2, 3, 4])
+    tm.assert_equal(expected, result)
+
+
+def test_where_columns_casting():
+    # GH 42295
+
+    df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]})
+    expected = df.copy()
+    result = df.where(pd.notnull(df), None)
+    # make sure dtypes don't change
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize("as_cat", [True, False])
+def test_where_period_invalid_na(frame_or_series, as_cat, request):
+    # GH#44697
+    idx = pd.period_range("2016-01-01", periods=3, freq="D")
+    if as_cat:
+        idx = idx.astype("category")
+    obj = frame_or_series(idx)
+
+    # NA value that we should *not* cast to Period dtype
+    tdnat = pd.NaT.to_numpy("m8[ns]")
+
+    mask = np.array([True, True, False], ndmin=obj.ndim).T
+
+    if as_cat:
+        msg = (
+            r"Cannot setitem on a Categorical with a new category \(NaT\), "
+            "set the categories first"
+        )
+        if np_version_under1p19:
+            mark = pytest.mark.xfail(
+                reason="When evaluating the f-string to generate the exception "
+                "message, numpy somehow ends up trying to cast None to int, so "
+                "ends up raising TypeError but with an unrelated message."
+            )
+            request.node.add_marker(mark)
+    else:
+        msg = "value should be a 'Period'"
+
+    with pytest.raises(TypeError, match=msg):
+        obj.where(mask, tdnat)
+
+    with pytest.raises(TypeError, match=msg):
+        obj.mask(mask, tdnat)
+
+    with pytest.raises(TypeError, match=msg):
+        obj.mask(mask, tdnat, inplace=True)
+
+
+def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
+    # GH#44697
+    arr = pd.array([1, 2, 3], dtype=any_numeric_ea_dtype)
+    obj = frame_or_series(arr)
+
+    mask = np.array([True, True, False], ndmin=obj.ndim).T
+
+    msg = "|".join(
+        [
+            r"datetime64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
+            r"timedelta64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
+            r"int\(\) argument must be a string, a bytes-like object or a number, "
+            "not 'NaTType'",
+            "object cannot be converted to a FloatingDtype",
+            "'values' contains non-numeric NA",
+        ]
+    )
+
+    for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
+        # NaT is an NA value that we should *not* cast to pd.NA dtype
+        with pytest.raises(TypeError, match=msg):
+            obj.where(mask, null)
+
+        with pytest.raises(TypeError, match=msg):
+            obj.mask(mask, null)
+
+
+@given(data=OPTIONAL_ONE_OF_ALL)
+def test_where_inplace_casting(data):
+    # GH 22051
+    df = DataFrame({"a": data})
+    df_copy = df.where(pd.notnull(df), None).copy()
+    df.where(pd.notnull(df), None, inplace=True)
+    tm.assert_equal(df, df_copy)
@@ -0,0 +1,392 @@
+import re
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    IndexSlice,
+    MultiIndex,
+    Series,
+    concat,
+)
+import pandas._testing as tm
+import pandas.core.common as com
+
+from pandas.tseries.offsets import BDay
+
+
+@pytest.fixture
+def four_level_index_dataframe():
+    arr = np.array(
+        [
+            [-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
+            [0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
+            [-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
+        ]
+    )
+    index = MultiIndex(
+        levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
+        codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
+        names=["one", "two", "three", "four"],
+    )
+    return DataFrame(arr, index=index, columns=list("ABCDE"))
+
+
+class TestXS:
+    def test_xs(self, float_frame, datetime_frame):
+        idx = float_frame.index[5]
+        xs = float_frame.xs(idx)
+        for item, value in xs.items():
+            if np.isnan(value):
+                assert np.isnan(float_frame[item][idx])
+            else:
+                assert value == float_frame[item][idx]
+
+        # mixed-type xs
+        test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
+        frame = DataFrame(test_data)
+        xs = frame.xs("1")
+        assert xs.dtype == np.object_
+        assert xs["A"] == 1
+        assert xs["B"] == "1"
+
+        with pytest.raises(
+            KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')")
+        ):
+            datetime_frame.xs(datetime_frame.index[0] - BDay())
+
+        # xs get column
+        series = float_frame.xs("A", axis=1)
+        expected = float_frame["A"]
+        tm.assert_series_equal(series, expected)
+
+        # view is returned if possible
+        series = float_frame.xs("A", axis=1)
+        series[:] = 5
+        assert (expected == 5).all()
+
+    def test_xs_corner(self):
+        # pathological mixed-type reordering case
+        df = DataFrame(index=[0])
+        df["A"] = 1.0
+        df["B"] = "foo"
+        df["C"] = 2.0
+        df["D"] = "bar"
+        df["E"] = 3.0
+
+        xs = df.xs(0)
+        exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0)
+        tm.assert_series_equal(xs, exp)
+
+        # no columns but Index(dtype=object)
+        df = DataFrame(index=["a", "b", "c"])
+        result = df.xs("a")
+        expected = Series([], name="a", index=Index([]), dtype=np.float64)
+        tm.assert_series_equal(result, expected)
+
+    def test_xs_duplicates(self):
+        df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"])
+
+        cross = df.xs("c")
+        exp = df.iloc[2]
+        tm.assert_series_equal(cross, exp)
+
+    def test_xs_keep_level(self):
+        df = DataFrame(
+            {
+                "day": {0: "sat", 1: "sun"},
+                "flavour": {0: "strawberry", 1: "strawberry"},
+                "sales": {0: 10, 1: 12},
+                "year": {0: 2008, 1: 2008},
+            }
+        ).set_index(["year", "flavour", "day"])
+        result = df.xs("sat", level="day", drop_level=False)
+        expected = df[:1]
+        tm.assert_frame_equal(result, expected)
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False)
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_view(self, using_array_manager):
+        # in 0.14 this will return a view if possible a copy otherwise, but
+        # this is numpy dependent
+
+        dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
+
+        if using_array_manager:
+            # INFO(ArrayManager) with ArrayManager getting a row as a view is
+            # not possible
+            msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                dm.xs(2)[:] = 20
+            assert not (dm.xs(2) == 20).any()
+        else:
+            dm.xs(2)[:] = 20
+            assert (dm.xs(2) == 20).all()
+
+
+class TestXSWithMultiIndex:
+    def test_xs_doc_example(self):
+        # TODO: more descriptive name
+        # based on example in advanced.rst
+        arrays = [
+            ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+            ["one", "two", "one", "two", "one", "two", "one", "two"],
+        ]
+        tuples = list(zip(*arrays))
+
+        index = MultiIndex.from_tuples(tuples, names=["first", "second"])
+        df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
+
+        result = df.xs(("one", "bar"), level=("second", "first"), axis=1)
+
+        expected = df.iloc[:, [0]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_integer_key(self):
+        # see GH#2107
+        dates = range(20111201, 20111205)
+        ids = list("abcde")
+        index = MultiIndex.from_product([dates, ids], names=["date", "secid"])
+        df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"])
+
+        result = df.xs(20111201, level="date")
+        expected = df.loc[20111201, :]
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_level(self, multiindex_dataframe_random_data):
+        df = multiindex_dataframe_random_data
+        result = df.xs("two", level="second")
+        expected = df[df.index.get_level_values(1) == "two"]
+        expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_level_eq_2(self):
+        arr = np.random.randn(3, 5)
+        index = MultiIndex(
+            levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
+            codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
+        )
+        df = DataFrame(arr, index=index)
+        expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
+        result = df.xs("c", level=2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data):
+        # this is a copy in 0.14
+        df = multiindex_dataframe_random_data
+        result = df.xs("two", level="second")
+
+        # setting this will give a SettingWithCopyError
+        # as we are trying to write a view
+        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+        with pytest.raises(com.SettingWithCopyError, match=msg):
+            result[:] = 10
+
+    def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe):
+        # this is a copy in 0.14
+        df = four_level_index_dataframe
+        result = df.xs(("a", 4), level=["one", "four"])
+
+        # setting this will give a SettingWithCopyError
+        # as we are trying to write a view
+        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+        with pytest.raises(com.SettingWithCopyError, match=msg):
+            result[:] = 10
+
+    @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
+    def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
+        # see GH#13719
+        frame = multiindex_dataframe_random_data
+        df = concat([frame] * 2)
+        assert df.index.is_unique is False
+        expected = concat([frame.xs("one", level="second")] * 2)
+
+        if isinstance(key, list):
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.xs(key, level=level)
+        else:
+            result = df.xs(key, level=level)
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_missing_values_in_index(self):
+        # see GH#6574
+        # missing values in returned index should be preserved
+        acc = [
+            ("a", "abcde", 1),
+            ("b", "bbcde", 2),
+            ("y", "yzcde", 25),
+            ("z", "xbcde", 24),
+            ("z", None, 26),
+            ("z", "zbcde", 25),
+            ("z", "ybcde", 26),
+        ]
+        df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
+        expected = DataFrame(
+            {"cnt": [24, 26, 25, 26]},
+            index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
+        )
+
+        result = df.xs("z", level="a1")
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "key, level, exp_arr, exp_index",
+        [
+            ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
+            ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
+        ],
+    )
+    def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index):
+        # see GH#2903
+        arr = np.random.randn(4, 4)
+        index = MultiIndex(
+            levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
+            codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
+            names=["lvl0", "lvl1"],
+        )
+        df = DataFrame(arr, columns=index)
+        result = df.xs(key, level=level, axis=1)
+        expected = DataFrame(exp_arr(arr), columns=exp_index)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "indexer",
+        [
+            lambda df: df.xs(("a", 4), level=["one", "four"]),
+            lambda df: df.xs("a").xs(4, level="four"),
+        ],
+    )
+    def test_xs_level_multiple(self, indexer, four_level_index_dataframe):
+        df = four_level_index_dataframe
+        expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
+        expected_index = MultiIndex(
+            levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
+        )
+        expected = DataFrame(
+            expected_values, index=expected_index, columns=list("ABCDE")
+        )
+        result = indexer(df)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
+    )
+    def test_xs_level0(self, indexer, four_level_index_dataframe):
+        df = four_level_index_dataframe
+        expected_values = [
+            [-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
+            [0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
+        ]
+        expected_index = MultiIndex(
+            levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
+            codes=[[0, 1], [0, 1], [1, 0]],
+            names=["two", "three", "four"],
+        )
+        expected = DataFrame(
+            expected_values, index=expected_index, columns=list("ABCDE")
+        )
+
+        result = indexer(df)
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_values(self, multiindex_dataframe_random_data):
+        df = multiindex_dataframe_random_data
+        result = df.xs(("bar", "two")).values
+        expected = df.values[4]
+        tm.assert_almost_equal(result, expected)
+
+    def test_xs_loc_equality(self, multiindex_dataframe_random_data):
+        df = multiindex_dataframe_random_data
+        result = df.xs(("bar", "two"))
+        expected = df.loc[("bar", "two")]
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [DataFrame, Series])
+    def test_xs_IndexSlice_argument_not_implemented(self, klass):
+        # GH#35301
+
+        index = MultiIndex(
+            levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
+            codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
+        )
+
+        obj = DataFrame(np.random.randn(6, 4), index=index)
+        if klass is Series:
+            obj = obj[0]
+
+        expected = obj.iloc[-2:].droplevel(0)
+
+        result = obj.xs(IndexSlice[("foo", "qux", 0), :])
+        tm.assert_equal(result, expected)
+
+        result = obj.loc[IndexSlice[("foo", "qux", 0), :]]
+        tm.assert_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [DataFrame, Series])
+    def test_xs_levels_raises(self, klass):
+        obj = DataFrame({"A": [1, 2, 3]})
+        if klass is Series:
+            obj = obj["A"]
+
+        msg = "Index must be a MultiIndex"
+        with pytest.raises(TypeError, match=msg):
+            obj.xs(0, level="as")
+
+    def test_xs_multiindex_droplevel_false(self):
+        # GH#19056
+        mi = MultiIndex.from_tuples(
+            [("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
+        )
+        df = DataFrame([[1, 2, 3]], columns=mi)
+        result = df.xs("a", axis=1, drop_level=False)
+        expected = DataFrame(
+            [[1, 2]],
+            columns=MultiIndex.from_tuples(
+                [("a", "x"), ("a", "y")], names=["level1", "level2"]
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_droplevel_false(self):
+        # GH#19056
+        df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
+        result = df.xs("a", axis=1, drop_level=False)
+        expected = DataFrame({"a": [1]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_droplevel_false_view(self, using_array_manager):
+        # GH#37832
+        df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
+        result = df.xs("a", axis=1, drop_level=False)
+        # check that result still views the same data as df
+        assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
+        # modifying original df also modifies result when having a single block
+        df.iloc[0, 0] = 2
+        expected = DataFrame({"a": [2]})
+        tm.assert_frame_equal(result, expected)
+
+        # with mixed dataframe, modifying the parent doesn't modify result
+        # TODO the "split" path behaves differently here as with single block
+        df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
+        result = df.xs("a", axis=1, drop_level=False)
+        df.iloc[0, 0] = 2
+        if using_array_manager:
+            # Here the behavior is consistent
+            expected = DataFrame({"a": [2]})
+        else:
+            # FIXME: iloc does not update the array inplace using
+            # "split" path
+            expected = DataFrame({"a": [1]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_list_indexer_droplevel_false(self):
+        # GH#41760
+        mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")])
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
+        with tm.assert_produces_warning(FutureWarning):
+            with pytest.raises(KeyError, match="y"):
+                df.xs(["x", "y"], drop_level=False, axis=1)