first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,77 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
# Note: identical the the "multi" entry in the top-level "index" fixture
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.fixture
def index_names():
# names that match those in the idx fixture for testing equality of
# names assigned to the idx
return ["first", "second"]
@pytest.fixture
def narrow_multi_index():
"""
Return a MultiIndex that is narrower than the display (<80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
return MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
@pytest.fixture
def wide_multi_index():
"""
Return a MultiIndex that is wider than the display (>80 characters).
"""
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
levels = [ci, ci.codes + 9, dti, dti, dti]
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
return MultiIndex.from_arrays(levels, names=names)

View File

@ -0,0 +1,260 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
date_range,
period_range,
)
import pandas._testing as tm
from pandas.core.api import UInt64Index
def test_shift(idx):
# GH8083 test the base class for shift
msg = "This method is only implemented for DatetimeIndex, PeriodIndex and "
"TimedeltaIndex; Got type MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate_multiindex():
# GH 34564 for MultiIndex level names check
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["L1", "L2"],
)
result = index.truncate(before=1)
assert "foo" not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
assert index.names == result.names
msg = "after < before"
with pytest.raises(ValueError, match=msg):
index.truncate(3, 1)
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match="^Too many levels"):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range("2011-01-01", freq="M", periods=3)
dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern")
pi = period_range("2011-01", freq="M", periods=3)
mi = MultiIndex.from_arrays(
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
)
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
["a", "b", "c", "a", "b", "c"],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi),
]
)
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays(
[
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
]
)
res = mi.append(other)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, "x", "y", "z"],
[1.1, np.nan, 3.3, "x", "y", "z"],
["a", "b", "c", "x", "y", "z"],
dti.append(Index(["x", "y", "z"])),
dti_tz.append(Index(["x", "y", "z"])),
pi.append(Index(["x", "y", "z"])),
]
)
tm.assert_index_equal(res, exp)
def test_iter(idx):
result = list(idx)
expected = [
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
msg = "cannot perform __sub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first - idx[-3:]
with pytest.raises(TypeError, match=msg):
idx[-3:] - first
with pytest.raises(TypeError, match=msg):
idx[-3:] - first.tolist()
msg = "cannot perform __rsub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
result = index.map(lambda x: x)
tm.assert_index_equal(result, index)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx),
],
)
def test_map_dictlike(idx, mapper):
identity = mapper(idx.values, idx)
# we don't infer to UInt64 for a dict
if isinstance(idx, UInt64Index) and isinstance(identity, dict):
expected = idx.astype("int64")
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"func",
[
np.exp,
np.exp2,
np.expm1,
np.log,
np.log2,
np.log10,
np.log1p,
np.sqrt,
np.sin,
np.cos,
np.tan,
np.arcsin,
np.arccos,
np.arctan,
np.sinh,
np.cosh,
np.tanh,
np.arcsinh,
np.arccosh,
np.arctanh,
np.deg2rad,
np.rad2deg,
],
ids=lambda func: func.__name__,
)
def test_numpy_ufuncs(idx, func):
# test ufuncs of numpy. see:
# https://numpy.org/doc/stable/reference/ufuncs.html
expected_exception = TypeError
msg = (
"loop of ufunc does not support argument 0 of type tuple which "
f"has no callable {func.__name__} method"
)
with pytest.raises(expected_exception, match=msg):
func(idx)
@pytest.mark.parametrize(
"func",
[np.isfinite, np.isinf, np.isnan, np.signbit],
ids=lambda func: func.__name__,
)
def test_numpy_type_funcs(idx, func):
msg = (
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
"could not be safely coerced to any supported types according to "
"the casting rule ''safe''"
)
with pytest.raises(TypeError, match=msg):
func(idx)

View File

@ -0,0 +1,30 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas._testing as tm
def test_astype(idx):
expected = idx.copy()
actual = idx.astype("O")
tm.assert_copy(actual.levels, expected.levels)
tm.assert_copy(actual.codes, expected.codes)
assert actual.names == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize("ordered", [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = "> 1 ndim Categorical are not supported at this time"
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype("category")

View File

@ -0,0 +1,98 @@
import numpy as np
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = "cannot perform __truediv__"
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
def test_logical_compat(idx, method):
msg = f"cannot perform {method}"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_inplace_mutation_resets_values():
levels = [["a", "b", "c"], [4]]
levels2 = [[1, 2, 3], ["a"]]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
# instantiating MultiIndex should not access/cache _.values
assert "_values" not in mi1._cache
assert "_values" not in mi2._cache
vals = mi1.values.copy()
vals2 = mi2.values.copy()
# accessing .values should cache ._values
assert mi1._values is mi1._cache["_values"]
assert mi1.values is mi1._cache["_values"]
assert isinstance(mi1._cache["_values"], np.ndarray)
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Non-inplace doesn't drop _values from _cache [implementation detail]
tm.assert_almost_equal(mi1._cache["_values"], vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Inplace should drop _values from _cache
with tm.assert_produces_warning(FutureWarning):
mi1.set_levels(levels2, inplace=True)
assert "_values" not in mi1._cache
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(1, "a")] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_mi = mi2.set_codes(codes2)
assert "_values" not in new_mi._cache
new_values = new_mi.values
assert "_values" in new_mi._cache
# Not inplace shouldn't change
tm.assert_almost_equal(mi2._cache["_values"], vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should drop _values from _cache, etc
with tm.assert_produces_warning(FutureWarning):
mi2.set_codes(codes2, inplace=True)
assert "_values" not in mi2._cache
tm.assert_almost_equal(mi2.values, new_values)
assert "_values" in mi2._cache

View File

@ -0,0 +1,829 @@
from datetime import (
date,
datetime,
)
import itertools
import numpy as np
import pytest
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
def test_constructor_single_level():
result = MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=msg):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], ["one", "two"]]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (["foo"], ["bar"])
msg = r"MultiIndex\.name must be a hashable type"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=("foo", "bar"),
)
renamed = [["foor"], ["barr"]]
with pytest.raises(TypeError, match=msg):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=msg):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = (
r"On level 0, code max \(3\) >= length of level \(1\)\. "
"NOTE: this index is in an inconsistent state"
)
label_error = r"Unequal code lengths: \[4, 2\]"
code_value_error = r"On level 0, code value \(-2\) < -1"
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([["a"], ["b"]])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
# test set_codes with verify_integrity=False
# the setting should not raise any value error
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
# code value smaller than -1
with pytest.raises(ValueError, match=code_value_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
def test_na_levels():
# GH26408
# test if codes are re-assigned value -1 for levels
# with missing values (NaN, NaT, None)
result = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
)
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
)
tm.assert_index_equal(result, expected)
# verify set_levels and set_codes
result = MultiIndex(
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
).set_codes([[0, -1, 1, 2, 3, 4]])
tm.assert_index_equal(result, expected)
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
assert result.levels[0].equals(Index([Timestamp("20130101")]))
assert result.levels[1].equals(Index(["a", "b"]))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_tuples(idx):
arrays = tuple(
tuple(np.asarray(lev).take(level_codes))
for lev, level_codes in zip(idx.levels, idx.codes)
)
# tuple of tuples as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
@pytest.mark.parametrize(
("idx1", "idx2"),
[
(
pd.period_range("2011-01-01", freq="D", periods=3),
pd.period_range("2015-01-01", freq="H", periods=3),
),
(
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo"),
),
(
pd.timedelta_range("1 days", freq="D", periods=3),
pd.timedelta_range("2 hours", freq="H", periods=3),
),
],
)
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
idx2 = date_range("2015-01-01 10:00", freq="H", periods=3)
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = MultiIndex.from_arrays(
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
)
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list("ABC")[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_sequence_of_arrays",
[
1,
[1],
[1, 2],
[[1], 2],
[1, [2]],
"a",
["a"],
["a", "b"],
[["a"], "b"],
(1,),
(1, 2),
([1], 2),
(1, [2]),
"a",
("a",),
("a", "b"),
(["a"], "b"),
[(1,), 2],
[1, (2,)],
[("a",), "b"],
((1,), 2),
(1, (2,)),
(("a",), "b"),
],
)
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
msg = "Input must be a list / sequence of array-likes"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
@pytest.mark.parametrize(
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
)
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = "^all arrays must be same length$"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
def test_from_arrays_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b", "c"], name="bar")
result = MultiIndex.from_arrays([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = "Cannot infer number of levels from empty list"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = "Input must be a list / sequence of tuple-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=["a", "b"])
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
msg = "Names should be list-like for a MultiIndex"
with pytest.raises(ValueError, match=msg):
Index(li, name="abc")
with pytest.raises(ValueError, match=msg):
Index(li, name="a")
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame(
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
).set_index(["a", "b"])
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
@pytest.mark.parametrize(
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
)
def test_from_product_empty_two_levels(first, second):
names = ["A", "B"]
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("N", list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ["A", "B", "C"]
lvl2 = list(range(N))
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
)
def test_from_product_invalid_input(invalid_input):
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range("2000-01-01", periods=2)
mi = MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike(
[
(1, Timestamp("2000-01-01")),
(1, Timestamp("2000-01-02")),
(2, Timestamp("2000-01-01")),
(2, Timestamp("2000-01-02")),
]
)
tm.assert_numpy_array_equal(mi.values, etalon)
def test_from_product_rangeindex():
# RangeIndex is preserved by factorize, so preserved in levels
rng = Index(range(5))
other = ["a", "b"]
mi = MultiIndex.from_product([rng, other])
tm.assert_index_equal(mi._levels[0], rng, exact=True)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ["foo", "bar"]
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
expected = pd.CategoricalIndex(
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
)
result = MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
result = MultiIndex.from_product([first, second], names=names)
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
@pytest.mark.parametrize(
"a, b, expected_names",
[
(
Series([1, 2, 3], name="foo"),
Series(["a", "b"], name="bar"),
["foo", "bar"],
),
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
([1, 2, 3], ["a", "b"], None),
],
)
def test_from_product_infer_names(a, b, expected_names):
# GH27292
result = MultiIndex.from_product([a, b])
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=expected_names,
)
tm.assert_index_equal(result, expected)
def test_from_product_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b"], name="bar")
result = MultiIndex.from_product([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=None,
)
tm.assert_index_equal(result, expected)
def test_from_product_readonly():
# GH#15286 passing read-only array to from_product
a = np.array(range(3))
b = ["a", "b"]
expected = MultiIndex.from_product([a, b])
a.setflags(write=False)
result = MultiIndex.from_product([a, b])
tm.assert_index_equal(result, expected)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ["foo", "bar"]
result = Index(index)
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
)
)
tm.assert_index_equal(result, expected)
result = Index(index, name="A")
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
),
name="A",
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
)
expected = MultiIndex.from_tuples(
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
)
result = MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
@pytest.mark.parametrize(
"non_frame",
[
Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27,
],
)
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match="Input must be a DataFrame"):
MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(
{
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
original_dtypes = df.dtypes.to_dict()
expected_mi = MultiIndex.from_arrays(
[
date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
mi = MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize(
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
)
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
mi = MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize(
"names,expected_error_msg",
[
("bad_input", "Names should be list-like for a MultiIndex"),
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
],
)
def test_from_frame_invalid_names(names, expected_error_msg):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
with pytest.raises(ValueError, match=expected_error_msg):
MultiIndex.from_frame(df, names=names)
def test_index_equal_empty_iterable():
# #16844
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(a, b)
def test_raise_invalid_sortorder():
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
# GH#28518
levels = [[0, 1], [0, 1, 2]]
# Correct sortorder
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
)
def test_datetimeindex():
idx1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
)
idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
idx = MultiIndex.from_arrays([idx1, idx2])
expected1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
)
tm.assert_index_equal(idx.levels[0], expected1)
tm.assert_index_equal(idx.levels[1], idx2)
# from datetime combos
# GH 7888
date1 = np.datetime64("today")
date2 = datetime.today()
date3 = Timestamp.today()
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
index = MultiIndex.from_product([[d1], [d2]])
assert isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
# but NOT date objects, matching Index behavior
date4 = date.today()
index = MultiIndex.from_product([[date4], [date2]])
assert not isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
def test_constructor_with_tz():
index = pd.DatetimeIndex(
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
)
columns = pd.DatetimeIndex(
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
)
result = MultiIndex.from_arrays([index, columns])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
result = MultiIndex.from_arrays([Series(index), Series(columns)])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
def test_multiindex_inference_consistency():
# check that inference behavior matches the base class
v = date.today()
arr = [v, v]
idx = Index(arr)
assert idx.dtype == object
mi = MultiIndex.from_arrays([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_product([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_tuples([(x,) for x in arr])
lev = mi.levels[0]
assert lev.dtype == object

View File

@ -0,0 +1,142 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
)
import pandas._testing as tm
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_to_frame():
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name="first")
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=["first"])
# Tests for datetime index
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{
0: np.repeat(np.arange(5, dtype="int64"), 3),
1: np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(
{
"first": np.repeat(np.arange(5, dtype="int64"), 3),
"second": np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = MultiIndex.from_arrays(
[
pd.date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
expected_df = DataFrame(
{
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ["z", 0, "a"]
mi = MultiIndex.from_arrays(
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_flat_index(idx):
expected = pd.Index(
(
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
),
tupleize_cols=False,
)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,106 @@
from copy import (
copy,
deepcopy,
)
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._view()
assert_multiindex_copied(i_copy, idx)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize("func", [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
@pytest.mark.parametrize(
"kwarg, value",
[
("names", ["third", "fourth"]),
],
)
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
assert getattr(idx_copy, kwarg) == value
@pytest.mark.parametrize("deep", [True, False])
@pytest.mark.parametrize(
"param_name, param_value",
[
("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]),
("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]),
],
)
def test_copy_deprecated_parameters(deep, param_name, param_value):
# gh-36685
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
with tm.assert_produces_warning(FutureWarning):
idx_copy = idx.copy(deep=deep, **{param_name: param_value})
assert [list(i) for i in getattr(idx_copy, param_name)] == param_value

View File

@ -0,0 +1,193 @@
import warnings
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_drop(idx):
dropped = idx.drop([("foo", "two"), ("qux", "one")])
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(["bar"])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop("foo")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop([("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop(index)
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(["foo", "two"])
# partially correct argument
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
with pytest.raises(KeyError, match=r"^10$"):
idx.drop(mixed_index)
# error='ignore'
dropped = idx.drop(index, errors="ignore")
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(["foo", "two"], errors="ignore")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(["foo", ("qux", "one")])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ["foo", ("qux", "one"), "two"]
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(mixed_index)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc("foo")]
dropped = index.droplevel(0)
assert dropped.name == "second"
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index.droplevel(0)
assert dropped.names == ("two", "three")
dropped = index.droplevel("two")
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index[:2].droplevel(["three", "one"])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
msg = (
"Cannot remove 3 levels from an index with 3 levels: "
"at least one level must be left"
)
with pytest.raises(ValueError, match=msg):
index[:2].droplevel(["one", "two", "three"])
with pytest.raises(KeyError, match="'Level four not found'"):
index[:2].droplevel(["one", "four"])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
assert lexsorted_mi._is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
)
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi._is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
def test_drop_with_nan_in_index(nulls_fixture):
# GH#18853
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(pd.Timestamp("2001"), level="date")
def test_drop_with_non_monotonic_duplicates():
# GH#33494
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
with warnings.catch_warnings():
warnings.simplefilter("ignore", PerformanceWarning)
result = mi.drop((1, 2))
expected = MultiIndex.from_tuples([(2, 3)])
tm.assert_index_equal(result, expected)
def test_single_level_drop_partially_missing_elements():
# GH 37820
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
msg = r"labels \[4\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(4, level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([1, 4], level=0)
msg = r"labels \[nan\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan], level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, 2, 3], level=0)
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
msg = r"labels \['a'\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, "a"], level=0)
def test_droplevel_multiindex_one_level():
# GH#37208
index = MultiIndex.from_tuples([(2,)], names=("b",))
result = index.droplevel([])
expected = Index([2], name="b")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,339 @@
from itertools import product
import numpy as np
import pytest
from pandas._libs import hashtable
from pandas import (
DatetimeIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize("names", [None, ["first", "second"]])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
)
idx2 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
tz="Asia/Tokyo",
)
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
eidx2 = DatetimeIndex(
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
)
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning):
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
for idx in [
mi,
mi.set_names([None, None]),
mi.set_names([None, "Num"]),
mi.set_names(["Upper", "Num"]),
]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
assert mi.is_unique is False
assert mi.has_duplicates is True
# single instance of NaN
mi_nan = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
)
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False
# multiple instances of NaN
mi_nan_dup = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
)
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [
("x", "out", "z", 5, "y", "in", "z", 169),
("x", "out", "z", 7, "y", "in", "z", 119),
("x", "out", "z", 9, "y", "in", "z", 135),
("x", "out", "z", 13, "y", "in", "z", 145),
("x", "out", "z", 14, "y", "in", "z", 158),
("x", "out", "z", 16, "y", "in", "z", 122),
("x", "out", "z", 17, "y", "in", "z", 160),
("x", "out", "z", 18, "y", "in", "z", 180),
("x", "out", "z", 20, "y", "in", "z", 143),
("x", "out", "z", 21, "y", "in", "z", 128),
("x", "out", "z", 22, "y", "in", "z", 129),
("x", "out", "z", 25, "y", "in", "z", 111),
("x", "out", "z", 28, "y", "in", "z", 114),
("x", "out", "z", 29, "y", "in", "z", 121),
("x", "out", "z", 31, "y", "in", "z", 126),
("x", "out", "z", 32, "y", "in", "z", 155),
("x", "out", "z", 33, "y", "in", "z", 123),
("x", "out", "z", 12, "y", "in", "z", 144),
]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
@pytest.mark.parametrize("nlevels", [4, 8])
@pytest.mark.parametrize("with_nulls", [True, False])
def test_has_duplicates_overflow(nlevels, with_nulls):
# handle int64 overflow if possible
# no overflow with 4
# overflow possible with 8
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
@pytest.mark.parametrize(
"keep, expected",
[
("first", np.array([False, False, False, True, True, False])),
("last", np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False])),
],
)
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.arm_slow
def test_duplicated_large(keep):
# GH 9125
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
codes = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
def test_duplicated2():
# TODO: more informative test name
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
tm.assert_numpy_array_equal(
mi.duplicated(), np.zeros(len(mi), dtype="bool")
)
def test_duplicated_drop_duplicates():
# GH#4060
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
expected = np.array([False, False, False, True, False, False], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(), expected)
expected = np.array([True, False, False, False, False, False])
duplicated = idx.duplicated(keep="last")
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
expected = np.array([True, False, False, True, False, False])
duplicated = idx.duplicated(keep=False)
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
@pytest.mark.parametrize(
"dtype",
[
np.complex64,
np.complex128,
],
)
def test_duplicated_series_complex_numbers(dtype):
# GH 17927
expected = Series(
[False, False, False, True, False, False, False, True, False, True],
dtype=bool,
)
result = Series(
[
np.nan + np.nan * 1j,
0,
1j,
1j,
1,
1 + 1j,
1 + 2j,
1 + 1j,
np.nan,
np.nan + np.nan * 1j,
],
dtype=dtype,
).duplicated()
tm.assert_series_equal(result, expected)
def test_multi_drop_duplicates_pos_args_deprecation():
# GH#41485
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])
msg = (
"In a future version of pandas all arguments of "
"MultiIndex.drop_duplicates will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.drop_duplicates("last")
expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])
tm.assert_index_equal(expected, result)

View File

@ -0,0 +1,290 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.equals(idx.to_flat_index())
assert idx.equals(idx.to_flat_index().astype("category"))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_compare_tuple():
# GH#21517
mi = MultiIndex.from_product([[1, 2]] * 2)
all_false = np.array([False, False, False, False])
result = mi == mi[0]
expected = np.array([True, False, False, False])
tm.assert_numpy_array_equal(result, expected)
result = mi != mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi < mi[0]
tm.assert_numpy_array_equal(result, all_false)
result = mi <= mi[0]
tm.assert_numpy_array_equal(result, expected)
result = mi > mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi >= mi[0]
tm.assert_numpy_array_equal(result, ~all_false)
def test_compare_tuple_strs():
# GH#34180
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
result = mi == ("c", "a")
expected = np.array([False, False, True])
tm.assert_numpy_array_equal(result, expected)
result = mi == ("c",)
expected = np.array([False, False, False])
tm.assert_numpy_array_equal(result, expected)
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(["new1", "new2"])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
mi3 = Index(mi.tolist(), names=mi.names)
msg = r"Unexpected keyword arguments {'names'}"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
Index(mi.tolist(), names=mi.names, tupleize_cols=False)
mi4 = Index(mi.tolist(), tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_equals_missing_values_differently_sorted():
# GH#38439
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
assert not mi1.equals(mi2)
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
assert mi1.equals(mi2)
def test_is_():
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert not mi.is_(mi.set_names(["C", "D"]))
mi2 = mi.view()
mi2.set_names(["E", "F"], inplace=True)
assert mi.is_(mi2)
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
with tm.assert_produces_warning(FutureWarning):
mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
with tm.assert_produces_warning(FutureWarning):
mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx._is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not idx.is_numeric()
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = Series([True, True])
result = Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = Series([False, False])
result = Series(midx > midx)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,233 @@
import warnings
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_format(idx):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
)
index.format(names=True)
def test_format_sparse_config(idx):
warn_filters = warnings.filters
warnings.filterwarnings("ignore", category=FutureWarning, module=".*format")
# GH1538
pd.set_option("display.multi_sparse", False)
result = idx.format()
assert result[1] == "foo two"
tm.reset_display_options()
warnings.filters = warn_filters
def test_format_sparse_display():
index = MultiIndex(
levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[
[0, 0, 0, 1, 1, 1],
[0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
],
)
result = index.format()
assert result[3] == "1 0 0 0"
def test_repr_with_unicode_data():
with pd.option_context("display.encoding", "UTF-8"):
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\" not in repr(index) # we don't want unicode-escaped
def test_repr_roundtrip_raises():
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
eval(repr(mi))
def test_unicode_string_with_unicode():
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert "..." not in str(idx)
class TestRepr:
def test_unicode_repr_issues(self):
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
index = MultiIndex(levels=levels, codes=codes)
repr(index.levels)
repr(index.get_level_values(1))
def test_repr_max_seq_items_equal_to_n(self, idx):
# display.max_seq_items == n
with pd.option_context("display.max_seq_items", 6):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
def test_repr(self, idx):
result = idx[:1].__repr__()
expected = """\
MultiIndex([('foo', 'one')],
names=['first', 'second'])"""
assert result == expected
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
with pd.option_context("display.max_seq_items", 5):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
...
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'], length=6)"""
assert result == expected
# display.max_seq_items == 1
with pd.option_context("display.max_seq_items", 1):
result = idx.__repr__()
expected = """\
MultiIndex([...
('qux', 'two')],
names=['first', ...], length=6)"""
assert result == expected
def test_rjust(self, narrow_multi_index):
mi = narrow_multi_index
result = mi[:1].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi[::500].__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:08:20'),
('abc', 10, '2000-01-01 00:16:40'),
('abc', 10, '2000-01-01 00:25:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:00:01'),
( 'a', 9, '2000-01-01 00:00:02'),
( 'a', 9, '2000-01-01 00:00:03'),
( 'a', 9, '2000-01-01 00:00:04'),
( 'a', 9, '2000-01-01 00:00:05'),
( 'a', 9, '2000-01-01 00:00:06'),
( 'a', 9, '2000-01-01 00:00:07'),
( 'a', 9, '2000-01-01 00:00:08'),
( 'a', 9, '2000-01-01 00:00:09'),
...
('abc', 10, '2000-01-01 00:33:10'),
('abc', 10, '2000-01-01 00:33:11'),
('abc', 10, '2000-01-01 00:33:12'),
('abc', 10, '2000-01-01 00:33:13'),
('abc', 10, '2000-01-01 00:33:14'),
('abc', 10, '2000-01-01 00:33:15'),
('abc', 10, '2000-01-01 00:33:16'),
('abc', 10, '2000-01-01 00:33:17'),
('abc', 10, '2000-01-01 00:33:18'),
('abc', 10, '2000-01-01 00:33:19')],
names=['a', 'b', 'dti'], length=2000)"""
assert result == expected
def test_tuple_width(self, wide_multi_index):
mi = wide_multi_index
result = mi[:1].__repr__()
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi[:10].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
...
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
assert result == expected

View File

@ -0,0 +1,114 @@
import numpy as np
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
MultiIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestGetLevelValues:
def test_get_level_values_box_datetime64(self):
dates = date_range("1/1/2000", periods=4)
levels = [dates, [0, 1]]
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
index = MultiIndex(levels=levels, codes=codes)
assert isinstance(index.get_level_values(0)[0], Timestamp)
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
tm.assert_index_equal(result, expected)
assert result.name == "first"
result = idx.get_level_values("first")
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
)
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_level_values_all_na():
# GH#17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH#17924
arrays = [["a", "b", "b"], [1, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_when_periods():
# GH33131. See also discussion in GH32669.
# This test can probably be removed when PeriodIndex._engine is removed.
from pandas import (
Period,
PeriodIndex,
)
idx = MultiIndex.from_arrays(
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
)
idx2 = MultiIndex.from_arrays(
[idx._get_level_values(level) for level in range(idx.nlevels)]
)
assert all(x.is_monotonic for x in idx2.levels)

View File

@ -0,0 +1,490 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
from pandas import (
CategoricalIndex,
MultiIndex,
)
import pandas._testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
msg = "Too many levels: Index has only 2 levels, not 3"
with pytest.raises(IndexError, match=msg):
idx._get_level_number(2)
with pytest.raises(KeyError, match="Level fourth not found"):
idx._get_level_number("fourth")
def test_get_dtypes():
# Test MultiIndex.dtypes (# Gh37062)
idx_multitype = MultiIndex.from_product(
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
names=["int", "string", "dt"],
)
expected = pd.Series(
{
"int": np.dtype("int64"),
"string": np.dtype("O"),
"dt": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_no_level_name():
# Test MultiIndex.dtypes (# GH38580 )
idx_multitype = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
)
expected = pd.Series(
{
"level_0": np.dtype("int64"),
"level_1": np.dtype("O"),
"level_2": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_duplicate_level_names():
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
result = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
names=["A", "A", "A"],
).dtypes
expected = pd.Series(
[np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")],
index=["A", "A", "A"],
)
tm.assert_series_equal(result, expected)
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
with pytest.raises(IndexError, match="Too many levels"):
frame.index._get_level_number(2)
with pytest.raises(IndexError, match="not a valid level number"):
frame.index._get_level_number(-3)
def test_set_name_methods(idx, index_names):
# so long as these are synonyms, we don't need to test set_names
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
msg = "Length of names must match number of levels in MultiIndex"
with pytest.raises(ValueError, match=msg):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
msg = "[Cc]an't set attribute"
with pytest.raises(AttributeError, match=msg):
idx.levels = new_levels
with pytest.raises(AttributeError, match=msg):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing [w/ mutation]
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/ mutation]
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
for inplace in [True, False]:
with pytest.raises(ValueError, match="^On"):
with tm.assert_produces_warning(FutureWarning):
idx.set_levels(["c"], level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(ValueError, match="^On"):
with tm.assert_produces_warning(FutureWarning):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
with tm.assert_produces_warning(FutureWarning):
idx.set_levels("c", level=0, inplace=inplace)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
with tm.assert_produces_warning(FutureWarning):
idx.set_codes(1, level=0, inplace=inplace)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# changing label w/ mutation
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_codes(new_codes, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing specific level w/ mutation
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels [w/ mutation]
ind2 = idx.copy()
with tm.assert_produces_warning(FutureWarning):
inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
assert inplace_return is None
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
with tm.assert_produces_warning(FutureWarning):
result.set_codes(codes=new_codes, level=1, inplace=True)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match="Length of levels"):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match="Length of codes"):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match="Length of names"):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list-like"):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match="Length of names"):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match="Names must be a"):
idx.set_names(names, level=0)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
m = MultiIndex.from_product([[0, 1]])
result = m.set_names("first", level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
def test_multi_set_names_pos_args_deprecation():
# GH#41485
idx = MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
msg = (
"In a future version of pandas all arguments of MultiIndex.set_names "
"except for the argument 'names' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.set_names(["kind", "year"], None)
expected = MultiIndex(
levels=[["python", "cobra"], [2018, 2019]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=["kind", "year"],
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("ordered", [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, level=0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
lev2 = ["1", "2", "3"] * 2
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
df = pd.DataFrame(
np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx
)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
df.at[("grethe", "4"), "one"] = 99.34
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ["black"] * 3
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
expected_sizes = [3, 2, 1]
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_codes_inplace_deprecated(idx, inplace):
new_codes = idx.codes[1][::-1]
with tm.assert_produces_warning(FutureWarning):
idx.set_codes(codes=new_codes, level=1, inplace=inplace)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_levels_inplace_deprecated(idx, inplace):
new_level = idx.levels[1].copy()
with tm.assert_produces_warning(FutureWarning):
idx.set_levels(levels=new_level, level=1, inplace=inplace)
def test_set_levels_pos_args_deprecation():
# https://github.com/pandas-dev/pandas/issues/41485
idx = MultiIndex.from_tuples(
[
(1, "one"),
(2, "one"),
(3, "one"),
],
names=["foo", "bar"],
)
msg = (
r"In a future version of pandas all arguments of MultiIndex.set_levels except "
r"for the argument 'levels' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.set_levels(["a", "b", "c"], 0)
expected = MultiIndex.from_tuples(
[
("a", "one"),
("b", "one"),
("c", "one"),
],
names=["foo", "bar"],
)
tm.assert_index_equal(result, expected)
def test_set_codes_pos_args_depreciation(idx):
# https://github.com/pandas-dev/pandas/issues/41485
msg = (
r"In a future version of pandas all arguments of MultiIndex.set_codes except "
r"for the argument 'codes' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.set_codes([[0, 0, 1, 2, 3, 3], [0, 1, 0, 1, 0, 1]], [0, 1])
expected = MultiIndex.from_tuples(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
names=["first", "second"],
)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,891 @@
from datetime import timedelta
import re
import numpy as np
import pytest
from pandas.errors import (
InvalidIndexError,
PerformanceWarning,
)
import pandas as pd
from pandas import (
Categorical,
Index,
MultiIndex,
date_range,
)
import pandas._testing as tm
class TestSliceLocs:
def test_slice_locs_partial(self, idx):
sorted_idx, _ = idx.sortlevel(0)
result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one"))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ("qux", "one"))
assert result == (0, 5)
result = sorted_idx.slice_locs(("foo", "two"), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs("bar", "baz")
assert result == (2, 4)
def test_slice_locs(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(
*idx.slice_locs(
df.index[5] + timedelta(seconds=30),
df.index[15] - timedelta(seconds=30),
)
)
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs((1, 3))
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted(self):
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
with pytest.raises(KeyError, match=msg):
index.slice_locs((1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_not_contained(self):
# some searchsorted action
index = MultiIndex(
levels=[[0, 2, 4, 6], [0, 2, 4]],
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
@pytest.mark.parametrize(
"index_arr,expected,start_idx,end_idx",
[
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None),
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"),
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")),
],
)
def test_slice_locs_with_missing_value(
self, index_arr, expected, start_idx, end_idx
):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.slice_locs(start=start_idx, end=end_idx)
assert result == expected
class TestPutmask:
def test_putmask_with_wrong_mask(self, idx):
# GH18368
msg = "putmask: mask and data must be the same size"
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) + 1, np.bool_), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) - 1, np.bool_), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask("foo", 1)
def test_putmask_multiindex_other(self):
# GH#43212 `value` is also a MultiIndex
left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)])
right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)])
mask = np.array([True, True, False])
result = left.putmask(mask, right)
expected = MultiIndex.from_tuples([right[0], right[1], left[2]])
tm.assert_index_equal(result, expected)
class TestGetIndexer:
def test_get_indexer(self):
major_axis = Index(np.arange(4))
minor_axis = Index(np.arange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method="pad")
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="pad")
tm.assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method="ffill")
tm.assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method="backfill")
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="backfill")
tm.assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method="bfill")
tm.assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
tm.assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(list(range(10)) + list(range(10)))
idx2 = Index(list(range(20)))
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest(self):
midx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = (
"method='nearest' not implemented yet for MultiIndex; "
"see GitHub issue 9365"
)
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="nearest")
msg = "tolerance not implemented yet for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="pad", tolerance=2)
def test_get_indexer_categorical_time(self):
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[
Categorical(["a", "b", "c"]),
Categorical(date_range("2012-01-01", periods=3, freq="H")),
]
)
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
@pytest.mark.parametrize(
"index_arr,labels,expected",
[
(
[[1, np.nan, 2], [3, 4, 5]],
[1, np.nan, 2],
np.array([-1, -1, -1], dtype=np.intp),
),
([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)),
([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)),
(
[[1, 2, 3], [np.nan, 4, 5]],
[np.nan, 4, 5],
np.array([-1, -1, -1], dtype=np.intp),
),
],
)
def test_get_indexer_with_missing_value(self, index_arr, labels, expected):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.get_indexer(labels)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_methods(self):
# https://github.com/pandas-dev/pandas/issues/29896
# test getting an indexer for another index with different methods
# confirms that getting an indexer without a filling method, getting an
# indexer and backfilling, and getting an indexer and padding all behave
# correctly in the case where all of the target values fall in between
# several levels in the MultiIndex into which they are getting an indexer
#
# visually, the MultiIndexes used in this test are:
# mult_idx_1:
# 0: -1 0
# 1: 2
# 2: 3
# 3: 4
# 4: 0 0
# 5: 2
# 6: 3
# 7: 4
# 8: 1 0
# 9: 2
# 10: 3
# 11: 4
#
# mult_idx_2:
# 0: 0 1
# 1: 3
# 2: 4
mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]])
mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]])
indexer = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, 6, 7], dtype=indexer.dtype)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
# ensure the legacy "bfill" option functions identically to "backfill"
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad")
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
# ensure the legacy "ffill" option functions identically to "pad"
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
def test_get_indexer_three_or_more_levels(self):
# https://github.com/pandas-dev/pandas/issues/29896
# tests get_indexer() on MultiIndexes with 3+ levels
# visually, these are
# mult_idx_1:
# 0: 1 2 5
# 1: 7
# 2: 4 5
# 3: 7
# 4: 6 5
# 5: 7
# 6: 3 2 5
# 7: 7
# 8: 4 5
# 9: 7
# 10: 6 5
# 11: 7
#
# mult_idx_2:
# 0: 1 1 8
# 1: 1 5 9
# 2: 1 6 7
# 3: 2 1 6
# 4: 2 7 6
# 5: 2 7 8
# 6: 3 6 8
mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]])
mult_idx_2 = MultiIndex.from_tuples(
[
(1, 1, 8),
(1, 5, 9),
(1, 6, 7),
(2, 1, 6),
(2, 7, 7),
(2, 7, 8),
(3, 6, 8),
]
)
# sanity check
assert mult_idx_1.is_monotonic
assert mult_idx_1.is_unique
assert mult_idx_2.is_monotonic
assert mult_idx_2.is_unique
# show the relationships between the two
assert mult_idx_2[0] < mult_idx_1[0]
assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4]
assert mult_idx_1[5] == mult_idx_2[2]
assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6]
assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6]
assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6]
assert mult_idx_1[-1] < mult_idx_2[6]
indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype)
tm.assert_almost_equal(expected, indexer_no_fill)
# test with backfilling
indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype)
tm.assert_almost_equal(expected, indexer_backfilled)
# now, the same thing, but forward-filled (aka "padded")
indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad")
expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype)
tm.assert_almost_equal(expected, indexer_padded)
# now, do the indexing in the other direction
assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1]
assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2]
assert mult_idx_2[2] == mult_idx_1[5]
assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6]
indexer = mult_idx_2.get_indexer(mult_idx_1)
expected = np.array(
[-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype
)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill")
expected = np.array(
[1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype
)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad")
expected = np.array(
[0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype
)
tm.assert_almost_equal(expected, pad_indexer)
def test_get_indexer_crossing_levels(self):
# https://github.com/pandas-dev/pandas/issues/29896
# tests a corner case with get_indexer() with MultiIndexes where, when we
# need to "carry" across levels, proper tuple ordering is respected
#
# the MultiIndexes used in this test, visually, are:
# mult_idx_1:
# 0: 1 1 1 1
# 1: 2
# 2: 2 1
# 3: 2
# 4: 1 2 1 1
# 5: 2
# 6: 2 1
# 7: 2
# 8: 2 1 1 1
# 9: 2
# 10: 2 1
# 11: 2
# 12: 2 2 1 1
# 13: 2
# 14: 2 1
# 15: 2
#
# mult_idx_2:
# 0: 1 3 2 2
# 1: 2 3 2 2
mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4)
mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)])
# show the tuple orderings, which get_indexer() should respect
assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8]
assert mult_idx_1[-1] < mult_idx_2[1]
indexer = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, -1], dtype=indexer.dtype)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
expected = np.array([8, -1], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
expected = np.array([7, 15], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
def test_get_indexer_kwarg_validation(self):
# GH#41918
mi = MultiIndex.from_product([range(3), ["A", "B"]])
msg = "limit argument only valid if doing pad, backfill or nearest"
with pytest.raises(ValueError, match=msg):
mi.get_indexer(mi[:-1], limit=4)
msg = "tolerance argument only valid if doing pad, backfill or nearest"
with pytest.raises(ValueError, match=msg):
mi.get_indexer(mi[:-1], tolerance="piano")
def test_getitem(idx):
# scalar
assert idx[2] == ("bar", "one")
# slice
result = idx[2:5]
expected = idx[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = idx[[True, False, True, False, True, True]]
result2 = idx[np.array([True, False, True, False, True, True])]
expected = idx[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(idx):
sorted_idx, _ = idx.sortlevel(0)
assert sorted_idx.get_loc("baz") == slice(3, 4)
assert sorted_idx.get_loc("foo") == slice(0, 2)
@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)])
@pytest.mark.parametrize(
"ind2",
[[True, False, True, False, False], Index([True, False, True, False, False])],
)
def test_getitem_bool_index_all(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
tm.assert_index_equal(idx[ind2], expected)
@pytest.mark.parametrize("ind1", [[True], Index([True])])
@pytest.mark.parametrize("ind2", [[False], Index([False])])
def test_getitem_bool_index_single(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex(
levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)],
codes=[[], []],
)
tm.assert_index_equal(idx[ind2], expected)
class TestGetLoc:
def test_get_loc(self, idx):
assert idx.get_loc(("foo", "two")) == 1
assert idx.get_loc(("baz", "two")) == 3
with pytest.raises(KeyError, match=r"^10$"):
idx.get_loc(("bar", "two"))
with pytest.raises(KeyError, match=r"^'quux'$"):
idx.get_loc("quux")
msg = "only the default get_loc method is currently supported for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.get_loc("foo", method="nearest")
# 3 levels
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
with pytest.raises(KeyError, match=r"^\(1, 1\)$"):
index.get_loc((1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates(self):
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
index = Index(["c", "a", "a", "b", "b"])
rs = index.get_loc("c")
xp = 0
assert rs == xp
with pytest.raises(KeyError, match="2"):
index.get_loc(2)
def test_get_loc_level(self):
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
with pytest.raises(KeyError, match=r"^\(2, 2\)$"):
index.get_loc_level((2, 2))
# GH 22221: unused label
with pytest.raises(KeyError, match=r"^2$"):
index.drop(2).get_loc_level(2)
# Unused label on unsorted level:
with pytest.raises(KeyError, match=r"^2$"):
index.drop(1, level=2).get_loc_level(2, level=2)
index = MultiIndex(
levels=[[2000], list(range(4))],
codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])],
)
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize("dtype1", [int, float, bool, str])
@pytest.mark.parametrize("dtype2", [int, float, bool, str])
def test_get_loc_multiple_dtypes(self, dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)]
idx = MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize("level", [0, 1])
@pytest.mark.parametrize("dtypes", [[int, float], [float, int]])
def test_get_loc_implicit_cast(self, level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool(self):
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype="int64")]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
with pytest.raises(KeyError, match=r"^\(False, True\)$"):
idx.get_loc((False, True))
with pytest.raises(KeyError, match=r"^\(True, False\)$"):
idx.get_loc((True, False))
@pytest.mark.parametrize("level", [0, 1])
def test_get_loc_nan(self, level, nulls_fixture):
# GH 18485 : NaN in MultiIndex
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
key[level] = nulls_fixture
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan(self):
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
with pytest.raises(KeyError, match=r"^3$"):
idx.get_loc(3)
with pytest.raises(KeyError, match=r"^nan$"):
idx.get_loc(np.nan)
with pytest.raises(InvalidIndexError, match=r"\[nan\]"):
# listlike/non-hashable raises TypeError
idx.get_loc([np.nan])
def test_get_loc_with_values_including_missing_values(self):
# issue 19132
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
expected = slice(0, 2, None)
assert idx.get_loc(np.nan) == expected
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
expected = np.array([True, False, False, True])
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
expected = slice(2, 4, None)
assert idx.get_loc((np.nan, 1)) == expected
def test_get_loc_duplicates2(self):
# TODO: de-duplicate with test_get_loc_duplicates above?
index = MultiIndex(
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
assert index.get_loc("D") == slice(0, 3)
def test_get_loc_past_lexsort_depth(self):
# GH#30053
idx = MultiIndex(
levels=[["a"], [0, 7], [1]],
codes=[[0, 0], [1, 0], [0, 0]],
names=["x", "y", "z"],
sortorder=0,
)
key = ("a", 7)
with tm.assert_produces_warning(PerformanceWarning):
# PerformanceWarning: indexing past lexsort depth may impact performance
result = idx.get_loc(key)
assert result == slice(0, 1, None)
def test_multiindex_get_loc_list_raises(self):
# GH#35878
idx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = r"\[\]"
with pytest.raises(InvalidIndexError, match=msg):
idx.get_loc([])
def test_get_loc_nested_tuple_raises_keyerror(self):
# raise KeyError, not TypeError
mi = MultiIndex.from_product([range(3), range(4), range(5), range(6)])
key = ((2, 3, 4), "foo")
with pytest.raises(KeyError, match=re.escape(str(key))):
mi.get_loc(key)
class TestWhere:
def test_where(self):
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
i.where(True)
def test_where_array_like(self, listlike_box):
mi = MultiIndex.from_tuples([("A", 1), ("A", 2)])
cond = [False, True]
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
mi.where(listlike_box(cond))
class TestContains:
def test_contains_top_level(self):
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
assert "A" in midx
assert "A" not in midx._engine
def test_contains_with_nat(self):
# MI with a NaT
mi = MultiIndex(
levels=[["C"], date_range("2012-01-01", periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, "B"],
)
assert ("C", pd.Timestamp("2012-01-01")) in mi
for val in mi.values:
assert val in mi
def test_contains(self, idx):
assert ("foo", "two") in idx
assert ("bar", "two") not in idx
assert None not in idx
def test_contains_with_missing_value(self):
# GH#19132
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
assert np.nan in idx
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
assert np.nan not in idx
assert (1, np.nan) in idx
def test_multiindex_contains_dropped(self):
# GH#19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)
# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx
# also applies to strings
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
assert "a" in idx
idx = idx.drop("a")
assert "a" in idx.levels[0]
assert "a" not in idx
def test_contains_td64_level(self):
# GH#24570
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
assert tx[0] in idx
assert "element_not_exit" not in idx
assert "0 day 09:30:00" in idx
@pytest.mark.slow
def test_large_mi_contains(self):
# GH#10645
result = MultiIndex.from_arrays([range(10**6), range(10**6)])
assert not (10**6, 0) in result
def test_timestamp_multiindex_indexer():
# https://github.com/pandas-dev/pandas/issues/26944
idx = MultiIndex.from_product(
[
date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"),
["x"],
[3],
]
)
df = pd.DataFrame({"foo": np.arange(len(idx))}, idx)
result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"]
qidx = MultiIndex.from_product(
[
date_range(
start="2019-01-02T00:15:33",
end="2019-01-05T03:15:33",
freq="H",
name="date",
),
["x"],
[3],
]
)
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
tm.assert_series_equal(result, should_be)
@pytest.mark.parametrize(
"index_arr,expected,target,algo",
[
([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"),
([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"),
([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"),
],
)
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
result = idx.get_slice_bound(target, side=algo, kind="loc")
assert result == expected
@pytest.mark.parametrize(
"index_arr,expected,start_idx,end_idx",
[
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1),
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)),
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3),
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)),
],
)
def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.slice_indexer(start=start_idx, end=end_idx)
assert result == expected
def test_pyint_engine():
# GH#18519 : when combinations of codes cannot be represented in 64
# bits, the index underlying the MultiIndex engine works with Python
# integers, rather than uint64.
N = 5
keys = [
tuple(arr)
for arr in [
[0] * 10 * N,
[1] * 10 * N,
[2] * 10 * N,
[np.nan] * N + [2] * 9 * N,
[0] * N + [2] * 9 * N,
[np.nan] * N + [2] * 8 * N + [0] * N,
]
]
# Each level contains 4 elements (including NaN), so it is represented
# in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
# 64 bit engine and truncating the first levels, the fourth and fifth
# keys would collide; if truncating the last levels, the fifth and
# sixth; if rotating bits rather than shifting, the third and fifth.
for idx in range(len(keys)):
index = MultiIndex.from_tuples(keys)
assert index.get_loc(keys[idx]) == idx
expected = np.arange(idx + 1, dtype=np.intp)
result = index.get_indexer([keys[i] for i in expected])
tm.assert_numpy_array_equal(result, expected)
# With missing key:
idces = range(len(keys))
expected = np.array([-1] + list(idces), dtype=np.intp)
missing = tuple([0, 1] * 5 * N)
result = index.get_indexer([missing] + [keys[i] for i in idces])
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,280 @@
import re
import numpy as np
import pytest
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
IntervalIndex,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
from pandas.core.api import Int64Index
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
assert i.codes[0].dtype == "int8"
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(40)])
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(400)])
assert i.codes[1].dtype == "int16"
i = MultiIndex.from_product([["a"], range(40000)])
assert i.codes[1].dtype == "int32"
i = MultiIndex.from_product([["a"], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [
(1, pd.Timestamp("2000-01-01")),
(2, pd.NaT),
(3, pd.Timestamp("2000-01-03")),
(1, pd.Timestamp("2000-01-04")),
(2, pd.Timestamp("2000-01-02")),
(3, pd.Timestamp("2000-01-03")),
]
result = MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10**18, 10**18 + 5)
naive = pd.DatetimeIndex(ints)
aware = pd.DatetimeIndex(ints, tz="US/Central")
idx = MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq="D")
idx = MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, Int64Index(ints))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, Int64Index(ints[:2]))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = list(range(70000))
minor_axis = list(range(10))
major_codes = np.arange(70000)
minor_codes = np.repeat(range(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert index.is_unique is False
@pytest.mark.slow
def test_hash_collisions():
# non-smoke test that we don't get hash collisions
index = MultiIndex.from_product(
[np.arange(1000), np.arange(1000)], names=["one", "two"]
)
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def test_take_invalid_kwargs():
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = MultiIndex.from_product(vals, names=["str", "dt"])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
pd.isna(idx)
def test_large_multiindex_error():
# GH12527
df_below_1000000 = pd.DataFrame(
1, index=MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"]
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_below_1000000.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_below_1000000.loc[(3, 0), "dest"]
df_above_1000000 = pd.DataFrame(
1, index=MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"]
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_above_1000000.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_above_1000000.loc[(3, 0), "dest"]
def test_million_record_attribute_error():
# GH 18165
r = list(range(1000000))
df = pd.DataFrame(
{"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r])
)
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df["a"].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile("does not support mutable operations")
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(ValueError, match="assignment destination is read-only"):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert ind.is_monotonic
with tm.assert_produces_warning(FutureWarning):
ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
def test_rangeindex_fallback_coercion_bug():
# GH 12893
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1)
df.index.names = ["fizz", "buzz"]
str(df)
expected = pd.DataFrame(
{"bar": np.arange(100), "foo": np.arange(100)},
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
)
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values("fizz")
expected = Int64Index(np.arange(10), name="fizz").repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values("buzz")
expected = Int64Index(np.tile(np.arange(10), 10), name="buzz")
tm.assert_index_equal(result, expected)
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == "object":
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2

View File

@ -0,0 +1,78 @@
import numpy as np
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def test_isin_nan():
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
tm.assert_numpy_array_equal(
idx.isin([("bar", float("nan"))]), np.array([False, True])
)
def test_isin():
values = [("foo", 2), ("bar", 3), ("quux", 4)]
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
vals_0 = ["foo", "bar", "quux"]
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
msg = "Too many levels: Index has only 2 levels, not 6"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=5)
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=-5)
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
idx.isin(vals_0, level=1.0)
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
idx.isin(vals_1, level=-1.0)
with pytest.raises(KeyError, match="'Level A not found'"):
idx.isin(vals_1, level="A")
idx.names = ["A", "B"]
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
with pytest.raises(KeyError, match="'Level C not found'"):
idx.isin(vals_1, level="C")
@pytest.mark.parametrize(
"labels,expected,level",
[
([("b", np.nan)], np.array([False, False, True]), None),
([np.nan, "a"], np.array([True, True, False]), 0),
(["d", np.nan], np.array([False, True, True]), 1),
],
)
def test_isin_multi_index_with_missing_value(labels, expected, level):
# GH 19132
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
result = midx.isin(labels, level=level)
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,160 @@
import numpy as np
import pytest
from pandas import (
Index,
Interval,
MultiIndex,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
)
def test_join_level(idx, other, join_type):
join_index, lidx, ridx = other.join(
idx, how=join_type, level="second", return_indexers=True
)
exp_level = other.join(idx.levels[1], how=join_type)
assert join_index.levels[0].equals(idx.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
exp_values = idx.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ("outer", "inner"):
join_index2, ridx2, lidx2 = idx.join(
other, how=join_type, level="second", return_indexers=True
)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(idx):
# some corner cases
index = Index(["three", "one", "two"])
result = index.join(idx, level="second")
assert isinstance(result, MultiIndex)
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
idx.join(idx, level=1)
def test_join_self(idx, join_type):
joined = idx.join(idx, how=join_type)
tm.assert_index_equal(joined, idx)
def test_join_multi():
# GH 10665
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
idx = Index([1, 2, 5], name="b")
# inner
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
exp_ridx = np.array(
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_self_unique(idx, join_type):
if idx.is_unique:
joined = idx.join(idx, how=join_type)
assert (idx == joined).all()
def test_join_multi_wrong_order():
# GH 25760
# GH 28956
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
tm.assert_index_equal(midx1, join_idx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_multi_return_indexers():
# GH 34074
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
result = midx1.join(midx2, return_indexers=False)
tm.assert_index_equal(result, midx1)
def test_join_overlapping_interval_level():
# GH 44096
idx_1 = MultiIndex.from_tuples(
[
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(1, Interval(2.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
(2, Interval(3.0, 5.0)),
],
names=["num", "interval"],
)
idx_2 = MultiIndex.from_tuples(
[
(1, Interval(2.0, 5.0)),
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(2, Interval(3.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)),
],
names=["num", "interval"],
)
expected = MultiIndex.from_tuples(
[
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(1, Interval(2.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)),
(2, Interval(3.0, 5.0)),
],
names=["num", "interval"],
)
result = idx_1.join(idx_2, how="outer")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,57 @@
from pandas import MultiIndex
import pandas._testing as tm
class TestIsLexsorted:
def test_is_lexsorted(self):
levels = [[0, 1], [0, 1, 2]]
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
)
assert index._is_lexsorted()
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
)
assert not index._is_lexsorted()
index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
)
assert not index._is_lexsorted()
assert index._lexsort_depth == 0
def test_is_lexsorted_deprecation(self):
# GH 32259
with tm.assert_produces_warning():
MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted()
class TestLexsortDepth:
def test_lexsort_depth(self):
# Test that lexsort_depth return the correct sortorder
# when it was given to the MultiIndex const.
# GH#28518
levels = [[0, 1], [0, 1, 2]]
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
assert index._lexsort_depth == 2
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
)
assert index._lexsort_depth == 1
index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
)
assert index._lexsort_depth == 0
def test_lexsort_depth_deprecation(self):
# GH 32259
with tm.assert_produces_warning():
MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth

View File

@ -0,0 +1,112 @@
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def test_fillna(idx):
# GH 11343
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
def test_dropna():
# GH 6194
idx = MultiIndex.from_arrays(
[
[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
["a", "b", "c", np.nan, "e"],
]
)
exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how="any"), exp)
exp = MultiIndex.from_arrays(
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
)
tm.assert_index_equal(idx.dropna(how="all"), exp)
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
idx.dropna(how="xxx")
# GH26408
# test if missing values are dropped for multiindex constructed
# from codes and values
idx = MultiIndex(
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
)
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
tm.assert_index_equal(idx.dropna(), expected)
tm.assert_index_equal(idx.dropna(how="any"), expected)
expected = MultiIndex.from_arrays(
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
)
tm.assert_index_equal(idx.dropna(how="all"), expected)
def test_nulls(idx):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
@pytest.mark.xfail(reason="isna is not defined for MultiIndex")
def test_hasnans_isnans(idx):
# GH 11343, added tests for hasnans / isnans
index = idx.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(index), dtype=bool)
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is False
index = idx.copy()
values = index.values
values[1] = np.nan
index = type(idx)(values)
expected = np.array([False] * len(index), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is True
def test_nan_stays_float():
# GH 7031
idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
idxm = idx0.join(idx1, how="outer")
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
def test_tuples_have_na():
index = MultiIndex(
levels=[[1, 0], [0, 1, 2, 3]],
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
)
assert pd.isna(index[4][0])
assert pd.isna(index.values[4][0])

View File

@ -0,0 +1,188 @@
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
)
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
# string ordering
mi = lexsorted_two_level_string_multiindex
assert mi.is_monotonic is False
assert Index(mi.values).is_monotonic is False
assert mi._is_strictly_monotonic_increasing is False
assert Index(mi.values)._is_strictly_monotonic_increasing is False
def test_is_monotonic_increasing():
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
assert i.is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex(
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[1, 2, 3, 4],
[
"gb00b03mlx29",
"lu0197800237",
"nl0000289783",
"nl0000289965",
"nl0000301109",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic is False
assert i._is_strictly_monotonic_increasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic is True
assert Index(i.values).is_monotonic is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_is_monotonic_decreasing():
i = MultiIndex.from_product(
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
# string ordering
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[4, 3, 2, 1],
[
"nl0000301109",
"nl0000289965",
"nl0000289783",
"lu0197800237",
"gb00b03mlx29",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_is_strictly_monotonic_increasing():
idx = MultiIndex(
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
def test_is_strictly_monotonic_decreasing():
idx = MultiIndex(
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
@pytest.mark.parametrize(
"values",
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
)
def test_is_monotonic_with_nans(values, attr):
# GH: 37220
idx = MultiIndex.from_tuples(values, names=["test"])
assert getattr(idx, attr) is False

View File

@ -0,0 +1,205 @@
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_slice_keep_name():
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
assert x[1:].names == x.names
def test_index_name_retained():
# GH9857
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
result = result.set_index("z")
result.loc[10] = [9, 10]
df_expected = pd.DataFrame(
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
)
df_expected = df_expected.set_index("z")
tm.assert_frame_equal(result, df_expected)
def test_changing_names(idx):
assert [level.name for level in idx.levels] == ["first", "second"]
view = idx.view()
copy = idx.copy()
shallow_copy = idx._view()
# changing names should not change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, ["firsta", "seconda"])
# and not on copies
check_level_names(view, ["first", "second"])
check_level_names(copy, ["first", "second"])
check_level_names(shallow_copy, ["first", "second"])
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, ["firsta", "seconda"])
def test_take_preserve_name(idx):
taken = idx.take([3, 0, 1])
assert taken.names == idx.names
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx1.names == ["MyName1", "MyName2"]
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx2.names == ["NewName1", "NewName2"]
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx3.names == ["NewName1", "NewName2"]
# gh-35592
with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
multi_idx.copy(names=["mario"])
with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
multi_idx.copy(names=[["mario"], ["luigi"]])
def test_names(idx, index_names):
# names are assigned in setup
assert index_names == ["first", "second"]
level_names = [level.name for level in idx.levels]
assert level_names == index_names
# setting bad names on existing
index = idx
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", list(index.names) + ["third"])
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = idx.levels
major_codes, minor_codes = idx.codes
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first"],
)
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first", "second", "third"],
)
# names are assigned on index, but not transferred to the levels
index.names = ["a", "b"]
level_names = [level.name for level in index.levels]
assert level_names == ["a", "b"]
def test_duplicate_level_names_access_raises(idx):
# GH19029
idx.names = ["foo", "foo"]
with pytest.raises(ValueError, match="name foo occurs multiple times"):
idx._get_level_number("foo")
def test_get_names_from_levels():
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
assert idx.levels[0].name == "a"
assert idx.levels[1].name == "b"
def test_setting_names_from_levels_raises():
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
with pytest.raises(RuntimeError, match="set_names"):
idx.levels[0].name = "foo"
with pytest.raises(RuntimeError, match="set_names"):
idx.levels[1].name = "foo"
new = pd.Series(1, index=idx.levels[0])
with pytest.raises(RuntimeError, match="set_names"):
new.index.name = "bar"
assert pd.Index._no_setting_name is False
assert pd.core.api.NumericIndex._no_setting_name is False
assert pd.RangeIndex._no_setting_name is False
@pytest.mark.parametrize("func", ["rename", "set_names"])
@pytest.mark.parametrize(
"rename_dict, exp_names",
[
({"x": "z"}, ["z", "y", "z"]),
({"x": "z", "y": "x"}, ["z", "x", "z"]),
({"y": "z"}, ["x", "z", "x"]),
({}, ["x", "y", "x"]),
({"z": "a"}, ["x", "y", "x"]),
({"y": "z", "a": "b"}, ["x", "z", "x"]),
],
)
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names):
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"])
result = getattr(mi, func)(rename_dict)
expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("func", ["rename", "set_names"])
@pytest.mark.parametrize(
"rename_dict, exp_names",
[
({"x": "z"}, ["z", "y"]),
({"x": "z", "y": "x"}, ["z", "x"]),
({"a": "z"}, ["x", "y"]),
({}, ["x", "y"]),
],
)
def test_name_mi_with_dict_like(func, rename_dict, exp_names):
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
result = getattr(mi, func)(rename_dict)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names)
tm.assert_index_equal(result, expected)
def test_index_name_with_dict_like_raising():
# GH#20421
ix = pd.Index([1, 2])
msg = "Can only pass dict-like as `names` for MultiIndex."
with pytest.raises(TypeError, match=msg):
ix.set_names({"x": "z"})
def test_multiindex_name_and_level_raising():
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."):
mi.set_names(names={"x": "z"}, level={"x": "z"})

View File

@ -0,0 +1,148 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
IndexSlice,
MultiIndex,
date_range,
)
import pandas._testing as tm
@pytest.fixture
def df():
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
dr = date_range("2016-01-01", "2016-01-03", freq="12H")
abc = ["a", "b", "c"]
mi = MultiIndex.from_product([dr, abc])
frame = DataFrame({"c1": range(0, 15)}, index=mi)
return frame
def test_partial_string_matching_single_index(df):
# partial string matching on a single index
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
df_swap = df_swap.sort_index()
just_a = df_swap.loc["a"]
result = just_a.loc["2016-01-01"]
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
def test_get_loc_partial_timestamp_multiindex(df):
mi = df.index
key = ("2016-01-01", "a")
loc = mi.get_loc(key)
expected = np.zeros(len(mi), dtype=bool)
expected[[0, 3]] = True
tm.assert_numpy_array_equal(loc, expected)
key2 = ("2016-01-02", "a")
loc2 = mi.get_loc(key2)
expected2 = np.zeros(len(mi), dtype=bool)
expected2[[6, 9]] = True
tm.assert_numpy_array_equal(loc2, expected2)
key3 = ("2016-01", "a")
loc3 = mi.get_loc(key3)
expected3 = np.zeros(len(mi), dtype=bool)
expected3[mi.get_level_values(1).get_loc("a")] = True
tm.assert_numpy_array_equal(loc3, expected3)
key4 = ("2016", "a")
loc4 = mi.get_loc(key4)
expected4 = expected3
tm.assert_numpy_array_equal(loc4, expected4)
# non-monotonic
taker = np.arange(len(mi), dtype=np.intp)
taker[::2] = taker[::-2]
mi2 = mi.take(taker)
loc5 = mi2.get_loc(key)
expected5 = np.zeros(len(mi2), dtype=bool)
expected5[[3, 14]] = True
tm.assert_numpy_array_equal(loc5, expected5)
def test_partial_string_timestamp_multiindex(df):
# GH10331
df_swap = df.swaplevel(0, 1).sort_index()
SLC = IndexSlice
# indexing with IndexSlice
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# partial string match on year only
result = df.loc["2016"]
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc["2016-01-01"]
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc["2016-01-02 12"]
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
# that level, so that level gets dropped
expected = df.iloc[9:12].droplevel(0)
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[SLC[:, "2016-01-02"], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
result = df.loc[("2016-01-01", "a"), :]
expected = df.iloc[[0, 3]]
expected = df.iloc[[0, 3]].droplevel(1)
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course) bc the DTI is the
# second level on df_swap
with pytest.raises(KeyError, match="'2016-01-01'"):
df_swap.loc["2016-01-01"]
def test_partial_string_timestamp_multiindex_str_key_raises(df):
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError, match="'2016-01-01'"):
df["2016-01-01"]
def test_partial_string_timestamp_multiindex_daily_resolution(df):
# GH12685 (partial string with daily resolution or below)
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,10 @@
import pytest
from pandas import MultiIndex
def test_pickle_compat_construction():
# this is testing for pickle compat
# need an object to create with
with pytest.raises(TypeError, match="Must pass both levels and codes"):
MultiIndex()

View File

@ -0,0 +1,164 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == ["first", "second"]
result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == ["first", "second"]
def test_reindex_level(idx):
index = Index(["one"])
target, indexer = idx.reindex(index, level="second")
target2, indexer2 = index.reindex(idx, level="second")
exp_index = idx.join(index, level="second", how="right")
exp_index2 = idx.join(index, level="second", how="left")
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
with pytest.raises(TypeError, match="Fill method not supported"):
idx.reindex(idx, method="pad", level="second")
with pytest.raises(TypeError, match="Fill method not supported"):
index.reindex(index, method="bfill", level="first")
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
# GH6552
idx = idx.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ["foo", "bar"]
assert idx.reindex([])[0].names == ["foo", "bar"]
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
# GH7774
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
# GH7774
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
# case with EA levels
cat = pd.Categorical(["foo", "bar"])
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
mi = MultiIndex.from_product([cat, dti])
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
def test_reindex_base(idx):
idx = idx
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")
def test_reindex_non_unique():
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
a = pd.Series(np.arange(4), index=idx)
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
msg = "cannot handle a non-unique multi-index!"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
a.reindex(new_idx)
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
idx = MultiIndex.from_arrays(values)
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
expected_indexer = np.array([], dtype=result_indexer.dtype)
tm.assert_index_equal(result, expected)
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
def test_reindex_not_all_tuples():
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
mi = MultiIndex.from_tuples(keys[:-1])
idx = Index(keys)
res, indexer = mi.reindex(idx)
tm.assert_index_equal(res, idx)
expected = np.array([0, 1, 2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_reindex_limit_arg_with_multiindex():
# GH21247
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
df = pd.Series([0.02, 0.01, 0.012], index=idx)
new_idx = MultiIndex.from_tuples(
[
(3, "A"),
(3, "B"),
(4, "A"),
(4, "B"),
(4, "C"),
(5, "B"),
(5, "C"),
(6, "B"),
(6, "C"),
]
)
with pytest.raises(
ValueError,
match="limit argument only valid if doing pad, backfill or nearest reindexing",
):
df.reindex(new_idx, fill_value=0, limit=1)

View File

@ -0,0 +1,185 @@
from datetime import datetime
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_insert(idx):
# key contained in all levels
new_index = idx.insert(0, ("bar", "two"))
assert new_index.equal_levels(idx)
assert new_index[0] == ("bar", "two")
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
tm.assert_index_equal(new_index.levels[0], exp0)
assert new_index.names == ["first", "second"]
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")
# key wrong length
msg = "Item must have length equal to number of levels"
with pytest.raises(ValueError, match=msg):
idx.insert(0, ("foo2",))
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
left.set_index(["1st", "2nd"], inplace=True)
ts = left["3rd"].copy(deep=True)
left.loc[("b", "x"), "3rd"] = 2
left.loc[("b", "a"), "3rd"] = -1
left.loc[("b", "b"), "3rd"] = 3
left.loc[("a", "x"), "3rd"] = 4
left.loc[("a", "w"), "3rd"] = 5
left.loc[("a", "a"), "3rd"] = 6
ts.loc[("b", "x")] = 2
ts.loc["b", "a"] = -1
ts.loc[("b", "b")] = 3
ts.loc["a", "x"] = 4
ts.loc[("a", "w")] = 5
ts.loc["a", "a"] = 6
right = pd.DataFrame(
[
["a", "b", 0],
["b", "d", 1],
["b", "x", 2],
["b", "a", -1],
["b", "b", 3],
["a", "x", 4],
["a", "w", 5],
["a", "a", 6],
],
columns=["1st", "2nd", "3rd"],
)
right.set_index(["1st", "2nd"], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right["3rd"])
def test_insert2():
# GH9250
idx = (
[("test1", i) for i in range(5)]
+ [("test2", i) for i in range(6)]
+ [("test", 17), ("test", 18)]
)
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
left.loc[("test", 17)] = 11
left.loc[("test", 18)] = 12
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_append(idx):
result = idx[:3].append(idx[3:])
assert result.equals(idx)
foos = [idx[:1], idx[1:3], idx[3:]]
result = foos[0].append(foos[1:])
assert result.equals(idx)
# empty
result = idx.append([])
assert result.equals(idx)
def test_append_index():
idx1 = Index([1.1, 1.2, 1.3])
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
idx3 = Index(["A", "B", "C"])
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
result = idx1.append(midx_lv2)
# see gh-7112
tz = pytz.timezone("Asia/Tokyo")
expected_tuples = [
(1.1, tz.localize(datetime(2011, 1, 1))),
(1.2, tz.localize(datetime(2011, 1, 2))),
(1.3, tz.localize(datetime(2011, 1, 3))),
]
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
tm.assert_index_equal(result, expected)
result = midx_lv2.append(idx1)
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
tm.assert_index_equal(result, expected)
result = midx_lv2.append(midx_lv2)
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
tm.assert_index_equal(result, expected)
result = midx_lv2.append(midx_lv3)
tm.assert_index_equal(result, expected)
result = midx_lv3.append(midx_lv2)
expected = Index._simple_new(
np.array(
[
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
]
+ expected_tuples,
dtype=object,
),
None,
)
tm.assert_index_equal(result, expected)
def test_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
def test_insert_base(idx):
result = idx[1:4]
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(idx):
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
msg = "index 6 is out of bounds for axis 0 with size 6"
with pytest.raises(IndexError, match=msg):
idx.delete(len(idx))

View File

@ -0,0 +1,540 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
IntervalIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_set_ops_error_cases(idx, case, sort, method):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case, sort=sort)
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
def test_intersection_base(idx, sort, klass):
first = idx[2::-1] # first 3 elements reversed
second = idx[:5]
if klass is not MultiIndex:
second = klass(second.values)
intersect = first.intersection(second, sort=sort)
if sort is None:
expected = first.sort_values()
else:
expected = first
tm.assert_index_equal(intersect, expected)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3], sort=sort)
@pytest.mark.arm_slow
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
def test_union_base(idx, sort, klass):
first = idx[::-1]
second = idx[:5]
if klass is not MultiIndex:
second = klass(second.values)
union = first.union(second, sort=sort)
if sort is None:
expected = first.sort_values()
else:
expected = first
tm.assert_index_equal(union, expected)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3], sort=sort)
def test_difference_base(idx, sort):
second = idx[4:]
answer = idx[:4]
result = idx.difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
assert result.equals(answer)
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = idx.difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
idx.difference([1, 2, 3], sort=sort)
def test_symmetric_difference(idx, sort):
first = idx[1:]
second = idx[:-1]
answer = idx[[-1, 0]]
result = first.symmetric_difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3], sort=sort)
def test_multiindex_symmetric_difference():
# GH 13490
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx
assert result.names == idx.names
idx2 = idx.copy().rename(["A", "B"])
with tm.assert_produces_warning(FutureWarning):
result = idx ^ idx2
assert result.names == [None, None]
def test_empty(idx):
# GH 15270
assert not idx.empty
assert idx[:0].empty
def test_difference(idx, sort):
first = idx
result = first.difference(idx[-3:], sort=sort)
vals = idx[:-3].values
if sort is None:
vals = sorted(vals)
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == idx.names
tm.assert_index_equal(result, expected)
# empty difference: reflexive
result = idx.difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: superset
result = idx[-3:].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: degenerate
result = idx[:0].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# names not the same
chunklet = idx[-3:]
chunklet.names = ["foo", "baz"]
result = first.difference(chunklet, sort=sort)
assert result.names == (None, None)
# empty, but non-equal
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values, sort=sort)
assert result.equals(first[:0])
# name from empty array
result = first.difference([], sort=sort)
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([("foo", "one")], sort=sort)
expected = MultiIndex.from_tuples(
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
)
expected.names = first.names
assert first.names == result.names
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3, 4, 5], sort=sort)
def test_difference_sort_special():
# GH-24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
# sort=None, the default
result = idx.difference([])
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_difference_sort_special_true():
# TODO(GH#25151): decide on True behaviour
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
result = idx.difference([], sort=True)
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_difference_sort_incomparable():
# GH-24959
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
# sort=None, the default
msg = "sort order is undefined for incomparable objects"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result = idx.difference(other)
tm.assert_index_equal(result, idx)
# sort=False
result = idx.difference(other, sort=False)
tm.assert_index_equal(result, idx)
def test_difference_sort_incomparable_true():
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
msg = "The 'sort' keyword only takes the values of None or False; True was passed."
with pytest.raises(ValueError, match=msg):
idx.difference(other, sort=True)
def test_union(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_union = piece1.union(piece2, sort=sort)
if sort is None:
tm.assert_index_equal(the_union, idx.sort_values())
assert tm.equalContents(the_union, idx)
# corner case, pass self or empty thing:
the_union = idx.union(idx, sort=sort)
tm.assert_index_equal(the_union, idx)
the_union = idx.union(idx[:0], sort=sort)
tm.assert_index_equal(the_union, idx)
tuples = idx.values
result = idx[:4].union(tuples[4:], sort=sort)
if sort is None:
tm.equalContents(result, idx)
else:
assert result.equals(idx)
@pytest.mark.xfail(
# This test was commented out from Oct 2011 to Dec 2021, may no longer
# be relevant.
reason="Length of names must match number of levels in MultiIndex",
raises=ValueError,
)
def test_union_with_regular_index(idx):
other = Index(["A", "B", "C"])
result = other.union(idx)
assert ("foo", "one") in result
assert "B" in result
msg = "The values in the array are unorderable"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result2 = idx.union(other)
assert result.equals(result2)
def test_intersection(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_int = piece1.intersection(piece2, sort=sort)
if sort is None:
tm.assert_index_equal(the_int, idx[3:5])
assert tm.equalContents(the_int, idx[3:5])
# corner case, pass self
the_int = idx.intersection(idx, sort=sort)
tm.assert_index_equal(the_int, idx)
# empty intersection: disjoint
empty = idx[:2].intersection(idx[2:], sort=sort)
expected = idx[:0]
assert empty.equals(expected)
tuples = idx.values
result = idx.intersection(tuples)
assert result.equals(idx)
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_setop_with_categorical(idx, sort, method):
other = idx.to_flat_index().astype("category")
res_names = [None] * idx.nlevels
result = getattr(idx, method)(other, sort=sort)
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
tm.assert_index_equal(result, expected)
result = getattr(idx, method)(other[:5], sort=sort)
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
tm.assert_index_equal(result, expected)
def test_intersection_non_object(idx, sort):
other = Index(range(3), name="foo")
result = idx.intersection(other, sort=sort)
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
tm.assert_index_equal(result, expected, exact=True)
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
result = idx.intersection(np.asarray(other)[:0], sort=sort)
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
tm.assert_index_equal(result, expected, exact=True)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
# With non-zero length non-index, we try and fail to convert to tuples
idx.intersection(np.asarray(other), sort=sort)
def test_intersect_equal_sort():
# GH-24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_intersect_equal_sort_true():
# TODO(GH#25151): decide on True behaviour
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
sorted_ = MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_empty(slice_):
# https://github.com/pandas-dev/pandas/issues/24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)
# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_union_sort_other_empty_sort(slice_):
# TODO(GH#25151): decide on True behaviour
# # sort=True
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
other = idx[:0]
result = idx.union(other, sort=True)
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_union_sort_other_incomparable():
# https://github.com/pandas-dev/pandas/issues/24959
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
# default, sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented.")
def test_union_sort_other_incomparable_sort():
# TODO(GH#25151): decide on True behaviour
# # sort=True
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
with pytest.raises(TypeError, match="Cannot compare"):
idx.union(idx[:1], sort=True)
def test_union_non_object_dtype_raises():
# GH#32646 raise NotImplementedError instead of less-informative error
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
idx = mi.levels[1]
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
with pytest.raises(NotImplementedError, match=msg):
mi.union(idx)
def test_union_empty_self_different_names():
# GH#38423
mi = MultiIndex.from_arrays([[]])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
result = mi.union(mi2)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
tm.assert_index_equal(result, expected)
def test_union_multiindex_empty_rangeindex():
# GH#41234
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
ri = pd.RangeIndex(0)
result_left = mi.union(ri)
tm.assert_index_equal(mi, result_left, check_names=False)
result_right = ri.union(mi)
tm.assert_index_equal(mi, result_right, check_names=False)
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_disallow_true(method):
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=True)
@pytest.mark.parametrize(
("tuples", "exp_tuples"),
[
([("val1", "test1")], [("val1", "test1")]),
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
(
[("val2", "test2"), ("val1", "test1")],
[("val2", "test2"), ("val1", "test1")],
),
],
)
def test_intersect_with_duplicates(tuples, exp_tuples):
# GH#36915
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
right = MultiIndex.from_tuples(
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
names=["first", "second"],
)
result = left.intersection(right)
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"data, names, expected",
[
((1,), None, [None, None]),
((1,), ["a"], [None, None]),
((1,), ["b"], [None, None]),
((1, 2), ["c", "d"], [None, None]),
((1, 2), ["b", "a"], [None, None]),
((1, 2, 3), ["a", "b", "c"], [None, None]),
((1, 2), ["a", "c"], ["a", None]),
((1, 2), ["c", "b"], [None, "b"]),
((1, 2), ["a", "b"], ["a", "b"]),
((1, 2), [None, "b"], [None, "b"]),
],
)
def test_maybe_match_names(data, names, expected):
# GH#38323
mi = MultiIndex.from_tuples([], names=["a", "b"])
mi2 = MultiIndex.from_tuples([data], names=names)
result = mi._maybe_match_names(mi2)
assert result == expected
def test_intersection_equal_different_names():
# GH#30302
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
result = mi1.intersection(mi2)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
tm.assert_index_equal(result, expected)
def test_intersection_different_names():
# GH#38323
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
mi2 = MultiIndex.from_arrays([[1], [3]])
result = mi.intersection(mi2)
tm.assert_index_equal(result, mi2)
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
# GH#38623
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
result = mi1.intersection(mi2)
expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]])
tm.assert_index_equal(result, expected)
def test_union_nan_got_duplicated():
# GH#38977
mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]])
mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]])
result = mi1.union(mi2)
tm.assert_index_equal(result, mi2)
def test_union_duplicates(index):
# GH#38977
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
# No duplicates in empty indexes
return
values = index.unique().values.tolist()
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
result = mi1.union(mi2)
tm.assert_index_equal(result, mi2.sort_values())
result = mi2.union(mi1)
tm.assert_index_equal(result, mi2.sort_values())

View File

@ -0,0 +1,287 @@
import random
import numpy as np
import pytest
from pandas.errors import (
PerformanceWarning,
UnsortedIndexError,
)
from pandas import (
CategoricalIndex,
DataFrame,
Index,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
from pandas.core.indexes.frozen import FrozenList
def test_sortlevel(idx):
tuples = list(idx)
random.shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining():
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic():
tuples = [
("bar", "one"),
("foo", "two"),
("qux", "two"),
("foo", "one"),
("baz", "two"),
("qux", "one"),
]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_numpy_argsort(idx):
result = np.argsort(idx)
expected = idx.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, kind="mergesort")
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, order=("a", "b"))
def test_unsortedindex():
# GH 11897
mi = MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)["z", "a"]
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
msg = (
"MultiIndex slicing requires the index to be lexsorted: "
r"slicing on levels \[1\], lexsort depth 0"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc(axis=0)["z", slice("a")]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)["z", :]) == 2
with pytest.raises(KeyError, match="'q'"):
df.loc(axis=0)["q", :]
def test_unsortedindex_doc_examples():
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
dfm = DataFrame(
{"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
)
dfm = dfm.set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, "z")]
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
with pytest.raises(UnsortedIndexError, match=msg):
dfm.loc[(0, "y"):(1, "z")]
assert not dfm.index._is_lexsorted()
assert dfm.index._lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, "z")]
dfm.loc[(0, "y"):(1, "z")]
assert dfm.index._is_lexsorted()
assert dfm.index._lexsort_depth == 2
def test_reconstruct_sort():
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert recons.is_monotonic
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(
levels=[["b", "d", "a"], [1, 2, 3]],
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=["col1", "col2"],
)
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused():
# xref to GH 2770
df = DataFrame(
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
columns=["first", "second", "third"],
)
df2 = df.set_index(["first", "second"], drop=False)
df2 = df2[df2["first"] != "deleteMe"]
# removed levels are there
expected = MultiIndex(
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
codes=[[1, 2], [1, 2]],
names=["first", "second"],
)
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(
levels=[["keepMe", "keepMeToo"], [2, 3]],
codes=[[0, 1], [0, 1]],
names=["first", "second"],
)
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize(
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
)
def test_remove_unused_levels_large(first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.RandomState(4) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(
{
"first": rng.randint(0, 1 << 13, size).astype(first_type),
"second": rng.randint(0, 1 << 10, size).astype(second_type),
"third": rng.rand(size),
}
)
df = df.groupby(["first", "second"]).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(["first", "second"]).index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
@pytest.mark.parametrize(
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
)
def test_remove_unused_nan(level0, level1):
# GH 18417
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert "unused" not in result.levels[level]
def test_argsort(idx):
result = idx.argsort()
expected = idx.values.argsort()
tm.assert_numpy_array_equal(result, expected)
def test_remove_unused_levels_with_nan():
# GH 37510
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
idx = idx.set_levels(["a", np.nan], level="id1")
idx = idx.remove_unused_levels()
result = idx.levels
expected = FrozenList([["a", np.nan], [4]])
assert str(result) == str(expected)

View File

@ -0,0 +1,79 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_take(idx):
indexer = [4, 3, 0, 2]
result = idx.take(indexer)
expected = idx[indexer]
assert result.equals(expected)
# GH 10791
msg = "'MultiIndex' object has no attribute 'freq'"
with pytest.raises(AttributeError, match=msg):
idx.freq
def test_take_invalid_kwargs(idx):
idx = idx
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_take_fill_value():
# GH 12631
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
(np.nan, pd.NaT),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))