mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-04 07:08:05 +00:00
first commit
This commit is contained in:
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
# Note: identical the the "multi" entry in the top-level "index" fixture
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index_names():
|
||||
# names that match those in the idx fixture for testing equality of
|
||||
# names assigned to the idx
|
||||
return ["first", "second"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def narrow_multi_index():
|
||||
"""
|
||||
Return a MultiIndex that is narrower than the display (<80 characters).
|
||||
"""
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
return MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wide_multi_index():
|
||||
"""
|
||||
Return a MultiIndex that is wider than the display (>80 characters).
|
||||
"""
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
levels = [ci, ci.codes + 9, dti, dti, dti]
|
||||
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
||||
return MultiIndex.from_arrays(levels, names=names)
|
@ -0,0 +1,260 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import UInt64Index
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
|
||||
# GH8083 test the base class for shift
|
||||
msg = "This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||||
"TimedeltaIndex; Got type MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate_multiindex():
|
||||
# GH 34564 for MultiIndex level names check
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["L1", "L2"],
|
||||
)
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert "foo" not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
assert index.names == result.names
|
||||
|
||||
msg = "after < before"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.truncate(3, 1)
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match="^Too many levels"):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range("2011-01-01", freq="M", periods=3)
|
||||
dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern")
|
||||
pi = period_range("2011-01", freq="M", periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
||||
)
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
["a", "b", "c", "a", "b", "c"],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays(
|
||||
[
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
]
|
||||
)
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, "x", "y", "z"],
|
||||
[1.1, np.nan, 3.3, "x", "y", "z"],
|
||||
["a", "b", "c", "x", "y", "z"],
|
||||
dti.append(Index(["x", "y", "z"])),
|
||||
dti_tz.append(Index(["x", "y", "z"])),
|
||||
pi.append(Index(["x", "y", "z"])),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first.tolist()
|
||||
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(idx, mapper):
|
||||
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to UInt64 for a dict
|
||||
if isinstance(idx, UInt64Index) and isinstance(identity, dict):
|
||||
expected = idx.astype("int64")
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs(idx, func):
|
||||
# test ufuncs of numpy. see:
|
||||
# https://numpy.org/doc/stable/reference/ufuncs.html
|
||||
|
||||
expected_exception = TypeError
|
||||
msg = (
|
||||
"loop of ufunc does not support argument 0 of type tuple which "
|
||||
f"has no callable {func.__name__} method"
|
||||
)
|
||||
with pytest.raises(expected_exception, match=msg):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_type_funcs(idx, func):
|
||||
msg = (
|
||||
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
|
||||
"could not be safely coerced to any supported types according to "
|
||||
"the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype("O")
|
||||
tm.assert_copy(actual.levels, expected.levels)
|
||||
tm.assert_copy(actual.codes, expected.codes)
|
||||
assert actual.names == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype("category")
|
@ -0,0 +1,98 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = f"cannot perform {method}"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [["a", "b", "c"], [4]]
|
||||
levels2 = [[1, 2, 3], ["a"]]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
|
||||
# instantiating MultiIndex should not access/cache _.values
|
||||
assert "_values" not in mi1._cache
|
||||
assert "_values" not in mi2._cache
|
||||
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
# accessing .values should cache ._values
|
||||
assert mi1._values is mi1._cache["_values"]
|
||||
assert mi1.values is mi1._cache["_values"]
|
||||
assert isinstance(mi1._cache["_values"], np.ndarray)
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Non-inplace doesn't drop _values from _cache [implementation detail]
|
||||
tm.assert_almost_equal(mi1._cache["_values"], vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Inplace should drop _values from _cache
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi1.set_levels(levels2, inplace=True)
|
||||
assert "_values" not in mi1._cache
|
||||
tm.assert_almost_equal(mi1.values, vals2)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(1, "a")] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
|
||||
new_mi = mi2.set_codes(codes2)
|
||||
assert "_values" not in new_mi._cache
|
||||
new_values = new_mi.values
|
||||
assert "_values" in new_mi._cache
|
||||
|
||||
# Not inplace shouldn't change
|
||||
tm.assert_almost_equal(mi2._cache["_values"], vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
# ...and again setting inplace should drop _values from _cache, etc
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi2.set_codes(codes2, inplace=True)
|
||||
assert "_values" not in mi2._cache
|
||||
tm.assert_almost_equal(mi2.values, new_values)
|
||||
assert "_values" in mi2._cache
|
@ -0,0 +1,829 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["first"]
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], ["one", "two"]]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (["foo"], ["bar"])
|
||||
msg = r"MultiIndex\.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=("foo", "bar"),
|
||||
)
|
||||
renamed = [["foor"], ["barr"]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = (
|
||||
r"On level 0, code max \(3\) >= length of level \(1\)\. "
|
||||
"NOTE: this index is in an inconsistent state"
|
||||
)
|
||||
label_error = r"Unequal code lengths: \[4, 2\]"
|
||||
code_value_error = r"On level 0, code value \(-2\) < -1"
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([["a"], ["b"]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# test set_codes with verify_integrity=False
|
||||
# the setting should not raise any value error
|
||||
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
||||
|
||||
# code value smaller than -1
|
||||
with pytest.raises(ValueError, match=code_value_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
||||
|
||||
|
||||
def test_na_levels():
|
||||
# GH26408
|
||||
# test if codes are re-assigned value -1 for levels
|
||||
# with missing values (NaN, NaT, None)
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# verify set_levels and set_codes
|
||||
result = MultiIndex(
|
||||
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
||||
).set_codes([[0, -1, 1, 2, 3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
||||
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
||||
assert result.levels[1].equals(Index(["a", "b"]))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_tuples(idx):
|
||||
arrays = tuple(
|
||||
tuple(np.asarray(lev).take(level_codes))
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
)
|
||||
|
||||
# tuple of tuples as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("idx1", "idx2"),
|
||||
[
|
||||
(
|
||||
pd.period_range("2011-01-01", freq="D", periods=3),
|
||||
pd.period_range("2015-01-01", freq="H", periods=3),
|
||||
),
|
||||
(
|
||||
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
|
||||
date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo"),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range("1 days", freq="D", periods=3),
|
||||
pd.timedelta_range("2 hours", freq="H", periods=3),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = date_range("2015-01-01 10:00", freq="H", periods=3)
|
||||
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = MultiIndex.from_arrays(
|
||||
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
|
||||
)
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list("ABC")[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_sequence_of_arrays",
|
||||
[
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
[1, [2]],
|
||||
"a",
|
||||
["a"],
|
||||
["a", "b"],
|
||||
[["a"], "b"],
|
||||
(1,),
|
||||
(1, 2),
|
||||
([1], 2),
|
||||
(1, [2]),
|
||||
"a",
|
||||
("a",),
|
||||
("a", "b"),
|
||||
(["a"], "b"),
|
||||
[(1,), 2],
|
||||
[1, (2,)],
|
||||
[("a",), "b"],
|
||||
((1,), 2),
|
||||
(1, (2,)),
|
||||
(("a",), "b"),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
||||
msg = "Input must be a list / sequence of array-likes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
||||
)
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = "^all arrays must be same length$"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
def test_from_arrays_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b", "c"], name="bar")
|
||||
|
||||
result = MultiIndex.from_arrays([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = "Cannot infer number of levels from empty list"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = "Input must be a list / sequence of tuple-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
msg = "Names should be list-like for a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="abc")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="a")
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame(
|
||||
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
||||
).set_index(["a", "b"])
|
||||
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
||||
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=["A"])
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
||||
)
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ["A", "B"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("N", list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ["A", "B", "C"]
|
||||
lvl2 = list(range(N))
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
||||
)
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range("2000-01-01", periods=2)
|
||||
mi = MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike(
|
||||
[
|
||||
(1, Timestamp("2000-01-01")),
|
||||
(1, Timestamp("2000-01-02")),
|
||||
(2, Timestamp("2000-01-01")),
|
||||
(2, Timestamp("2000-01-02")),
|
||||
]
|
||||
)
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
def test_from_product_rangeindex():
|
||||
# RangeIndex is preserved by factorize, so preserved in levels
|
||||
rng = Index(range(5))
|
||||
other = ["a", "b"]
|
||||
mi = MultiIndex.from_product([rng, other])
|
||||
tm.assert_index_equal(mi._levels[0], rng, exact=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [False, True])
|
||||
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ["foo", "bar"]
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
||||
expected = pd.CategoricalIndex(
|
||||
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
||||
)
|
||||
|
||||
result = MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected_names",
|
||||
[
|
||||
(
|
||||
Series([1, 2, 3], name="foo"),
|
||||
Series(["a", "b"], name="bar"),
|
||||
["foo", "bar"],
|
||||
),
|
||||
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
|
||||
([1, 2, 3], ["a", "b"], None),
|
||||
],
|
||||
)
|
||||
def test_from_product_infer_names(a, b, expected_names):
|
||||
# GH27292
|
||||
result = MultiIndex.from_product([a, b])
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=expected_names,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b"], name="bar")
|
||||
|
||||
result = MultiIndex.from_product([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_readonly():
|
||||
# GH#15286 passing read-only array to from_product
|
||||
a = np.array(range(3))
|
||||
b = ["a", "b"]
|
||||
expected = MultiIndex.from_product([a, b])
|
||||
|
||||
a.setflags(write=False)
|
||||
result = MultiIndex.from_product([a, b])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ["foo", "bar"]
|
||||
result = Index(index)
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Index(index, name="A")
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
name="A",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
||||
)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
||||
)
|
||||
result = MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_frame",
|
||||
[
|
||||
Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27,
|
||||
],
|
||||
)
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
||||
MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = MultiIndex.from_arrays(
|
||||
[
|
||||
date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
mi = MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
||||
)
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
mi = MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names,expected_error_msg",
|
||||
[
|
||||
("bad_input", "Names should be list-like for a MultiIndex"),
|
||||
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
||||
],
|
||||
)
|
||||
def test_from_frame_invalid_names(names, expected_error_msg):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
MultiIndex.from_frame(df, names=names)
|
||||
|
||||
|
||||
def test_index_equal_empty_iterable():
|
||||
# #16844
|
||||
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
|
||||
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(a, b)
|
||||
|
||||
|
||||
def test_raise_invalid_sortorder():
|
||||
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
# Correct sortorder
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
|
||||
)
|
||||
|
||||
|
||||
def test_datetimeindex():
|
||||
idx1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
|
||||
)
|
||||
idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
|
||||
idx = MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
expected1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
tm.assert_index_equal(idx.levels[0], expected1)
|
||||
tm.assert_index_equal(idx.levels[1], idx2)
|
||||
|
||||
# from datetime combos
|
||||
# GH 7888
|
||||
date1 = np.datetime64("today")
|
||||
date2 = datetime.today()
|
||||
date3 = Timestamp.today()
|
||||
|
||||
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
|
||||
index = MultiIndex.from_product([[d1], [d2]])
|
||||
assert isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
# but NOT date objects, matching Index behavior
|
||||
date4 = date.today()
|
||||
index = MultiIndex.from_product([[date4], [date2]])
|
||||
assert not isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
|
||||
def test_constructor_with_tz():
|
||||
|
||||
index = pd.DatetimeIndex(
|
||||
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
|
||||
)
|
||||
columns = pd.DatetimeIndex(
|
||||
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
result = MultiIndex.from_arrays([index, columns])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
result = MultiIndex.from_arrays([Series(index), Series(columns)])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
|
||||
def test_multiindex_inference_consistency():
|
||||
# check that inference behavior matches the base class
|
||||
|
||||
v = date.today()
|
||||
|
||||
arr = [v, v]
|
||||
|
||||
idx = Index(arr)
|
||||
assert idx.dtype == object
|
||||
|
||||
mi = MultiIndex.from_arrays([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_product([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_tuples([(x,) for x in arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name="first")
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=["first"])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = DataFrame(
|
||||
{
|
||||
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ["z", 0, "a"]
|
||||
mi = MultiIndex.from_arrays(
|
||||
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
||||
)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index(
|
||||
(
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
),
|
||||
tupleize_cols=False,
|
||||
)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
106
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_copy.py
Normal file
106
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_copy.py
Normal file
@ -0,0 +1,106 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._view()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"kwarg, value",
|
||||
[
|
||||
("names", ["third", "fourth"]),
|
||||
],
|
||||
)
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"param_name, param_value",
|
||||
[
|
||||
("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]),
|
||||
("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]),
|
||||
],
|
||||
)
|
||||
def test_copy_deprecated_parameters(deep, param_name, param_value):
|
||||
# gh-36685
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx_copy = idx.copy(deep=deep, **{param_name: param_value})
|
||||
|
||||
assert [list(i) for i in getattr(idx_copy, param_name)] == param_value
|
193
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_drop.py
Normal file
193
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_drop.py
Normal file
@ -0,0 +1,193 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
||||
|
||||
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(["bar"])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop("foo")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop(index)
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(["foo", "two"])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.drop(mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(["foo", "two"], errors="ignore")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(["foo", ("qux", "one")])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ["foo", ("qux", "one"), "two"]
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc("foo")]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == "second"
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ("two", "three")
|
||||
|
||||
dropped = index.droplevel("two")
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
|
||||
dropped = index[:2].droplevel(["three", "one"])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
msg = (
|
||||
"Cannot remove 3 levels from an index with 3 levels: "
|
||||
"at least one level must be left"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index[:2].droplevel(["one", "two", "three"])
|
||||
|
||||
with pytest.raises(KeyError, match="'Level four not found'"):
|
||||
index[:2].droplevel(["one", "four"])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
||||
assert lexsorted_mi._is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(
|
||||
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
||||
)
|
||||
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi._is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
||||
|
||||
|
||||
def test_drop_with_nan_in_index(nulls_fixture):
|
||||
# GH#18853
|
||||
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
|
||||
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(pd.Timestamp("2001"), level="date")
|
||||
|
||||
|
||||
def test_drop_with_non_monotonic_duplicates():
|
||||
# GH#33494
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", PerformanceWarning)
|
||||
result = mi.drop((1, 2))
|
||||
expected = MultiIndex.from_tuples([(2, 3)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_single_level_drop_partially_missing_elements():
|
||||
# GH 37820
|
||||
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
|
||||
msg = r"labels \[4\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(4, level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([1, 4], level=0)
|
||||
msg = r"labels \[nan\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan], level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, 2, 3], level=0)
|
||||
|
||||
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
|
||||
msg = r"labels \['a'\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, "a"], level=0)
|
||||
|
||||
|
||||
def test_droplevel_multiindex_one_level():
|
||||
# GH#37208
|
||||
index = MultiIndex.from_tuples([(2,)], names=("b",))
|
||||
result = index.droplevel([])
|
||||
expected = Index([2], name="b")
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,339 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import hashtable
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [None, ["first", "second"]])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
|
||||
)
|
||||
idx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
|
||||
eidx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
|
||||
)
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
|
||||
for idx in [
|
||||
mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, "Num"]),
|
||||
mi.set_names(["Upper", "Num"]),
|
||||
]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
# single instance of NaN
|
||||
mi_nan = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan.is_unique is True
|
||||
assert mi_nan.has_duplicates is False
|
||||
|
||||
# multiple instances of NaN
|
||||
mi_nan_dup = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan_dup.is_unique is False
|
||||
assert mi_nan_dup.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [
|
||||
("x", "out", "z", 5, "y", "in", "z", 169),
|
||||
("x", "out", "z", 7, "y", "in", "z", 119),
|
||||
("x", "out", "z", 9, "y", "in", "z", 135),
|
||||
("x", "out", "z", 13, "y", "in", "z", 145),
|
||||
("x", "out", "z", 14, "y", "in", "z", 158),
|
||||
("x", "out", "z", 16, "y", "in", "z", 122),
|
||||
("x", "out", "z", 17, "y", "in", "z", 160),
|
||||
("x", "out", "z", 18, "y", "in", "z", 180),
|
||||
("x", "out", "z", 20, "y", "in", "z", 143),
|
||||
("x", "out", "z", 21, "y", "in", "z", 128),
|
||||
("x", "out", "z", 22, "y", "in", "z", 129),
|
||||
("x", "out", "z", 25, "y", "in", "z", 111),
|
||||
("x", "out", "z", 28, "y", "in", "z", 114),
|
||||
("x", "out", "z", 29, "y", "in", "z", 121),
|
||||
("x", "out", "z", 31, "y", "in", "z", 126),
|
||||
("x", "out", "z", 32, "y", "in", "z", 155),
|
||||
("x", "out", "z", 33, "y", "in", "z", 123),
|
||||
("x", "out", "z", 12, "y", "in", "z", 144),
|
||||
]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nlevels", [4, 8])
|
||||
@pytest.mark.parametrize("with_nulls", [True, False])
|
||||
def test_has_duplicates_overflow(nlevels, with_nulls):
|
||||
# handle int64 overflow if possible
|
||||
# no overflow with 4
|
||||
# overflow possible with 8
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", np.array([False, False, False, True, True, False])),
|
||||
("last", np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_duplicated_large(keep):
|
||||
# GH 9125
|
||||
n, k = 200, 5000
|
||||
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
|
||||
codes = [np.random.choice(n, k * n) for lev in levels]
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicated2():
|
||||
# TODO: more informative test name
|
||||
# GH5873
|
||||
for a in [101, 102]:
|
||||
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
|
||||
|
||||
for n in range(1, 6): # 1st level shape
|
||||
for m in range(1, 5): # 2nd level shape
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(
|
||||
levels=[list("abcde")[:n], list("WXYZ")[:m]],
|
||||
codes=np.random.permutation(list(codes)).T,
|
||||
)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
mi.duplicated(), np.zeros(len(mi), dtype="bool")
|
||||
)
|
||||
|
||||
|
||||
def test_duplicated_drop_duplicates():
|
||||
# GH#4060
|
||||
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
|
||||
|
||||
expected = np.array([False, False, False, True, False, False], dtype=bool)
|
||||
duplicated = idx.duplicated()
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(), expected)
|
||||
|
||||
expected = np.array([True, False, False, False, False, False])
|
||||
duplicated = idx.duplicated(keep="last")
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
|
||||
|
||||
expected = np.array([True, False, False, True, False, False])
|
||||
duplicated = idx.duplicated(keep=False)
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
np.complex64,
|
||||
np.complex128,
|
||||
],
|
||||
)
|
||||
def test_duplicated_series_complex_numbers(dtype):
|
||||
# GH 17927
|
||||
expected = Series(
|
||||
[False, False, False, True, False, False, False, True, False, True],
|
||||
dtype=bool,
|
||||
)
|
||||
result = Series(
|
||||
[
|
||||
np.nan + np.nan * 1j,
|
||||
0,
|
||||
1j,
|
||||
1j,
|
||||
1,
|
||||
1 + 1j,
|
||||
1 + 2j,
|
||||
1 + 1j,
|
||||
np.nan,
|
||||
np.nan + np.nan * 1j,
|
||||
],
|
||||
dtype=dtype,
|
||||
).duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_multi_drop_duplicates_pos_args_deprecation():
|
||||
# GH#41485
|
||||
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])
|
||||
msg = (
|
||||
"In a future version of pandas all arguments of "
|
||||
"MultiIndex.drop_duplicates will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.drop_duplicates("last")
|
||||
expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])
|
||||
tm.assert_index_equal(expected, result)
|
@ -0,0 +1,290 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.equals(idx.to_flat_index())
|
||||
assert idx.equals(idx.to_flat_index().astype("category"))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_compare_tuple():
|
||||
# GH#21517
|
||||
mi = MultiIndex.from_product([[1, 2]] * 2)
|
||||
|
||||
all_false = np.array([False, False, False, False])
|
||||
|
||||
result = mi == mi[0]
|
||||
expected = np.array([True, False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi != mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi < mi[0]
|
||||
tm.assert_numpy_array_equal(result, all_false)
|
||||
|
||||
result = mi <= mi[0]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi > mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi >= mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~all_false)
|
||||
|
||||
|
||||
def test_compare_tuple_strs():
|
||||
# GH#34180
|
||||
|
||||
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
|
||||
|
||||
result = mi == ("c", "a")
|
||||
expected = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi == ("c",)
|
||||
expected = np.array([False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(["new1", "new2"])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(["new1", "new2"])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# subclass-specific keywords to pd.Index
|
||||
mi3 = Index(mi.tolist(), names=mi.names)
|
||||
|
||||
msg = r"Unexpected keyword arguments {'names'}"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# subclass-specific keywords to pd.Index
|
||||
Index(mi.tolist(), names=mi.names, tupleize_cols=False)
|
||||
|
||||
mi4 = Index(mi.tolist(), tupleize_cols=False)
|
||||
assert mi.identical(mi3)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_equals_missing_values_differently_sorted():
|
||||
# GH#38439
|
||||
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
|
||||
assert not mi1.equals(mi2)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
assert mi1.equals(mi2)
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert not mi.is_(mi.set_names(["C", "D"]))
|
||||
mi2 = mi.view()
|
||||
mi2.set_names(["E", "F"], inplace=True)
|
||||
assert mi.is_(mi2)
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi4.set_levels([list(range(10)), list(range(10))], inplace=True)
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi5.set_levels(mi5.levels, inplace=True)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx._is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not idx.is_numeric()
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = Series([True, True])
|
||||
result = Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = Series([False, False])
|
||||
result = Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,233 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
||||
)
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
warn_filters = warnings.filters
|
||||
warnings.filterwarnings("ignore", category=FutureWarning, module=".*format")
|
||||
# GH1538
|
||||
pd.set_option("display.multi_sparse", False)
|
||||
|
||||
result = idx.format()
|
||||
assert result[1] == "foo two"
|
||||
|
||||
tm.reset_display_options()
|
||||
|
||||
warnings.filters = warn_filters
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0],
|
||||
],
|
||||
)
|
||||
|
||||
result = index.format()
|
||||
assert result[3] == "1 0 0 0"
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.option_context("display.encoding", "UTF-8"):
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
def test_repr_roundtrip_raises():
|
||||
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
eval(repr(mi))
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
|
||||
class TestRepr:
|
||||
def test_unicode_repr_issues(self):
|
||||
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
|
||||
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
repr(index.levels)
|
||||
repr(index.get_level_values(1))
|
||||
|
||||
def test_repr_max_seq_items_equal_to_n(self, idx):
|
||||
# display.max_seq_items == n
|
||||
with pd.option_context("display.max_seq_items", 6):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
def test_repr(self, idx):
|
||||
result = idx[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
with pd.option_context("display.max_seq_items", 5):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
...
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
# display.max_seq_items == 1
|
||||
with pd.option_context("display.max_seq_items", 1):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([...
|
||||
('qux', 'two')],
|
||||
names=['first', ...], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
def test_rjust(self, narrow_multi_index):
|
||||
mi = narrow_multi_index
|
||||
result = mi[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[::500].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:08:20'),
|
||||
('abc', 10, '2000-01-01 00:16:40'),
|
||||
('abc', 10, '2000-01-01 00:25:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:00:01'),
|
||||
( 'a', 9, '2000-01-01 00:00:02'),
|
||||
( 'a', 9, '2000-01-01 00:00:03'),
|
||||
( 'a', 9, '2000-01-01 00:00:04'),
|
||||
( 'a', 9, '2000-01-01 00:00:05'),
|
||||
( 'a', 9, '2000-01-01 00:00:06'),
|
||||
( 'a', 9, '2000-01-01 00:00:07'),
|
||||
( 'a', 9, '2000-01-01 00:00:08'),
|
||||
( 'a', 9, '2000-01-01 00:00:09'),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10'),
|
||||
('abc', 10, '2000-01-01 00:33:11'),
|
||||
('abc', 10, '2000-01-01 00:33:12'),
|
||||
('abc', 10, '2000-01-01 00:33:13'),
|
||||
('abc', 10, '2000-01-01 00:33:14'),
|
||||
('abc', 10, '2000-01-01 00:33:15'),
|
||||
('abc', 10, '2000-01-01 00:33:16'),
|
||||
('abc', 10, '2000-01-01 00:33:17'),
|
||||
('abc', 10, '2000-01-01 00:33:18'),
|
||||
('abc', 10, '2000-01-01 00:33:19')],
|
||||
names=['a', 'b', 'dti'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_tuple_width(self, wide_multi_index):
|
||||
mi = wide_multi_index
|
||||
result = mi[:1].__repr__()
|
||||
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[:10].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
||||
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
||||
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
||||
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
||||
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
||||
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
||||
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
||||
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
||||
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
||||
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
|
||||
assert result == expected
|
@ -0,0 +1,114 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetLevelValues:
|
||||
def test_get_level_values_box_datetime64(self):
|
||||
|
||||
dates = date_range("1/1/2000", periods=4)
|
||||
levels = [dates, [0, 1]]
|
||||
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
|
||||
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
assert isinstance(index.get_level_values(0)[0], Timestamp)
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "first"
|
||||
|
||||
result = idx.get_level_values("first")
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
|
||||
)
|
||||
|
||||
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH#17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH#17924
|
||||
arrays = [["a", "b", "b"], [1, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_when_periods():
|
||||
# GH33131. See also discussion in GH32669.
|
||||
# This test can probably be removed when PeriodIndex._engine is removed.
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
)
|
||||
|
||||
idx = MultiIndex.from_arrays(
|
||||
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
|
||||
)
|
||||
idx2 = MultiIndex.from_arrays(
|
||||
[idx._get_level_values(level) for level in range(idx.nlevels)]
|
||||
)
|
||||
assert all(x.is_monotonic for x in idx2.levels)
|
@ -0,0 +1,490 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
msg = "Too many levels: Index has only 2 levels, not 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx._get_level_number(2)
|
||||
with pytest.raises(KeyError, match="Level fourth not found"):
|
||||
idx._get_level_number("fourth")
|
||||
|
||||
|
||||
def test_get_dtypes():
|
||||
# Test MultiIndex.dtypes (# Gh37062)
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
|
||||
names=["int", "string", "dt"],
|
||||
)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"int": np.dtype("int64"),
|
||||
"string": np.dtype("O"),
|
||||
"dt": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_no_level_name():
|
||||
# Test MultiIndex.dtypes (# GH38580 )
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"level_0": np.dtype("int64"),
|
||||
"level_1": np.dtype("O"),
|
||||
"level_2": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_duplicate_level_names():
|
||||
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
|
||||
result = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
names=["A", "A", "A"],
|
||||
).dtypes
|
||||
expected = pd.Series(
|
||||
[np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")],
|
||||
index=["A", "A", "A"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
frame.index._get_level_number(2)
|
||||
with pytest.raises(IndexError, match="not a valid level number"):
|
||||
frame.index._get_level_number(-3)
|
||||
|
||||
|
||||
def test_set_name_methods(idx, index_names):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
msg = "Length of names must match number of levels in MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
msg = "[Cc]an't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.levels = new_levels
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_levels(new_levels, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
for inplace in [True, False]:
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_levels(["c"], level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_levels("c", level=0, inplace=inplace)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_codes(1, level=0, inplace=inplace)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# changing label w/ mutation
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_codes(new_codes, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing specific level w/ mutation
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels [w/ mutation]
|
||||
ind2 = idx.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True)
|
||||
assert inplace_return is None
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
# [w/ mutation]
|
||||
result = ind.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result.set_codes(codes=new_codes, level=1, inplace=True)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match="Length of levels"):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of codes"):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="Names must be a"):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
|
||||
m = MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names("first", level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_multi_set_names_pos_args_deprecation():
|
||||
# GH#41485
|
||||
idx = MultiIndex.from_product([["python", "cobra"], [2018, 2019]])
|
||||
msg = (
|
||||
"In a future version of pandas all arguments of MultiIndex.set_names "
|
||||
"except for the argument 'names' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.set_names(["kind", "year"], None)
|
||||
expected = MultiIndex(
|
||||
levels=[["python", "cobra"], [2018, 2019]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=["kind", "year"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, level=0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(
|
||||
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
|
||||
)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
|
||||
lev2 = ["1", "2", "3"] * 2
|
||||
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
|
||||
df = pd.DataFrame(
|
||||
np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx
|
||||
)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
df.at[("grethe", "4"), "one"] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ["black"] * 3
|
||||
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
|
||||
|
||||
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_codes_inplace_deprecated(idx, inplace):
|
||||
new_codes = idx.codes[1][::-1]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_codes(codes=new_codes, level=1, inplace=inplace)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_levels_inplace_deprecated(idx, inplace):
|
||||
new_level = idx.levels[1].copy()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.set_levels(levels=new_level, level=1, inplace=inplace)
|
||||
|
||||
|
||||
def test_set_levels_pos_args_deprecation():
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, "one"),
|
||||
(2, "one"),
|
||||
(3, "one"),
|
||||
],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
msg = (
|
||||
r"In a future version of pandas all arguments of MultiIndex.set_levels except "
|
||||
r"for the argument 'levels' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.set_levels(["a", "b", "c"], 0)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
("a", "one"),
|
||||
("b", "one"),
|
||||
("c", "one"),
|
||||
],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_codes_pos_args_depreciation(idx):
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
msg = (
|
||||
r"In a future version of pandas all arguments of MultiIndex.set_codes except "
|
||||
r"for the argument 'codes' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.set_codes([[0, 0, 1, 2, 3, 3], [0, 1, 0, 1, 0, 1]], [0, 1])
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
names=["first", "second"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,891 @@
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
InvalidIndexError,
|
||||
PerformanceWarning,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSliceLocs:
|
||||
def test_slice_locs_partial(self, idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
|
||||
result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one"))
|
||||
assert result == (1, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(None, ("qux", "one"))
|
||||
assert result == (0, 5)
|
||||
|
||||
result = sorted_idx.slice_locs(("foo", "two"), None)
|
||||
assert result == (1, len(sorted_idx))
|
||||
|
||||
result = sorted_idx.slice_locs("bar", "baz")
|
||||
assert result == (2, 4)
|
||||
|
||||
def test_slice_locs(self):
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
|
||||
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
|
||||
sliced = stacked[slob]
|
||||
expected = df[5:16].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
slob = slice(
|
||||
*idx.slice_locs(
|
||||
df.index[5] + timedelta(seconds=30),
|
||||
df.index[15] - timedelta(seconds=30),
|
||||
)
|
||||
)
|
||||
sliced = stacked[slob]
|
||||
expected = df[6:15].stack()
|
||||
tm.assert_almost_equal(sliced.values, expected.values)
|
||||
|
||||
def test_slice_locs_with_type_mismatch(self):
|
||||
df = tm.makeTimeDataFrame()
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs((1, 3))
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
|
||||
df = tm.makeCustomDataframe(5, 5)
|
||||
stacked = df.stack()
|
||||
idx = stacked.index
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(timedelta(seconds=30))
|
||||
# TODO: Try creating a UnicodeDecodeError in exception message
|
||||
with pytest.raises(TypeError, match="^Level type mismatch"):
|
||||
idx.slice_locs(df.index[1], (16, "a"))
|
||||
|
||||
def test_slice_locs_not_sorted(self):
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
# works
|
||||
sorted_index, _ = index.sortlevel(0)
|
||||
# should there be a test case here???
|
||||
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
|
||||
|
||||
def test_slice_locs_not_contained(self):
|
||||
# some searchsorted action
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[0, 2, 4, 6], [0, 2, 4]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
|
||||
)
|
||||
|
||||
result = index.slice_locs((1, 0), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(1, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((2, 2), (5, 2))
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs(2, 5)
|
||||
assert result == (3, 6)
|
||||
|
||||
result = index.slice_locs((1, 0), (6, 3))
|
||||
assert result == (3, 8)
|
||||
|
||||
result = index.slice_locs(-1, 10)
|
||||
assert result == (0, len(index))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_arr,expected,start_idx,end_idx",
|
||||
[
|
||||
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None),
|
||||
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"),
|
||||
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")),
|
||||
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None),
|
||||
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"),
|
||||
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")),
|
||||
],
|
||||
)
|
||||
def test_slice_locs_with_missing_value(
|
||||
self, index_arr, expected, start_idx, end_idx
|
||||
):
|
||||
# issue 19132
|
||||
idx = MultiIndex.from_arrays(index_arr)
|
||||
result = idx.slice_locs(start=start_idx, end=end_idx)
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestPutmask:
|
||||
def test_putmask_with_wrong_mask(self, idx):
|
||||
# GH18368
|
||||
|
||||
msg = "putmask: mask and data must be the same size"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask(np.ones(len(idx) + 1, np.bool_), 1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask(np.ones(len(idx) - 1, np.bool_), 1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.putmask("foo", 1)
|
||||
|
||||
def test_putmask_multiindex_other(self):
|
||||
# GH#43212 `value` is also a MultiIndex
|
||||
|
||||
left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)])
|
||||
right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)])
|
||||
mask = np.array([True, True, False])
|
||||
|
||||
result = left.putmask(mask, right)
|
||||
|
||||
expected = MultiIndex.from_tuples([right[0], right[1], left[2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer(self):
|
||||
major_axis = Index(np.arange(4))
|
||||
minor_axis = Index(np.arange(2))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
idx1 = index[:5]
|
||||
idx2 = index[[1, 3, 5]]
|
||||
|
||||
r1 = idx1.get_indexer(idx2)
|
||||
tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method="pad")
|
||||
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method="pad")
|
||||
tm.assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rffill1 = idx2.get_indexer(idx1, method="ffill")
|
||||
tm.assert_almost_equal(r1, rffill1)
|
||||
|
||||
r1 = idx2.get_indexer(idx1, method="backfill")
|
||||
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, e1)
|
||||
|
||||
r2 = idx2.get_indexer(idx1[::-1], method="backfill")
|
||||
tm.assert_almost_equal(r2, e1[::-1])
|
||||
|
||||
rbfill1 = idx2.get_indexer(idx1, method="bfill")
|
||||
tm.assert_almost_equal(r1, rbfill1)
|
||||
|
||||
# pass non-MultiIndex
|
||||
r1 = idx1.get_indexer(idx2.values)
|
||||
rexp1 = idx1.get_indexer(idx2)
|
||||
tm.assert_almost_equal(r1, rexp1)
|
||||
|
||||
r1 = idx1.get_indexer([1, 2, 3])
|
||||
assert (r1 == [-1, -1, -1]).all()
|
||||
|
||||
# create index with duplicates
|
||||
idx1 = Index(list(range(10)) + list(range(10)))
|
||||
idx2 = Index(list(range(20)))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(idx2)
|
||||
|
||||
def test_get_indexer_nearest(self):
|
||||
midx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
msg = (
|
||||
"method='nearest' not implemented yet for MultiIndex; "
|
||||
"see GitHub issue 9365"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
midx.get_indexer(["a"], method="nearest")
|
||||
msg = "tolerance not implemented yet for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
midx.get_indexer(["a"], method="pad", tolerance=2)
|
||||
|
||||
def test_get_indexer_categorical_time(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/21390
|
||||
midx = MultiIndex.from_product(
|
||||
[
|
||||
Categorical(["a", "b", "c"]),
|
||||
Categorical(date_range("2012-01-01", periods=3, freq="H")),
|
||||
]
|
||||
)
|
||||
result = midx.get_indexer(midx)
|
||||
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_arr,labels,expected",
|
||||
[
|
||||
(
|
||||
[[1, np.nan, 2], [3, 4, 5]],
|
||||
[1, np.nan, 2],
|
||||
np.array([-1, -1, -1], dtype=np.intp),
|
||||
),
|
||||
([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)),
|
||||
([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)),
|
||||
(
|
||||
[[1, 2, 3], [np.nan, 4, 5]],
|
||||
[np.nan, 4, 5],
|
||||
np.array([-1, -1, -1], dtype=np.intp),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_missing_value(self, index_arr, labels, expected):
|
||||
# issue 19132
|
||||
idx = MultiIndex.from_arrays(index_arr)
|
||||
result = idx.get_indexer(labels)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_methods(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/29896
|
||||
# test getting an indexer for another index with different methods
|
||||
# confirms that getting an indexer without a filling method, getting an
|
||||
# indexer and backfilling, and getting an indexer and padding all behave
|
||||
# correctly in the case where all of the target values fall in between
|
||||
# several levels in the MultiIndex into which they are getting an indexer
|
||||
#
|
||||
# visually, the MultiIndexes used in this test are:
|
||||
# mult_idx_1:
|
||||
# 0: -1 0
|
||||
# 1: 2
|
||||
# 2: 3
|
||||
# 3: 4
|
||||
# 4: 0 0
|
||||
# 5: 2
|
||||
# 6: 3
|
||||
# 7: 4
|
||||
# 8: 1 0
|
||||
# 9: 2
|
||||
# 10: 3
|
||||
# 11: 4
|
||||
#
|
||||
# mult_idx_2:
|
||||
# 0: 0 1
|
||||
# 1: 3
|
||||
# 2: 4
|
||||
mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]])
|
||||
mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]])
|
||||
|
||||
indexer = mult_idx_1.get_indexer(mult_idx_2)
|
||||
expected = np.array([-1, 6, 7], dtype=indexer.dtype)
|
||||
tm.assert_almost_equal(expected, indexer)
|
||||
|
||||
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
|
||||
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, backfill_indexer)
|
||||
|
||||
# ensure the legacy "bfill" option functions identically to "backfill"
|
||||
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
|
||||
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, backfill_indexer)
|
||||
|
||||
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad")
|
||||
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, pad_indexer)
|
||||
|
||||
# ensure the legacy "ffill" option functions identically to "pad"
|
||||
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
|
||||
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, pad_indexer)
|
||||
|
||||
def test_get_indexer_three_or_more_levels(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/29896
|
||||
# tests get_indexer() on MultiIndexes with 3+ levels
|
||||
# visually, these are
|
||||
# mult_idx_1:
|
||||
# 0: 1 2 5
|
||||
# 1: 7
|
||||
# 2: 4 5
|
||||
# 3: 7
|
||||
# 4: 6 5
|
||||
# 5: 7
|
||||
# 6: 3 2 5
|
||||
# 7: 7
|
||||
# 8: 4 5
|
||||
# 9: 7
|
||||
# 10: 6 5
|
||||
# 11: 7
|
||||
#
|
||||
# mult_idx_2:
|
||||
# 0: 1 1 8
|
||||
# 1: 1 5 9
|
||||
# 2: 1 6 7
|
||||
# 3: 2 1 6
|
||||
# 4: 2 7 6
|
||||
# 5: 2 7 8
|
||||
# 6: 3 6 8
|
||||
mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]])
|
||||
mult_idx_2 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, 1, 8),
|
||||
(1, 5, 9),
|
||||
(1, 6, 7),
|
||||
(2, 1, 6),
|
||||
(2, 7, 7),
|
||||
(2, 7, 8),
|
||||
(3, 6, 8),
|
||||
]
|
||||
)
|
||||
# sanity check
|
||||
assert mult_idx_1.is_monotonic
|
||||
assert mult_idx_1.is_unique
|
||||
assert mult_idx_2.is_monotonic
|
||||
assert mult_idx_2.is_unique
|
||||
|
||||
# show the relationships between the two
|
||||
assert mult_idx_2[0] < mult_idx_1[0]
|
||||
assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4]
|
||||
assert mult_idx_1[5] == mult_idx_2[2]
|
||||
assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6]
|
||||
assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6]
|
||||
assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6]
|
||||
assert mult_idx_1[-1] < mult_idx_2[6]
|
||||
|
||||
indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2)
|
||||
expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype)
|
||||
tm.assert_almost_equal(expected, indexer_no_fill)
|
||||
|
||||
# test with backfilling
|
||||
indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
|
||||
expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype)
|
||||
tm.assert_almost_equal(expected, indexer_backfilled)
|
||||
|
||||
# now, the same thing, but forward-filled (aka "padded")
|
||||
indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad")
|
||||
expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype)
|
||||
tm.assert_almost_equal(expected, indexer_padded)
|
||||
|
||||
# now, do the indexing in the other direction
|
||||
assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1]
|
||||
assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1]
|
||||
assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1]
|
||||
assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1]
|
||||
assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2]
|
||||
assert mult_idx_2[2] == mult_idx_1[5]
|
||||
assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6]
|
||||
assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6]
|
||||
assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6]
|
||||
assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6]
|
||||
assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6]
|
||||
assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6]
|
||||
|
||||
indexer = mult_idx_2.get_indexer(mult_idx_1)
|
||||
expected = np.array(
|
||||
[-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype
|
||||
)
|
||||
tm.assert_almost_equal(expected, indexer)
|
||||
|
||||
backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill")
|
||||
expected = np.array(
|
||||
[1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype
|
||||
)
|
||||
tm.assert_almost_equal(expected, backfill_indexer)
|
||||
|
||||
pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad")
|
||||
expected = np.array(
|
||||
[0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype
|
||||
)
|
||||
tm.assert_almost_equal(expected, pad_indexer)
|
||||
|
||||
def test_get_indexer_crossing_levels(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/29896
|
||||
# tests a corner case with get_indexer() with MultiIndexes where, when we
|
||||
# need to "carry" across levels, proper tuple ordering is respected
|
||||
#
|
||||
# the MultiIndexes used in this test, visually, are:
|
||||
# mult_idx_1:
|
||||
# 0: 1 1 1 1
|
||||
# 1: 2
|
||||
# 2: 2 1
|
||||
# 3: 2
|
||||
# 4: 1 2 1 1
|
||||
# 5: 2
|
||||
# 6: 2 1
|
||||
# 7: 2
|
||||
# 8: 2 1 1 1
|
||||
# 9: 2
|
||||
# 10: 2 1
|
||||
# 11: 2
|
||||
# 12: 2 2 1 1
|
||||
# 13: 2
|
||||
# 14: 2 1
|
||||
# 15: 2
|
||||
#
|
||||
# mult_idx_2:
|
||||
# 0: 1 3 2 2
|
||||
# 1: 2 3 2 2
|
||||
mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4)
|
||||
mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)])
|
||||
|
||||
# show the tuple orderings, which get_indexer() should respect
|
||||
assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8]
|
||||
assert mult_idx_1[-1] < mult_idx_2[1]
|
||||
|
||||
indexer = mult_idx_1.get_indexer(mult_idx_2)
|
||||
expected = np.array([-1, -1], dtype=indexer.dtype)
|
||||
tm.assert_almost_equal(expected, indexer)
|
||||
|
||||
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
|
||||
expected = np.array([8, -1], dtype=backfill_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, backfill_indexer)
|
||||
|
||||
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
|
||||
expected = np.array([7, 15], dtype=pad_indexer.dtype)
|
||||
tm.assert_almost_equal(expected, pad_indexer)
|
||||
|
||||
def test_get_indexer_kwarg_validation(self):
|
||||
# GH#41918
|
||||
mi = MultiIndex.from_product([range(3), ["A", "B"]])
|
||||
|
||||
msg = "limit argument only valid if doing pad, backfill or nearest"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi.get_indexer(mi[:-1], limit=4)
|
||||
|
||||
msg = "tolerance argument only valid if doing pad, backfill or nearest"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi.get_indexer(mi[:-1], tolerance="piano")
|
||||
|
||||
|
||||
def test_getitem(idx):
|
||||
# scalar
|
||||
assert idx[2] == ("bar", "one")
|
||||
|
||||
# slice
|
||||
result = idx[2:5]
|
||||
expected = idx[[2, 3, 4]]
|
||||
assert result.equals(expected)
|
||||
|
||||
# boolean
|
||||
result = idx[[True, False, True, False, True, True]]
|
||||
result2 = idx[np.array([True, False, True, False, True, True])]
|
||||
expected = idx[[0, 2, 4, 5]]
|
||||
assert result.equals(expected)
|
||||
assert result2.equals(expected)
|
||||
|
||||
|
||||
def test_getitem_group_select(idx):
|
||||
sorted_idx, _ = idx.sortlevel(0)
|
||||
assert sorted_idx.get_loc("baz") == slice(3, 4)
|
||||
assert sorted_idx.get_loc("foo") == slice(0, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)])
|
||||
@pytest.mark.parametrize(
|
||||
"ind2",
|
||||
[[True, False, True, False, False], Index([True, False, True, False, False])],
|
||||
)
|
||||
def test_getitem_bool_index_all(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ind1", [[True], Index([True])])
|
||||
@pytest.mark.parametrize("ind2", [[False], Index([False])])
|
||||
def test_getitem_bool_index_single(ind1, ind2):
|
||||
# GH#22533
|
||||
idx = MultiIndex.from_tuples([(10, 1)])
|
||||
tm.assert_index_equal(idx[ind1], idx)
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)],
|
||||
codes=[[], []],
|
||||
)
|
||||
tm.assert_index_equal(idx[ind2], expected)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc(self, idx):
|
||||
assert idx.get_loc(("foo", "two")) == 1
|
||||
assert idx.get_loc(("baz", "two")) == 3
|
||||
with pytest.raises(KeyError, match=r"^10$"):
|
||||
idx.get_loc(("bar", "two"))
|
||||
with pytest.raises(KeyError, match=r"^'quux'$"):
|
||||
idx.get_loc("quux")
|
||||
|
||||
msg = "only the default get_loc method is currently supported for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.get_loc("foo", method="nearest")
|
||||
|
||||
# 3 levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(1, 1\)$"):
|
||||
index.get_loc((1, 1))
|
||||
assert index.get_loc((2, 0)) == slice(3, 5)
|
||||
|
||||
def test_get_loc_duplicates(self):
|
||||
index = Index([2, 2, 2, 2])
|
||||
result = index.get_loc(2)
|
||||
expected = slice(0, 4)
|
||||
assert result == expected
|
||||
|
||||
index = Index(["c", "a", "a", "b", "b"])
|
||||
rs = index.get_loc("c")
|
||||
xp = 0
|
||||
assert rs == xp
|
||||
|
||||
with pytest.raises(KeyError, match="2"):
|
||||
index.get_loc(2)
|
||||
|
||||
def test_get_loc_level(self):
|
||||
index = MultiIndex(
|
||||
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
loc, new_index = index.get_loc_level((0, 1))
|
||||
expected = slice(1, 2)
|
||||
exp_index = index[expected].droplevel(0).droplevel(0)
|
||||
assert loc == expected
|
||||
assert new_index.equals(exp_index)
|
||||
|
||||
loc, new_index = index.get_loc_level((0, 1, 0))
|
||||
expected = 1
|
||||
assert loc == expected
|
||||
assert new_index is None
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\(2, 2\)$"):
|
||||
index.get_loc_level((2, 2))
|
||||
# GH 22221: unused label
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
index.drop(2).get_loc_level(2)
|
||||
# Unused label on unsorted level:
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
index.drop(1, level=2).get_loc_level(2, level=2)
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[2000], list(range(4))],
|
||||
codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])],
|
||||
)
|
||||
result, new_index = index.get_loc_level((2000, slice(None, None)))
|
||||
expected = slice(None, None)
|
||||
assert result == expected
|
||||
assert new_index.equals(index.droplevel(0))
|
||||
|
||||
@pytest.mark.parametrize("dtype1", [int, float, bool, str])
|
||||
@pytest.mark.parametrize("dtype2", [int, float, bool, str])
|
||||
def test_get_loc_multiple_dtypes(self, dtype1, dtype2):
|
||||
# GH 18520
|
||||
levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)]
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(idx[2]) == 2
|
||||
|
||||
@pytest.mark.parametrize("level", [0, 1])
|
||||
@pytest.mark.parametrize("dtypes", [[int, float], [float, int]])
|
||||
def test_get_loc_implicit_cast(self, level, dtypes):
|
||||
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
|
||||
levels = [["a", "b"], ["c", "d"]]
|
||||
key = ["b", "d"]
|
||||
lev_dtype, key_dtype = dtypes
|
||||
levels[level] = np.array([0, 1], dtype=lev_dtype)
|
||||
key[level] = key_dtype(1)
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
def test_get_loc_cast_bool(self):
|
||||
# GH 19086 : int is casted to bool, but not vice-versa
|
||||
levels = [[False, True], np.arange(2, dtype="int64")]
|
||||
idx = MultiIndex.from_product(levels)
|
||||
|
||||
assert idx.get_loc((0, 1)) == 1
|
||||
assert idx.get_loc((1, 0)) == 2
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\(False, True\)$"):
|
||||
idx.get_loc((False, True))
|
||||
with pytest.raises(KeyError, match=r"^\(True, False\)$"):
|
||||
idx.get_loc((True, False))
|
||||
|
||||
@pytest.mark.parametrize("level", [0, 1])
|
||||
def test_get_loc_nan(self, level, nulls_fixture):
|
||||
# GH 18485 : NaN in MultiIndex
|
||||
levels = [["a", "b"], ["c", "d"]]
|
||||
key = ["b", "d"]
|
||||
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
|
||||
key[level] = nulls_fixture
|
||||
idx = MultiIndex.from_product(levels)
|
||||
assert idx.get_loc(tuple(key)) == 3
|
||||
|
||||
def test_get_loc_missing_nan(self):
|
||||
# GH 8569
|
||||
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
|
||||
assert isinstance(idx.get_loc(1), slice)
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
idx.get_loc(3)
|
||||
with pytest.raises(KeyError, match=r"^nan$"):
|
||||
idx.get_loc(np.nan)
|
||||
with pytest.raises(InvalidIndexError, match=r"\[nan\]"):
|
||||
# listlike/non-hashable raises TypeError
|
||||
idx.get_loc([np.nan])
|
||||
|
||||
def test_get_loc_with_values_including_missing_values(self):
|
||||
# issue 19132
|
||||
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
|
||||
expected = slice(0, 2, None)
|
||||
assert idx.get_loc(np.nan) == expected
|
||||
|
||||
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
|
||||
expected = np.array([True, False, False, True])
|
||||
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
|
||||
|
||||
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
|
||||
expected = slice(2, 4, None)
|
||||
assert idx.get_loc((np.nan, 1)) == expected
|
||||
|
||||
def test_get_loc_duplicates2(self):
|
||||
# TODO: de-duplicate with test_get_loc_duplicates above?
|
||||
index = MultiIndex(
|
||||
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
|
||||
assert index.get_loc("D") == slice(0, 3)
|
||||
|
||||
def test_get_loc_past_lexsort_depth(self):
|
||||
# GH#30053
|
||||
idx = MultiIndex(
|
||||
levels=[["a"], [0, 7], [1]],
|
||||
codes=[[0, 0], [1, 0], [0, 0]],
|
||||
names=["x", "y", "z"],
|
||||
sortorder=0,
|
||||
)
|
||||
key = ("a", 7)
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
# PerformanceWarning: indexing past lexsort depth may impact performance
|
||||
result = idx.get_loc(key)
|
||||
|
||||
assert result == slice(0, 1, None)
|
||||
|
||||
def test_multiindex_get_loc_list_raises(self):
|
||||
# GH#35878
|
||||
idx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
msg = r"\[\]"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx.get_loc([])
|
||||
|
||||
def test_get_loc_nested_tuple_raises_keyerror(self):
|
||||
# raise KeyError, not TypeError
|
||||
mi = MultiIndex.from_product([range(3), range(4), range(5), range(6)])
|
||||
key = ((2, 3, 4), "foo")
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(str(key))):
|
||||
mi.get_loc(key)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self):
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
|
||||
msg = r"\.where is not supported for MultiIndex operations"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
i.where(True)
|
||||
|
||||
def test_where_array_like(self, listlike_box):
|
||||
mi = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
cond = [False, True]
|
||||
msg = r"\.where is not supported for MultiIndex operations"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
mi.where(listlike_box(cond))
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains_top_level(self):
|
||||
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
|
||||
assert "A" in midx
|
||||
assert "A" not in midx._engine
|
||||
|
||||
def test_contains_with_nat(self):
|
||||
# MI with a NaT
|
||||
mi = MultiIndex(
|
||||
levels=[["C"], date_range("2012-01-01", periods=5)],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
|
||||
names=[None, "B"],
|
||||
)
|
||||
assert ("C", pd.Timestamp("2012-01-01")) in mi
|
||||
for val in mi.values:
|
||||
assert val in mi
|
||||
|
||||
def test_contains(self, idx):
|
||||
assert ("foo", "two") in idx
|
||||
assert ("bar", "two") not in idx
|
||||
assert None not in idx
|
||||
|
||||
def test_contains_with_missing_value(self):
|
||||
# GH#19132
|
||||
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
|
||||
assert np.nan in idx
|
||||
|
||||
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
|
||||
assert np.nan not in idx
|
||||
assert (1, np.nan) in idx
|
||||
|
||||
def test_multiindex_contains_dropped(self):
|
||||
# GH#19027
|
||||
# test that dropped MultiIndex levels are not in the MultiIndex
|
||||
# despite continuing to be in the MultiIndex's levels
|
||||
idx = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
assert 2 in idx
|
||||
idx = idx.drop(2)
|
||||
|
||||
# drop implementation keeps 2 in the levels
|
||||
assert 2 in idx.levels[0]
|
||||
# but it should no longer be in the index itself
|
||||
assert 2 not in idx
|
||||
|
||||
# also applies to strings
|
||||
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
|
||||
assert "a" in idx
|
||||
idx = idx.drop("a")
|
||||
assert "a" in idx.levels[0]
|
||||
assert "a" not in idx
|
||||
|
||||
def test_contains_td64_level(self):
|
||||
# GH#24570
|
||||
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
|
||||
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
|
||||
assert tx[0] in idx
|
||||
assert "element_not_exit" not in idx
|
||||
assert "0 day 09:30:00" in idx
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_large_mi_contains(self):
|
||||
# GH#10645
|
||||
result = MultiIndex.from_arrays([range(10**6), range(10**6)])
|
||||
assert not (10**6, 0) in result
|
||||
|
||||
|
||||
def test_timestamp_multiindex_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/26944
|
||||
idx = MultiIndex.from_product(
|
||||
[
|
||||
date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"),
|
||||
["x"],
|
||||
[3],
|
||||
]
|
||||
)
|
||||
df = pd.DataFrame({"foo": np.arange(len(idx))}, idx)
|
||||
result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"]
|
||||
qidx = MultiIndex.from_product(
|
||||
[
|
||||
date_range(
|
||||
start="2019-01-02T00:15:33",
|
||||
end="2019-01-05T03:15:33",
|
||||
freq="H",
|
||||
name="date",
|
||||
),
|
||||
["x"],
|
||||
[3],
|
||||
]
|
||||
)
|
||||
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
|
||||
tm.assert_series_equal(result, should_be)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_arr,expected,target,algo",
|
||||
[
|
||||
([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"),
|
||||
([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"),
|
||||
([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"),
|
||||
],
|
||||
)
|
||||
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo):
|
||||
# issue 19132
|
||||
idx = MultiIndex.from_arrays(index_arr)
|
||||
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
|
||||
result = idx.get_slice_bound(target, side=algo, kind="loc")
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_arr,expected,start_idx,end_idx",
|
||||
[
|
||||
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1),
|
||||
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)),
|
||||
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3),
|
||||
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)),
|
||||
],
|
||||
)
|
||||
def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx):
|
||||
# issue 19132
|
||||
idx = MultiIndex.from_arrays(index_arr)
|
||||
result = idx.slice_indexer(start=start_idx, end=end_idx)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_pyint_engine():
|
||||
# GH#18519 : when combinations of codes cannot be represented in 64
|
||||
# bits, the index underlying the MultiIndex engine works with Python
|
||||
# integers, rather than uint64.
|
||||
N = 5
|
||||
keys = [
|
||||
tuple(arr)
|
||||
for arr in [
|
||||
[0] * 10 * N,
|
||||
[1] * 10 * N,
|
||||
[2] * 10 * N,
|
||||
[np.nan] * N + [2] * 9 * N,
|
||||
[0] * N + [2] * 9 * N,
|
||||
[np.nan] * N + [2] * 8 * N + [0] * N,
|
||||
]
|
||||
]
|
||||
# Each level contains 4 elements (including NaN), so it is represented
|
||||
# in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
|
||||
# 64 bit engine and truncating the first levels, the fourth and fifth
|
||||
# keys would collide; if truncating the last levels, the fifth and
|
||||
# sixth; if rotating bits rather than shifting, the third and fifth.
|
||||
|
||||
for idx in range(len(keys)):
|
||||
index = MultiIndex.from_tuples(keys)
|
||||
assert index.get_loc(keys[idx]) == idx
|
||||
|
||||
expected = np.arange(idx + 1, dtype=np.intp)
|
||||
result = index.get_indexer([keys[i] for i in expected])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# With missing key:
|
||||
idces = range(len(keys))
|
||||
expected = np.array([-1] + list(idces), dtype=np.intp)
|
||||
missing = tuple([0, 1] * 5 * N)
|
||||
result = index.get_indexer([missing] + [keys[i] for i in idces])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,280 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Int64Index
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
assert i.codes[0].dtype == "int8"
|
||||
assert i.codes[1].dtype == "int8"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(40)])
|
||||
assert i.codes[1].dtype == "int8"
|
||||
i = MultiIndex.from_product([["a"], range(400)])
|
||||
assert i.codes[1].dtype == "int16"
|
||||
i = MultiIndex.from_product([["a"], range(40000)])
|
||||
assert i.codes[1].dtype == "int32"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.NaT),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
(1, pd.Timestamp("2000-01-04")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
]
|
||||
result = MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10**18, 10**18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
|
||||
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
||||
|
||||
idx = MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq="D")
|
||||
|
||||
idx = MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Int64Index(ints))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = Int64Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Int64Index(ints[:2]))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = list(range(70000))
|
||||
minor_axis = list(range(10))
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(range(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_hash_collisions():
|
||||
# non-smoke test that we don't get hash collisions
|
||||
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(1000), np.arange(1000)], names=["one", "two"]
|
||||
)
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def test_take_invalid_kwargs():
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error():
|
||||
# GH12527
|
||||
df_below_1000000 = pd.DataFrame(
|
||||
1, index=MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_below_1000000.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_below_1000000.loc[(3, 0), "dest"]
|
||||
df_above_1000000 = pd.DataFrame(
|
||||
1, index=MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_above_1000000.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_above_1000000.loc[(3, 0), "dest"]
|
||||
|
||||
|
||||
def test_million_record_attribute_error():
|
||||
# GH 18165
|
||||
r = list(range(1000000))
|
||||
df = pd.DataFrame(
|
||||
{"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r])
|
||||
)
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df["a"].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile("does not support mutable operations")
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert ind.is_monotonic
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True)
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1)
|
||||
df.index.names = ["fizz", "buzz"]
|
||||
|
||||
str(df)
|
||||
expected = pd.DataFrame(
|
||||
{"bar": np.arange(100), "foo": np.arange(100)},
|
||||
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values("fizz")
|
||||
expected = Int64Index(np.arange(10), name="fizz").repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values("buzz")
|
||||
expected = Int64Index(np.tile(np.arange(10), 10), name="buzz")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_isin_nan():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, True])
|
||||
)
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [("foo", 2), ("bar", 3), ("quux", 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
|
||||
vals_0 = ["foo", "bar", "quux"]
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
msg = "Too many levels: Index has only 2 levels, not 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=5)
|
||||
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=-5)
|
||||
|
||||
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
|
||||
idx.isin(vals_0, level=1.0)
|
||||
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
|
||||
idx.isin(vals_1, level=-1.0)
|
||||
with pytest.raises(KeyError, match="'Level A not found'"):
|
||||
idx.isin(vals_1, level="A")
|
||||
|
||||
idx.names = ["A", "B"]
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
|
||||
|
||||
with pytest.raises(KeyError, match="'Level C not found'"):
|
||||
idx.isin(vals_1, level="C")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"labels,expected,level",
|
||||
[
|
||||
([("b", np.nan)], np.array([False, False, True]), None),
|
||||
([np.nan, "a"], np.array([True, True, False]), 0),
|
||||
(["d", np.nan], np.array([False, True, True]), 1),
|
||||
],
|
||||
)
|
||||
def test_isin_multi_index_with_missing_value(labels, expected, level):
|
||||
# GH 19132
|
||||
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
|
||||
result = midx.isin(labels, level=level)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
160
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_join.py
Normal file
160
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_join.py
Normal file
@ -0,0 +1,160 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
|
||||
)
|
||||
def test_join_level(idx, other, join_type):
|
||||
join_index, lidx, ridx = other.join(
|
||||
idx, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
exp_level = other.join(idx.levels[1], how=join_type)
|
||||
assert join_index.levels[0].equals(idx.levels[0])
|
||||
assert join_index.levels[1].equals(exp_level)
|
||||
|
||||
# pare down levels
|
||||
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
|
||||
exp_values = idx.values[mask]
|
||||
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||||
|
||||
if join_type in ("outer", "inner"):
|
||||
join_index2, ridx2, lidx2 = idx.join(
|
||||
other, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
assert join_index.equals(join_index2)
|
||||
tm.assert_numpy_array_equal(lidx, lidx2)
|
||||
tm.assert_numpy_array_equal(ridx, ridx2)
|
||||
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||||
|
||||
|
||||
def test_join_level_corner_case(idx):
|
||||
# some corner cases
|
||||
index = Index(["three", "one", "two"])
|
||||
result = index.join(idx, level="second")
|
||||
assert isinstance(result, MultiIndex)
|
||||
|
||||
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
||||
idx.join(idx, level=1)
|
||||
|
||||
|
||||
def test_join_self(idx, join_type):
|
||||
joined = idx.join(idx, how=join_type)
|
||||
tm.assert_index_equal(joined, idx)
|
||||
|
||||
|
||||
def test_join_multi():
|
||||
# GH 10665
|
||||
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
|
||||
idx = Index([1, 2, 5], name="b")
|
||||
|
||||
# inner
|
||||
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
|
||||
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
|
||||
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||||
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
# keep MultiIndex
|
||||
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
|
||||
exp_ridx = np.array(
|
||||
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
|
||||
)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_self_unique(idx, join_type):
|
||||
if idx.is_unique:
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert (idx == joined).all()
|
||||
|
||||
|
||||
def test_join_multi_wrong_order():
|
||||
# GH 25760
|
||||
# GH 28956
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
|
||||
|
||||
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
|
||||
|
||||
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
|
||||
|
||||
tm.assert_index_equal(midx1, join_idx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_multi_return_indexers():
|
||||
# GH 34074
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = midx1.join(midx2, return_indexers=False)
|
||||
tm.assert_index_equal(result, midx1)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_level():
|
||||
# GH 44096
|
||||
idx_1 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
idx_2 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
result = idx_1.join(idx_2, how="outer")
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,57 @@
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIsLexsorted:
|
||||
def test_is_lexsorted(self):
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
)
|
||||
assert index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
assert index._lexsort_depth == 0
|
||||
|
||||
def test_is_lexsorted_deprecation(self):
|
||||
# GH 32259
|
||||
with tm.assert_produces_warning():
|
||||
MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).is_lexsorted()
|
||||
|
||||
|
||||
class TestLexsortDepth:
|
||||
def test_lexsort_depth(self):
|
||||
# Test that lexsort_depth return the correct sortorder
|
||||
# when it was given to the MultiIndex const.
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
assert index._lexsort_depth == 2
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
|
||||
)
|
||||
assert index._lexsort_depth == 1
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
|
||||
)
|
||||
assert index._lexsort_depth == 0
|
||||
|
||||
def test_lexsort_depth_deprecation(self):
|
||||
# GH 32259
|
||||
with tm.assert_produces_warning():
|
||||
MultiIndex.from_arrays([["a", "b", "c"], ["d", "f", "e"]]).lexsort_depth
|
@ -0,0 +1,112 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fillna(idx):
|
||||
# GH 11343
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
|
||||
|
||||
def test_dropna():
|
||||
# GH 6194
|
||||
idx = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
[1, 2, np.nan, np.nan, 5],
|
||||
["a", "b", "c", np.nan, "e"],
|
||||
]
|
||||
)
|
||||
|
||||
exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
|
||||
tm.assert_index_equal(idx.dropna(), exp)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), exp)
|
||||
|
||||
exp = MultiIndex.from_arrays(
|
||||
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), exp)
|
||||
|
||||
msg = "invalid how option: xxx"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.dropna(how="xxx")
|
||||
|
||||
# GH26408
|
||||
# test if missing values are dropped for multiindex constructed
|
||||
# from codes and values
|
||||
idx = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
|
||||
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
|
||||
)
|
||||
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
|
||||
tm.assert_index_equal(idx.dropna(), expected)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), expected)
|
||||
|
||||
expected = MultiIndex.from_arrays(
|
||||
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), expected)
|
||||
|
||||
|
||||
def test_nulls(idx):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="isna is not defined for MultiIndex")
|
||||
def test_hasnans_isnans(idx):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
index = idx.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is False
|
||||
|
||||
index = idx.copy()
|
||||
values = index.values
|
||||
values[1] = np.nan
|
||||
|
||||
index = type(idx)(values)
|
||||
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is True
|
||||
|
||||
|
||||
def test_nan_stays_float():
|
||||
|
||||
# GH 7031
|
||||
idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
|
||||
idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
|
||||
idxm = idx0.join(idx1, how="outer")
|
||||
assert pd.isna(idx0.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||||
|
||||
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||||
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||||
dfm = df0 - df1
|
||||
assert pd.isna(df0.index.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||||
|
||||
|
||||
def test_tuples_have_na():
|
||||
index = MultiIndex(
|
||||
levels=[[1, 0], [0, 1, 2, 3]],
|
||||
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
|
||||
)
|
||||
|
||||
assert pd.isna(index[4][0])
|
||||
assert pd.isna(index.values[4][0])
|
@ -0,0 +1,188 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
|
||||
# string ordering
|
||||
mi = lexsorted_two_level_string_multiindex
|
||||
assert mi.is_monotonic is False
|
||||
assert Index(mi.values).is_monotonic is False
|
||||
assert mi._is_strictly_monotonic_increasing is False
|
||||
assert Index(mi.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_monotonic_increasing():
|
||||
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
|
||||
assert i.is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[1, 2, 3, 4],
|
||||
[
|
||||
"gb00b03mlx29",
|
||||
"lu0197800237",
|
||||
"nl0000289783",
|
||||
"nl0000289965",
|
||||
"nl0000301109",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic is True
|
||||
assert Index(i.values).is_monotonic is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
|
||||
def test_is_monotonic_decreasing():
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[4, 3, 2, 1],
|
||||
[
|
||||
"nl0000301109",
|
||||
"nl0000289965",
|
||||
"nl0000289783",
|
||||
"lu0197800237",
|
||||
"gb00b03mlx29",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_increasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_decreasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
|
||||
)
|
||||
def test_is_monotonic_with_nans(values, attr):
|
||||
# GH: 37220
|
||||
idx = MultiIndex.from_tuples(values, names=["test"])
|
||||
assert getattr(idx, attr) is False
|
205
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_names.py
Normal file
205
.venv/Lib/site-packages/pandas/tests/indexes/multi/test_names.py
Normal file
@ -0,0 +1,205 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_slice_keep_name():
|
||||
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
|
||||
assert x[1:].names == x.names
|
||||
|
||||
|
||||
def test_index_name_retained():
|
||||
# GH9857
|
||||
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
|
||||
result = result.set_index("z")
|
||||
result.loc[10] = [9, 10]
|
||||
df_expected = pd.DataFrame(
|
||||
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
|
||||
)
|
||||
df_expected = df_expected.set_index("z")
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
|
||||
def test_changing_names(idx):
|
||||
assert [level.name for level in idx.levels] == ["first", "second"]
|
||||
|
||||
view = idx.view()
|
||||
copy = idx.copy()
|
||||
shallow_copy = idx._view()
|
||||
|
||||
# changing names should not change level names on object
|
||||
new_names = [name + "a" for name in idx.names]
|
||||
idx.names = new_names
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
# and not on copies
|
||||
check_level_names(view, ["first", "second"])
|
||||
check_level_names(copy, ["first", "second"])
|
||||
check_level_names(shallow_copy, ["first", "second"])
|
||||
|
||||
# and copies shouldn't change original
|
||||
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
|
||||
def test_take_preserve_name(idx):
|
||||
taken = idx.take([3, 0, 1])
|
||||
assert taken.names == idx.names
|
||||
|
||||
|
||||
def test_copy_names():
|
||||
# Check that adding a "names" parameter to the copy is honored
|
||||
# GH14302
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# subclass-specific kwargs to pd.Index
|
||||
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
|
||||
multi_idx1 = multi_idx.copy()
|
||||
|
||||
assert multi_idx.equals(multi_idx1)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx1.names == ["MyName1", "MyName2"]
|
||||
|
||||
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx2)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx2.names == ["NewName1", "NewName2"]
|
||||
|
||||
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx3)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx3.names == ["NewName1", "NewName2"]
|
||||
|
||||
# gh-35592
|
||||
with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
|
||||
multi_idx.copy(names=["mario"])
|
||||
|
||||
with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
|
||||
multi_idx.copy(names=[["mario"], ["luigi"]])
|
||||
|
||||
|
||||
def test_names(idx, index_names):
|
||||
|
||||
# names are assigned in setup
|
||||
assert index_names == ["first", "second"]
|
||||
level_names = [level.name for level in idx.levels]
|
||||
assert level_names == index_names
|
||||
|
||||
# setting bad names on existing
|
||||
index = idx
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", list(index.names) + ["third"])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", [])
|
||||
|
||||
# initializing with bad names (should always be equivalent)
|
||||
major_axis, minor_axis = idx.levels
|
||||
major_codes, minor_codes = idx.codes
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first", "second", "third"],
|
||||
)
|
||||
|
||||
# names are assigned on index, but not transferred to the levels
|
||||
index.names = ["a", "b"]
|
||||
level_names = [level.name for level in index.levels]
|
||||
assert level_names == ["a", "b"]
|
||||
|
||||
|
||||
def test_duplicate_level_names_access_raises(idx):
|
||||
# GH19029
|
||||
idx.names = ["foo", "foo"]
|
||||
with pytest.raises(ValueError, match="name foo occurs multiple times"):
|
||||
idx._get_level_number("foo")
|
||||
|
||||
|
||||
def test_get_names_from_levels():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
|
||||
assert idx.levels[0].name == "a"
|
||||
assert idx.levels[1].name == "b"
|
||||
|
||||
|
||||
def test_setting_names_from_levels_raises():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[0].name = "foo"
|
||||
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[1].name = "foo"
|
||||
|
||||
new = pd.Series(1, index=idx.levels[0])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
new.index.name = "bar"
|
||||
|
||||
assert pd.Index._no_setting_name is False
|
||||
assert pd.core.api.NumericIndex._no_setting_name is False
|
||||
assert pd.RangeIndex._no_setting_name is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y", "z"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x", "z"]),
|
||||
({"y": "z"}, ["x", "z", "x"]),
|
||||
({}, ["x", "y", "x"]),
|
||||
({"z": "a"}, ["x", "y", "x"]),
|
||||
({"y": "z", "a": "b"}, ["x", "z", "x"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x"]),
|
||||
({"a": "z"}, ["x", "y"]),
|
||||
({}, ["x", "y"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_index_name_with_dict_like_raising():
|
||||
# GH#20421
|
||||
ix = pd.Index([1, 2])
|
||||
msg = "Can only pass dict-like as `names` for MultiIndex."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ix.set_names({"x": "z"})
|
||||
|
||||
|
||||
def test_multiindex_name_and_level_raising():
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."):
|
||||
mi.set_names(names={"x": "z"}, level={"x": "z"})
|
@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
# c1
|
||||
# 2016-01-01 00:00:00 a 0
|
||||
# b 1
|
||||
# c 2
|
||||
# 2016-01-01 12:00:00 a 3
|
||||
# b 4
|
||||
# c 5
|
||||
# 2016-01-02 00:00:00 a 6
|
||||
# b 7
|
||||
# c 8
|
||||
# 2016-01-02 12:00:00 a 9
|
||||
# b 10
|
||||
# c 11
|
||||
# 2016-01-03 00:00:00 a 12
|
||||
# b 13
|
||||
# c 14
|
||||
dr = date_range("2016-01-01", "2016-01-03", freq="12H")
|
||||
abc = ["a", "b", "c"]
|
||||
mi = MultiIndex.from_product([dr, abc])
|
||||
frame = DataFrame({"c1": range(0, 15)}, index=mi)
|
||||
return frame
|
||||
|
||||
|
||||
def test_partial_string_matching_single_index(df):
|
||||
# partial string matching on a single index
|
||||
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
|
||||
df_swap = df_swap.sort_index()
|
||||
just_a = df_swap.loc["a"]
|
||||
result = just_a.loc["2016-01-01"]
|
||||
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
|
||||
expected.index = expected.index.droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_loc_partial_timestamp_multiindex(df):
|
||||
mi = df.index
|
||||
key = ("2016-01-01", "a")
|
||||
loc = mi.get_loc(key)
|
||||
|
||||
expected = np.zeros(len(mi), dtype=bool)
|
||||
expected[[0, 3]] = True
|
||||
tm.assert_numpy_array_equal(loc, expected)
|
||||
|
||||
key2 = ("2016-01-02", "a")
|
||||
loc2 = mi.get_loc(key2)
|
||||
expected2 = np.zeros(len(mi), dtype=bool)
|
||||
expected2[[6, 9]] = True
|
||||
tm.assert_numpy_array_equal(loc2, expected2)
|
||||
|
||||
key3 = ("2016-01", "a")
|
||||
loc3 = mi.get_loc(key3)
|
||||
expected3 = np.zeros(len(mi), dtype=bool)
|
||||
expected3[mi.get_level_values(1).get_loc("a")] = True
|
||||
tm.assert_numpy_array_equal(loc3, expected3)
|
||||
|
||||
key4 = ("2016", "a")
|
||||
loc4 = mi.get_loc(key4)
|
||||
expected4 = expected3
|
||||
tm.assert_numpy_array_equal(loc4, expected4)
|
||||
|
||||
# non-monotonic
|
||||
taker = np.arange(len(mi), dtype=np.intp)
|
||||
taker[::2] = taker[::-2]
|
||||
mi2 = mi.take(taker)
|
||||
loc5 = mi2.get_loc(key)
|
||||
expected5 = np.zeros(len(mi2), dtype=bool)
|
||||
expected5[[3, 14]] = True
|
||||
tm.assert_numpy_array_equal(loc5, expected5)
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex(df):
|
||||
# GH10331
|
||||
df_swap = df.swaplevel(0, 1).sort_index()
|
||||
SLC = IndexSlice
|
||||
|
||||
# indexing with IndexSlice
|
||||
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
|
||||
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on year only
|
||||
result = df.loc["2016"]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date
|
||||
result = df.loc["2016-01-01"]
|
||||
expected = df.iloc[0:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date and hour, from middle
|
||||
result = df.loc["2016-01-02 12"]
|
||||
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
|
||||
# that level, so that level gets dropped
|
||||
expected = df.iloc[9:12].droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-02"], :]
|
||||
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# tuple selector with partial string match on date
|
||||
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
|
||||
result = df.loc[("2016-01-01", "a"), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
expected = df.iloc[[0, 3]].droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Slicing date on first level should break (of course) bc the DTI is the
|
||||
# second level on df_swap
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df_swap.loc["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_str_key_raises(df):
|
||||
# Even though this syntax works on a single index, this is somewhat
|
||||
# ambiguous and we don't want to extend this behavior forward to work
|
||||
# in multi-indexes. This would amount to selecting a scalar from a
|
||||
# column.
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_daily_resolution(df):
|
||||
# GH12685 (partial string with daily resolution or below)
|
||||
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
|
||||
expected = df.iloc[118:180]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,10 @@
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
|
||||
|
||||
def test_pickle_compat_construction():
|
||||
# this is testing for pickle compat
|
||||
# need an object to create with
|
||||
with pytest.raises(TypeError, match="Must pass both levels and codes"):
|
||||
MultiIndex()
|
@ -0,0 +1,164 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_reindex(idx):
|
||||
result, indexer = idx.reindex(list(idx[:4]))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
result, indexer = idx.reindex(list(idx))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert indexer is None
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
|
||||
def test_reindex_level(idx):
|
||||
index = Index(["one"])
|
||||
|
||||
target, indexer = idx.reindex(index, level="second")
|
||||
target2, indexer2 = index.reindex(idx, level="second")
|
||||
|
||||
exp_index = idx.join(index, level="second", how="right")
|
||||
exp_index2 = idx.join(index, level="second", how="left")
|
||||
|
||||
assert target.equals(exp_index)
|
||||
exp_indexer = np.array([0, 2, 4])
|
||||
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||||
|
||||
assert target2.equals(exp_index2)
|
||||
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||||
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
idx.reindex(idx, method="pad", level="second")
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
index.reindex(index, method="bfill", level="first")
|
||||
|
||||
|
||||
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
||||
# GH6552
|
||||
idx = idx.copy()
|
||||
target = idx.copy()
|
||||
idx.names = target.names = [None, None]
|
||||
|
||||
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
|
||||
# list & ndarray cases
|
||||
assert idx.reindex([])[0].names == [None, None]
|
||||
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||||
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(target.values)[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||||
|
||||
idx.names = ["foo", "bar"]
|
||||
assert idx.reindex([])[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
|
||||
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||||
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
|
||||
|
||||
# case with EA levels
|
||||
cat = pd.Categorical(["foo", "bar"])
|
||||
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
|
||||
mi = MultiIndex.from_product([cat, dti])
|
||||
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
|
||||
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
|
||||
|
||||
|
||||
def test_reindex_base(idx):
|
||||
idx = idx
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
|
||||
def test_reindex_non_unique():
|
||||
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
||||
a = pd.Series(np.arange(4), index=idx)
|
||||
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
msg = "cannot handle a non-unique multi-index!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
a.reindex(new_idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
||||
def test_reindex_empty_with_level(values):
|
||||
# GH41170
|
||||
idx = MultiIndex.from_arrays(values)
|
||||
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
|
||||
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
|
||||
expected_indexer = np.array([], dtype=result_indexer.dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
|
||||
def test_reindex_not_all_tuples():
|
||||
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
|
||||
mi = MultiIndex.from_tuples(keys[:-1])
|
||||
idx = Index(keys)
|
||||
res, indexer = mi.reindex(idx)
|
||||
|
||||
tm.assert_index_equal(res, idx)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, expected)
|
||||
|
||||
|
||||
def test_reindex_limit_arg_with_multiindex():
|
||||
# GH21247
|
||||
|
||||
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
|
||||
|
||||
df = pd.Series([0.02, 0.01, 0.012], index=idx)
|
||||
|
||||
new_idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(3, "A"),
|
||||
(3, "B"),
|
||||
(4, "A"),
|
||||
(4, "B"),
|
||||
(4, "C"),
|
||||
(5, "B"),
|
||||
(5, "C"),
|
||||
(6, "B"),
|
||||
(6, "C"),
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="limit argument only valid if doing pad, backfill or nearest reindexing",
|
||||
):
|
||||
df.reindex(new_idx, fill_value=0, limit=1)
|
@ -0,0 +1,185 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_insert(idx):
|
||||
# key contained in all levels
|
||||
new_index = idx.insert(0, ("bar", "two"))
|
||||
assert new_index.equal_levels(idx)
|
||||
assert new_index[0] == ("bar", "two")
|
||||
|
||||
# key not contained in all levels
|
||||
new_index = idx.insert(0, ("abc", "three"))
|
||||
|
||||
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
|
||||
tm.assert_index_equal(new_index.levels[0], exp0)
|
||||
assert new_index.names == ["first", "second"]
|
||||
|
||||
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
|
||||
tm.assert_index_equal(new_index.levels[1], exp1)
|
||||
assert new_index[0] == ("abc", "three")
|
||||
|
||||
# key wrong length
|
||||
msg = "Item must have length equal to number of levels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.insert(0, ("foo2",))
|
||||
|
||||
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
|
||||
left.set_index(["1st", "2nd"], inplace=True)
|
||||
ts = left["3rd"].copy(deep=True)
|
||||
|
||||
left.loc[("b", "x"), "3rd"] = 2
|
||||
left.loc[("b", "a"), "3rd"] = -1
|
||||
left.loc[("b", "b"), "3rd"] = 3
|
||||
left.loc[("a", "x"), "3rd"] = 4
|
||||
left.loc[("a", "w"), "3rd"] = 5
|
||||
left.loc[("a", "a"), "3rd"] = 6
|
||||
|
||||
ts.loc[("b", "x")] = 2
|
||||
ts.loc["b", "a"] = -1
|
||||
ts.loc[("b", "b")] = 3
|
||||
ts.loc["a", "x"] = 4
|
||||
ts.loc[("a", "w")] = 5
|
||||
ts.loc["a", "a"] = 6
|
||||
|
||||
right = pd.DataFrame(
|
||||
[
|
||||
["a", "b", 0],
|
||||
["b", "d", 1],
|
||||
["b", "x", 2],
|
||||
["b", "a", -1],
|
||||
["b", "b", 3],
|
||||
["a", "x", 4],
|
||||
["a", "w", 5],
|
||||
["a", "a", 6],
|
||||
],
|
||||
columns=["1st", "2nd", "3rd"],
|
||||
)
|
||||
right.set_index(["1st", "2nd"], inplace=True)
|
||||
# FIXME data types changes to float because
|
||||
# of intermediate nan insertion;
|
||||
tm.assert_frame_equal(left, right, check_dtype=False)
|
||||
tm.assert_series_equal(ts, right["3rd"])
|
||||
|
||||
|
||||
def test_insert2():
|
||||
# GH9250
|
||||
idx = (
|
||||
[("test1", i) for i in range(5)]
|
||||
+ [("test2", i) for i in range(6)]
|
||||
+ [("test", 17), ("test", 18)]
|
||||
)
|
||||
|
||||
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
|
||||
|
||||
left.loc[("test", 17)] = 11
|
||||
left.loc[("test", 18)] = 12
|
||||
|
||||
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
|
||||
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
|
||||
def test_append(idx):
|
||||
result = idx[:3].append(idx[3:])
|
||||
assert result.equals(idx)
|
||||
|
||||
foos = [idx[:1], idx[1:3], idx[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
assert result.equals(idx)
|
||||
|
||||
# empty
|
||||
result = idx.append([])
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_append_index():
|
||||
idx1 = Index([1.1, 1.2, 1.3])
|
||||
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
|
||||
idx3 = Index(["A", "B", "C"])
|
||||
|
||||
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
|
||||
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
|
||||
|
||||
result = idx1.append(midx_lv2)
|
||||
|
||||
# see gh-7112
|
||||
tz = pytz.timezone("Asia/Tokyo")
|
||||
expected_tuples = [
|
||||
(1.1, tz.localize(datetime(2011, 1, 1))),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2))),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3))),
|
||||
]
|
||||
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(idx1)
|
||||
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv2)
|
||||
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv3)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv3.append(midx_lv2)
|
||||
expected = Index._simple_new(
|
||||
np.array(
|
||||
[
|
||||
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
|
||||
]
|
||||
+ expected_tuples,
|
||||
dtype=object,
|
||||
),
|
||||
None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(m.repeat(reps), expected)
|
||||
|
||||
|
||||
def test_insert_base(idx):
|
||||
|
||||
result = idx[1:4]
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
|
||||
def test_delete_base(idx):
|
||||
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
msg = "index 6 is out of bounds for axis 0 with size 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.delete(len(idx))
|
@ -0,0 +1,540 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_ops_error_cases(idx, case, sort, method):
|
||||
# non-iterable input
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case, sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_intersection_base(idx, sort, klass):
|
||||
first = idx[2::-1] # first 3 elements reversed
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(intersect, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_union_base(idx, sort, klass):
|
||||
first = idx[::-1]
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
union = first.union(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_difference_base(idx, sort):
|
||||
second = idx[4:]
|
||||
answer = idx[:4]
|
||||
result = idx.difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
assert result.equals(answer)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = idx.difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_symmetric_difference(idx, sort):
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
answer = idx[[-1, 0]]
|
||||
result = first.symmetric_difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.symmetric_difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_multiindex_symmetric_difference():
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = idx ^ idx
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(["A", "B"])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = idx ^ idx2
|
||||
assert result.names == [None, None]
|
||||
|
||||
|
||||
def test_empty(idx):
|
||||
# GH 15270
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
|
||||
def test_difference(idx, sort):
|
||||
|
||||
first = idx
|
||||
result = first.difference(idx[-3:], sort=sort)
|
||||
vals = idx[:-3].values
|
||||
|
||||
if sort is None:
|
||||
vals = sorted(vals)
|
||||
|
||||
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
|
||||
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty difference: reflexive
|
||||
result = idx.difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: superset
|
||||
result = idx[-3:].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: degenerate
|
||||
result = idx[:0].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# names not the same
|
||||
chunklet = idx[-3:]
|
||||
chunklet.names = ["foo", "baz"]
|
||||
result = first.difference(chunklet, sort=sort)
|
||||
assert result.names == (None, None)
|
||||
|
||||
# empty, but non-equal
|
||||
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
||||
assert len(result) == 0
|
||||
|
||||
# raise Exception called with non-MultiIndex
|
||||
result = first.difference(first.values, sort=sort)
|
||||
assert result.equals(first[:0])
|
||||
|
||||
# name from empty array
|
||||
result = first.difference([], sort=sort)
|
||||
assert first.equals(result)
|
||||
assert first.names == result.names
|
||||
|
||||
# name from non-empty array
|
||||
result = first.difference([("foo", "one")], sort=sort)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
|
||||
)
|
||||
expected.names = first.names
|
||||
assert first.names == result.names
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3, 4, 5], sort=sort)
|
||||
|
||||
|
||||
def test_difference_sort_special():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
# sort=None, the default
|
||||
result = idx.difference([])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_difference_sort_special_true():
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
result = idx.difference([], sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
# sort=None, the default
|
||||
msg = "sort order is undefined for incomparable objects"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result = idx.difference(other)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.difference(other, sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable_true():
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
|
||||
msg = "The 'sort' keyword only takes the values of None or False; True was passed."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.difference(other, sort=True)
|
||||
|
||||
|
||||
def test_union(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_union = piece1.union(piece2, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_union, idx.sort_values())
|
||||
|
||||
assert tm.equalContents(the_union, idx)
|
||||
|
||||
# corner case, pass self or empty thing:
|
||||
the_union = idx.union(idx, sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
the_union = idx.union(idx[:0], sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx[:4].union(tuples[4:], sort=sort)
|
||||
if sort is None:
|
||||
tm.equalContents(result, idx)
|
||||
else:
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
# This test was commented out from Oct 2011 to Dec 2021, may no longer
|
||||
# be relevant.
|
||||
reason="Length of names must match number of levels in MultiIndex",
|
||||
raises=ValueError,
|
||||
)
|
||||
def test_union_with_regular_index(idx):
|
||||
other = Index(["A", "B", "C"])
|
||||
|
||||
result = other.union(idx)
|
||||
assert ("foo", "one") in result
|
||||
assert "B" in result
|
||||
|
||||
msg = "The values in the array are unorderable"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result2 = idx.union(other)
|
||||
assert result.equals(result2)
|
||||
|
||||
|
||||
def test_intersection(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_int = piece1.intersection(piece2, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_int, idx[3:5])
|
||||
assert tm.equalContents(the_int, idx[3:5])
|
||||
|
||||
# corner case, pass self
|
||||
the_int = idx.intersection(idx, sort=sort)
|
||||
tm.assert_index_equal(the_int, idx)
|
||||
|
||||
# empty intersection: disjoint
|
||||
empty = idx[:2].intersection(idx[2:], sort=sort)
|
||||
expected = idx[:0]
|
||||
assert empty.equals(expected)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx.intersection(tuples)
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setop_with_categorical(idx, sort, method):
|
||||
other = idx.to_flat_index().astype("category")
|
||||
res_names = [None] * idx.nlevels
|
||||
|
||||
result = getattr(idx, method)(other, sort=sort)
|
||||
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = getattr(idx, method)(other[:5], sort=sort)
|
||||
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_non_object(idx, sort):
|
||||
other = Index(range(3), name="foo")
|
||||
|
||||
result = idx.intersection(other, sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
|
||||
result = idx.intersection(np.asarray(other)[:0], sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# With non-zero length non-index, we try and fail to convert to tuples
|
||||
idx.intersection(np.asarray(other), sort=sort)
|
||||
|
||||
|
||||
def test_intersect_equal_sort():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_intersect_equal_sort_true():
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
sorted_ = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
|
||||
def test_union_sort_other_empty(slice_):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
other = idx[slice_]
|
||||
tm.assert_index_equal(idx.union(other), idx)
|
||||
tm.assert_index_equal(other.union(idx), idx)
|
||||
|
||||
# sort=False
|
||||
tm.assert_index_equal(idx.union(other, sort=False), idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_union_sort_other_empty_sort(slice_):
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
# # sort=True
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
other = idx[:0]
|
||||
result = idx.union(other, sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable():
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="Not implemented.")
|
||||
def test_union_sort_other_incomparable_sort():
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
# # sort=True
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
with pytest.raises(TypeError, match="Cannot compare"):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
|
||||
def test_union_non_object_dtype_raises():
|
||||
# GH#32646 raise NotImplementedError instead of less-informative error
|
||||
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
|
||||
idx = mi.levels[1]
|
||||
|
||||
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
mi.union(idx)
|
||||
|
||||
|
||||
def test_union_empty_self_different_names():
|
||||
# GH#38423
|
||||
mi = MultiIndex.from_arrays([[]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
result = mi.union(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_multiindex_empty_rangeindex():
|
||||
# GH#41234
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
ri = pd.RangeIndex(0)
|
||||
|
||||
result_left = mi.union(ri)
|
||||
tm.assert_index_equal(mi, result_left, check_names=False)
|
||||
|
||||
result_right = ri.union(mi)
|
||||
tm.assert_index_equal(mi, result_right, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_disallow_true(method):
|
||||
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("tuples", "exp_tuples"),
|
||||
[
|
||||
([("val1", "test1")], [("val1", "test1")]),
|
||||
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
|
||||
(
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_intersect_with_duplicates(tuples, exp_tuples):
|
||||
# GH#36915
|
||||
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
right = MultiIndex.from_tuples(
|
||||
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = left.intersection(right)
|
||||
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, names, expected",
|
||||
[
|
||||
((1,), None, [None, None]),
|
||||
((1,), ["a"], [None, None]),
|
||||
((1,), ["b"], [None, None]),
|
||||
((1, 2), ["c", "d"], [None, None]),
|
||||
((1, 2), ["b", "a"], [None, None]),
|
||||
((1, 2, 3), ["a", "b", "c"], [None, None]),
|
||||
((1, 2), ["a", "c"], ["a", None]),
|
||||
((1, 2), ["c", "b"], [None, "b"]),
|
||||
((1, 2), ["a", "b"], ["a", "b"]),
|
||||
((1, 2), [None, "b"], [None, "b"]),
|
||||
],
|
||||
)
|
||||
def test_maybe_match_names(data, names, expected):
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
mi2 = MultiIndex.from_tuples([data], names=names)
|
||||
result = mi._maybe_match_names(mi2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_intersection_equal_different_names():
|
||||
# GH#30302
|
||||
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_different_names():
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1], [3]])
|
||||
result = mi.intersection(mi2)
|
||||
tm.assert_index_equal(result, mi2)
|
||||
|
||||
|
||||
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
|
||||
# GH#38623
|
||||
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
|
||||
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_nan_got_duplicated():
|
||||
# GH#38977
|
||||
mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]])
|
||||
mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]])
|
||||
result = mi1.union(mi2)
|
||||
tm.assert_index_equal(result, mi2)
|
||||
|
||||
|
||||
def test_union_duplicates(index):
|
||||
# GH#38977
|
||||
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
|
||||
# No duplicates in empty indexes
|
||||
return
|
||||
values = index.unique().values.tolist()
|
||||
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
|
||||
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
|
||||
result = mi1.union(mi2)
|
||||
tm.assert_index_equal(result, mi2.sort_values())
|
||||
|
||||
result = mi2.union(mi1)
|
||||
tm.assert_index_equal(result, mi2.sort_values())
|
@ -0,0 +1,287 @@
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
PerformanceWarning,
|
||||
UnsortedIndexError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.frozen import FrozenList
|
||||
|
||||
|
||||
def test_sortlevel(idx):
|
||||
tuples = list(idx)
|
||||
random.shuffle(tuples)
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_not_sort_remaining():
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
|
||||
assert sorted_idx.equals(mi)
|
||||
|
||||
|
||||
def test_sortlevel_deterministic():
|
||||
tuples = [
|
||||
("bar", "one"),
|
||||
("foo", "two"),
|
||||
("qux", "two"),
|
||||
("foo", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_numpy_argsort(idx):
|
||||
result = np.argsort(idx)
|
||||
expected = idx.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, kind="mergesort")
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, order=("a", "b"))
|
||||
|
||||
|
||||
def test_unsortedindex():
|
||||
# GH 11897
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc(axis=0)["z", "a"]
|
||||
expected = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc(axis=0)["z", slice("a")]
|
||||
df.sort_index(inplace=True)
|
||||
assert len(df.loc(axis=0)["z", :]) == 2
|
||||
|
||||
with pytest.raises(KeyError, match="'q'"):
|
||||
df.loc(axis=0)["q", :]
|
||||
|
||||
|
||||
def test_unsortedindex_doc_examples():
|
||||
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
|
||||
dfm = DataFrame(
|
||||
{"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
|
||||
)
|
||||
|
||||
dfm = dfm.set_index(["jim", "joe"])
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
dfm.loc[(1, "z")]
|
||||
|
||||
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert not dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 1
|
||||
|
||||
# sort it
|
||||
dfm = dfm.sort_index()
|
||||
dfm.loc[(1, "z")]
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 2
|
||||
|
||||
|
||||
def test_reconstruct_sort():
|
||||
|
||||
# starts off lexsorted & monotonic
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert recons.is_monotonic
|
||||
assert mi is recons
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex(
|
||||
levels=[["b", "d", "a"], [1, 2, 3]],
|
||||
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||||
names=["col1", "col2"],
|
||||
)
|
||||
assert not mi.is_monotonic
|
||||
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
|
||||
def test_reconstruct_remove_unused():
|
||||
# xref to GH 2770
|
||||
df = DataFrame(
|
||||
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
|
||||
columns=["first", "second", "third"],
|
||||
)
|
||||
df2 = df.set_index(["first", "second"], drop=False)
|
||||
df2 = df2[df2["first"] != "deleteMe"]
|
||||
|
||||
# removed levels are there
|
||||
expected = MultiIndex(
|
||||
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
|
||||
codes=[[1, 2], [1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[["keepMe", "keepMeToo"], [2, 3]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index.remove_unused_levels()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# idempotent
|
||||
result2 = result.remove_unused_levels()
|
||||
tm.assert_index_equal(result2, expected)
|
||||
assert result2.is_(result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
|
||||
)
|
||||
def test_remove_unused_levels_large(first_type, second_type):
|
||||
# GH16556
|
||||
|
||||
# because tests should be deterministic (and this test in particular
|
||||
# checks that levels are removed, which is not the case for every
|
||||
# random input):
|
||||
rng = np.random.RandomState(4) # seed is arbitrary value that works
|
||||
|
||||
size = 1 << 16
|
||||
df = DataFrame(
|
||||
{
|
||||
"first": rng.randint(0, 1 << 13, size).astype(first_type),
|
||||
"second": rng.randint(0, 1 << 10, size).astype(second_type),
|
||||
"third": rng.rand(size),
|
||||
}
|
||||
)
|
||||
df = df.groupby(["first", "second"]).sum()
|
||||
df = df[df.third < 0.1]
|
||||
|
||||
result = df.index.remove_unused_levels()
|
||||
assert len(result.levels[0]) < len(df.index.levels[0])
|
||||
assert len(result.levels[1]) < len(df.index.levels[1])
|
||||
assert result.equals(df.index)
|
||||
|
||||
expected = df.reset_index().set_index(["first", "second"]).index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
|
||||
@pytest.mark.parametrize(
|
||||
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
|
||||
)
|
||||
def test_remove_unused_nan(level0, level1):
|
||||
# GH 18417
|
||||
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
||||
|
||||
result = mi.remove_unused_levels()
|
||||
tm.assert_index_equal(result, mi)
|
||||
for level in 0, 1:
|
||||
assert "unused" not in result.levels[level]
|
||||
|
||||
|
||||
def test_argsort(idx):
|
||||
result = idx.argsort()
|
||||
expected = idx.values.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_remove_unused_levels_with_nan():
|
||||
# GH 37510
|
||||
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
|
||||
idx = idx.set_levels(["a", np.nan], level="id1")
|
||||
idx = idx.remove_unused_levels()
|
||||
result = idx.levels
|
||||
expected = FrozenList([["a", np.nan], [4]])
|
||||
assert str(result) == str(expected)
|
@ -0,0 +1,79 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take(idx):
|
||||
indexer = [4, 3, 0, 2]
|
||||
result = idx.take(indexer)
|
||||
expected = idx[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
# GH 10791
|
||||
msg = "'MultiIndex' object has no attribute 'freq'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.freq
|
||||
|
||||
|
||||
def test_take_invalid_kwargs(idx):
|
||||
idx = idx
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_take_fill_value():
|
||||
# GH 12631
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
(np.nan, pd.NaT),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
Reference in New Issue
Block a user