first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,240 @@
import re
import numpy as np
import pytest
from pandas.compat import is_platform_arm
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
IntervalDtype,
)
from pandas import (
CategoricalIndex,
Index,
IntervalIndex,
NaT,
Timedelta,
Timestamp,
interval_range,
)
import pandas._testing as tm
class AstypeTests:
"""Tests common to IntervalIndex with any subtype"""
def test_astype_idempotent(self, index):
result = index.astype("interval")
tm.assert_index_equal(result, index)
result = index.astype(index.dtype)
tm.assert_index_equal(result, index)
def test_astype_object(self, index):
result = index.astype(object)
expected = Index(index.values, dtype="object")
tm.assert_index_equal(result, expected)
assert not result.equals(index)
def test_astype_category(self, index):
result = index.astype("category")
expected = CategoricalIndex(index.values)
tm.assert_index_equal(result, expected)
result = index.astype(CategoricalDtype())
tm.assert_index_equal(result, expected)
# non-default params
categories = index.dropna().unique().values[:-1]
dtype = CategoricalDtype(categories=categories, ordered=True)
result = index.astype(dtype)
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
[
"int64",
"uint64",
"float64",
"complex128",
"period[M]",
"timedelta64",
"timedelta64[ns]",
"datetime64",
"datetime64[ns]",
"datetime64[ns, US/Eastern]",
],
)
def test_astype_cannot_cast(self, index, dtype):
msg = "Cannot cast IntervalIndex to dtype"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_astype_invalid_dtype(self, index):
msg = "data type [\"']fake_dtype[\"'] not understood"
with pytest.raises(TypeError, match=msg):
index.astype("fake_dtype")
class TestIntSubtype(AstypeTests):
"""Tests specific to IntervalIndex with integer-like subtype"""
indexes = [
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize(
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
)
def test_subtype_conversion(self, index, subtype):
dtype = IntervalDtype(subtype, index.closed)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
)
def test_subtype_integer(self, subtype_start, subtype_end):
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
dtype = IntervalDtype(subtype_end, index.closed)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype_end),
index.right.astype(subtype_end),
closed=index.closed,
)
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(reason="GH#15832")
def test_subtype_integer_errors(self):
# int64 -> uint64 fails with negative values
index = interval_range(-10, 10)
dtype = IntervalDtype("uint64", "right")
# Until we decide what the exception message _should_ be, we
# assert something that it should _not_ be.
# We should _not_ be getting a message suggesting that the -10
# has been wrapped around to a large-positive integer
msg = "^(?!(left side of interval must be <= right side))"
with pytest.raises(ValueError, match=msg):
index.astype(dtype)
class TestFloatSubtype(AstypeTests):
"""Tests specific to IntervalIndex with float subtype"""
indexes = [
interval_range(-10.0, 10.0, closed="neither"),
IntervalIndex.from_arrays(
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, subtype):
index = interval_range(0.0, 10.0)
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
# raises with NA
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(ValueError, match=msg):
index.insert(0, np.nan).astype(dtype)
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer_with_non_integer_borders(self, subtype):
index = interval_range(0.0, 3.0, freq=0.25)
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(is_platform_arm(), reason="GH 41740")
def test_subtype_integer_errors(self):
# float64 -> uint64 fails with negative values
index = interval_range(-10.0, 10.0)
dtype = IntervalDtype("uint64", "right")
msg = re.escape(
"Cannot convert interval[float64, right] to interval[uint64, right]; "
"subtypes are incompatible"
)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_subtype_datetimelike(self, index, subtype):
dtype = IntervalDtype(subtype, "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
class TestDatetimelikeSubtype(AstypeTests):
"""Tests specific to IntervalIndex with datetime-like subtype"""
indexes = [
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
interval_range(Timedelta("0 days"), periods=10, closed="both"),
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
def test_subtype_float(self, index):
dtype = IntervalDtype("float64", "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_subtype_datetimelike(self):
# datetime -> timedelta raises
dtype = IntervalDtype("timedelta64[ns]", "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
index = interval_range(Timestamp("2018-01-01"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
# timedelta -> datetime raises
dtype = IntervalDtype("datetime64[ns]", "right")
index = interval_range(Timedelta("0 days"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)

View File

@ -0,0 +1,71 @@
import numpy as np
import pytest
from pandas import IntervalIndex
import pandas._testing as tm
from pandas.tests.indexes.common import Base
class TestBase(Base):
"""
Tests specific to the shared common index tests; unrelated tests should be placed
in test_interval.py or the specific test file (e.g. test_astype.py)
"""
_index_cls = IntervalIndex
@pytest.fixture
def simple_index(self) -> IntervalIndex:
return self._index_cls.from_breaks(range(11), closed="right")
@pytest.fixture
def index(self):
return tm.makeIntervalIndex(10)
def create_index(self, *, closed="right"):
return IntervalIndex.from_breaks(range(11), closed=closed)
def test_repr_max_seq_item_setting(self):
# override base test: not a valid repr as we use interval notation
pass
def test_repr_roundtrip(self):
# override base test: not a valid repr as we use interval notation
pass
def test_take(self, closed):
index = self.create_index(closed=closed)
result = index.take(range(10))
tm.assert_index_equal(result, index)
result = index.take([0, 0, 1])
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
tm.assert_index_equal(result, expected)
def test_where(self, simple_index, listlike_box):
klass = listlike_box
idx = simple_index
cond = [True] * len(idx)
expected = idx
result = expected.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * len(idx[1:])
expected = IntervalIndex([np.nan] + idx[1:].tolist())
result = idx.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_getitem_2d_deprecated(self, simple_index):
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
idx = simple_index
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
with tm.assert_produces_warning(FutureWarning):
idx[:, None]
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
# GH#44051
idx[True]
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
# GH#44051
idx[False]

View File

@ -0,0 +1,473 @@
from functools import partial
import numpy as np
import pytest
from pandas.core.dtypes.common import is_categorical_dtype
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas import (
Categorical,
CategoricalIndex,
Index,
Interval,
IntervalIndex,
date_range,
notna,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.api import (
Float64Index,
Int64Index,
)
from pandas.core.arrays import IntervalArray
import pandas.core.common as com
@pytest.fixture(params=[None, "foo"])
def name(request):
return request.param
class ConstructorTests:
"""
Common tests for all variations of IntervalIndex construction. Input data
to be supplied in breaks format, then converted by the subclass method
get_kwargs_from_breaks to the expected format.
"""
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
[3, 14, 15, 92, 653],
np.arange(10, dtype="int64"),
Int64Index(range(-10, 11)),
Float64Index(np.arange(20, 30, 0.5)),
date_range("20180101", periods=10),
date_range("20180101", periods=10, tz="US/Eastern"),
timedelta_range("1 day", periods=10),
],
)
def test_constructor(self, constructor, breaks, closed, name):
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
result = constructor(closed=closed, name=name, **result_kwargs)
assert result.closed == closed
assert result.name == name
assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
tm.assert_index_equal(result.left, Index(breaks[:-1]))
tm.assert_index_equal(result.right, Index(breaks[1:]))
@pytest.mark.parametrize(
"breaks, subtype",
[
(Int64Index([0, 1, 2, 3, 4]), "float64"),
(Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
(Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
(Float64Index([0, 1, 2, 3, 4]), "int64"),
(date_range("2017-01-01", periods=5), "int64"),
(timedelta_range("1 day", periods=5), "int64"),
],
)
def test_constructor_dtype(self, constructor, breaks, subtype):
# GH 19262: conversion via dtype parameter
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
expected = constructor(**expected_kwargs)
result_kwargs = self.get_kwargs_from_breaks(breaks)
iv_dtype = IntervalDtype(subtype, "right")
for dtype in (iv_dtype, str(iv_dtype)):
result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[
Int64Index([0, 1, 2, 3, 4]),
Int64Index([0, 1, 2, 3, 4]),
Int64Index([0, 1, 2, 3, 4]),
Float64Index([0, 1, 2, 3, 4]),
date_range("2017-01-01", periods=5),
timedelta_range("1 day", periods=5),
],
)
def test_constructor_pass_closed(self, constructor, breaks):
# not passing closed to IntervalDtype, but to IntervalArray constructor
warn = None
if isinstance(constructor, partial) and constructor.func is Index:
# passing kwargs to Index is deprecated
warn = FutureWarning
iv_dtype = IntervalDtype(breaks.dtype)
result_kwargs = self.get_kwargs_from_breaks(breaks)
for dtype in (iv_dtype, str(iv_dtype)):
with tm.assert_produces_warning(warn):
result = constructor(dtype=dtype, closed="left", **result_kwargs)
assert result.dtype.closed == "left"
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
def test_constructor_nan(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_subtype = np.float64
expected_values = np.array(breaks[:-1], dtype=object)
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
[],
np.array([], dtype="int64"),
np.array([], dtype="float64"),
np.array([], dtype="datetime64[ns]"),
np.array([], dtype="timedelta64[ns]"),
],
)
def test_constructor_empty(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_values = np.array([], dtype=object)
expected_subtype = getattr(breaks, "dtype", np.int64)
assert result.empty
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)
@pytest.mark.parametrize(
"breaks",
[
tuple("0123456789"),
list("abcdefghij"),
np.array(list("abcdefghij"), dtype=object),
np.array(list("abcdefghij"), dtype="<U1"),
],
)
def test_constructor_string(self, constructor, breaks):
# GH 19016
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
constructor(**self.get_kwargs_from_breaks(breaks))
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
def test_constructor_categorical_valid(self, constructor, cat_constructor):
# GH 21243/21253
breaks = np.arange(10, dtype="int64")
expected = IntervalIndex.from_breaks(breaks)
cat_breaks = cat_constructor(breaks)
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
result = constructor(**result_kwargs)
tm.assert_index_equal(result, expected)
def test_generic_errors(self, constructor):
# filler input data to be used when supplying invalid kwargs
filler = self.get_kwargs_from_breaks(range(10))
# invalid closed
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
with pytest.raises(ValueError, match=msg):
constructor(closed="invalid", **filler)
# unsupported dtype
msg = "dtype must be an IntervalDtype, got int64"
with pytest.raises(TypeError, match=msg):
constructor(dtype="int64", **filler)
# invalid dtype
msg = "data type [\"']invalid[\"'] not understood"
with pytest.raises(TypeError, match=msg):
constructor(dtype="invalid", **filler)
# no point in nesting periods in an IntervalIndex
periods = period_range("2000-01-01", periods=10)
periods_kwargs = self.get_kwargs_from_breaks(periods)
msg = "Period dtypes are not supported, use a PeriodIndex instead"
with pytest.raises(ValueError, match=msg):
constructor(**periods_kwargs)
# decreasing values
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
msg = "left side of interval must be <= right side"
with pytest.raises(ValueError, match=msg):
constructor(**decreasing_kwargs)
class TestFromArrays(ConstructorTests):
"""Tests specific to IntervalIndex.from_arrays"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_arrays
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_arrays
"""
return {"left": breaks[:-1], "right": breaks[1:]}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list("01234abcde"), ordered=True)
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_arrays(data[:-1], data[1:])
# unequal length
left = [0, 1, 2]
right = [2, 3]
msg = "left and right must have the same length"
with pytest.raises(ValueError, match=msg):
IntervalIndex.from_arrays(left, right)
@pytest.mark.parametrize(
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
)
def test_mixed_float_int(self, left_subtype, right_subtype):
"""mixed int/float left/right results in float for both sides"""
left = np.arange(9, dtype=left_subtype)
right = np.arange(1, 10, dtype=right_subtype)
result = IntervalIndex.from_arrays(left, right)
expected_left = Float64Index(left)
expected_right = Float64Index(right)
expected_subtype = np.float64
tm.assert_index_equal(result.left, expected_left)
tm.assert_index_equal(result.right, expected_right)
assert result.dtype.subtype == expected_subtype
class TestFromBreaks(ConstructorTests):
"""Tests specific to IntervalIndex.from_breaks"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_breaks
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_breaks
"""
return {"breaks": breaks}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list("01234abcde"), ordered=True)
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_breaks(data)
def test_length_one(self):
"""breaks of length one produce an empty IntervalIndex"""
breaks = [0]
result = IntervalIndex.from_breaks(breaks)
expected = IntervalIndex.from_breaks([])
tm.assert_index_equal(result, expected)
def test_left_right_dont_share_data(self):
# GH#36310
breaks = np.arange(5)
result = IntervalIndex.from_breaks(breaks)._data
assert result._left.base is None or result._left.base is not result._right.base
class TestFromTuples(ConstructorTests):
"""Tests specific to IntervalIndex.from_tuples"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_tuples
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_tuples
"""
if len(breaks) == 0:
return {"data": breaks}
tuples = list(zip(breaks[:-1], breaks[1:]))
if isinstance(breaks, (list, tuple)):
return {"data": tuples}
elif is_categorical_dtype(breaks):
return {"data": breaks._constructor(tuples)}
return {"data": com.asarray_tuplesafe(tuples)}
def test_constructor_errors(self):
# non-tuple
tuples = [(0, 1), 2, (3, 4)]
msg = "IntervalIndex.from_tuples received an invalid item, 2"
with pytest.raises(TypeError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
# too few/many items
tuples = [(0, 1), (2,), (3, 4)]
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
tuples = [(0, 1), (2, 3, 4), (5, 6)]
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
def test_na_tuples(self):
# tuple (NA, NA) evaluates the same as NA as an element
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
tm.assert_index_equal(idx_na_tuple, idx_na_element)
class TestClassConstructors(ConstructorTests):
"""Tests specific to the IntervalIndex/Index constructors"""
@pytest.fixture(
params=[IntervalIndex, partial(Index, dtype="interval")],
ids=["IntervalIndex", "Index"],
)
def constructor(self, request):
return request.param
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by the IntervalIndex/Index constructors
"""
if len(breaks) == 0:
return {"data": breaks}
ivs = [
Interval(left, right, closed) if notna(left) else left
for left, right in zip(breaks[:-1], breaks[1:])
]
if isinstance(breaks, list):
return {"data": ivs}
elif is_categorical_dtype(breaks):
return {"data": breaks._constructor(ivs)}
return {"data": np.array(ivs, dtype=object)}
def test_generic_errors(self, constructor):
"""
override the base class implementation since errors are handled
differently; checks unnecessary since caught at the Interval level
"""
pass
def test_constructor_string(self):
# GH23013
# When forming the interval from breaks,
# the interval of strings is already forbidden.
pass
def test_constructor_errors(self, constructor):
# mismatched closed within intervals with no constructor override
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
msg = "intervals must all be closed on the same side"
with pytest.raises(ValueError, match=msg):
constructor(ivs)
# scalar
msg = (
r"IntervalIndex\(...\) must be called with a collection of "
"some kind, 5 was passed"
)
with pytest.raises(TypeError, match=msg):
constructor(5)
# not an interval; dtype depends on 32bit/windows builds
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
with pytest.raises(TypeError, match=msg):
constructor([0, 1])
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"data, closed",
[
([], "both"),
([np.nan, np.nan], "neither"),
(
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
"left",
),
(
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
"neither",
),
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
],
)
def test_override_inferred_closed(self, constructor, data, closed):
# GH 19370
if isinstance(data, IntervalIndex):
tuples = data.to_tuples()
else:
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
expected = IntervalIndex.from_tuples(tuples, closed=closed)
result = constructor(data, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
)
def test_index_object_dtype(self, values_constructor):
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
values = values_constructor(intervals)
result = Index(values, dtype=object)
assert type(result) is Index
tm.assert_numpy_array_equal(result.values, np.array(values))
def test_index_mixed_closed(self):
# GH27172
intervals = [
Interval(0, 1, closed="left"),
Interval(1, 2, closed="right"),
Interval(2, 3, closed="neither"),
Interval(3, 4, closed="both"),
]
result = Index(intervals)
expected = Index(intervals, dtype=object)
tm.assert_index_equal(result, expected)
def test_dtype_closed_mismatch():
# GH#38394 closed specified in both dtype and IntervalIndex constructor
dtype = IntervalDtype(np.int64, "left")
msg = "closed keyword does not match dtype.closed"
with pytest.raises(ValueError, match=msg):
IntervalIndex([], dtype=dtype, closed="neither")
with pytest.raises(ValueError, match=msg):
IntervalArray([], dtype=dtype, closed="neither")

View File

@ -0,0 +1,36 @@
import numpy as np
from pandas import (
IntervalIndex,
date_range,
)
class TestEquals:
def test_equals(self, closed):
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
assert expected.equals(expected)
assert expected.equals(expected.copy())
assert not expected.equals(expected.astype(object))
assert not expected.equals(np.array(expected))
assert not expected.equals(list(expected))
assert not expected.equals([1, 2])
assert not expected.equals(np.array([1, 2]))
assert not expected.equals(date_range("20130101", periods=2))
expected_name1 = IntervalIndex.from_breaks(
np.arange(5), closed=closed, name="foo"
)
expected_name2 = IntervalIndex.from_breaks(
np.arange(5), closed=closed, name="bar"
)
assert expected.equals(expected_name1)
assert expected_name1.equals(expected_name2)
for other_closed in {"left", "right", "both", "neither"} - {closed}:
expected_other_closed = IntervalIndex.from_breaks(
np.arange(5), closed=other_closed
)
assert not expected.equals(expected_other_closed)

View File

@ -0,0 +1,105 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Interval,
IntervalIndex,
Series,
Timedelta,
Timestamp,
)
import pandas._testing as tm
from pandas.core.api import Float64Index
class TestIntervalIndexRendering:
def test_frame_repr(self):
# https://github.com/pandas-dev/pandas/pull/24134/files
df = DataFrame(
{"A": [1, 2, 3, 4]}, index=IntervalIndex.from_breaks([0, 1, 2, 3, 4])
)
result = repr(df)
expected = " A\n(0, 1] 1\n(1, 2] 2\n(2, 3] 3\n(3, 4] 4"
assert result == expected
@pytest.mark.parametrize(
"constructor,expected",
[
(
Series,
(
"(0.0, 1.0] a\n"
"NaN b\n"
"(2.0, 3.0] c\n"
"dtype: object"
),
),
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
],
)
def test_repr_missing(self, constructor, expected):
# GH 25984
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
obj = constructor(list("abc"), index=index)
result = repr(obj)
assert result == expected
def test_repr_floats(self):
# GH 32553
markers = Series(
["foo", "bar"],
index=IntervalIndex(
[
Interval(left, right)
for left, right in zip(
Float64Index([329.973, 345.137], dtype="float64"),
Float64Index([345.137, 360.191], dtype="float64"),
)
]
),
)
result = str(markers)
expected = "(329.973, 345.137] foo\n(345.137, 360.191] bar\ndtype: object"
assert result == expected
@pytest.mark.parametrize(
"tuples, closed, expected_data",
[
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
(
[(0.5, 1.0), np.nan, (2.0, 3.0)],
"right",
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
),
(
[
(Timestamp("20180101"), Timestamp("20180102")),
np.nan,
((Timestamp("20180102"), Timestamp("20180103"))),
],
"both",
["[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]"],
),
(
[
(Timedelta("0 days"), Timedelta("1 days")),
(Timedelta("1 days"), Timedelta("2 days")),
np.nan,
],
"neither",
[
"(0 days 00:00:00, 1 days 00:00:00)",
"(1 days 00:00:00, 2 days 00:00:00)",
"NaN",
],
),
],
)
def test_to_native_types(self, tuples, closed, expected_data):
# GH 28210
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index._format_native_types()
expected = np.array(expected_data)
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,587 @@
import re
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
from pandas import (
NA,
CategoricalIndex,
Index,
Interval,
IntervalIndex,
MultiIndex,
NaT,
Series,
Timedelta,
date_range,
timedelta_range,
)
import pandas._testing as tm
class TestGetLoc:
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
def test_get_loc_interval(self, closed, side):
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
# if get_loc is supplied an interval, it should only search
# for exact matches, not overlaps or covers, else KeyError.
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
if closed == side:
if bound == [0, 1]:
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
elif bound == [2, 3]:
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
else:
with pytest.raises(KeyError, match=msg):
idx.get_loc(Interval(*bound, closed=side))
else:
with pytest.raises(KeyError, match=msg):
idx.get_loc(Interval(*bound, closed=side))
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
def test_get_loc_scalar(self, closed, scalar):
# correct = {side: {query: answer}}.
# If query is not in the dict, that query should raise a KeyError
correct = {
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
"neither": {0.5: 0, 2.5: 1},
}
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
# if get_loc is supplied a scalar, it should return the index of
# the interval which contains the scalar, or KeyError.
if scalar in correct[closed].keys():
assert idx.get_loc(scalar) == correct[closed][scalar]
else:
with pytest.raises(KeyError, match=str(scalar)):
idx.get_loc(scalar)
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
def test_get_loc_length_one_scalar(self, scalar, closed):
# GH 20921
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
if scalar in index[0]:
result = index.get_loc(scalar)
assert result == 0
else:
with pytest.raises(KeyError, match=str(scalar)):
index.get_loc(scalar)
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
# GH 20921
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
interval = Interval(left, right, closed=other_closed)
if interval == index[0]:
result = index.get_loc(interval)
assert result == 0
else:
with pytest.raises(
KeyError,
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
):
index.get_loc(interval)
# Make consistent with test_interval_new.py (see #16316, #16386)
@pytest.mark.parametrize(
"breaks",
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
ids=lambda x: str(x.dtype),
)
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
# GH 20636
# nonoverlapping = IntervalIndex method and no i8 conversion
index = IntervalIndex.from_breaks(breaks)
value = index[0].mid
result = index.get_loc(value)
expected = 0
assert result == expected
interval = Interval(index[0].left, index[0].right)
result = index.get_loc(interval)
expected = 0
assert result == expected
@pytest.mark.parametrize(
"arrays",
[
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
(
date_range("20180101", periods=4, tz="US/Eastern"),
date_range("20180103", periods=4, tz="US/Eastern"),
),
(
timedelta_range("0 days", periods=4),
timedelta_range("2 days", periods=4),
),
],
ids=lambda x: str(x[0].dtype),
)
def test_get_loc_datetimelike_overlapping(self, arrays):
# GH 20636
index = IntervalIndex.from_arrays(*arrays)
value = index[0].mid + Timedelta("12 hours")
result = index.get_loc(value)
expected = slice(0, 2, None)
assert result == expected
interval = Interval(index[0].left, index[0].right)
result = index.get_loc(interval)
expected = 0
assert result == expected
@pytest.mark.parametrize(
"values",
[
date_range("2018-01-04", periods=4, freq="-1D"),
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
timedelta_range("3 days", periods=4, freq="-1D"),
np.arange(3.0, -1.0, -1.0),
np.arange(3, -1, -1),
],
ids=lambda x: str(x.dtype),
)
def test_get_loc_decreasing(self, values):
# GH 25860
index = IntervalIndex.from_arrays(values[1:], values[:-1])
result = index.get_loc(index[0])
expected = 0
assert result == expected
@pytest.mark.parametrize("key", [[5], (2, 3)])
def test_get_loc_non_scalar_errors(self, key):
# GH 31117
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
msg = str(key)
with pytest.raises(InvalidIndexError, match=msg):
idx.get_loc(key)
def test_get_indexer_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
expected = np.array([True, False, True])
for key in [None, np.nan, NA]:
assert key in index
result = index.get_loc(key)
tm.assert_numpy_array_equal(result, expected)
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
with pytest.raises(KeyError, match=str(key)):
index.get_loc(key)
class TestGetIndexer:
@pytest.mark.parametrize(
"query, expected",
[
([Interval(2, 4, closed="right")], [1]),
([Interval(2, 4, closed="left")], [-1]),
([Interval(2, 4, closed="both")], [-1]),
([Interval(2, 4, closed="neither")], [-1]),
([Interval(1, 4, closed="right")], [-1]),
([Interval(0, 4, closed="right")], [-1]),
([Interval(0.5, 1.5, closed="right")], [-1]),
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
],
)
def test_get_indexer_with_interval(self, query, expected):
tuples = [(0, 2), (2, 4), (5, 7)]
index = IntervalIndex.from_tuples(tuples, closed="right")
result = index.get_indexer(query)
expected = np.array(expected, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"query, expected",
[
([-0.5], [-1]),
([0], [-1]),
([0.5], [0]),
([1], [0]),
([1.5], [1]),
([2], [1]),
([2.5], [-1]),
([3], [-1]),
([3.5], [2]),
([4], [2]),
([4.5], [-1]),
([1, 2], [0, 1]),
([1, 2, 3], [0, 1, -1]),
([1, 2, 3, 4], [0, 1, -1, 2]),
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
],
)
def test_get_indexer_with_int_and_float(self, query, expected):
tuples = [(0, 1), (1, 2), (3, 4)]
index = IntervalIndex.from_tuples(tuples, closed="right")
result = index.get_indexer(query)
expected = np.array(expected, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
def test_get_indexer_length_one(self, item, closed):
# GH 17284
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
result = index.get_indexer(item)
expected = np.array([0] * len(item), dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("size", [1, 5])
def test_get_indexer_length_one_interval(self, size, closed):
# GH 17284
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
result = index.get_indexer([Interval(0, 5, closed)] * size)
expected = np.array([0] * size, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"target",
[
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
["foo", "foo", "bar", "baz"],
],
)
def test_get_indexer_categorical(self, target, ordered):
# GH 30063: categorical and non-categorical results should be consistent
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
categorical_target = CategoricalIndex(target, ordered=ordered)
result = index.get_indexer(categorical_target)
expected = index.get_indexer(target)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_categorical_with_nans(self):
# GH#41934 nans in both index and in target
ii = IntervalIndex.from_breaks(range(5))
ii2 = ii.append(IntervalIndex([np.nan]))
ci2 = CategoricalIndex(ii2)
result = ii2.get_indexer(ci2)
expected = np.arange(5, dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
# not-all-matches
result = ii2[1:].get_indexer(ci2[::-1])
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
# non-unique target, non-unique nans
result = ii2.get_indexer(ci2.append(ci2))
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"tuples, closed",
[
([(0, 2), (1, 3), (3, 4)], "neither"),
([(0, 5), (1, 4), (6, 7)], "left"),
([(0, 1), (0, 1), (1, 2)], "right"),
([(0, 1), (2, 3), (3, 4)], "both"),
],
)
def test_get_indexer_errors(self, tuples, closed):
# IntervalIndex needs non-overlapping for uniqueness when querying
index = IntervalIndex.from_tuples(tuples, closed=closed)
msg = (
"cannot handle overlapping indices; use "
"IntervalIndex.get_indexer_non_unique"
)
with pytest.raises(InvalidIndexError, match=msg):
index.get_indexer([0, 2])
@pytest.mark.parametrize(
"query, expected",
[
([-0.5], ([-1], [0])),
([0], ([0], [])),
([0.5], ([0], [])),
([1], ([0, 1], [])),
([1.5], ([0, 1], [])),
([2], ([0, 1, 2], [])),
([2.5], ([1, 2], [])),
([3], ([2], [])),
([3.5], ([2], [])),
([4], ([-1], [0])),
([4.5], ([-1], [0])),
([1, 2], ([0, 1, 0, 1, 2], [])),
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
],
)
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
tuples = [(0, 2.5), (1, 3), (2, 4)]
index = IntervalIndex.from_tuples(tuples, closed="left")
result_indexer, result_missing = index.get_indexer_non_unique(query)
expected_indexer = np.array(expected[0], dtype="intp")
expected_missing = np.array(expected[1], dtype="intp")
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
tm.assert_numpy_array_equal(result_missing, expected_missing)
# TODO we may also want to test get_indexer for the case when
# the intervals are duplicated, decreasing, non-monotonic, etc..
def test_get_indexer_non_monotonic(self):
# GH 16410
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
result = idx1.get_indexer(idx2)
expected = np.array([2, 0, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
result = idx1.get_indexer(idx1[1:])
expected = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, np.nan])
other = IntervalIndex([np.nan])
assert not index._index_as_unique
result = index.get_indexer_for(other)
expected = np.array([0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_index_non_unique_non_monotonic(self):
# GH#44084 (root cause)
index = IntervalIndex.from_tuples(
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
)
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
expected = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_multiindex_with_intervals(self):
# GH#44084 (MultiIndex case as reported)
interval_index = IntervalIndex.from_tuples(
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
)
foo_index = Index([1, 2, 3], name="foo")
multi_index = MultiIndex.from_product([foo_index, interval_index])
result = multi_index.get_level_values("interval").get_indexer_for(
[Interval(0.0, 1.0)]
)
expected = np.array([1, 4, 7], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
class TestSliceLocs:
def test_slice_locs_with_interval(self):
# increasing monotonically
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
# decreasing monotonically
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
# sorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
# unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get left slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get left slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(0, 2))
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get right slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(end=Interval(0, 2))
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get right slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
# another unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_slice_locs_with_ints_and_floats_succeeds(self):
# increasing non-overlapping
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
assert index.slice_locs(0, 1) == (0, 1)
assert index.slice_locs(0, 2) == (0, 2)
assert index.slice_locs(0, 3) == (0, 2)
assert index.slice_locs(3, 1) == (2, 1)
assert index.slice_locs(3, 4) == (2, 3)
assert index.slice_locs(0, 4) == (0, 3)
# decreasing non-overlapping
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
assert index.slice_locs(0, 1) == (3, 3)
assert index.slice_locs(0, 2) == (3, 2)
assert index.slice_locs(0, 3) == (3, 1)
assert index.slice_locs(3, 1) == (1, 3)
assert index.slice_locs(3, 4) == (1, 1)
assert index.slice_locs(0, 4) == (3, 1)
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
@pytest.mark.parametrize(
"tuples",
[
[(0, 2), (1, 3), (2, 4)],
[(2, 4), (1, 3), (0, 2)],
[(0, 2), (0, 2), (2, 4)],
[(0, 2), (2, 4), (0, 2)],
[(0, 2), (0, 2), (2, 4), (1, 3)],
],
)
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
start, stop = query
index = IntervalIndex.from_tuples(tuples)
with pytest.raises(
KeyError,
match=(
"'can only get slices from an IntervalIndex if bounds are "
"non-overlapping and all monotonic increasing or decreasing'"
),
):
index.slice_locs(start, stop)
class TestPutmask:
@pytest.mark.parametrize("tz", ["US/Pacific", None])
def test_putmask_dt64(self, tz):
# GH#37968
dti = date_range("2016-01-01", periods=9, tz=tz)
idx = IntervalIndex.from_breaks(dti)
mask = np.zeros(idx.shape, dtype=bool)
mask[0:3] = True
result = idx.putmask(mask, idx[-1])
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
tm.assert_index_equal(result, expected)
def test_putmask_td64(self):
# GH#37968
dti = date_range("2016-01-01", periods=9)
tdi = dti - dti[0]
idx = IntervalIndex.from_breaks(tdi)
mask = np.zeros(idx.shape, dtype=bool)
mask[0:3] = True
result = idx.putmask(mask, idx[-1])
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
tm.assert_index_equal(result, expected)
class TestGetValue:
@pytest.mark.parametrize("key", [[5], (2, 3)])
def test_get_value_non_scalar_errors(self, key):
# GH#31117
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
ser = Series(range(len(idx)), index=idx)
msg = str(key)
with pytest.raises(InvalidIndexError, match=msg):
with tm.assert_produces_warning(FutureWarning):
idx.get_value(ser, key)
class TestContains:
# .__contains__, not .contains
def test_contains_dunder(self):
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
# __contains__ requires perfect matches to intervals.
assert 0 not in index
assert 1 not in index
assert 2 not in index
assert Interval(0, 1, closed="right") in index
assert Interval(0, 2, closed="right") not in index
assert Interval(0, 0.5, closed="right") not in index
assert Interval(3, 5, closed="right") not in index
assert Interval(-1, 0, closed="left") not in index
assert Interval(0, 1, closed="left") not in index
assert Interval(0, 1, closed="both") not in index

View File

@ -0,0 +1,918 @@
from itertools import permutations
import re
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
Interval,
IntervalIndex,
Timedelta,
Timestamp,
date_range,
interval_range,
isna,
notna,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.api import Float64Index
import pandas.core.common as com
@pytest.fixture(scope="class", params=[None, "foo"])
def name(request):
return request.param
class TestIntervalIndex:
index = IntervalIndex.from_arrays([0, 1], [1, 2])
def create_index(self, closed="right"):
return IntervalIndex.from_breaks(range(11), closed=closed)
def create_index_with_nan(self, closed="right"):
mask = [True, False] + [True] * 8
return IntervalIndex.from_arrays(
np.where(mask, np.arange(10), np.nan),
np.where(mask, np.arange(1, 11), np.nan),
closed=closed,
)
def test_properties(self, closed):
index = self.create_index(closed=closed)
assert len(index) == 10
assert index.size == 10
assert index.shape == (10,)
tm.assert_index_equal(index.left, Index(np.arange(10)))
tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))
assert index.closed == closed
ivs = [
Interval(left, right, closed)
for left, right in zip(range(10), range(1, 11))
]
expected = np.array(ivs, dtype=object)
tm.assert_numpy_array_equal(np.asarray(index), expected)
# with nans
index = self.create_index_with_nan(closed=closed)
assert len(index) == 10
assert index.size == 10
assert index.shape == (10,)
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
expected_right = expected_left + 1
expected_mid = expected_left + 0.5
tm.assert_index_equal(index.left, expected_left)
tm.assert_index_equal(index.right, expected_right)
tm.assert_index_equal(index.mid, expected_mid)
assert index.closed == closed
ivs = [
Interval(left, right, closed) if notna(left) else np.nan
for left, right in zip(expected_left, expected_right)
]
expected = np.array(ivs, dtype=object)
tm.assert_numpy_array_equal(np.asarray(index), expected)
@pytest.mark.parametrize(
"breaks",
[
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
],
)
def test_length(self, closed, breaks):
# GH 18789
index = IntervalIndex.from_breaks(breaks, closed=closed)
result = index.length
expected = Index(iv.length for iv in index)
tm.assert_index_equal(result, expected)
# with NA
index = index.insert(1, np.nan)
result = index.length
expected = Index(iv.length if notna(iv) else iv for iv in index)
tm.assert_index_equal(result, expected)
def test_with_nans(self, closed):
index = self.create_index(closed=closed)
assert index.hasnans is False
result = index.isna()
expected = np.zeros(len(index), dtype=bool)
tm.assert_numpy_array_equal(result, expected)
result = index.notna()
expected = np.ones(len(index), dtype=bool)
tm.assert_numpy_array_equal(result, expected)
index = self.create_index_with_nan(closed=closed)
assert index.hasnans is True
result = index.isna()
expected = np.array([False, True] + [False] * (len(index) - 2))
tm.assert_numpy_array_equal(result, expected)
result = index.notna()
expected = np.array([True, False] + [True] * (len(index) - 2))
tm.assert_numpy_array_equal(result, expected)
def test_copy(self, closed):
expected = self.create_index(closed=closed)
result = expected.copy()
assert result.equals(expected)
result = expected.copy(deep=True)
assert result.equals(expected)
assert result.left is not expected.left
def test_ensure_copied_data(self, closed):
# exercise the copy flag in the constructor
# not copying
index = self.create_index(closed=closed)
result = IntervalIndex(index, copy=False)
tm.assert_numpy_array_equal(
index.left.values, result.left.values, check_same="same"
)
tm.assert_numpy_array_equal(
index.right.values, result.right.values, check_same="same"
)
# by-definition make a copy
result = IntervalIndex(np.array(index), copy=False)
tm.assert_numpy_array_equal(
index.left.values, result.left.values, check_same="copy"
)
tm.assert_numpy_array_equal(
index.right.values, result.right.values, check_same="copy"
)
def test_delete(self, closed):
expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
result = self.create_index(closed=closed).delete(0)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
interval_range(0, periods=10, closed="neither"),
interval_range(1.7, periods=8, freq=2.5, closed="both"),
interval_range(Timestamp("20170101"), periods=12, closed="left"),
interval_range(Timedelta("1 day"), periods=6, closed="right"),
],
)
def test_insert(self, data):
item = data[0]
idx_item = IntervalIndex([item])
# start
expected = idx_item.append(data)
result = data.insert(0, item)
tm.assert_index_equal(result, expected)
# end
expected = data.append(idx_item)
result = data.insert(len(data), item)
tm.assert_index_equal(result, expected)
# mid
expected = data[:3].append(idx_item).append(data[3:])
result = data.insert(3, item)
tm.assert_index_equal(result, expected)
# invalid type
res = data.insert(1, "foo")
expected = data.astype(object).insert(1, "foo")
tm.assert_index_equal(res, expected)
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(TypeError, match=msg):
data._data.insert(1, "foo")
# invalid closed
msg = "'value.closed' is 'left', expected 'right'."
for closed in {"left", "right", "both", "neither"} - {item.closed}:
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
bad_item = Interval(item.left, item.right, closed=closed)
res = data.insert(1, bad_item)
expected = data.astype(object).insert(1, bad_item)
tm.assert_index_equal(res, expected)
with pytest.raises(ValueError, match=msg):
data._data.insert(1, bad_item)
# GH 18295 (test missing)
na_idx = IntervalIndex([np.nan], closed=data.closed)
for na in [np.nan, None, pd.NA]:
expected = data[:1].append(na_idx).append(data[1:])
result = data.insert(1, na)
tm.assert_index_equal(result, expected)
if data.left.dtype.kind not in ["m", "M"]:
# trying to insert pd.NaT into a numeric-dtyped Index should cast
expected = data.astype(object).insert(1, pd.NaT)
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(TypeError, match=msg):
data._data.insert(1, pd.NaT)
result = data.insert(1, pd.NaT)
tm.assert_index_equal(result, expected)
def test_is_unique_interval(self, closed):
"""
Interval specific tests for is_unique in addition to base class tests
"""
# unique overlapping - distinct endpoints
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
assert idx.is_unique is True
# unique overlapping - shared endpoints
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_unique is True
# unique nested
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
assert idx.is_unique is True
# unique NaN
idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed)
assert idx.is_unique is True
# non-unique NaN
idx = IntervalIndex.from_tuples(
[(np.NaN, np.NaN), (np.NaN, np.NaN)], closed=closed
)
assert idx.is_unique is False
def test_monotonic(self, closed):
# increasing non-overlapping
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing non-overlapping
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# unordered non-overlapping
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# increasing overlapping
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing overlapping
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# unordered overlapping
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# increasing overlapping shared endpoints
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing overlapping shared endpoints
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
assert idx.is_monotonic is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# stationary
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
# empty
idx = IntervalIndex([], closed=closed)
assert idx.is_monotonic is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
def test_is_monotonic_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, np.nan])
assert not index.is_monotonic
assert not index._is_strictly_monotonic_increasing
assert not index.is_monotonic_increasing
assert not index._is_strictly_monotonic_decreasing
assert not index.is_monotonic_decreasing
def test_get_item(self, closed):
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
assert i[0] == Interval(0.0, 1.0, closed=closed)
assert i[1] == Interval(1.0, 2.0, closed=closed)
assert isna(i[2])
result = i[0:1]
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
tm.assert_index_equal(result, expected)
result = i[0:2]
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
tm.assert_index_equal(result, expected)
result = i[1:3]
expected = IntervalIndex.from_arrays(
(1.0, np.nan), (2.0, np.nan), closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
ids=lambda x: str(x.dtype),
)
def test_maybe_convert_i8(self, breaks):
# GH 20636
index = IntervalIndex.from_breaks(breaks)
# intervalindex
result = index._maybe_convert_i8(index)
expected = IntervalIndex.from_breaks(breaks.asi8)
tm.assert_index_equal(result, expected)
# interval
interval = Interval(breaks[0], breaks[1])
result = index._maybe_convert_i8(interval)
expected = Interval(breaks[0].value, breaks[1].value)
assert result == expected
# datetimelike index
result = index._maybe_convert_i8(breaks)
expected = Index(breaks.asi8)
tm.assert_index_equal(result, expected)
# datetimelike scalar
result = index._maybe_convert_i8(breaks[0])
expected = breaks[0].value
assert result == expected
# list-like of datetimelike scalars
result = index._maybe_convert_i8(list(breaks))
expected = Index(breaks.asi8)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
)
def test_maybe_convert_i8_nat(self, breaks):
# GH 20636
index = IntervalIndex.from_breaks(breaks)
to_convert = breaks._constructor([pd.NaT] * 3)
expected = Float64Index([np.nan] * 3)
result = index._maybe_convert_i8(to_convert)
tm.assert_index_equal(result, expected)
to_convert = to_convert.insert(0, breaks[0])
expected = expected.insert(0, float(breaks[0].value))
result = index._maybe_convert_i8(to_convert)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[np.arange(5, dtype="int64"), np.arange(5, dtype="float64")],
ids=lambda x: str(x.dtype),
)
@pytest.mark.parametrize(
"make_key",
[
IntervalIndex.from_breaks,
lambda breaks: Interval(breaks[0], breaks[1]),
lambda breaks: breaks,
lambda breaks: breaks[0],
list,
],
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
)
def test_maybe_convert_i8_numeric(self, breaks, make_key):
# GH 20636
index = IntervalIndex.from_breaks(breaks)
key = make_key(breaks)
# no conversion occurs for numeric
result = index._maybe_convert_i8(key)
assert result is key
@pytest.mark.parametrize(
"breaks1, breaks2",
permutations(
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
2,
),
ids=lambda x: str(x.dtype),
)
@pytest.mark.parametrize(
"make_key",
[
IntervalIndex.from_breaks,
lambda breaks: Interval(breaks[0], breaks[1]),
lambda breaks: breaks,
lambda breaks: breaks[0],
list,
],
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
)
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
# GH 20636
index = IntervalIndex.from_breaks(breaks1)
key = make_key(breaks2)
msg = (
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
f"values of dtype {breaks2.dtype}"
)
msg = re.escape(msg)
with pytest.raises(ValueError, match=msg):
index._maybe_convert_i8(key)
def test_contains_method(self):
# can select values that are IN the range of a value
i = IntervalIndex.from_arrays([0, 1], [1, 2])
expected = np.array([False, False], dtype="bool")
actual = i.contains(0)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(3)
tm.assert_numpy_array_equal(actual, expected)
expected = np.array([True, False], dtype="bool")
actual = i.contains(0.5)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(1)
tm.assert_numpy_array_equal(actual, expected)
# __contains__ not implemented for "interval in interval", follow
# that for the contains method for now
with pytest.raises(
NotImplementedError, match="contains not implemented for two"
):
i.contains(Interval(0, 1))
def test_dropna(self, closed):
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
result = ii.dropna()
tm.assert_index_equal(result, expected)
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
result = ii.dropna()
tm.assert_index_equal(result, expected)
def test_non_contiguous(self, closed):
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
target = [0.5, 1.5, 2.5]
actual = index.get_indexer(target)
expected = np.array([0, -1, 1], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
assert 1.5 not in index
def test_isin(self, closed):
index = self.create_index(closed=closed)
expected = np.array([True] + [False] * (len(index) - 1))
result = index.isin(index[:1])
tm.assert_numpy_array_equal(result, expected)
result = index.isin([index[0]])
tm.assert_numpy_array_equal(result, expected)
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
expected = np.array([True] * (len(index) - 1) + [False])
result = index.isin(other)
tm.assert_numpy_array_equal(result, expected)
result = index.isin(other.tolist())
tm.assert_numpy_array_equal(result, expected)
for other_closed in {"right", "left", "both", "neither"}:
other = self.create_index(closed=other_closed)
expected = np.repeat(closed == other_closed, len(index))
result = index.isin(other)
tm.assert_numpy_array_equal(result, expected)
result = index.isin(other.tolist())
tm.assert_numpy_array_equal(result, expected)
def test_comparison(self):
actual = Interval(0, 1) < self.index
expected = np.array([False, True])
tm.assert_numpy_array_equal(actual, expected)
actual = Interval(0.5, 1.5) < self.index
expected = np.array([False, True])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index > Interval(0.5, 1.5)
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == self.index
expected = np.array([True, True])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index <= self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index >= self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index < self.index
expected = np.array([False, False])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index > self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == self.index.values
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index.values == self.index
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index <= self.index.values
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index != self.index.values
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index > self.index.values
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index.values > self.index
tm.assert_numpy_array_equal(actual, np.array([False, False]))
# invalid comparisons
actual = self.index == 0
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index == self.index.left
tm.assert_numpy_array_equal(actual, np.array([False, False]))
msg = "|".join(
[
"not supported between instances of 'int' and '.*.Interval'",
r"Invalid comparison between dtype=interval\[int64, right\] and ",
]
)
with pytest.raises(TypeError, match=msg):
self.index > 0
with pytest.raises(TypeError, match=msg):
self.index <= 0
with pytest.raises(TypeError, match=msg):
self.index > np.arange(2)
msg = "Lengths must match to compare"
with pytest.raises(ValueError, match=msg):
self.index > np.arange(3)
def test_missing_values(self, closed):
idx = Index(
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
)
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
assert idx.equals(idx2)
msg = (
"missing values must be missing in the same location both left "
"and right sides"
)
with pytest.raises(ValueError, match=msg):
IntervalIndex.from_arrays(
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
)
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
def test_sort_values(self, closed):
index = self.create_index(closed=closed)
result = index.sort_values()
tm.assert_index_equal(result, index)
result = index.sort_values(ascending=False)
tm.assert_index_equal(result, index[::-1])
# with nan
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
result = index.sort_values()
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
tm.assert_index_equal(result, expected)
result = index.sort_values(ascending=False, na_position="first")
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_datetime(self, tz):
start = Timestamp("2000-01-01", tz=tz)
dates = date_range(start=start, periods=10)
index = IntervalIndex.from_breaks(dates)
# test mid
start = Timestamp("2000-01-01T12:00", tz=tz)
expected = date_range(start=start, periods=9)
tm.assert_index_equal(index.mid, expected)
# __contains__ doesn't check individual points
assert Timestamp("2000-01-01", tz=tz) not in index
assert Timestamp("2000-01-01T12", tz=tz) not in index
assert Timestamp("2000-01-02", tz=tz) not in index
iv_true = Interval(
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
)
iv_false = Interval(
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
)
assert iv_true in index
assert iv_false not in index
# .contains does check individual points
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
# test get_indexer
start = Timestamp("1999-12-31T12:00", tz=tz)
target = date_range(start=start, periods=7, freq="12H")
actual = index.get_indexer(target)
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
start = Timestamp("2000-01-08T18:00", tz=tz)
target = date_range(start=start, periods=7, freq="6H")
actual = index.get_indexer(target)
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
def test_append(self, closed):
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
result = index1.append(index2)
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
tm.assert_index_equal(result, expected)
result = index1.append([index1, index2])
expected = IntervalIndex.from_arrays(
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
)
tm.assert_index_equal(result, expected)
for other_closed in {"left", "right", "both", "neither"} - {closed}:
index_other_closed = IntervalIndex.from_arrays(
[0, 1], [1, 2], closed=other_closed
)
result = index1.append(index_other_closed)
expected = index1.astype(object).append(index_other_closed.astype(object))
tm.assert_index_equal(result, expected)
def test_is_non_overlapping_monotonic(self, closed):
# Should be True in all cases
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is True
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is True
# Should be False in all cases (overlapping)
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False
# Should be False in all cases (non-monotonic)
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False
# Should be False for closed='both', otherwise True (GH16560)
if closed == "both":
idx = IntervalIndex.from_breaks(range(4), closed=closed)
assert idx.is_non_overlapping_monotonic is False
else:
idx = IntervalIndex.from_breaks(range(4), closed=closed)
assert idx.is_non_overlapping_monotonic is True
@pytest.mark.parametrize(
"start, shift, na_value",
[
(0, 1, np.nan),
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
],
)
def test_is_overlapping(self, start, shift, na_value, closed):
# GH 23309
# see test_interval_tree.py for extensive tests; interface tests here
# non-overlapping
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is False
# non-overlapping with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is False
# overlapping
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is True
# overlapping with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is True
# common endpoints
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index.is_overlapping
expected = closed == "both"
assert result is expected
# common endpoints with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index.is_overlapping
assert result is expected
@pytest.mark.parametrize(
"tuples",
[
list(zip(range(10), range(1, 11))),
list(
zip(
date_range("20170101", periods=10),
date_range("20170101", periods=10),
)
),
list(
zip(
timedelta_range("0 days", periods=10),
timedelta_range("1 day", periods=10),
)
),
],
)
def test_to_tuples(self, tuples):
# GH 18756
idx = IntervalIndex.from_tuples(tuples)
result = idx.to_tuples()
expected = Index(com.asarray_tuplesafe(tuples))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"tuples",
[
list(zip(range(10), range(1, 11))) + [np.nan],
list(
zip(
date_range("20170101", periods=10),
date_range("20170101", periods=10),
)
)
+ [np.nan],
list(
zip(
timedelta_range("0 days", periods=10),
timedelta_range("1 day", periods=10),
)
)
+ [np.nan],
],
)
@pytest.mark.parametrize("na_tuple", [True, False])
def test_to_tuples_na(self, tuples, na_tuple):
# GH 18756
idx = IntervalIndex.from_tuples(tuples)
result = idx.to_tuples(na_tuple=na_tuple)
# check the non-NA portion
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
result_notna = result[:-1]
tm.assert_index_equal(result_notna, expected_notna)
# check the NA portion
result_na = result[-1]
if na_tuple:
assert isinstance(result_na, tuple)
assert len(result_na) == 2
assert all(isna(x) for x in result_na)
else:
assert isna(result_na)
def test_nbytes(self):
# GH 19209
left = np.arange(0, 4, dtype="i8")
right = np.arange(1, 5, dtype="i8")
result = IntervalIndex.from_arrays(left, right).nbytes
expected = 64 # 4 * 8 * 2
assert result == expected
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
def test_set_closed(self, name, closed, new_closed):
# GH 21670
index = interval_range(0, 5, closed=closed, name=name)
result = index.set_closed(new_closed)
expected = interval_range(0, 5, closed=new_closed, name=name)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
def test_set_closed_errors(self, bad_closed):
# GH 21670
index = interval_range(0, 5)
msg = f"invalid option for 'closed': {bad_closed}"
with pytest.raises(ValueError, match=msg):
index.set_closed(bad_closed)
def test_is_all_dates(self):
# GH 23576
year_2017 = Interval(
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
)
year_2017_index = IntervalIndex([year_2017])
assert not year_2017_index._is_all_dates
def test_dir():
# GH#27571 dir(interval_index) should not raise
index = IntervalIndex.from_arrays([0, 1], [1, 2])
result = dir(index)
assert "str" not in result
def test_searchsorted_different_argument_classes(listlike_box):
# https://github.com/pandas-dev/pandas/issues/32762
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
result = values.searchsorted(listlike_box(values))
expected = np.array([0, 1], dtype=result.dtype)
tm.assert_numpy_array_equal(result, expected)
result = values._data.searchsorted(listlike_box(values))
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
)
def test_searchsorted_invalid_argument(arg):
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
with pytest.raises(TypeError, match=msg):
values.searchsorted(arg)

View File

@ -0,0 +1,355 @@
from datetime import timedelta
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
from pandas import (
DateOffset,
Interval,
IntervalIndex,
Timedelta,
Timestamp,
date_range,
interval_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.tseries.offsets import Day
@pytest.fixture(scope="class", params=[None, "foo"])
def name(request):
return request.param
class TestIntervalRange:
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
def test_constructor_numeric(self, closed, name, freq, periods):
start, end = 0, 100
breaks = np.arange(101, step=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
@pytest.mark.parametrize(
"freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)]
)
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
breaks = date_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
if not breaks.freq.is_anchored() and tz is None:
# matches expected only for non-anchored offsets and tz naive
# (anchored/DST transitions cause unequal spacing in expected)
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)]
)
def test_constructor_timedelta(self, closed, name, freq, periods):
start, end = Timedelta("0 days"), Timedelta("100 days")
breaks = timedelta_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"start, end, freq, expected_endpoint",
[
(0, 10, 3, 9),
(0, 10, 1.5, 9),
(0.5, 10, 3, 9.5),
(Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")),
(
Timestamp("2018-01-01"),
Timestamp("2018-02-09"),
"MS",
Timestamp("2018-02-01"),
),
(
Timestamp("2018-01-01", tz="US/Eastern"),
Timestamp("2018-01-20", tz="US/Eastern"),
"5D12H",
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
),
],
)
def test_early_truncation(self, start, end, freq, expected_endpoint):
# index truncates early if freq causes end to be skipped
result = interval_range(start=start, end=end, freq=freq)
result_endpoint = result.right[-1]
assert result_endpoint == expected_endpoint
@pytest.mark.parametrize(
"start, end, freq",
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
)
def test_no_invalid_float_truncation(self, start, end, freq):
# GH 21161
if freq is None:
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
else:
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
expected = IntervalIndex.from_breaks(breaks)
result = interval_range(start=start, end=end, periods=4, freq=freq)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"start, mid, end",
[
(
Timestamp("2018-03-10", tz="US/Eastern"),
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
Timestamp("2018-03-12", tz="US/Eastern"),
),
(
Timestamp("2018-11-03", tz="US/Eastern"),
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
Timestamp("2018-11-05", tz="US/Eastern"),
),
],
)
def test_linspace_dst_transition(self, start, mid, end):
# GH 20976: linspace behavior defined from start/end/periods
# accounts for the hour gained/lost during DST transition
result = interval_range(start=start, end=end, periods=2)
expected = IntervalIndex.from_breaks([start, mid, end])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("freq", [2, 2.0])
@pytest.mark.parametrize("end", [10, 10.0])
@pytest.mark.parametrize("start", [0, 0.0])
def test_float_subtype(self, start, end, freq):
# Has float subtype if any of start/end/freq are float, even if all
# resulting endpoints can safely be upcast to integers
# defined from start/end/freq
index = interval_range(start=start, end=end, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(start + end + freq) else "float64"
assert result == expected
# defined from start/periods/freq
index = interval_range(start=start, periods=5, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(start + freq) else "float64"
assert result == expected
# defined from end/periods/freq
index = interval_range(end=end, periods=5, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(end + freq) else "float64"
assert result == expected
# GH 20976: linspace behavior defined from start/end/periods
index = interval_range(start=start, end=end, periods=5)
result = index.dtype.subtype
expected = "int64" if is_integer(start + end) else "float64"
assert result == expected
def test_constructor_coverage(self):
# float value for periods
expected = interval_range(start=0, periods=10)
result = interval_range(start=0, periods=10.5)
tm.assert_index_equal(result, expected)
# equivalent timestamp-like start/end
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timestamp
equiv_freq = [
"D",
Day(),
Timedelta(days=1),
timedelta(days=1),
DateOffset(days=1),
]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
# equivalent timedelta-like start/end
start, end = Timedelta(days=1), Timedelta(days=10)
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timedelta
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = (
"Of the four parameters: start, end, periods, and freq, "
"exactly three must be specified"
)
with pytest.raises(ValueError, match=msg):
interval_range(start=0)
with pytest.raises(ValueError, match=msg):
interval_range(end=5)
with pytest.raises(ValueError, match=msg):
interval_range(periods=2)
with pytest.raises(ValueError, match=msg):
interval_range()
# too many params
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=5, periods=6, freq=1.5)
# mixed units
msg = "start, end, freq need to be type compatible"
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timestamp("20130101"), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timedelta("1 day"), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(start=Timestamp("20130101"), end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
)
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
)
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta("1 day"), end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
)
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
# invalid periods
msg = "periods must be a number, got foo"
with pytest.raises(TypeError, match=msg):
interval_range(start=0, periods="foo")
# invalid start
msg = "start must be numeric or datetime-like, got foo"
with pytest.raises(ValueError, match=msg):
interval_range(start="foo", periods=10)
# invalid end
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
with pytest.raises(ValueError, match=msg):
interval_range(end=Interval(0, 1), periods=10)
# invalid freq for datetime-like
msg = "freq must be numeric or convertible to DateOffset, got foo"
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=10, freq="foo")
with pytest.raises(ValueError, match=msg):
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
with pytest.raises(ValueError, match=msg):
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
# mixed tz
start = Timestamp("2017-01-01", tz="US/Eastern")
end = Timestamp("2017-01-07", tz="US/Pacific")
msg = "Start and end cannot both be tz-aware with different timezones"
with pytest.raises(TypeError, match=msg):
interval_range(start=start, end=end)

View File

@ -0,0 +1,191 @@
from itertools import permutations
import numpy as np
import pytest
from pandas._libs.interval import IntervalTree
from pandas.compat import IS64
import pandas._testing as tm
def skipif_32bit(param):
"""
Skip parameters in a parametrize on 32bit systems. Specifically used
here to skip leaf_size parameters related to GH 23440.
"""
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
return pytest.param(param, marks=marks)
@pytest.fixture(scope="class", params=["int64", "float64", "uint64"])
def dtype(request):
return request.param
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
def leaf_size(request):
"""
Fixture to specify IntervalTree leaf_size parameter; to be used with the
tree fixture.
"""
return request.param
@pytest.fixture(
params=[
np.arange(5, dtype="int64"),
np.arange(5, dtype="uint64"),
np.arange(5, dtype="float64"),
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
]
)
def tree(request, leaf_size):
left = request.param
return IntervalTree(left, left + 2, leaf_size=leaf_size)
class TestIntervalTree:
def test_get_indexer(self, tree):
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
expected = np.array([0, 4, -1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(
KeyError, match="'indexer does not intersect a unique set of intervals'"
):
tree.get_indexer(np.array([3.0]))
@pytest.mark.parametrize(
"dtype, target_value, target_dtype",
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
)
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
tree = IntervalTree(left, right)
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
expected = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_non_unique(self, tree):
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
result = indexer[:1]
expected = np.array([0], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[1:3])
expected = np.array([0, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[3:])
expected = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([2], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"dtype, target_value, target_dtype",
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
)
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
tree = IntervalTree(left, right)
target = np.array([target_value], dtype=target_dtype)
result_indexer, result_missing = tree.get_indexer_non_unique(target)
expected_indexer = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
expected_missing = np.array([0], dtype="intp")
tm.assert_numpy_array_equal(result_missing, expected_missing)
def test_duplicates(self, dtype):
left = np.array([0, 0, 0], dtype=dtype)
tree = IntervalTree(left, left + 1)
with pytest.raises(
KeyError, match="'indexer does not intersect a unique set of intervals'"
):
tree.get_indexer(np.array([0.5]))
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
result = np.sort(indexer)
expected = np.array([0, 1, 2], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
)
def test_get_indexer_closed(self, closed, leaf_size):
x = np.arange(1000, dtype="float64")
found = x.astype("intp")
not_found = (-1 * np.ones(1000)).astype("intp")
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
expected = found if tree.closed_left else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
expected = found if tree.closed_right else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
@pytest.mark.parametrize(
"left, right, expected",
[
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
],
)
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
def test_is_overlapping(self, closed, order, left, right, expected):
# GH 23309
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
assert result is expected
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
def test_is_overlapping_endpoints(self, closed, order):
"""shared endpoints are marked as overlapping"""
# GH 23309
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
expected = closed == "both"
assert result is expected
@pytest.mark.parametrize(
"left, right",
[
(np.array([], dtype="int64"), np.array([], dtype="int64")),
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
(np.array([np.nan]), np.array([np.nan])),
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
],
)
def test_is_overlapping_trivial(self, closed, left, right):
# GH 23309
tree = IntervalTree(left, right, closed=closed)
assert tree.is_overlapping is False
@pytest.mark.skipif(not IS64, reason="GH 23440")
def test_construction_overflow(self):
# GH 25485
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
tree = IntervalTree(left, right)
# pivot should be average of left/right medians
result = tree.root.pivot
expected = (50 + np.iinfo(np.int64).max) / 2
assert result == expected

View File

@ -0,0 +1,44 @@
import pytest
from pandas import (
IntervalIndex,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
@pytest.fixture
def range_index():
return RangeIndex(3, name="range_index")
@pytest.fixture
def interval_index():
return IntervalIndex.from_tuples(
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
)
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
# GH-45661
multi_index = MultiIndex.from_product([interval_index, range_index])
result = multi_index.join(interval_index)
tm.assert_index_equal(result, multi_index)
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
# GH-45661
multi_index = MultiIndex.from_product([interval_index, range_index])
result = interval_index.join(multi_index)
tm.assert_index_equal(result, multi_index)
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
# GH-45661
flipped_interval_index = interval_index[::-1]
result = interval_index.join(flipped_interval_index)
tm.assert_index_equal(result, interval_index)

View File

@ -0,0 +1,13 @@
import pytest
from pandas import IntervalIndex
import pandas._testing as tm
class TestPickle:
@pytest.mark.parametrize("closed", ["left", "right", "both"])
def test_pickle_round_trip_closed(self, closed):
# https://github.com/pandas-dev/pandas/issues/35658
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)

View File

@ -0,0 +1,202 @@
import numpy as np
import pytest
from pandas import (
Index,
IntervalIndex,
Timestamp,
interval_range,
)
import pandas._testing as tm
def monotonic_index(start, end, dtype="int64", closed="right"):
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
def empty_index(dtype="int64", closed="right"):
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
class TestIntervalIndex:
def test_union(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
other = monotonic_index(5, 13, closed=closed)
expected = monotonic_index(0, 13, closed=closed)
result = index[::-1].union(other, sort=sort)
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
result = other[::-1].union(index, sort=sort)
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
tm.assert_index_equal(index.union(index, sort=sort), index)
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
def test_union_empty_result(self, closed, sort):
# GH 19101: empty result, same dtype
index = empty_index(dtype="int64", closed=closed)
result = index.union(index, sort=sort)
tm.assert_index_equal(result, index)
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
other = empty_index(dtype="float64", closed=closed)
result = index.union(other, sort=sort)
expected = other
tm.assert_index_equal(result, expected)
other = index.union(index, sort=sort)
tm.assert_index_equal(result, expected)
other = empty_index(dtype="uint64", closed=closed)
result = index.union(other, sort=sort)
tm.assert_index_equal(result, expected)
result = other.union(index, sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
other = monotonic_index(5, 13, closed=closed)
expected = monotonic_index(5, 11, closed=closed)
result = index[::-1].intersection(other, sort=sort)
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
result = other[::-1].intersection(index, sort=sort)
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
tm.assert_index_equal(index.intersection(index, sort=sort), index)
# GH 26225: nested intervals
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
# GH 26225
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
expected = IntervalIndex.from_tuples([(0, 2)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
# GH 26225: duplicate nan element
index = IntervalIndex([np.nan, np.nan])
other = IntervalIndex([np.nan])
expected = IntervalIndex([np.nan])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
def test_intersection_empty_result(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
# GH 19101: empty result, same dtype
other = monotonic_index(300, 314, closed=closed)
expected = empty_index(dtype="int64", closed=closed)
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
other = monotonic_index(300, 314, dtype="float64", closed=closed)
result = index.intersection(other, sort=sort)
expected = other[:0]
tm.assert_index_equal(result, expected)
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection_duplicates(self):
# GH#38743
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
def test_difference(self, closed, sort):
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
result = index.difference(index[:1], sort=sort)
expected = index[1:]
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
# GH 19101: empty result, same dtype
result = index.difference(index, sort=sort)
expected = empty_index(dtype="int64", closed=closed)
tm.assert_index_equal(result, expected)
# GH 19101: empty result, different dtypes
other = IntervalIndex.from_arrays(
index.left.astype("float64"), index.right, closed=closed
)
result = index.difference(other, sort=sort)
tm.assert_index_equal(result, expected)
def test_symmetric_difference(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
result = index[1:].symmetric_difference(index[:-1], sort=sort)
expected = IntervalIndex([index[0], index[-1]])
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
# GH 19101: empty result, same dtype
result = index.symmetric_difference(index, sort=sort)
expected = empty_index(dtype="int64", closed=closed)
if sort is None:
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
# GH 19101: empty result, different dtypes
other = IntervalIndex.from_arrays(
index.left.astype("float64"), index.right, closed=closed
)
result = index.symmetric_difference(other, sort=sort)
expected = empty_index(dtype="float64", closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
@pytest.mark.parametrize(
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_set_incompatible_types(self, closed, op_name, sort):
index = monotonic_index(0, 11, closed=closed)
set_op = getattr(index, op_name)
# TODO: standardize return type of non-union setops type(self vs other)
# non-IntervalIndex
if op_name == "difference":
expected = index
else:
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
result = set_op(Index([1, 2, 3]), sort=sort)
tm.assert_index_equal(result, expected)
# mixed closed -> cast to object
for other_closed in {"right", "left", "both", "neither"} - {closed}:
other = monotonic_index(0, 11, closed=other_closed)
expected = getattr(index.astype(object), op_name)(other, sort=sort)
if op_name == "difference":
expected = index
result = set_op(other, sort=sort)
tm.assert_index_equal(result, expected)
# GH 19016: incompatible dtypes -> cast to object
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
expected = getattr(index.astype(object), op_name)(other, sort=sort)
if op_name == "difference":
expected = index
result = set_op(other, sort=sort)
tm.assert_index_equal(result, expected)