mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-04 07:08:05 +00:00
first commit
This commit is contained in:
@ -0,0 +1,240 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_arm
|
||||
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
CategoricalDtype,
|
||||
IntervalDtype,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class AstypeTests:
|
||||
"""Tests common to IntervalIndex with any subtype"""
|
||||
|
||||
def test_astype_idempotent(self, index):
|
||||
result = index.astype("interval")
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.astype(index.dtype)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_astype_object(self, index):
|
||||
result = index.astype(object)
|
||||
expected = Index(index.values, dtype="object")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert not result.equals(index)
|
||||
|
||||
def test_astype_category(self, index):
|
||||
result = index.astype("category")
|
||||
expected = CategoricalIndex(index.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.astype(CategoricalDtype())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-default params
|
||||
categories = index.dropna().unique().values[:-1]
|
||||
dtype = CategoricalDtype(categories=categories, ordered=True)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"int64",
|
||||
"uint64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"period[M]",
|
||||
"timedelta64",
|
||||
"timedelta64[ns]",
|
||||
"datetime64",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
],
|
||||
)
|
||||
def test_astype_cannot_cast(self, index, dtype):
|
||||
msg = "Cannot cast IntervalIndex to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_astype_invalid_dtype(self, index):
|
||||
msg = "data type [\"']fake_dtype[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype("fake_dtype")
|
||||
|
||||
|
||||
class TestIntSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with integer-like subtype"""
|
||||
|
||||
indexes = [
|
||||
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
|
||||
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
|
||||
)
|
||||
def test_subtype_conversion(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
|
||||
)
|
||||
def test_subtype_integer(self, subtype_start, subtype_end):
|
||||
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
|
||||
dtype = IntervalDtype(subtype_end, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype_end),
|
||||
index.right.astype(subtype_end),
|
||||
closed=index.closed,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#15832")
|
||||
def test_subtype_integer_errors(self):
|
||||
# int64 -> uint64 fails with negative values
|
||||
index = interval_range(-10, 10)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
|
||||
# Until we decide what the exception message _should_ be, we
|
||||
# assert something that it should _not_ be.
|
||||
# We should _not_ be getting a message suggesting that the -10
|
||||
# has been wrapped around to a large-positive integer
|
||||
msg = "^(?!(left side of interval must be <= right side))"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestFloatSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with float subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(-10.0, 10.0, closed="neither"),
|
||||
IntervalIndex.from_arrays(
|
||||
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
|
||||
),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, subtype):
|
||||
index = interval_range(0.0, 10.0)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raises with NA
|
||||
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.insert(0, np.nan).astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer_with_non_integer_borders(self, subtype):
|
||||
index = interval_range(0.0, 3.0, freq=0.25)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(is_platform_arm(), reason="GH 41740")
|
||||
def test_subtype_integer_errors(self):
|
||||
# float64 -> uint64 fails with negative values
|
||||
index = interval_range(-10.0, 10.0)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
msg = re.escape(
|
||||
"Cannot convert interval[float64, right] to interval[uint64, right]; "
|
||||
"subtypes are incompatible"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_subtype_datetimelike(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestDatetimelikeSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with datetime-like subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
|
||||
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
|
||||
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
|
||||
interval_range(Timedelta("0 days"), periods=10, closed="both"),
|
||||
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_float(self, index):
|
||||
dtype = IntervalDtype("float64", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_subtype_datetimelike(self):
|
||||
# datetime -> timedelta raises
|
||||
dtype = IntervalDtype("timedelta64[ns]", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
# timedelta -> datetime raises
|
||||
dtype = IntervalDtype("datetime64[ns]", "right")
|
||||
index = interval_range(Timedelta("0 days"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
@ -0,0 +1,71 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import IntervalIndex
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexes.common import Base
|
||||
|
||||
|
||||
class TestBase(Base):
|
||||
"""
|
||||
Tests specific to the shared common index tests; unrelated tests should be placed
|
||||
in test_interval.py or the specific test file (e.g. test_astype.py)
|
||||
"""
|
||||
|
||||
_index_cls = IntervalIndex
|
||||
|
||||
@pytest.fixture
|
||||
def simple_index(self) -> IntervalIndex:
|
||||
return self._index_cls.from_breaks(range(11), closed="right")
|
||||
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return tm.makeIntervalIndex(10)
|
||||
|
||||
def create_index(self, *, closed="right"):
|
||||
return IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
def test_repr_max_seq_item_setting(self):
|
||||
# override base test: not a valid repr as we use interval notation
|
||||
pass
|
||||
|
||||
def test_repr_roundtrip(self):
|
||||
# override base test: not a valid repr as we use interval notation
|
||||
pass
|
||||
|
||||
def test_take(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
result = index.take(range(10))
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.take([0, 0, 1])
|
||||
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where(self, simple_index, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
idx = simple_index
|
||||
cond = [True] * len(idx)
|
||||
expected = idx
|
||||
result = expected.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * len(idx[1:])
|
||||
expected = IntervalIndex([np.nan] + idx[1:].tolist())
|
||||
result = idx.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_getitem_2d_deprecated(self, simple_index):
|
||||
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
|
||||
idx = simple_index
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx[:, None]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[True]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[False]
|
@ -0,0 +1,473 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
notna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import (
|
||||
Float64Index,
|
||||
Int64Index,
|
||||
)
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class ConstructorTests:
|
||||
"""
|
||||
Common tests for all variations of IntervalIndex construction. Input data
|
||||
to be supplied in breaks format, then converted by the subclass method
|
||||
get_kwargs_from_breaks to the expected format.
|
||||
"""
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[3, 14, 15, 92, 653],
|
||||
np.arange(10, dtype="int64"),
|
||||
Int64Index(range(-10, 11)),
|
||||
Float64Index(np.arange(20, 30, 0.5)),
|
||||
date_range("20180101", periods=10),
|
||||
date_range("20180101", periods=10, tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=10),
|
||||
],
|
||||
)
|
||||
def test_constructor(self, constructor, breaks, closed, name):
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
|
||||
result = constructor(closed=closed, name=name, **result_kwargs)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.name == name
|
||||
assert result.dtype.subtype == getattr(breaks, "dtype", "int64")
|
||||
tm.assert_index_equal(result.left, Index(breaks[:-1]))
|
||||
tm.assert_index_equal(result.right, Index(breaks[1:]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks, subtype",
|
||||
[
|
||||
(Int64Index([0, 1, 2, 3, 4]), "float64"),
|
||||
(Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"),
|
||||
(Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"),
|
||||
(Float64Index([0, 1, 2, 3, 4]), "int64"),
|
||||
(date_range("2017-01-01", periods=5), "int64"),
|
||||
(timedelta_range("1 day", periods=5), "int64"),
|
||||
],
|
||||
)
|
||||
def test_constructor_dtype(self, constructor, breaks, subtype):
|
||||
# GH 19262: conversion via dtype parameter
|
||||
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
|
||||
expected = constructor(**expected_kwargs)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
iv_dtype = IntervalDtype(subtype, "right")
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
result = constructor(dtype=dtype, **result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
Int64Index([0, 1, 2, 3, 4]),
|
||||
Int64Index([0, 1, 2, 3, 4]),
|
||||
Int64Index([0, 1, 2, 3, 4]),
|
||||
Float64Index([0, 1, 2, 3, 4]),
|
||||
date_range("2017-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
],
|
||||
)
|
||||
def test_constructor_pass_closed(self, constructor, breaks):
|
||||
# not passing closed to IntervalDtype, but to IntervalArray constructor
|
||||
warn = None
|
||||
if isinstance(constructor, partial) and constructor.func is Index:
|
||||
# passing kwargs to Index is deprecated
|
||||
warn = FutureWarning
|
||||
|
||||
iv_dtype = IntervalDtype(breaks.dtype)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
with tm.assert_produces_warning(warn):
|
||||
|
||||
result = constructor(dtype=dtype, closed="left", **result_kwargs)
|
||||
assert result.dtype.closed == "left"
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
|
||||
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
|
||||
def test_constructor_nan(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_subtype = np.float64
|
||||
expected_values = np.array(breaks[:-1], dtype=object)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[],
|
||||
np.array([], dtype="int64"),
|
||||
np.array([], dtype="float64"),
|
||||
np.array([], dtype="datetime64[ns]"),
|
||||
np.array([], dtype="timedelta64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_constructor_empty(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_values = np.array([], dtype=object)
|
||||
expected_subtype = getattr(breaks, "dtype", np.int64)
|
||||
|
||||
assert result.empty
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
tuple("0123456789"),
|
||||
list("abcdefghij"),
|
||||
np.array(list("abcdefghij"), dtype=object),
|
||||
np.array(list("abcdefghij"), dtype="<U1"),
|
||||
],
|
||||
)
|
||||
def test_constructor_string(self, constructor, breaks):
|
||||
# GH 19016
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(**self.get_kwargs_from_breaks(breaks))
|
||||
|
||||
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
|
||||
def test_constructor_categorical_valid(self, constructor, cat_constructor):
|
||||
# GH 21243/21253
|
||||
|
||||
breaks = np.arange(10, dtype="int64")
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
cat_breaks = cat_constructor(breaks)
|
||||
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
|
||||
result = constructor(**result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
# filler input data to be used when supplying invalid kwargs
|
||||
filler = self.get_kwargs_from_breaks(range(10))
|
||||
|
||||
# invalid closed
|
||||
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(closed="invalid", **filler)
|
||||
|
||||
# unsupported dtype
|
||||
msg = "dtype must be an IntervalDtype, got int64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="int64", **filler)
|
||||
|
||||
# invalid dtype
|
||||
msg = "data type [\"']invalid[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="invalid", **filler)
|
||||
|
||||
# no point in nesting periods in an IntervalIndex
|
||||
periods = period_range("2000-01-01", periods=10)
|
||||
periods_kwargs = self.get_kwargs_from_breaks(periods)
|
||||
msg = "Period dtypes are not supported, use a PeriodIndex instead"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**periods_kwargs)
|
||||
|
||||
# decreasing values
|
||||
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
|
||||
msg = "left side of interval must be <= right side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**decreasing_kwargs)
|
||||
|
||||
|
||||
class TestFromArrays(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_arrays"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_arrays
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_arrays
|
||||
"""
|
||||
return {"left": breaks[:-1], "right": breaks[1:]}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_arrays(data[:-1], data[1:])
|
||||
|
||||
# unequal length
|
||||
left = [0, 1, 2]
|
||||
right = [2, 3]
|
||||
msg = "left and right must have the same length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(left, right)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
|
||||
)
|
||||
def test_mixed_float_int(self, left_subtype, right_subtype):
|
||||
"""mixed int/float left/right results in float for both sides"""
|
||||
left = np.arange(9, dtype=left_subtype)
|
||||
right = np.arange(1, 10, dtype=right_subtype)
|
||||
result = IntervalIndex.from_arrays(left, right)
|
||||
|
||||
expected_left = Float64Index(left)
|
||||
expected_right = Float64Index(right)
|
||||
expected_subtype = np.float64
|
||||
|
||||
tm.assert_index_equal(result.left, expected_left)
|
||||
tm.assert_index_equal(result.right, expected_right)
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
|
||||
|
||||
class TestFromBreaks(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_breaks"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_breaks
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_breaks
|
||||
"""
|
||||
return {"breaks": breaks}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_breaks(data)
|
||||
|
||||
def test_length_one(self):
|
||||
"""breaks of length one produce an empty IntervalIndex"""
|
||||
breaks = [0]
|
||||
result = IntervalIndex.from_breaks(breaks)
|
||||
expected = IntervalIndex.from_breaks([])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_left_right_dont_share_data(self):
|
||||
# GH#36310
|
||||
breaks = np.arange(5)
|
||||
result = IntervalIndex.from_breaks(breaks)._data
|
||||
assert result._left.base is None or result._left.base is not result._right.base
|
||||
|
||||
|
||||
class TestFromTuples(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_tuples"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_tuples
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_tuples
|
||||
"""
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
tuples = list(zip(breaks[:-1], breaks[1:]))
|
||||
if isinstance(breaks, (list, tuple)):
|
||||
return {"data": tuples}
|
||||
elif is_categorical_dtype(breaks):
|
||||
return {"data": breaks._constructor(tuples)}
|
||||
return {"data": com.asarray_tuplesafe(tuples)}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# non-tuple
|
||||
tuples = [(0, 1), 2, (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples received an invalid item, 2"
|
||||
with pytest.raises(TypeError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
# too few/many items
|
||||
tuples = [(0, 1), (2,), (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
tuples = [(0, 1), (2, 3, 4), (5, 6)]
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
def test_na_tuples(self):
|
||||
# tuple (NA, NA) evaluates the same as NA as an element
|
||||
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
|
||||
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
|
||||
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
tm.assert_index_equal(idx_na_tuple, idx_na_element)
|
||||
|
||||
|
||||
class TestClassConstructors(ConstructorTests):
|
||||
"""Tests specific to the IntervalIndex/Index constructors"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[IntervalIndex, partial(Index, dtype="interval")],
|
||||
ids=["IntervalIndex", "Index"],
|
||||
)
|
||||
def constructor(self, request):
|
||||
return request.param
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by the IntervalIndex/Index constructors
|
||||
"""
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else left
|
||||
for left, right in zip(breaks[:-1], breaks[1:])
|
||||
]
|
||||
|
||||
if isinstance(breaks, list):
|
||||
return {"data": ivs}
|
||||
elif is_categorical_dtype(breaks):
|
||||
return {"data": breaks._constructor(ivs)}
|
||||
return {"data": np.array(ivs, dtype=object)}
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
"""
|
||||
override the base class implementation since errors are handled
|
||||
differently; checks unnecessary since caught at the Interval level
|
||||
"""
|
||||
pass
|
||||
|
||||
def test_constructor_string(self):
|
||||
# GH23013
|
||||
# When forming the interval from breaks,
|
||||
# the interval of strings is already forbidden.
|
||||
pass
|
||||
|
||||
def test_constructor_errors(self, constructor):
|
||||
# mismatched closed within intervals with no constructor override
|
||||
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
|
||||
msg = "intervals must all be closed on the same side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(ivs)
|
||||
|
||||
# scalar
|
||||
msg = (
|
||||
r"IntervalIndex\(...\) must be called with a collection of "
|
||||
"some kind, 5 was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(5)
|
||||
|
||||
# not an interval; dtype depends on 32bit/windows builds
|
||||
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor([0, 1])
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"data, closed",
|
||||
[
|
||||
([], "both"),
|
||||
([np.nan, np.nan], "neither"),
|
||||
(
|
||||
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
|
||||
"left",
|
||||
),
|
||||
(
|
||||
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
|
||||
"neither",
|
||||
),
|
||||
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
|
||||
],
|
||||
)
|
||||
def test_override_inferred_closed(self, constructor, data, closed):
|
||||
# GH 19370
|
||||
if isinstance(data, IntervalIndex):
|
||||
tuples = data.to_tuples()
|
||||
else:
|
||||
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
|
||||
expected = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = constructor(data, closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
|
||||
)
|
||||
def test_index_object_dtype(self, values_constructor):
|
||||
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
|
||||
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
|
||||
values = values_constructor(intervals)
|
||||
result = Index(values, dtype=object)
|
||||
|
||||
assert type(result) is Index
|
||||
tm.assert_numpy_array_equal(result.values, np.array(values))
|
||||
|
||||
def test_index_mixed_closed(self):
|
||||
# GH27172
|
||||
intervals = [
|
||||
Interval(0, 1, closed="left"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="neither"),
|
||||
Interval(3, 4, closed="both"),
|
||||
]
|
||||
result = Index(intervals)
|
||||
expected = Index(intervals, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_dtype_closed_mismatch():
|
||||
# GH#38394 closed specified in both dtype and IntervalIndex constructor
|
||||
|
||||
dtype = IntervalDtype(np.int64, "left")
|
||||
|
||||
msg = "closed keyword does not match dtype.closed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex([], dtype=dtype, closed="neither")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalArray([], dtype=dtype, closed="neither")
|
@ -0,0 +1,36 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals(self, closed):
|
||||
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
|
||||
assert expected.equals(expected)
|
||||
assert expected.equals(expected.copy())
|
||||
|
||||
assert not expected.equals(expected.astype(object))
|
||||
assert not expected.equals(np.array(expected))
|
||||
assert not expected.equals(list(expected))
|
||||
|
||||
assert not expected.equals([1, 2])
|
||||
assert not expected.equals(np.array([1, 2]))
|
||||
assert not expected.equals(date_range("20130101", periods=2))
|
||||
|
||||
expected_name1 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="foo"
|
||||
)
|
||||
expected_name2 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="bar"
|
||||
)
|
||||
assert expected.equals(expected_name1)
|
||||
assert expected_name1.equals(expected_name2)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
expected_other_closed = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=other_closed
|
||||
)
|
||||
assert not expected.equals(expected_other_closed)
|
@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Float64Index
|
||||
|
||||
|
||||
class TestIntervalIndexRendering:
|
||||
def test_frame_repr(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/24134/files
|
||||
df = DataFrame(
|
||||
{"A": [1, 2, 3, 4]}, index=IntervalIndex.from_breaks([0, 1, 2, 3, 4])
|
||||
)
|
||||
result = repr(df)
|
||||
expected = " A\n(0, 1] 1\n(1, 2] 2\n(2, 3] 3\n(3, 4] 4"
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"constructor,expected",
|
||||
[
|
||||
(
|
||||
Series,
|
||||
(
|
||||
"(0.0, 1.0] a\n"
|
||||
"NaN b\n"
|
||||
"(2.0, 3.0] c\n"
|
||||
"dtype: object"
|
||||
),
|
||||
),
|
||||
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
|
||||
],
|
||||
)
|
||||
def test_repr_missing(self, constructor, expected):
|
||||
# GH 25984
|
||||
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
obj = constructor(list("abc"), index=index)
|
||||
result = repr(obj)
|
||||
assert result == expected
|
||||
|
||||
def test_repr_floats(self):
|
||||
# GH 32553
|
||||
|
||||
markers = Series(
|
||||
["foo", "bar"],
|
||||
index=IntervalIndex(
|
||||
[
|
||||
Interval(left, right)
|
||||
for left, right in zip(
|
||||
Float64Index([329.973, 345.137], dtype="float64"),
|
||||
Float64Index([345.137, 360.191], dtype="float64"),
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
result = str(markers)
|
||||
expected = "(329.973, 345.137] foo\n(345.137, 360.191] bar\ndtype: object"
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed, expected_data",
|
||||
[
|
||||
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
|
||||
(
|
||||
[(0.5, 1.0), np.nan, (2.0, 3.0)],
|
||||
"right",
|
||||
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timestamp("20180101"), Timestamp("20180102")),
|
||||
np.nan,
|
||||
((Timestamp("20180102"), Timestamp("20180103"))),
|
||||
],
|
||||
"both",
|
||||
["[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]"],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timedelta("0 days"), Timedelta("1 days")),
|
||||
(Timedelta("1 days"), Timedelta("2 days")),
|
||||
np.nan,
|
||||
],
|
||||
"neither",
|
||||
[
|
||||
"(0 days 00:00:00, 1 days 00:00:00)",
|
||||
"(1 days 00:00:00, 2 days 00:00:00)",
|
||||
"NaN",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_native_types(self, tuples, closed, expected_data):
|
||||
# GH 28210
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index._format_native_types()
|
||||
expected = np.array(expected_data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,587 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
|
||||
def test_get_loc_interval(self, closed, side):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
|
||||
# if get_loc is supplied an interval, it should only search
|
||||
# for exact matches, not overlaps or covers, else KeyError.
|
||||
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
|
||||
if closed == side:
|
||||
if bound == [0, 1]:
|
||||
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
|
||||
elif bound == [2, 3]:
|
||||
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
|
||||
def test_get_loc_scalar(self, closed, scalar):
|
||||
|
||||
# correct = {side: {query: answer}}.
|
||||
# If query is not in the dict, that query should raise a KeyError
|
||||
correct = {
|
||||
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
|
||||
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
|
||||
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
|
||||
"neither": {0.5: 0, 2.5: 1},
|
||||
}
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
# if get_loc is supplied a scalar, it should return the index of
|
||||
# the interval which contains the scalar, or KeyError.
|
||||
if scalar in correct[closed].keys():
|
||||
assert idx.get_loc(scalar) == correct[closed][scalar]
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
idx.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
|
||||
def test_get_loc_length_one_scalar(self, scalar, closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
if scalar in index[0]:
|
||||
result = index.get_loc(scalar)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
index.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
|
||||
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
|
||||
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
interval = Interval(left, right, closed=other_closed)
|
||||
if interval == index[0]:
|
||||
result = index.get_loc(interval)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
|
||||
):
|
||||
index.get_loc(interval)
|
||||
|
||||
# Make consistent with test_interval_new.py (see #16316, #16386)
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
|
||||
# GH 20636
|
||||
# nonoverlapping = IntervalIndex method and no i8 conversion
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
value = index[0].mid
|
||||
result = index.get_loc(value)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arrays",
|
||||
[
|
||||
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
|
||||
(
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
date_range("20180103", periods=4, tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
timedelta_range("0 days", periods=4),
|
||||
timedelta_range("2 days", periods=4),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_overlapping(self, arrays):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_arrays(*arrays)
|
||||
|
||||
value = index[0].mid + Timedelta("12 hours")
|
||||
result = index.get_loc(value)
|
||||
expected = slice(0, 2, None)
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
date_range("2018-01-04", periods=4, freq="-1D"),
|
||||
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
|
||||
timedelta_range("3 days", periods=4, freq="-1D"),
|
||||
np.arange(3.0, -1.0, -1.0),
|
||||
np.arange(3, -1, -1),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_decreasing(self, values):
|
||||
# GH 25860
|
||||
index = IntervalIndex.from_arrays(values[1:], values[:-1])
|
||||
result = index.get_loc(index[0])
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("key", [[5], (2, 3)])
|
||||
def test_get_loc_non_scalar_errors(self, key):
|
||||
# GH 31117
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
|
||||
|
||||
msg = str(key)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx.get_loc(key)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
|
||||
|
||||
expected = np.array([True, False, True])
|
||||
for key in [None, np.nan, NA]:
|
||||
assert key in index
|
||||
result = index.get_loc(key)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
index.get_loc(key)
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([Interval(2, 4, closed="right")], [1]),
|
||||
([Interval(2, 4, closed="left")], [-1]),
|
||||
([Interval(2, 4, closed="both")], [-1]),
|
||||
([Interval(2, 4, closed="neither")], [-1]),
|
||||
([Interval(1, 4, closed="right")], [-1]),
|
||||
([Interval(0, 4, closed="right")], [-1]),
|
||||
([Interval(0.5, 1.5, closed="right")], [-1]),
|
||||
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
|
||||
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_interval(self, query, expected):
|
||||
|
||||
tuples = [(0, 2), (2, 4), (5, 7)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], [-1]),
|
||||
([0], [-1]),
|
||||
([0.5], [0]),
|
||||
([1], [0]),
|
||||
([1.5], [1]),
|
||||
([2], [1]),
|
||||
([2.5], [-1]),
|
||||
([3], [-1]),
|
||||
([3.5], [2]),
|
||||
([4], [2]),
|
||||
([4.5], [-1]),
|
||||
([1, 2], [0, 1]),
|
||||
([1, 2, 3], [0, 1, -1]),
|
||||
([1, 2, 3, 4], [0, 1, -1, 2]),
|
||||
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_int_and_float(self, query, expected):
|
||||
|
||||
tuples = [(0, 1), (1, 2), (3, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
|
||||
def test_get_indexer_length_one(self, item, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer(item)
|
||||
expected = np.array([0] * len(item), dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("size", [1, 5])
|
||||
def test_get_indexer_length_one_interval(self, size, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer([Interval(0, 5, closed)] * size)
|
||||
expected = np.array([0] * size, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"target",
|
||||
[
|
||||
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
|
||||
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
|
||||
["foo", "foo", "bar", "baz"],
|
||||
],
|
||||
)
|
||||
def test_get_indexer_categorical(self, target, ordered):
|
||||
# GH 30063: categorical and non-categorical results should be consistent
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
categorical_target = CategoricalIndex(target, ordered=ordered)
|
||||
|
||||
result = index.get_indexer(categorical_target)
|
||||
expected = index.get_indexer(target)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_categorical_with_nans(self):
|
||||
# GH#41934 nans in both index and in target
|
||||
ii = IntervalIndex.from_breaks(range(5))
|
||||
ii2 = ii.append(IntervalIndex([np.nan]))
|
||||
ci2 = CategoricalIndex(ii2)
|
||||
|
||||
result = ii2.get_indexer(ci2)
|
||||
expected = np.arange(5, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# not-all-matches
|
||||
result = ii2[1:].get_indexer(ci2[::-1])
|
||||
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# non-unique target, non-unique nans
|
||||
result = ii2.get_indexer(ci2.append(ci2))
|
||||
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed",
|
||||
[
|
||||
([(0, 2), (1, 3), (3, 4)], "neither"),
|
||||
([(0, 5), (1, 4), (6, 7)], "left"),
|
||||
([(0, 1), (0, 1), (1, 2)], "right"),
|
||||
([(0, 1), (2, 3), (3, 4)], "both"),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_errors(self, tuples, closed):
|
||||
# IntervalIndex needs non-overlapping for uniqueness when querying
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
|
||||
msg = (
|
||||
"cannot handle overlapping indices; use "
|
||||
"IntervalIndex.get_indexer_non_unique"
|
||||
)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
index.get_indexer([0, 2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], ([-1], [0])),
|
||||
([0], ([0], [])),
|
||||
([0.5], ([0], [])),
|
||||
([1], ([0, 1], [])),
|
||||
([1.5], ([0, 1], [])),
|
||||
([2], ([0, 1, 2], [])),
|
||||
([2.5], ([1, 2], [])),
|
||||
([3], ([2], [])),
|
||||
([3.5], ([2], [])),
|
||||
([4], ([-1], [0])),
|
||||
([4.5], ([-1], [0])),
|
||||
([1, 2], ([0, 1, 0, 1, 2], [])),
|
||||
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
|
||||
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
|
||||
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
|
||||
|
||||
tuples = [(0, 2.5), (1, 3), (2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="left")
|
||||
|
||||
result_indexer, result_missing = index.get_indexer_non_unique(query)
|
||||
expected_indexer = np.array(expected[0], dtype="intp")
|
||||
expected_missing = np.array(expected[1], dtype="intp")
|
||||
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
# TODO we may also want to test get_indexer for the case when
|
||||
# the intervals are duplicated, decreasing, non-monotonic, etc..
|
||||
|
||||
def test_get_indexer_non_monotonic(self):
|
||||
# GH 16410
|
||||
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
|
||||
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
|
||||
result = idx1.get_indexer(idx2)
|
||||
expected = np.array([2, 0, -1, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = idx1.get_indexer(idx1[1:])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
|
||||
assert not index._index_as_unique
|
||||
|
||||
result = index.get_indexer_for(other)
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_index_non_unique_non_monotonic(self):
|
||||
# GH#44084 (root cause)
|
||||
index = IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
|
||||
)
|
||||
|
||||
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
|
||||
expected = np.array([1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_multiindex_with_intervals(self):
|
||||
# GH#44084 (MultiIndex case as reported)
|
||||
interval_index = IntervalIndex.from_tuples(
|
||||
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
|
||||
)
|
||||
foo_index = Index([1, 2, 3], name="foo")
|
||||
|
||||
multi_index = MultiIndex.from_product([foo_index, interval_index])
|
||||
|
||||
result = multi_index.get_level_values("interval").get_indexer_for(
|
||||
[Interval(0.0, 1.0)]
|
||||
)
|
||||
expected = np.array([1, 4, 7], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestSliceLocs:
|
||||
def test_slice_locs_with_interval(self):
|
||||
|
||||
# increasing monotonically
|
||||
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
|
||||
|
||||
# decreasing monotonically
|
||||
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
|
||||
|
||||
# sorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
# unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2))
|
||||
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(end=Interval(0, 2))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
|
||||
|
||||
# another unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
def test_slice_locs_with_ints_and_floats_succeeds(self):
|
||||
|
||||
# increasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
|
||||
assert index.slice_locs(0, 1) == (0, 1)
|
||||
assert index.slice_locs(0, 2) == (0, 2)
|
||||
assert index.slice_locs(0, 3) == (0, 2)
|
||||
assert index.slice_locs(3, 1) == (2, 1)
|
||||
assert index.slice_locs(3, 4) == (2, 3)
|
||||
assert index.slice_locs(0, 4) == (0, 3)
|
||||
|
||||
# decreasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
|
||||
assert index.slice_locs(0, 1) == (3, 3)
|
||||
assert index.slice_locs(0, 2) == (3, 2)
|
||||
assert index.slice_locs(0, 3) == (3, 1)
|
||||
assert index.slice_locs(3, 1) == (1, 3)
|
||||
assert index.slice_locs(3, 4) == (1, 1)
|
||||
assert index.slice_locs(0, 4) == (3, 1)
|
||||
|
||||
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
[(0, 2), (1, 3), (2, 4)],
|
||||
[(2, 4), (1, 3), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4)],
|
||||
[(0, 2), (2, 4), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4), (1, 3)],
|
||||
],
|
||||
)
|
||||
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
|
||||
start, stop = query
|
||||
index = IntervalIndex.from_tuples(tuples)
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=(
|
||||
"'can only get slices from an IntervalIndex if bounds are "
|
||||
"non-overlapping and all monotonic increasing or decreasing'"
|
||||
),
|
||||
):
|
||||
index.slice_locs(start, stop)
|
||||
|
||||
|
||||
class TestPutmask:
|
||||
@pytest.mark.parametrize("tz", ["US/Pacific", None])
|
||||
def test_putmask_dt64(self, tz):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9, tz=tz)
|
||||
idx = IntervalIndex.from_breaks(dti)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_putmask_td64(self):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9)
|
||||
tdi = dti - dti[0]
|
||||
idx = IntervalIndex.from_breaks(tdi)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetValue:
|
||||
@pytest.mark.parametrize("key", [[5], (2, 3)])
|
||||
def test_get_value_non_scalar_errors(self, key):
|
||||
# GH#31117
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
msg = str(key)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
idx.get_value(ser, key)
|
||||
|
||||
|
||||
class TestContains:
|
||||
# .__contains__, not .contains
|
||||
|
||||
def test_contains_dunder(self):
|
||||
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
|
||||
|
||||
# __contains__ requires perfect matches to intervals.
|
||||
assert 0 not in index
|
||||
assert 1 not in index
|
||||
assert 2 not in index
|
||||
|
||||
assert Interval(0, 1, closed="right") in index
|
||||
assert Interval(0, 2, closed="right") not in index
|
||||
assert Interval(0, 0.5, closed="right") not in index
|
||||
assert Interval(3, 5, closed="right") not in index
|
||||
assert Interval(-1, 0, closed="left") not in index
|
||||
assert Interval(0, 1, closed="left") not in index
|
||||
assert Interval(0, 1, closed="both") not in index
|
@ -0,0 +1,918 @@
|
||||
from itertools import permutations
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
isna,
|
||||
notna,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Float64Index
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(scope="class", params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
def create_index(self, closed="right"):
|
||||
return IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
def create_index_with_nan(self, closed="right"):
|
||||
mask = [True, False] + [True] * 8
|
||||
return IntervalIndex.from_arrays(
|
||||
np.where(mask, np.arange(10), np.nan),
|
||||
np.where(mask, np.arange(1, 11), np.nan),
|
||||
closed=closed,
|
||||
)
|
||||
|
||||
def test_properties(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
tm.assert_index_equal(index.left, Index(np.arange(10)))
|
||||
tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
|
||||
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed)
|
||||
for left, right in zip(range(10), range(1, 11))
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
# with nans
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
|
||||
expected_right = expected_left + 1
|
||||
expected_mid = expected_left + 0.5
|
||||
tm.assert_index_equal(index.left, expected_left)
|
||||
tm.assert_index_equal(index.right, expected_right)
|
||||
tm.assert_index_equal(index.mid, expected_mid)
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else np.nan
|
||||
for left, right in zip(expected_left, expected_right)
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
|
||||
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
|
||||
pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
|
||||
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
|
||||
],
|
||||
)
|
||||
def test_length(self, closed, breaks):
|
||||
# GH 18789
|
||||
index = IntervalIndex.from_breaks(breaks, closed=closed)
|
||||
result = index.length
|
||||
expected = Index(iv.length for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# with NA
|
||||
index = index.insert(1, np.nan)
|
||||
result = index.length
|
||||
expected = Index(iv.length if notna(iv) else iv for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_with_nans(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert index.hasnans is False
|
||||
|
||||
result = index.isna()
|
||||
expected = np.zeros(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.ones(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert index.hasnans is True
|
||||
|
||||
result = index.isna()
|
||||
expected = np.array([False, True] + [False] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.array([True, False] + [True] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_copy(self, closed):
|
||||
expected = self.create_index(closed=closed)
|
||||
|
||||
result = expected.copy()
|
||||
assert result.equals(expected)
|
||||
|
||||
result = expected.copy(deep=True)
|
||||
assert result.equals(expected)
|
||||
assert result.left is not expected.left
|
||||
|
||||
def test_ensure_copied_data(self, closed):
|
||||
# exercise the copy flag in the constructor
|
||||
|
||||
# not copying
|
||||
index = self.create_index(closed=closed)
|
||||
result = IntervalIndex(index, copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="same"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="same"
|
||||
)
|
||||
|
||||
# by-definition make a copy
|
||||
result = IntervalIndex(np.array(index), copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="copy"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="copy"
|
||||
)
|
||||
|
||||
def test_delete(self, closed):
|
||||
expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
|
||||
result = self.create_index(closed=closed).delete(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
interval_range(0, periods=10, closed="neither"),
|
||||
interval_range(1.7, periods=8, freq=2.5, closed="both"),
|
||||
interval_range(Timestamp("20170101"), periods=12, closed="left"),
|
||||
interval_range(Timedelta("1 day"), periods=6, closed="right"),
|
||||
],
|
||||
)
|
||||
def test_insert(self, data):
|
||||
item = data[0]
|
||||
idx_item = IntervalIndex([item])
|
||||
|
||||
# start
|
||||
expected = idx_item.append(data)
|
||||
result = data.insert(0, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# end
|
||||
expected = data.append(idx_item)
|
||||
result = data.insert(len(data), item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mid
|
||||
expected = data[:3].append(idx_item).append(data[3:])
|
||||
result = data.insert(3, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# invalid type
|
||||
res = data.insert(1, "foo")
|
||||
expected = data.astype(object).insert(1, "foo")
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, "foo")
|
||||
|
||||
# invalid closed
|
||||
msg = "'value.closed' is 'left', expected 'right'."
|
||||
for closed in {"left", "right", "both", "neither"} - {item.closed}:
|
||||
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
|
||||
bad_item = Interval(item.left, item.right, closed=closed)
|
||||
res = data.insert(1, bad_item)
|
||||
expected = data.astype(object).insert(1, bad_item)
|
||||
tm.assert_index_equal(res, expected)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data._data.insert(1, bad_item)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
na_idx = IntervalIndex([np.nan], closed=data.closed)
|
||||
for na in [np.nan, None, pd.NA]:
|
||||
expected = data[:1].append(na_idx).append(data[1:])
|
||||
result = data.insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if data.left.dtype.kind not in ["m", "M"]:
|
||||
# trying to insert pd.NaT into a numeric-dtyped Index should cast
|
||||
expected = data.astype(object).insert(1, pd.NaT)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, pd.NaT)
|
||||
|
||||
result = data.insert(1, pd.NaT)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_unique_interval(self, closed):
|
||||
"""
|
||||
Interval specific tests for is_unique in addition to base class tests
|
||||
"""
|
||||
# unique overlapping - distinct endpoints
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique overlapping - shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique nested
|
||||
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique NaN
|
||||
idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# non-unique NaN
|
||||
idx = IntervalIndex.from_tuples(
|
||||
[(np.NaN, np.NaN), (np.NaN, np.NaN)], closed=closed
|
||||
)
|
||||
assert idx.is_unique is False
|
||||
|
||||
def test_monotonic(self, closed):
|
||||
# increasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
|
||||
assert idx.is_monotonic is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
|
||||
assert idx.is_monotonic is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered overlapping
|
||||
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# stationary
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
|
||||
assert idx.is_monotonic is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
idx = IntervalIndex([], closed=closed)
|
||||
assert idx.is_monotonic is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
def test_is_monotonic_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
|
||||
assert not index.is_monotonic
|
||||
assert not index._is_strictly_monotonic_increasing
|
||||
assert not index.is_monotonic_increasing
|
||||
assert not index._is_strictly_monotonic_decreasing
|
||||
assert not index.is_monotonic_decreasing
|
||||
|
||||
def test_get_item(self, closed):
|
||||
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
|
||||
assert i[0] == Interval(0.0, 1.0, closed=closed)
|
||||
assert i[1] == Interval(1.0, 2.0, closed=closed)
|
||||
assert isna(i[2])
|
||||
|
||||
result = i[0:1]
|
||||
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = i[0:2]
|
||||
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = i[1:3]
|
||||
expected = IntervalIndex.from_arrays(
|
||||
(1.0, np.nan), (2.0, np.nan), closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_maybe_convert_i8(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
# intervalindex
|
||||
result = index._maybe_convert_i8(index)
|
||||
expected = IntervalIndex.from_breaks(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# interval
|
||||
interval = Interval(breaks[0], breaks[1])
|
||||
result = index._maybe_convert_i8(interval)
|
||||
expected = Interval(breaks[0].value, breaks[1].value)
|
||||
assert result == expected
|
||||
|
||||
# datetimelike index
|
||||
result = index._maybe_convert_i8(breaks)
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# datetimelike scalar
|
||||
result = index._maybe_convert_i8(breaks[0])
|
||||
expected = breaks[0].value
|
||||
assert result == expected
|
||||
|
||||
# list-like of datetimelike scalars
|
||||
result = index._maybe_convert_i8(list(breaks))
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
|
||||
)
|
||||
def test_maybe_convert_i8_nat(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
to_convert = breaks._constructor([pd.NaT] * 3)
|
||||
expected = Float64Index([np.nan] * 3)
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
to_convert = to_convert.insert(0, breaks[0])
|
||||
expected = expected.insert(0, float(breaks[0].value))
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[np.arange(5, dtype="int64"), np.arange(5, dtype="float64")],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks,
|
||||
lambda breaks: breaks[0],
|
||||
list,
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_numeric(self, breaks, make_key):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
key = make_key(breaks)
|
||||
|
||||
# no conversion occurs for numeric
|
||||
result = index._maybe_convert_i8(key)
|
||||
assert result is key
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks1, breaks2",
|
||||
permutations(
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
2,
|
||||
),
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks,
|
||||
lambda breaks: breaks[0],
|
||||
list,
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks1)
|
||||
key = make_key(breaks2)
|
||||
|
||||
msg = (
|
||||
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
|
||||
f"values of dtype {breaks2.dtype}"
|
||||
)
|
||||
msg = re.escape(msg)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index._maybe_convert_i8(key)
|
||||
|
||||
def test_contains_method(self):
|
||||
# can select values that are IN the range of a value
|
||||
i = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
expected = np.array([False, False], dtype="bool")
|
||||
actual = i.contains(0)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(3)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
expected = np.array([True, False], dtype="bool")
|
||||
actual = i.contains(0.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(1)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
# __contains__ not implemented for "interval in interval", follow
|
||||
# that for the contains method for now
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="contains not implemented for two"
|
||||
):
|
||||
i.contains(Interval(0, 1))
|
||||
|
||||
def test_dropna(self, closed):
|
||||
|
||||
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
|
||||
|
||||
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_non_contiguous(self, closed):
|
||||
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
target = [0.5, 1.5, 2.5]
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([0, -1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
assert 1.5 not in index
|
||||
|
||||
def test_isin(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
expected = np.array([True] + [False] * (len(index) - 1))
|
||||
result = index.isin(index[:1])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin([index[0]])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
|
||||
expected = np.array([True] * (len(index) - 1) + [False])
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for other_closed in {"right", "left", "both", "neither"}:
|
||||
other = self.create_index(closed=other_closed)
|
||||
expected = np.repeat(closed == other_closed, len(index))
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_comparison(self):
|
||||
actual = Interval(0, 1) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = Interval(0.5, 1.5) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > Interval(0.5, 1.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index <= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index >= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index < self.index
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index.values == self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index <= self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index != self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index > self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index.values > self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
# invalid comparisons
|
||||
actual = self.index == 0
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index == self.index.left
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'int' and '.*.Interval'",
|
||||
r"Invalid comparison between dtype=interval\[int64, right\] and ",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index <= 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > np.arange(2)
|
||||
|
||||
msg = "Lengths must match to compare"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.index > np.arange(3)
|
||||
|
||||
def test_missing_values(self, closed):
|
||||
idx = Index(
|
||||
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
|
||||
)
|
||||
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
|
||||
assert idx.equals(idx2)
|
||||
|
||||
msg = (
|
||||
"missing values must be missing in the same location both left "
|
||||
"and right sides"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(
|
||||
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
|
||||
|
||||
def test_sort_values(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
result = index.sort_values()
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.sort_values(ascending=False)
|
||||
tm.assert_index_equal(result, index[::-1])
|
||||
|
||||
# with nan
|
||||
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
|
||||
|
||||
result = index.sort_values()
|
||||
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.sort_values(ascending=False, na_position="first")
|
||||
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_datetime(self, tz):
|
||||
start = Timestamp("2000-01-01", tz=tz)
|
||||
dates = date_range(start=start, periods=10)
|
||||
index = IntervalIndex.from_breaks(dates)
|
||||
|
||||
# test mid
|
||||
start = Timestamp("2000-01-01T12:00", tz=tz)
|
||||
expected = date_range(start=start, periods=9)
|
||||
tm.assert_index_equal(index.mid, expected)
|
||||
|
||||
# __contains__ doesn't check individual points
|
||||
assert Timestamp("2000-01-01", tz=tz) not in index
|
||||
assert Timestamp("2000-01-01T12", tz=tz) not in index
|
||||
assert Timestamp("2000-01-02", tz=tz) not in index
|
||||
iv_true = Interval(
|
||||
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
|
||||
)
|
||||
iv_false = Interval(
|
||||
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
|
||||
)
|
||||
assert iv_true in index
|
||||
assert iv_false not in index
|
||||
|
||||
# .contains does check individual points
|
||||
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
|
||||
|
||||
# test get_indexer
|
||||
start = Timestamp("1999-12-31T12:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="12H")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
start = Timestamp("2000-01-08T18:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="6H")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_append(self, closed):
|
||||
|
||||
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
|
||||
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
|
||||
|
||||
result = index1.append(index2)
|
||||
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index1.append([index1, index2])
|
||||
expected = IntervalIndex.from_arrays(
|
||||
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
index_other_closed = IntervalIndex.from_arrays(
|
||||
[0, 1], [1, 2], closed=other_closed
|
||||
)
|
||||
result = index1.append(index_other_closed)
|
||||
expected = index1.astype(object).append(index_other_closed.astype(object))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_non_overlapping_monotonic(self, closed):
|
||||
# Should be True in all cases
|
||||
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
# Should be False in all cases (overlapping)
|
||||
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False in all cases (non-monotonic)
|
||||
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False for closed='both', otherwise True (GH16560)
|
||||
if closed == "both":
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
else:
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, shift, na_value",
|
||||
[
|
||||
(0, 1, np.nan),
|
||||
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
|
||||
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping(self, start, shift, na_value, closed):
|
||||
# GH 23309
|
||||
# see test_interval_tree.py for extensive tests; interface tests here
|
||||
|
||||
# non-overlapping
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# non-overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# overlapping
|
||||
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# common endpoints
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
# common endpoints with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))),
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
),
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_tuples(self, tuples):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples()
|
||||
expected = Index(com.asarray_tuplesafe(tuples))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))) + [np.nan],
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("na_tuple", [True, False])
|
||||
def test_to_tuples_na(self, tuples, na_tuple):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples(na_tuple=na_tuple)
|
||||
|
||||
# check the non-NA portion
|
||||
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
|
||||
result_notna = result[:-1]
|
||||
tm.assert_index_equal(result_notna, expected_notna)
|
||||
|
||||
# check the NA portion
|
||||
result_na = result[-1]
|
||||
if na_tuple:
|
||||
assert isinstance(result_na, tuple)
|
||||
assert len(result_na) == 2
|
||||
assert all(isna(x) for x in result_na)
|
||||
else:
|
||||
assert isna(result_na)
|
||||
|
||||
def test_nbytes(self):
|
||||
# GH 19209
|
||||
left = np.arange(0, 4, dtype="i8")
|
||||
right = np.arange(1, 5, dtype="i8")
|
||||
|
||||
result = IntervalIndex.from_arrays(left, right).nbytes
|
||||
expected = 64 # 4 * 8 * 2
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||||
def test_set_closed(self, name, closed, new_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5, closed=closed, name=name)
|
||||
result = index.set_closed(new_closed)
|
||||
expected = interval_range(0, 5, closed=new_closed, name=name)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
|
||||
def test_set_closed_errors(self, bad_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5)
|
||||
msg = f"invalid option for 'closed': {bad_closed}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.set_closed(bad_closed)
|
||||
|
||||
def test_is_all_dates(self):
|
||||
# GH 23576
|
||||
year_2017 = Interval(
|
||||
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
|
||||
)
|
||||
year_2017_index = IntervalIndex([year_2017])
|
||||
assert not year_2017_index._is_all_dates
|
||||
|
||||
|
||||
def test_dir():
|
||||
# GH#27571 dir(interval_index) should not raise
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
result = dir(index)
|
||||
assert "str" not in result
|
||||
|
||||
|
||||
def test_searchsorted_different_argument_classes(listlike_box):
|
||||
# https://github.com/pandas-dev/pandas/issues/32762
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
result = values.searchsorted(listlike_box(values))
|
||||
expected = np.array([0, 1], dtype=result.dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = values._data.searchsorted(listlike_box(values))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
|
||||
)
|
||||
def test_searchsorted_invalid_argument(arg):
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
values.searchsorted(arg)
|
@ -0,0 +1,355 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
from pandas import (
|
||||
DateOffset,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day
|
||||
|
||||
|
||||
@pytest.fixture(scope="class", params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalRange:
|
||||
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
|
||||
def test_constructor_numeric(self, closed, name, freq, periods):
|
||||
start, end = 0, 100
|
||||
breaks = np.arange(101, step=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)]
|
||||
)
|
||||
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
|
||||
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
|
||||
breaks = date_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
if not breaks.freq.is_anchored() and tz is None:
|
||||
# matches expected only for non-anchored offsets and tz naive
|
||||
# (anchored/DST transitions cause unequal spacing in expected)
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)]
|
||||
)
|
||||
def test_constructor_timedelta(self, closed, name, freq, periods):
|
||||
start, end = Timedelta("0 days"), Timedelta("100 days")
|
||||
breaks = timedelta_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, expected_endpoint",
|
||||
[
|
||||
(0, 10, 3, 9),
|
||||
(0, 10, 1.5, 9),
|
||||
(0.5, 10, 3, 9.5),
|
||||
(Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")),
|
||||
(
|
||||
Timestamp("2018-01-01"),
|
||||
Timestamp("2018-02-09"),
|
||||
"MS",
|
||||
Timestamp("2018-02-01"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-20", tz="US/Eastern"),
|
||||
"5D12H",
|
||||
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_early_truncation(self, start, end, freq, expected_endpoint):
|
||||
# index truncates early if freq causes end to be skipped
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
result_endpoint = result.right[-1]
|
||||
assert result_endpoint == expected_endpoint
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq",
|
||||
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
|
||||
)
|
||||
def test_no_invalid_float_truncation(self, start, end, freq):
|
||||
# GH 21161
|
||||
if freq is None:
|
||||
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
else:
|
||||
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
result = interval_range(start=start, end=end, periods=4, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, mid, end",
|
||||
[
|
||||
(
|
||||
Timestamp("2018-03-10", tz="US/Eastern"),
|
||||
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-03-12", tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-11-03", tz="US/Eastern"),
|
||||
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-11-05", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_linspace_dst_transition(self, start, mid, end):
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
# accounts for the hour gained/lost during DST transition
|
||||
result = interval_range(start=start, end=end, periods=2)
|
||||
expected = IntervalIndex.from_breaks([start, mid, end])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", [2, 2.0])
|
||||
@pytest.mark.parametrize("end", [10, 10.0])
|
||||
@pytest.mark.parametrize("start", [0, 0.0])
|
||||
def test_float_subtype(self, start, end, freq):
|
||||
# Has float subtype if any of start/end/freq are float, even if all
|
||||
# resulting endpoints can safely be upcast to integers
|
||||
|
||||
# defined from start/end/freq
|
||||
index = interval_range(start=start, end=end, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from start/periods/freq
|
||||
index = interval_range(start=start, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from end/periods/freq
|
||||
index = interval_range(end=end, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
index = interval_range(start=start, end=end, periods=5)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end) else "float64"
|
||||
assert result == expected
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
# float value for periods
|
||||
expected = interval_range(start=0, periods=10)
|
||||
result = interval_range(start=0, periods=10.5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timestamp-like start/end
|
||||
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timestamp
|
||||
equiv_freq = [
|
||||
"D",
|
||||
Day(),
|
||||
Timedelta(days=1),
|
||||
timedelta(days=1),
|
||||
DateOffset(days=1),
|
||||
]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timedelta-like start/end
|
||||
start, end = Timedelta(days=1), Timedelta(days=10)
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timedelta
|
||||
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, "
|
||||
"exactly three must be specified"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=5)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(periods=2)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=5, periods=6, freq=1.5)
|
||||
|
||||
# mixed units
|
||||
msg = "start, end, freq need to be type compatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timestamp("20130101"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timedelta("1 day"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
|
||||
|
||||
# invalid periods
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, periods="foo")
|
||||
|
||||
# invalid start
|
||||
msg = "start must be numeric or datetime-like, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start="foo", periods=10)
|
||||
|
||||
# invalid end
|
||||
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Interval(0, 1), periods=10)
|
||||
|
||||
# invalid freq for datetime-like
|
||||
msg = "freq must be numeric or convertible to DateOffset, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
|
||||
|
||||
# mixed tz
|
||||
start = Timestamp("2017-01-01", tz="US/Eastern")
|
||||
end = Timestamp("2017-01-07", tz="US/Pacific")
|
||||
msg = "Start and end cannot both be tz-aware with different timezones"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=start, end=end)
|
@ -0,0 +1,191 @@
|
||||
from itertools import permutations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.interval import IntervalTree
|
||||
from pandas.compat import IS64
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def skipif_32bit(param):
|
||||
"""
|
||||
Skip parameters in a parametrize on 32bit systems. Specifically used
|
||||
here to skip leaf_size parameters related to GH 23440.
|
||||
"""
|
||||
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
|
||||
return pytest.param(param, marks=marks)
|
||||
|
||||
|
||||
@pytest.fixture(scope="class", params=["int64", "float64", "uint64"])
|
||||
def dtype(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
|
||||
def leaf_size(request):
|
||||
"""
|
||||
Fixture to specify IntervalTree leaf_size parameter; to be used with the
|
||||
tree fixture.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.arange(5, dtype="int64"),
|
||||
np.arange(5, dtype="uint64"),
|
||||
np.arange(5, dtype="float64"),
|
||||
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
|
||||
]
|
||||
)
|
||||
def tree(request, leaf_size):
|
||||
left = request.param
|
||||
return IntervalTree(left, left + 2, leaf_size=leaf_size)
|
||||
|
||||
|
||||
class TestIntervalTree:
|
||||
def test_get_indexer(self, tree):
|
||||
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
|
||||
expected = np.array([0, 4, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([3.0]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_non_unique(self, tree):
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
|
||||
|
||||
result = indexer[:1]
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[1:3])
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[3:])
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
target = np.array([target_value], dtype=target_dtype)
|
||||
|
||||
result_indexer, result_missing = tree.get_indexer_non_unique(target)
|
||||
expected_indexer = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
expected_missing = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
def test_duplicates(self, dtype):
|
||||
left = np.array([0, 0, 0], dtype=dtype)
|
||||
tree = IntervalTree(left, left + 1)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([0.5]))
|
||||
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
|
||||
result = np.sort(indexer)
|
||||
expected = np.array([0, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
|
||||
)
|
||||
def test_get_indexer_closed(self, closed, leaf_size):
|
||||
x = np.arange(1000, dtype="float64")
|
||||
found = x.astype("intp")
|
||||
not_found = (-1 * np.ones(1000)).astype("intp")
|
||||
|
||||
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
|
||||
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
|
||||
|
||||
expected = found if tree.closed_left else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
|
||||
|
||||
expected = found if tree.closed_right else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
|
||||
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
|
||||
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
|
||||
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
|
||||
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping(self, closed, order, left, right, expected):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping_endpoints(self, closed, order):
|
||||
"""shared endpoints are marked as overlapping"""
|
||||
# GH 23309
|
||||
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(np.array([], dtype="int64"), np.array([], dtype="int64")),
|
||||
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
|
||||
(np.array([np.nan]), np.array([np.nan])),
|
||||
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping_trivial(self, closed, left, right):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left, right, closed=closed)
|
||||
assert tree.is_overlapping is False
|
||||
|
||||
@pytest.mark.skipif(not IS64, reason="GH 23440")
|
||||
def test_construction_overflow(self):
|
||||
# GH 25485
|
||||
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
# pivot should be average of left/right medians
|
||||
result = tree.root.pivot
|
||||
expected = (50 + np.iinfo(np.int64).max) / 2
|
||||
assert result == expected
|
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def range_index():
|
||||
return RangeIndex(3, name="range_index")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_index():
|
||||
return IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
|
||||
)
|
||||
|
||||
|
||||
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = multi_index.join(interval_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = interval_index.join(multi_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
|
||||
# GH-45661
|
||||
flipped_interval_index = interval_index[::-1]
|
||||
result = interval_index.join(flipped_interval_index)
|
||||
|
||||
tm.assert_index_equal(result, interval_index)
|
@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
|
||||
from pandas import IntervalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestPickle:
|
||||
@pytest.mark.parametrize("closed", ["left", "right", "both"])
|
||||
def test_pickle_round_trip_closed(self, closed):
|
||||
# https://github.com/pandas-dev/pandas/issues/35658
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
|
||||
result = tm.round_trip_pickle(idx)
|
||||
tm.assert_index_equal(result, idx)
|
@ -0,0 +1,202 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def monotonic_index(start, end, dtype="int64", closed="right"):
|
||||
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
def empty_index(dtype="int64", closed="right"):
|
||||
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def test_union(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(0, 13, closed=closed)
|
||||
result = index[::-1].union(other, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
result = other[::-1].union(index, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
tm.assert_index_equal(index.union(index, sort=sort), index)
|
||||
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
|
||||
|
||||
def test_union_empty_result(self, closed, sort):
|
||||
# GH 19101: empty result, same dtype
|
||||
index = empty_index(dtype="int64", closed=closed)
|
||||
result = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
|
||||
other = empty_index(dtype="float64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
expected = other
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = empty_index(dtype="uint64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = other.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(5, 11, closed=closed)
|
||||
result = index[::-1].intersection(other, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
result = other[::-1].intersection(index, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
tm.assert_index_equal(index.intersection(index, sort=sort), index)
|
||||
|
||||
# GH 26225: nested intervals
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225
|
||||
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(0, 2)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: duplicate nan element
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
expected = IntervalIndex([np.nan])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_empty_result(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
other = monotonic_index(300, 314, closed=closed)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
|
||||
other = monotonic_index(300, 314, dtype="float64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
expected = other[:0]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_duplicates(self):
|
||||
# GH#38743
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference(self, closed, sort):
|
||||
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
|
||||
result = index.difference(index[:1], sort=sort)
|
||||
expected = index[1:]
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.difference(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
result = index[1:].symmetric_difference(index[:-1], sort=sort)
|
||||
expected = IntervalIndex([index[0], index[-1]])
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.symmetric_difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert tm.equalContents(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.symmetric_difference(other, sort=sort)
|
||||
expected = empty_index(dtype="float64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_incompatible_types(self, closed, op_name, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
set_op = getattr(index, op_name)
|
||||
|
||||
# TODO: standardize return type of non-union setops type(self vs other)
|
||||
# non-IntervalIndex
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
else:
|
||||
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
|
||||
result = set_op(Index([1, 2, 3]), sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mixed closed -> cast to object
|
||||
for other_closed in {"right", "left", "both", "neither"} - {closed}:
|
||||
other = monotonic_index(0, 11, closed=other_closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19016: incompatible dtypes -> cast to object
|
||||
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
Reference in New Issue
Block a user