mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-04 07:08:05 +00:00
first commit
This commit is contained in:
@ -0,0 +1,80 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
PeriodIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class DropDuplicates:
|
||||
def test_drop_duplicates_metadata(self, idx):
|
||||
# GH#10115
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
result = idx_dup.drop_duplicates()
|
||||
|
||||
expected = idx
|
||||
if not isinstance(idx, PeriodIndex):
|
||||
# freq is reset except for PeriodIndex
|
||||
assert idx_dup.freq is None
|
||||
assert result.freq is None
|
||||
expected = idx._with_freq(None)
|
||||
else:
|
||||
assert result.freq == expected.freq
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected, index",
|
||||
[
|
||||
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
|
||||
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
|
||||
(
|
||||
False,
|
||||
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
||||
np.arange(5, 10),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, keep, expected, index, idx):
|
||||
# to check Index/Series compat
|
||||
idx = idx.append(idx[:5])
|
||||
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
||||
expected = idx[~expected]
|
||||
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Series(idx).drop_duplicates(keep=keep)
|
||||
tm.assert_series_equal(result, Series(expected, index=index))
|
||||
|
||||
|
||||
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
||||
@pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
|
||||
def freq(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def idx(self, freq):
|
||||
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
@ -0,0 +1,182 @@
|
||||
"""
|
||||
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class EqualsTests:
|
||||
def test_not_equals_numeric(self, index):
|
||||
|
||||
assert not index.equals(Index(index.asi8))
|
||||
assert not index.equals(Index(index.asi8.astype("u8")))
|
||||
assert not index.equals(Index(index.asi8).astype("f8"))
|
||||
|
||||
def test_equals(self, index):
|
||||
assert index.equals(index)
|
||||
assert index.equals(index.astype(object))
|
||||
assert index.equals(CategoricalIndex(index))
|
||||
assert index.equals(CategoricalIndex(index.astype(object)))
|
||||
|
||||
def test_not_equals_non_arraylike(self, index):
|
||||
assert not index.equals(list(index))
|
||||
|
||||
def test_not_equals_strings(self, index):
|
||||
|
||||
other = Index([str(x) for x in index], dtype=object)
|
||||
assert not index.equals(other)
|
||||
assert not index.equals(CategoricalIndex(other))
|
||||
|
||||
def test_not_equals_misc_strs(self, index):
|
||||
other = Index(list("abc"))
|
||||
assert not index.equals(other)
|
||||
|
||||
|
||||
class TestPeriodIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return period_range("2013-01-01", periods=5, freq="D")
|
||||
|
||||
# TODO: de-duplicate with other test_equals2 methods
|
||||
@pytest.mark.parametrize("freq", ["D", "M"])
|
||||
def test_equals2(self, freq):
|
||||
# GH#13107
|
||||
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = PeriodIndex._simple_new(
|
||||
idx._values._simple_new(idx._values.asi8, freq="H")
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
|
||||
class TestDatetimeIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return date_range("2013-01-01", periods=5)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds objects
|
||||
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
assert not idx3.equals(oob)
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds dt64
|
||||
oob2 = oob.map(np.datetime64)
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
assert not idx3.equals(oob2)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_not_equals_bday(self, freq):
|
||||
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
||||
assert not rng.equals(list(rng))
|
||||
|
||||
|
||||
class TestTimedeltaIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return tm.makeTimedeltaIndex(10)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.astype(object).equals(idx2.astype(object))
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# Check that we dont raise OverflowError on comparisons outside the
|
||||
# implementation range GH#28532
|
||||
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
|
||||
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
|
||||
assert (oob == oob2).all()
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
|
||||
oob3 = oob.map(np.timedelta64)
|
||||
assert (oob3 == oob).all()
|
||||
assert not idx.equals(oob3)
|
||||
assert not idx2.equals(oob3)
|
@ -0,0 +1,46 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
dtlike_dtypes = [
|
||||
np.dtype("timedelta64[ns]"),
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
|
||||
pd.PeriodDtype("ns"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
|
||||
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
|
||||
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
|
||||
|
||||
vals = np.tile(3600 * 10**9 * np.arange(3), 2)
|
||||
|
||||
def construct(dtype):
|
||||
if dtype is dtlike_dtypes[-1]:
|
||||
# PeriodArray will try to cast ints to strings
|
||||
return DatetimeIndex(vals).astype(dtype)
|
||||
return Index(vals, dtype=dtype)
|
||||
|
||||
left = construct(ldtype)
|
||||
right = construct(rdtype)
|
||||
|
||||
result = left.get_indexer_non_unique(right)
|
||||
|
||||
if ldtype is rdtype:
|
||||
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
|
||||
ex2 = np.array([], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], ex1)
|
||||
tm.assert_numpy_array_equal(result[1], ex2)
|
||||
|
||||
else:
|
||||
no_matches = np.array([-1] * 6, dtype=np.intp)
|
||||
missing = np.arange(6, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], no_matches)
|
||||
tm.assert_numpy_array_equal(result[1], missing)
|
@ -0,0 +1,46 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_with_nat():
|
||||
# GH#31437
|
||||
# PeriodIndex.is_monotonic should behave analogously to DatetimeIndex,
|
||||
# in particular never be monotonic when we have NaT
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
pi = dti.to_period("D")
|
||||
tdi = Index(dti.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert obj.is_monotonic
|
||||
assert obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti1 = dti.insert(0, NaT)
|
||||
pi1 = dti1.to_period("D")
|
||||
tdi1 = Index(dti1.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti2 = dti.insert(3, NaT)
|
||||
pi2 = dti2.to_period("H")
|
||||
tdi2 = Index(dti2.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class NATests:
|
||||
def test_nat(self, index_without_na):
|
||||
empty_index = index_without_na[:0]
|
||||
|
||||
index_with_na = index_without_na.copy(deep=True)
|
||||
index_with_na._data[1] = NaT
|
||||
|
||||
assert empty_index._na_value is NaT
|
||||
assert index_with_na._na_value is NaT
|
||||
assert index_without_na._na_value is NaT
|
||||
|
||||
idx = index_without_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index_with_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
class TestDatetimeIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
|
||||
|
||||
class TestTimedeltaIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return TimedeltaIndex(["1 days", "2 days"])
|
||||
|
||||
|
||||
class TestPeriodIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
@ -0,0 +1,317 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_freq_ascending(ordered, orig, ascending):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is generated (or generate-able) with
|
||||
period_range/date_range/timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
if ascending:
|
||||
assert ordered.freq.n == orig.freq.n
|
||||
else:
|
||||
assert ordered.freq.n == -1 * orig.freq.n
|
||||
|
||||
|
||||
def check_freq_nonmonotonic(ordered, orig):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is _not_ generated (or generate-able) with
|
||||
period_range/date_range//timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
assert ordered.freq is None
|
||||
|
||||
|
||||
class TestSortValues:
|
||||
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
|
||||
def non_monotonic_idx(self, request):
|
||||
if request.param is DatetimeIndex:
|
||||
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
elif request.param is PeriodIndex:
|
||||
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
return dti.to_period("D")
|
||||
else:
|
||||
return TimedeltaIndex(
|
||||
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
|
||||
)
|
||||
|
||||
def test_argmin_argmax(self, non_monotonic_idx):
|
||||
assert non_monotonic_idx.argmin() == 1
|
||||
assert non_monotonic_idx.argmax() == 0
|
||||
|
||||
def test_sort_values(self, non_monotonic_idx):
|
||||
idx = non_monotonic_idx
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
||||
|
||||
def check_sort_values_with_freq(self, idx):
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "H"])
|
||||
def test_sort_values_with_freq_timedeltaindex(self, freq):
|
||||
# GH#10295
|
||||
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
|
||||
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
||||
),
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
name="tzidx",
|
||||
tz="Asia/Tokyo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_datetimeindex(self, idx):
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
|
||||
def test_sort_values_with_freq_periodindex(self, freq):
|
||||
# here with_freq refers to being period_range-like
|
||||
idx = PeriodIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
||||
)
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"),
|
||||
Index([2011, 2012, 2013], name="idx"), # for compatibility check
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_periodindex2(self, idx):
|
||||
# here with_freq indicates this is period_range-like
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
def check_sort_values_without_freq(self, idx, expected):
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
def test_sort_values_without_freq_timedeltaindex(self):
|
||||
# GH#10295
|
||||
|
||||
idx = TimedeltaIndex(
|
||||
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
||||
)
|
||||
expected = TimedeltaIndex(
|
||||
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
||||
)
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_dates,expected_dates",
|
||||
[
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_datetimeindex(
|
||||
self, index_dates, expected_dates, tz_naive_fixture
|
||||
):
|
||||
tz = tz_naive_fixture
|
||||
|
||||
# without freq
|
||||
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
|
||||
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
||||
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,expected",
|
||||
[
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
PeriodIndex(
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
|
||||
),
|
||||
PeriodIndex(
|
||||
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
|
||||
),
|
||||
),
|
||||
(
|
||||
# For compatibility check
|
||||
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
|
||||
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_periodindex(self, idx, expected):
|
||||
# here without_freq means not generateable by period_range
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
def test_sort_values_without_freq_periodindex_nat(self):
|
||||
# doesn't quite fit into check_sort_values_without_freq
|
||||
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
||||
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
|
||||
def test_order_stability_compat():
|
||||
# GH#35922. sort_values is stable both for normal and datetime-like Index
|
||||
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
|
||||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
||||
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
||||
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_numpy_array_equal(indexer1, indexer2)
|
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestValueCounts:
|
||||
# GH#7735
|
||||
|
||||
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex(self):
|
||||
orig = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_periodindex(self):
|
||||
orig = period_range("2011-01-01 09:00", freq="H", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def _check_value_counts_with_repeats(self, orig):
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = type(orig)(
|
||||
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
|
||||
)
|
||||
|
||||
exp_idx = orig[::-1]
|
||||
if not isinstance(exp_idx, PeriodIndex):
|
||||
exp_idx = exp_idx._with_freq(None)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), orig)
|
||||
|
||||
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex2(self):
|
||||
idx = TimedeltaIndex(
|
||||
[
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 08:00:00",
|
||||
"1 days 08:00:00",
|
||||
NaT,
|
||||
]
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_periodindex2(self):
|
||||
idx = PeriodIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
freq="H",
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def _check_value_counts_dropna(self, idx):
|
||||
exp_idx = idx[[2, 3]]
|
||||
expected = Series([3, 2], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = idx[[2, 3, -1]]
|
||||
expected = Series([3, 2, 1], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
Reference in New Issue
Block a user