mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-04 07:08:05 +00:00
first commit
This commit is contained in:
489
.venv/Lib/site-packages/pandas/tests/indexes/test_common.py
Normal file
489
.venv/Lib/site-packages/pandas/tests/indexes/test_common.py
Normal file
@ -0,0 +1,489 @@
|
||||
"""
|
||||
Collection of tests asserting things that should be true for
|
||||
any index subclass except for MultiIndex. Makes use of the `index_flat`
|
||||
fixture defined in pandas/conftest.py.
|
||||
"""
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
from pandas.core.dtypes.common import is_integer_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
MultiIndex,
|
||||
PeriodIndex,
|
||||
RangeIndex,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import NumericIndex
|
||||
|
||||
|
||||
class TestCommon:
|
||||
@pytest.mark.parametrize("name", [None, "new_name"])
|
||||
def test_to_frame(self, name, index_flat):
|
||||
# see GH#15230, GH#22580
|
||||
idx = index_flat
|
||||
|
||||
if name:
|
||||
idx_name = name
|
||||
else:
|
||||
idx_name = idx.name or 0
|
||||
|
||||
df = idx.to_frame(name=idx_name)
|
||||
|
||||
assert df.index is idx
|
||||
assert len(df.columns) == 1
|
||||
assert df.columns[0] == idx_name
|
||||
assert df[idx_name].values is not idx.values
|
||||
|
||||
df = idx.to_frame(index=False, name=idx_name)
|
||||
assert df.index is not idx
|
||||
|
||||
def test_droplevel(self, index_flat):
|
||||
# GH 21115
|
||||
# MultiIndex is tested separately in test_multi.py
|
||||
index = index_flat
|
||||
|
||||
assert index.droplevel([]).equals(index)
|
||||
|
||||
for level in [index.name, [index.name]]:
|
||||
if isinstance(index.name, tuple) and level is index.name:
|
||||
# GH 21121 : droplevel with tuple name
|
||||
continue
|
||||
msg = (
|
||||
"Cannot remove 1 levels from an index with 1 levels: at least one "
|
||||
"level must be left."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.droplevel(level)
|
||||
|
||||
for level in "wrong", ["wrong"]:
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=r"'Requested level \(wrong\) does not match index name \(None\)'",
|
||||
):
|
||||
index.droplevel(level)
|
||||
|
||||
def test_constructor_non_hashable_name(self, index_flat):
|
||||
# GH 20527
|
||||
index = index_flat
|
||||
|
||||
message = "Index.name must be a hashable type"
|
||||
renamed = [["1"]]
|
||||
|
||||
# With .rename()
|
||||
with pytest.raises(TypeError, match=message):
|
||||
index.rename(name=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=message):
|
||||
index.set_names(names=renamed)
|
||||
|
||||
def test_constructor_unwraps_index(self, index_flat):
|
||||
a = index_flat
|
||||
b = type(a)(a)
|
||||
tm.assert_equal(a._data, b._data)
|
||||
|
||||
def test_to_flat_index(self, index_flat):
|
||||
# 22866
|
||||
index = index_flat
|
||||
|
||||
result = index.to_flat_index()
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_set_name_methods(self, index_flat):
|
||||
# MultiIndex tested separately
|
||||
index = index_flat
|
||||
new_name = "This is the new name for this index"
|
||||
|
||||
original_name = index.name
|
||||
new_ind = index.set_names([new_name])
|
||||
assert new_ind.name == new_name
|
||||
assert index.name == original_name
|
||||
res = index.rename(new_name, inplace=True)
|
||||
|
||||
# should return None
|
||||
assert res is None
|
||||
assert index.name == new_name
|
||||
assert index.names == [new_name]
|
||||
# FIXME: dont leave commented-out
|
||||
# with pytest.raises(TypeError, match="list-like"):
|
||||
# # should still fail even if it would be the right length
|
||||
# ind.set_names("a")
|
||||
with pytest.raises(ValueError, match="Level must be None"):
|
||||
index.set_names("a", level=0)
|
||||
|
||||
# rename in place just leaves tuples and other containers alone
|
||||
name = ("A", "B")
|
||||
index.rename(name, inplace=True)
|
||||
assert index.name == name
|
||||
assert index.names == [name]
|
||||
|
||||
def test_copy_and_deepcopy(self, index_flat):
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
index = index_flat
|
||||
|
||||
for func in (copy, deepcopy):
|
||||
idx_copy = func(index)
|
||||
assert idx_copy is not index
|
||||
assert idx_copy.equals(index)
|
||||
|
||||
new_copy = index.copy(deep=True, name="banana")
|
||||
assert new_copy.name == "banana"
|
||||
|
||||
def test_copy_name(self, index_flat):
|
||||
# GH#12309: Check that the "name" argument
|
||||
# passed at initialization is honored.
|
||||
index = index_flat
|
||||
|
||||
first = type(index)(index, copy=True, name="mario")
|
||||
second = type(first)(first, copy=False)
|
||||
|
||||
# Even though "copy=False", we want a new object.
|
||||
assert first is not second
|
||||
tm.assert_index_equal(first, second)
|
||||
|
||||
# Not using tm.assert_index_equal() since names differ.
|
||||
assert index.equals(first)
|
||||
|
||||
assert first.name == "mario"
|
||||
assert second.name == "mario"
|
||||
|
||||
# TODO: belongs in series arithmetic tests?
|
||||
s1 = pd.Series(2, index=first)
|
||||
s2 = pd.Series(3, index=second[:-1])
|
||||
# See GH#13365
|
||||
s3 = s1 * s2
|
||||
assert s3.index.name == "mario"
|
||||
|
||||
def test_copy_name2(self, index_flat):
|
||||
# GH#35592
|
||||
index = index_flat
|
||||
|
||||
assert index.copy(name="mario").name == "mario"
|
||||
|
||||
with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
|
||||
index.copy(name=["mario", "luigi"])
|
||||
|
||||
msg = f"{type(index).__name__}.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.copy(name=[["mario"]])
|
||||
|
||||
def test_unique_level(self, index_flat):
|
||||
# don't test a MultiIndex here (as its tested separated)
|
||||
index = index_flat
|
||||
|
||||
# GH 17896
|
||||
expected = index.drop_duplicates()
|
||||
for level in [0, index.name, None]:
|
||||
result = index.unique(level=level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Too many levels: Index has only 1 level, not 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.unique(level=3)
|
||||
|
||||
msg = (
|
||||
rf"Requested level \(wrong\) does not match index name "
|
||||
rf"\({re.escape(index.name.__repr__())}\)"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
index.unique(level="wrong")
|
||||
|
||||
def test_unique(self, index_flat):
|
||||
# MultiIndex tested separately
|
||||
index = index_flat
|
||||
if not len(index):
|
||||
pytest.skip("Skip check for empty Index and MultiIndex")
|
||||
|
||||
idx = index[[0] * 5]
|
||||
idx_unique = index[[0]]
|
||||
|
||||
# We test against `idx_unique`, so first we make sure it's unique
|
||||
# and doesn't contain nans.
|
||||
assert idx_unique.is_unique is True
|
||||
try:
|
||||
assert idx_unique.hasnans is False
|
||||
except NotImplementedError:
|
||||
pass
|
||||
|
||||
result = idx.unique()
|
||||
tm.assert_index_equal(result, idx_unique)
|
||||
|
||||
# nans:
|
||||
if not index._can_hold_na:
|
||||
pytest.skip("Skip na-check if index cannot hold na")
|
||||
|
||||
vals = index._values[[0] * 5]
|
||||
vals[0] = np.nan
|
||||
|
||||
vals_unique = vals[:2]
|
||||
idx_nan = index._shallow_copy(vals)
|
||||
idx_unique_nan = index._shallow_copy(vals_unique)
|
||||
assert idx_unique_nan.is_unique is True
|
||||
|
||||
assert idx_nan.dtype == index.dtype
|
||||
assert idx_unique_nan.dtype == index.dtype
|
||||
|
||||
expected = idx_unique_nan
|
||||
for i in [idx_nan, idx_unique_nan]:
|
||||
result = i.unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_searchsorted_monotonic(self, index_flat, request):
|
||||
# GH17271
|
||||
index = index_flat
|
||||
# not implemented for tuple searches in MultiIndex
|
||||
# or Intervals searches in IntervalIndex
|
||||
if isinstance(index, pd.IntervalIndex):
|
||||
mark = pytest.mark.xfail(
|
||||
reason="IntervalIndex.searchsorted does not support Interval arg",
|
||||
raises=NotImplementedError,
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
|
||||
# nothing to test if the index is empty
|
||||
if index.empty:
|
||||
pytest.skip("Skip check for empty Index")
|
||||
value = index[0]
|
||||
|
||||
# determine the expected results (handle dupes for 'right')
|
||||
expected_left, expected_right = 0, (index == value).argmin()
|
||||
if expected_right == 0:
|
||||
# all values are the same, expected_right should be length
|
||||
expected_right = len(index)
|
||||
|
||||
# test _searchsorted_monotonic in all cases
|
||||
# test searchsorted only for increasing
|
||||
if index.is_monotonic_increasing:
|
||||
ssm_left = index._searchsorted_monotonic(value, side="left")
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = index._searchsorted_monotonic(value, side="right")
|
||||
assert expected_right == ssm_right
|
||||
|
||||
ss_left = index.searchsorted(value, side="left")
|
||||
assert expected_left == ss_left
|
||||
|
||||
ss_right = index.searchsorted(value, side="right")
|
||||
assert expected_right == ss_right
|
||||
|
||||
elif index.is_monotonic_decreasing:
|
||||
ssm_left = index._searchsorted_monotonic(value, side="left")
|
||||
assert expected_left == ssm_left
|
||||
|
||||
ssm_right = index._searchsorted_monotonic(value, side="right")
|
||||
assert expected_right == ssm_right
|
||||
else:
|
||||
# non-monotonic should raise.
|
||||
msg = "index must be monotonic increasing or decreasing"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index._searchsorted_monotonic(value, side="left")
|
||||
|
||||
def test_drop_duplicates(self, index_flat, keep):
|
||||
# MultiIndex is tested separately
|
||||
index = index_flat
|
||||
if isinstance(index, RangeIndex):
|
||||
pytest.skip(
|
||||
"RangeIndex is tested in test_drop_duplicates_no_duplicates "
|
||||
"as it cannot hold duplicates"
|
||||
)
|
||||
if len(index) == 0:
|
||||
pytest.skip(
|
||||
"empty index is tested in test_drop_duplicates_no_duplicates "
|
||||
"as it cannot hold duplicates"
|
||||
)
|
||||
|
||||
# make unique index
|
||||
holder = type(index)
|
||||
unique_values = list(set(index))
|
||||
dtype = index.dtype if isinstance(index, NumericIndex) else None
|
||||
unique_idx = holder(unique_values, dtype=dtype)
|
||||
|
||||
# make duplicated index
|
||||
n = len(unique_idx)
|
||||
duplicated_selection = np.random.choice(n, int(n * 1.5))
|
||||
idx = holder(unique_idx.values[duplicated_selection])
|
||||
|
||||
# Series.duplicated is tested separately
|
||||
expected_duplicated = (
|
||||
pd.Series(duplicated_selection).duplicated(keep=keep).values
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
|
||||
|
||||
# Series.drop_duplicates is tested separately
|
||||
expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
|
||||
|
||||
def test_drop_duplicates_no_duplicates(self, index_flat):
|
||||
# MultiIndex is tested separately
|
||||
index = index_flat
|
||||
|
||||
# make unique index
|
||||
if isinstance(index, RangeIndex):
|
||||
# RangeIndex cannot have duplicates
|
||||
unique_idx = index
|
||||
else:
|
||||
holder = type(index)
|
||||
unique_values = list(set(index))
|
||||
dtype = index.dtype if isinstance(index, NumericIndex) else None
|
||||
unique_idx = holder(unique_values, dtype=dtype)
|
||||
|
||||
# check on unique index
|
||||
expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
|
||||
tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
|
||||
result_dropped = unique_idx.drop_duplicates()
|
||||
tm.assert_index_equal(result_dropped, unique_idx)
|
||||
# validate shallow copy
|
||||
assert result_dropped is not unique_idx
|
||||
|
||||
def test_drop_duplicates_inplace(self, index):
|
||||
msg = r"drop_duplicates\(\) got an unexpected keyword argument"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.drop_duplicates(inplace=True)
|
||||
|
||||
def test_has_duplicates(self, index_flat):
|
||||
# MultiIndex tested separately in:
|
||||
# tests/indexes/multi/test_unique_and_duplicates.
|
||||
index = index_flat
|
||||
holder = type(index)
|
||||
if not len(index) or isinstance(index, RangeIndex):
|
||||
# MultiIndex tested separately in:
|
||||
# tests/indexes/multi/test_unique_and_duplicates.
|
||||
# RangeIndex is unique by definition.
|
||||
pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
|
||||
|
||||
idx = holder([index[0]] * 5)
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
|
||||
)
|
||||
def test_astype_preserves_name(self, index, dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/32013
|
||||
if isinstance(index, MultiIndex):
|
||||
index.names = ["idx" + str(i) for i in range(index.nlevels)]
|
||||
else:
|
||||
index.name = "idx"
|
||||
|
||||
warn = None
|
||||
if (
|
||||
isinstance(index, DatetimeIndex)
|
||||
and index.tz is not None
|
||||
and dtype == "datetime64[ns]"
|
||||
):
|
||||
# This astype is deprecated in favor of tz_localize
|
||||
warn = FutureWarning
|
||||
try:
|
||||
# Some of these conversions cannot succeed so we use a try / except
|
||||
with tm.assert_produces_warning(warn):
|
||||
result = index.astype(dtype)
|
||||
except (ValueError, TypeError, NotImplementedError, SystemError):
|
||||
return
|
||||
|
||||
if isinstance(index, MultiIndex):
|
||||
assert result.names == index.names
|
||||
else:
|
||||
assert result.name == index.name
|
||||
|
||||
def test_asi8_deprecation(self, index):
|
||||
# GH#37877
|
||||
if isinstance(index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)):
|
||||
warn = None
|
||||
else:
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn):
|
||||
index.asi8
|
||||
|
||||
def test_hasnans_isnans(self, index_flat):
|
||||
# GH#11343, added tests for hasnans / isnans
|
||||
index = index_flat
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
idx = index.copy(deep=True)
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index.copy(deep=True)
|
||||
values = idx._values
|
||||
|
||||
if len(index) == 0:
|
||||
return
|
||||
elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype):
|
||||
return
|
||||
|
||||
values[1] = np.nan
|
||||
|
||||
idx = type(index)(values)
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_position", [None, "middle"])
|
||||
def test_sort_values_invalid_na_position(index_with_missing, na_position):
|
||||
|
||||
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
|
||||
index_with_missing.sort_values(na_position=na_position)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_position", ["first", "last"])
|
||||
def test_sort_values_with_missing(index_with_missing, na_position):
|
||||
# GH 35584. Test that sort_values works with missing values,
|
||||
# sort non-missing and place missing according to na_position
|
||||
|
||||
if isinstance(index_with_missing, CategoricalIndex):
|
||||
pytest.skip("missing value sorting order not well-defined")
|
||||
|
||||
missing_count = np.sum(index_with_missing.isna())
|
||||
not_na_vals = index_with_missing[index_with_missing.notna()].values
|
||||
sorted_values = np.sort(not_na_vals)
|
||||
if na_position == "first":
|
||||
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
|
||||
else:
|
||||
sorted_values = np.concatenate([sorted_values, [None] * missing_count])
|
||||
|
||||
# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
|
||||
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
|
||||
|
||||
result = index_with_missing.sort_values(na_position=na_position)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_ndarray_compat_properties(index):
|
||||
if isinstance(index, PeriodIndex) and not IS64:
|
||||
pytest.skip("Overflow")
|
||||
idx = index
|
||||
assert idx.T.equals(idx)
|
||||
assert idx.transpose().equals(idx)
|
||||
|
||||
values = idx.values
|
||||
|
||||
assert idx.shape == values.shape
|
||||
assert idx.ndim == values.ndim
|
||||
assert idx.size == values.size
|
||||
|
||||
if not isinstance(index, (RangeIndex, MultiIndex)):
|
||||
# These two are not backed by an ndarray
|
||||
assert idx.nbytes == values.nbytes
|
||||
|
||||
# test for validity
|
||||
idx.nbytes
|
||||
idx.values.nbytes
|
Reference in New Issue
Block a user