first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,509 @@
"""
datetimelike delegation
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_list_like,
is_period_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.accessor import (
PandasDelegate,
delegate_names,
)
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.base import (
NoNewAttributesMixin,
PandasObject,
)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
if TYPE_CHECKING:
from pandas import Series
class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
_hidden_attrs = PandasObject._hidden_attrs | {
"orig",
"name",
}
def __init__(self, data: Series, orig):
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
self._parent = data
self.orig = orig
self.name = getattr(data, "name", None)
self._freeze()
def _get_values(self):
data = self._parent
if is_datetime64_dtype(data.dtype):
return DatetimeIndex(data, copy=False, name=self.name)
elif is_datetime64tz_dtype(data.dtype):
return DatetimeIndex(data, copy=False, name=self.name)
elif is_timedelta64_dtype(data.dtype):
return TimedeltaIndex(data, copy=False, name=self.name)
elif is_period_dtype(data.dtype):
return PeriodArray(data, copy=False)
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
def _delegate_property_get(self, name):
from pandas import Series
values = self._get_values()
result = getattr(values, name)
# maybe need to upcast (ints)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result):
return result
result = np.asarray(result)
if self.orig is not None:
index = self.orig.index
else:
index = self._parent.index
# return the result as a Series, which is by definition a copy
result = Series(result, index=index, name=self.name).__finalize__(self._parent)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a property of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
def _delegate_property_set(self, name, value, *args, **kwargs):
raise ValueError(
"modifications to a property of a datetimelike object are not supported. "
"Change values on the original."
)
def _delegate_method(self, name, *args, **kwargs):
from pandas import Series
values = self._get_values()
method = getattr(values, name)
result = method(*args, **kwargs)
if not is_list_like(result):
return result
result = Series(result, index=self._parent.index, name=self.name).__finalize__(
self._parent
)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a method of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
@delegate_names(
delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_methods, typ="method"
)
class DatetimeProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Examples
--------
>>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
dtype: datetime64[ns]
>>> seconds_series.dt.second
0 0
1 1
2 2
dtype: int64
>>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
>>> hours_series
0 2000-01-01 00:00:00
1 2000-01-01 01:00:00
2 2000-01-01 02:00:00
dtype: datetime64[ns]
>>> hours_series.dt.hour
0 0
1 1
2 2
dtype: int64
>>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q"))
>>> quarters_series
0 2000-03-31
1 2000-06-30
2 2000-09-30
dtype: datetime64[ns]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
dtype: int64
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
"""
def to_pydatetime(self) -> np.ndarray:
"""
Return the data as an array of :class:`datetime.datetime` objects.
Timezone information is retained if present.
.. warning::
Python's datetime uses microsecond resolution, which is lower than
pandas (nanosecond). The values are truncated.
Returns
-------
numpy.ndarray
Object dtype array containing native Python datetime objects.
See Also
--------
datetime.datetime : Standard library value for a datetime.
Examples
--------
>>> s = pd.Series(pd.date_range('20180310', periods=2))
>>> s
0 2018-03-10
1 2018-03-11
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 11, 0, 0)], dtype=object)
pandas' nanosecond precision is truncated to microseconds.
>>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns'))
>>> s
0 2018-03-10 00:00:00.000000000
1 2018-03-10 00:00:00.000000001
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 10, 0, 0)], dtype=object)
"""
return self._get_values().to_pydatetime()
@property
def freq(self):
return self._get_values().inferred_freq
def isocalendar(self):
"""
Returns a DataFrame with the year, week, and day calculated according to
the ISO 8601 standard.
.. versionadded:: 1.1.0
Returns
-------
DataFrame
with columns year, week and day
See Also
--------
Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
week number, and weekday for the given Timestamp object.
datetime.date.isocalendar : Return a named tuple object with
three components: year, week and weekday.
Examples
--------
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
>>> ser.dt.isocalendar()
year week day
0 2009 53 5
1 <NA> <NA> <NA>
>>> ser.dt.isocalendar().week
0 53
1 <NA>
Name: week, dtype: UInt32
"""
return self._get_values().isocalendar().set_index(self._parent.index)
@property
def weekofyear(self):
"""
The week ordinal of the year.
.. deprecated:: 1.1.0
Series.dt.weekofyear and Series.dt.week have been deprecated.
Please use Series.dt.isocalendar().week instead.
"""
warnings.warn(
"Series.dt.weekofyear and Series.dt.week have been deprecated. "
"Please use Series.dt.isocalendar().week instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
week_series = self.isocalendar().week
week_series.name = self.name
if week_series.hasnans:
return week_series.astype("float64")
return week_series.astype("int64")
week = weekofyear
@delegate_names(
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=TimedeltaArray,
accessors=TimedeltaArray._datetimelike_methods,
typ="method",
)
class TimedeltaProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.timedelta_range(start="1 second", periods=3, freq="S")
... )
>>> seconds_series
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
>>> seconds_series.dt.seconds
0 1
1 2
2 3
dtype: int64
"""
def to_pytimedelta(self) -> np.ndarray:
"""
Return an array of native :class:`datetime.timedelta` objects.
Python's standard `datetime` library uses a different representation
timedelta's. This method converts a Series of pandas Timedeltas
to `datetime.timedelta` format with the same length as the original
Series.
Returns
-------
numpy.ndarray
Array of 1D containing data with `datetime.timedelta` type.
See Also
--------
datetime.timedelta : A duration expressing the difference
between two date, time, or datetime.
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
>>> s
0 0 days
1 1 days
2 2 days
3 3 days
4 4 days
dtype: timedelta64[ns]
>>> s.dt.to_pytimedelta()
array([datetime.timedelta(0), datetime.timedelta(days=1),
datetime.timedelta(days=2), datetime.timedelta(days=3),
datetime.timedelta(days=4)], dtype=object)
"""
return self._get_values().to_pytimedelta()
@property
def components(self):
"""
Return a Dataframe of the components of the Timedeltas.
Returns
-------
DataFrame
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
>>> s
0 0 days 00:00:00
1 0 days 00:00:01
2 0 days 00:00:02
3 0 days 00:00:03
4 0 days 00:00:04
dtype: timedelta64[ns]
>>> s.dt.components
days hours minutes seconds milliseconds microseconds nanoseconds
0 0 0 0 0 0 0 0
1 0 0 0 1 0 0 0
2 0 0 0 2 0 0 0
3 0 0 0 3 0 0 0
4 0 0 0 4 0 0 0
"""
return (
self._get_values()
.components.set_index(self._parent.index)
.__finalize__(self._parent)
)
@property
def freq(self):
return self._get_values().inferred_freq
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method"
)
class PeriodProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.period_range(
... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s"
... )
... )
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
3 2000-01-01 00:00:03
dtype: period[S]
>>> seconds_series.dt.second
0 0
1 1
2 2
3 3
dtype: int64
>>> hours_series = pd.Series(
... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h")
... )
>>> hours_series
0 2000-01-01 00:00
1 2000-01-01 01:00
2 2000-01-01 02:00
3 2000-01-01 03:00
dtype: period[H]
>>> hours_series.dt.hour
0 0
1 1
2 2
3 3
dtype: int64
>>> quarters_series = pd.Series(
... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC")
... )
>>> quarters_series
0 2000Q1
1 2000Q2
2 2000Q3
3 2000Q4
dtype: period[Q-DEC]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
3 4
dtype: int64
"""
class CombinedDatetimelikeProperties(
DatetimeProperties, TimedeltaProperties, PeriodProperties
):
def __new__(cls, data: Series):
# CombinedDatetimelikeProperties isn't really instantiated. Instead
# we need to choose which parent (datetime or timedelta) is
# appropriate. Since we're checking the dtypes anyway, we'll just
# do all the validation here.
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
orig = data if is_categorical_dtype(data.dtype) else None
if orig is not None:
data = data._constructor(
orig.array,
name=orig.name,
copy=False,
dtype=orig._values.categories.dtype,
index=orig.index,
)
if is_datetime64_dtype(data.dtype):
return DatetimeProperties(data, orig)
elif is_datetime64tz_dtype(data.dtype):
return DatetimeProperties(data, orig)
elif is_timedelta64_dtype(data.dtype):
return TimedeltaProperties(data, orig)
elif is_period_dtype(data.dtype):
return PeriodProperties(data, orig)
raise AttributeError("Can only use .dt accessor with datetimelike values")

View File

@@ -0,0 +1,324 @@
from __future__ import annotations
import textwrap
from pandas._libs import (
NaT,
lib,
)
from pandas.errors import InvalidIndexError
from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.indexes.base import (
Index,
_new_Index,
ensure_index,
ensure_index_from_sequences,
get_unanimous_names,
)
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.interval import IntervalIndex
from pandas.core.indexes.multi import MultiIndex
from pandas.core.indexes.numeric import (
Float64Index,
Int64Index,
NumericIndex,
UInt64Index,
)
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.range import RangeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
_sort_msg = textwrap.dedent(
"""\
Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
"""
)
__all__ = [
"Index",
"MultiIndex",
"NumericIndex",
"Float64Index",
"Int64Index",
"CategoricalIndex",
"IntervalIndex",
"RangeIndex",
"UInt64Index",
"InvalidIndexError",
"TimedeltaIndex",
"PeriodIndex",
"DatetimeIndex",
"_new_Index",
"NaT",
"ensure_index",
"ensure_index_from_sequences",
"get_objs_combined_axis",
"union_indexes",
"get_unanimous_names",
"all_indexes_same",
"default_index",
]
def get_objs_combined_axis(
objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
value of "intersect") of indexes on given axis, or None if all objects
lack indexes (e.g. they are numpy arrays).
Parameters
----------
objs : list
Series or DataFrame objects, may be mix of the two.
intersect : bool, default False
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
axis : {0 or 'index', 1 or 'outer'}, default 0
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.
Returns
-------
Index
"""
obs_idxes = [obj._get_axis(axis) for obj in objs]
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
def _get_distinct_objs(objs: list[Index]) -> list[Index]:
"""
Return a list with distinct elements of "objs" (different ids).
Preserves order.
"""
ids: set[int] = set()
res = []
for obj in objs:
if id(obj) not in ids:
ids.add(id(obj))
res.append(obj)
return res
def _get_combined_index(
indexes: list[Index],
intersect: bool = False,
sort: bool = False,
copy: bool = False,
) -> Index:
"""
Return the union or intersection of indexes.
Parameters
----------
indexes : list of Index or list objects
When intersect=True, do not accept list of lists.
intersect : bool, default False
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.
Returns
-------
Index
"""
# TODO: handle index names!
indexes = _get_distinct_objs(indexes)
if len(indexes) == 0:
index = Index([])
elif len(indexes) == 1:
index = indexes[0]
elif intersect:
index = indexes[0]
for other in indexes[1:]:
index = index.intersection(other)
else:
index = union_indexes(indexes, sort=False)
index = ensure_index(index)
if sort:
try:
index = index.sort_values()
except TypeError:
pass
# GH 29879
if copy:
index = index.copy()
return index
def union_indexes(indexes, sort: bool | None = True) -> Index:
"""
Return the union of indexes.
The behavior of sort and names is not consistent.
Parameters
----------
indexes : list of Index or list objects
sort : bool, default True
Whether the result index should come out sorted or not.
Returns
-------
Index
"""
if len(indexes) == 0:
raise AssertionError("Must have at least 1 Index to union")
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
result = Index(sorted(result))
return result
indexes, kind = _sanitize_and_check(indexes)
def _unique_indices(inds) -> Index:
"""
Convert indexes to lists and concatenate them, removing duplicates.
The final dtype is inferred.
Parameters
----------
inds : list of Index or list objects
Returns
-------
Index
"""
def conv(i):
if isinstance(i, Index):
i = i.tolist()
return i
return Index(lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
if kind == "special":
result = indexes[0]
first = result
dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
dti_tzs = [x for x in dtis if x.tz is not None]
if len(dti_tzs) not in [0, len(dtis)]:
# TODO: this behavior is not tested (so may not be desired),
# but is kept in order to keep behavior the same when
# deprecating union_many
# test_frame_from_dict_with_mixed_indexes
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
if len(dtis) == len(indexes):
sort = True
if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes):
# i.e. timezones mismatch
# TODO(2.0): once deprecation is enforced, this union will
# cast to UTC automatically.
indexes = [x.tz_convert("UTC") for x in indexes]
result = indexes[0]
elif len(dtis) > 1:
# If we have mixed timezones, our casting behavior may depend on
# the order of indexes, which we don't want.
sort = False
# TODO: what about Categorical[dt64]?
# test_frame_from_dict_with_mixed_indexes
indexes = [x.astype(object, copy=False) for x in indexes]
result = indexes[0]
for other in indexes[1:]:
result = result.union(other, sort=None if sort else False)
return result
elif kind == "array":
index = indexes[0]
if not all(index.equals(other) for other in indexes[1:]):
index = _unique_indices(indexes)
name = get_unanimous_names(*indexes)[0]
if name != index.name:
index = index.rename(name)
return index
else: # kind='list'
return _unique_indices(indexes)
def _sanitize_and_check(indexes):
"""
Verify the type of indexes and convert lists to Index.
Cases:
- [list, list, ...]: Return ([list, list, ...], 'list')
- [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
Lists are sorted and converted to Index.
- [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
TYPE = 'special' if at least one special type, 'array' otherwise.
Parameters
----------
indexes : list of Index or list objects
Returns
-------
sanitized_indexes : list of Index or list objects
type : {'list', 'array', 'special'}
"""
kinds = list({type(index) for index in indexes})
if list in kinds:
if len(kinds) > 1:
indexes = [
Index(list(x)) if not isinstance(x, Index) else x for x in indexes
]
kinds.remove(list)
else:
return indexes, "list"
if len(kinds) > 1 or Index not in kinds:
return indexes, "special"
else:
return indexes, "array"
def all_indexes_same(indexes) -> bool:
"""
Determine if all indexes contain the same elements.
Parameters
----------
indexes : iterable of Index objects
Returns
-------
bool
True if all indexes contain the same elements, False otherwise.
"""
itr = iter(indexes)
first = next(itr)
return all(first.equals(index) for index in itr)
def default_index(n: int) -> RangeIndex:
rng = range(0, n)
return RangeIndex._simple_new(rng, name=None)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,589 @@
from __future__ import annotations
from typing import (
Any,
Hashable,
)
import warnings
import numpy as np
from pandas._config import get_option
from pandas._libs import index as libindex
from pandas._typing import (
Dtype,
DtypeObj,
npt,
)
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_scalar,
pandas_dtype,
)
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
notna,
)
from pandas.core.arrays.categorical import (
Categorical,
contains,
)
from pandas.core.construction import extract_array
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
from pandas.core.indexes.extension import (
NDArrayBackedExtensionIndex,
inherit_names,
)
from pandas.io.formats.printing import pprint_thing
_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "CategoricalIndex"})
@inherit_names(
[
"argsort",
"tolist",
"codes",
"categories",
"ordered",
"_reverse_indexer",
"searchsorted",
"is_dtype_equal",
"min",
"max",
],
Categorical,
)
@inherit_names(
[
"rename_categories",
"reorder_categories",
"add_categories",
"remove_categories",
"remove_unused_categories",
"set_categories",
"as_ordered",
"as_unordered",
],
Categorical,
wrap=True,
)
class CategoricalIndex(NDArrayBackedExtensionIndex):
"""
Index based on an underlying :class:`Categorical`.
CategoricalIndex, like Categorical, can only take on a limited,
and usually fixed, number of possible values (`categories`). Also,
like Categorical, it might have an order, but numerical operations
(additions, divisions, ...) are not possible.
Parameters
----------
data : array-like (1-dimensional)
The values of the categorical. If `categories` are given, values not in
`categories` will be replaced with NaN.
categories : index-like, optional
The categories for the categorical. Items need to be unique.
If the categories are not given here (and also not in `dtype`), they
will be inferred from the `data`.
ordered : bool, optional
Whether or not this categorical is treated as an ordered
categorical. If not given here or in `dtype`, the resulting
categorical will be unordered.
dtype : CategoricalDtype or "category", optional
If :class:`CategoricalDtype`, cannot be used together with
`categories` or `ordered`.
copy : bool, default False
Make a copy of input ndarray.
name : object, optional
Name to be stored in the index.
Attributes
----------
codes
categories
ordered
Methods
-------
rename_categories
reorder_categories
add_categories
remove_categories
remove_unused_categories
set_categories
as_ordered
as_unordered
map
Raises
------
ValueError
If the categories do not validate.
TypeError
If an explicit ``ordered=True`` is given but no `categories` and the
`values` are not sortable.
See Also
--------
Index : The base pandas Index type.
Categorical : A categorical array.
CategoricalDtype : Type for categorical data.
Notes
-----
See the `user guide
<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__
for more.
Examples
--------
>>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"])
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['a', 'b', 'c'], ordered=False, dtype='category')
``CategoricalIndex`` can also be instantiated from a ``Categorical``:
>>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"])
>>> pd.CategoricalIndex(c)
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['a', 'b', 'c'], ordered=False, dtype='category')
Ordered ``CategoricalIndex`` can have a min and max value.
>>> ci = pd.CategoricalIndex(
... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"]
... )
>>> ci
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['c', 'b', 'a'], ordered=True, dtype='category')
>>> ci.min()
'c'
"""
_typ = "categoricalindex"
_data_cls = Categorical
@property
def _can_hold_strings(self):
return self.categories._can_hold_strings
@cache_readonly
def _should_fallback_to_positional(self) -> bool:
return self.categories._should_fallback_to_positional
codes: np.ndarray
categories: Index
ordered: bool | None
_data: Categorical
_values: Categorical
@property
def _engine_type(self):
# self.codes can have dtype int8, int16, int32 or int64, so we need
# to return the corresponding engine type (libindex.Int8Engine, etc.).
return {
np.int8: libindex.Int8Engine,
np.int16: libindex.Int16Engine,
np.int32: libindex.Int32Engine,
np.int64: libindex.Int64Engine,
}[self.codes.dtype.type]
# --------------------------------------------------------------------
# Constructors
def __new__(
cls,
data=None,
categories=None,
ordered=None,
dtype: Dtype | None = None,
copy: bool = False,
name: Hashable = None,
) -> CategoricalIndex:
name = maybe_extract_name(name, data, cls)
if data is None:
# GH#38944
warnings.warn(
"Constructing a CategoricalIndex without passing data is "
"deprecated and will raise in a future version. "
"Use CategoricalIndex([], ...) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
data = []
if is_scalar(data):
raise cls._scalar_data_error(data)
data = Categorical(
data, categories=categories, ordered=ordered, dtype=dtype, copy=copy
)
return cls._simple_new(data, name=name)
# --------------------------------------------------------------------
def _is_dtype_compat(self, other) -> Categorical:
"""
*this is an internal non-public method*
provide a comparison between the dtype of self and other (coercing if
needed)
Parameters
----------
other : Index
Returns
-------
Categorical
Raises
------
TypeError if the dtypes are not compatible
"""
if is_categorical_dtype(other):
other = extract_array(other)
if not other._categories_match_up_to_permutation(self):
raise TypeError(
"categories must match existing categories when appending"
)
elif other._is_multi:
# preempt raising NotImplementedError in isna call
raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex")
else:
values = other
cat = Categorical(other, dtype=self.dtype)
other = CategoricalIndex(cat)
if not other.isin(values).all():
raise TypeError(
"cannot append a non-category item to a CategoricalIndex"
)
other = other._values
if not ((other == values) | (isna(other) & isna(values))).all():
# GH#37667 see test_equals_non_category
raise TypeError(
"categories must match existing categories when appending"
)
return other
@doc(Index.astype)
def astype(self, dtype: Dtype, copy: bool = True) -> Index:
from pandas.core.api import NumericIndex
dtype = pandas_dtype(dtype)
categories = self.categories
# the super method always returns Int64Index, UInt64Index and Float64Index
# but if the categories are a NumericIndex with dtype float32, we want to
# return an index with the same dtype as self.categories.
if categories._is_backward_compat_public_numeric_index:
assert isinstance(categories, NumericIndex) # mypy complaint fix
try:
categories._validate_dtype(dtype)
except ValueError:
pass
else:
new_values = self._data.astype(dtype, copy=copy)
# pass copy=False because any copying has been done in the
# _data.astype call above
return categories._constructor(new_values, name=self.name, copy=False)
return super().astype(dtype, copy=copy)
def equals(self, other: object) -> bool:
"""
Determine if two CategoricalIndex objects contain the same elements.
Returns
-------
bool
If two CategoricalIndex objects have equal elements True,
otherwise False.
"""
if self.is_(other):
return True
if not isinstance(other, Index):
return False
try:
other = self._is_dtype_compat(other)
except (TypeError, ValueError):
return False
return self._data.equals(other)
# --------------------------------------------------------------------
# Rendering Methods
@property
def _formatter_func(self):
return self.categories._formatter_func
def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
max_categories = (
10
if get_option("display.max_categories") == 0
else get_option("display.max_categories")
)
attrs: list[tuple[str, str | int | bool | None]]
attrs = [
(
"categories",
ibase.default_pprint(self.categories, max_seq_items=max_categories),
),
("ordered", self.ordered),
]
extra = super()._format_attrs()
return attrs + extra
def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:
result = [
pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep
for x in self._values
]
return header + result
# --------------------------------------------------------------------
@property
def inferred_type(self) -> str:
return "categorical"
@doc(Index.__contains__)
def __contains__(self, key: Any) -> bool:
# if key is a NaN, check if any NaN is in self.
if is_valid_na_for_dtype(key, self.categories.dtype):
return self.hasnans
return contains(self, key, container=self._engine)
# TODO(2.0): remove reindex once non-unique deprecation is enforced
def reindex(
self, target, method=None, level=None, limit=None, tolerance=None
) -> tuple[Index, npt.NDArray[np.intp] | None]:
"""
Create index with target's values (move/add/delete values as necessary)
Returns
-------
new_index : pd.Index
Resulting index
indexer : np.ndarray[np.intp] or None
Indices of output values in original index
"""
if method is not None:
raise NotImplementedError(
"argument method is not implemented for CategoricalIndex.reindex"
)
if level is not None:
raise NotImplementedError(
"argument level is not implemented for CategoricalIndex.reindex"
)
if limit is not None:
raise NotImplementedError(
"argument limit is not implemented for CategoricalIndex.reindex"
)
target = ibase.ensure_index(target)
if self.equals(target):
indexer = None
missing = np.array([], dtype=np.intp)
else:
indexer, missing = self.get_indexer_non_unique(target)
if not self.is_unique:
# GH#42568
warnings.warn(
"reindexing with a non-unique Index is deprecated and will "
"raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
if len(self) and indexer is not None:
new_target = self.take(indexer)
else:
new_target = target
# filling in missing if needed
if len(missing):
cats = self.categories.get_indexer(target)
if not isinstance(target, CategoricalIndex) or (cats == -1).any():
new_target, indexer, _ = super()._reindex_non_unique(target)
else:
codes = new_target.codes.copy()
codes[indexer == -1] = cats[missing]
cat = self._data._from_backing_data(codes)
new_target = type(self)._simple_new(cat, name=self.name)
# we always want to return an Index type here
# to be consistent with .reindex for other index types (e.g. they don't
# coerce based on the actual values, only on the dtype)
# unless we had an initial Categorical to begin with
# in which case we are going to conform to the passed Categorical
if is_categorical_dtype(target):
cat = Categorical(new_target, dtype=target.dtype)
new_target = type(self)._simple_new(cat, name=self.name)
else:
# e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target
new_target = np.asarray(new_target)
new_target = Index._with_infer(new_target, name=self.name)
return new_target, indexer
# --------------------------------------------------------------------
# Indexing Methods
def _maybe_cast_indexer(self, key) -> int:
# GH#41933: we have to do this instead of self._data._validate_scalar
# because this will correctly get partial-indexing on Interval categories
try:
return self._data._unbox_scalar(key)
except KeyError:
if is_valid_na_for_dtype(key, self.categories.dtype):
return -1
raise
def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:
if isinstance(values, CategoricalIndex):
values = values._data
if isinstance(values, Categorical):
# Indexing on codes is more efficient if categories are the same,
# so we can apply some optimizations based on the degree of
# dtype-matching.
cat = self._data._encode_with_my_categories(values)
codes = cat._codes
else:
codes = self.categories.get_indexer(values)
codes = codes.astype(self.codes.dtype, copy=False)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat)
# --------------------------------------------------------------------
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
return self.categories._is_comparable_dtype(dtype)
def take_nd(self, *args, **kwargs):
"""Alias for `take`"""
warnings.warn(
"CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.take(*args, **kwargs)
def map(self, mapper):
"""
Map values using input an input mapping or function.
Maps the values (their categories, not the codes) of the index to new
categories. If the mapping correspondence is one-to-one the result is a
:class:`~pandas.CategoricalIndex` which has the same order property as
the original, otherwise an :class:`~pandas.Index` is returned.
If a `dict` or :class:`~pandas.Series` is used any unmapped category is
mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
will be returned.
Parameters
----------
mapper : function, dict, or Series
Mapping correspondence.
Returns
-------
pandas.CategoricalIndex or pandas.Index
Mapped index.
See Also
--------
Index.map : Apply a mapping correspondence on an
:class:`~pandas.Index`.
Series.map : Apply a mapping correspondence on a
:class:`~pandas.Series`.
Series.apply : Apply more complex functions on a
:class:`~pandas.Series`.
Examples
--------
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'])
>>> idx
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
ordered=False, dtype='category')
>>> idx.map(lambda x: x.upper())
CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'],
ordered=False, dtype='category')
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'})
CategoricalIndex(['first', 'second', 'third'], categories=['first',
'second', 'third'], ordered=False, dtype='category')
If the mapping is one-to-one the ordering of the categories is
preserved:
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True)
>>> idx
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
ordered=True, dtype='category')
>>> idx.map({'a': 3, 'b': 2, 'c': 1})
CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True,
dtype='category')
If the mapping is not one-to-one an :class:`~pandas.Index` is returned:
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'})
Index(['first', 'second', 'first'], dtype='object')
If a `dict` is used, all unmapped categories are mapped to `NaN` and
the result is an :class:`~pandas.Index`:
>>> idx.map({'a': 'first', 'b': 'second'})
Index(['first', 'second', nan], dtype='object')
"""
mapped = self._values.map(mapper)
return Index(mapped, name=self.name)
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
# if calling index is category, don't check dtype of others
try:
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
except TypeError:
# not all to_concat elements are among our categories (or NA)
from pandas.core.dtypes.concat import concat_compat
res = concat_compat(to_concat)
return Index(res, name=name)
else:
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=name)

View File

@@ -0,0 +1,709 @@
"""
Base and utility classes for tseries type pandas objects.
"""
from __future__ import annotations
from datetime import datetime
from typing import (
TYPE_CHECKING,
Any,
Callable,
Sequence,
TypeVar,
cast,
final,
)
import warnings
import numpy as np
from pandas._libs import (
NaT,
Timedelta,
lib,
)
from pandas._libs.tslibs import (
BaseOffset,
Resolution,
Tick,
parsing,
to_offset,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import (
Appender,
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
is_integer,
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
import pandas.core.common as com
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
_index_shared_docs,
)
from pandas.core.indexes.extension import (
NDArrayBackedExtensionIndex,
inherit_names,
)
from pandas.core.indexes.range import RangeIndex
from pandas.core.tools.timedeltas import to_timedelta
if TYPE_CHECKING:
from pandas import CategoricalIndex
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_T = TypeVar("_T", bound="DatetimeIndexOpsMixin")
_TDT = TypeVar("_TDT", bound="DatetimeTimedeltaMixin")
@inherit_names(
["inferred_freq", "_resolution_obj", "resolution"],
DatetimeLikeArrayMixin,
cache=True,
)
@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin)
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
"""
Common ops mixin to support a unified interface datetimelike Index.
"""
_is_numeric_dtype = False
_can_hold_strings = False
_data: DatetimeArray | TimedeltaArray | PeriodArray
freq: BaseOffset | None
freqstr: str | None
_resolution_obj: Resolution
# error: "Callable[[Any], Any]" has no attribute "fget"
hasnans = cast(
bool,
cache_readonly(
DatetimeLikeArrayMixin._hasna.fget # type: ignore[attr-defined]
),
)
@property
def _is_all_dates(self) -> bool:
return True
# ------------------------------------------------------------------------
def equals(self, other: Any) -> bool:
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True
if not isinstance(other, Index):
return False
elif other.dtype.kind in ["f", "i", "u", "c"]:
return False
elif not isinstance(other, type(self)):
should_try = False
inferable = self._data._infer_matches
if other.dtype == object:
should_try = other.inferred_type in inferable
elif is_categorical_dtype(other.dtype):
other = cast("CategoricalIndex", other)
should_try = other.categories.inferred_type in inferable
if should_try:
try:
other = type(self)(other)
except (ValueError, TypeError, OverflowError):
# e.g.
# ValueError -> cannot parse str entry, or OutOfBoundsDatetime
# TypeError -> trying to convert IntervalIndex to DatetimeIndex
# OverflowError -> Index([very_large_timedeltas])
return False
if not is_dtype_equal(self.dtype, other.dtype):
# have different timezone
return False
return np.array_equal(self.asi8, other.asi8)
@Appender(Index.__contains__.__doc__)
def __contains__(self, key: Any) -> bool:
hash(key)
try:
self.get_loc(key)
except (KeyError, TypeError, ValueError):
return False
return True
_can_hold_na = True
def _convert_tolerance(self, tolerance, target):
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
return super()._convert_tolerance(tolerance, target)
# --------------------------------------------------------------------
# Rendering Methods
def format(
self,
name: bool = False,
formatter: Callable | None = None,
na_rep: str = "NaT",
date_format: str | None = None,
) -> list[str]:
"""
Render a string representation of the Index.
"""
header = []
if name:
header.append(
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
if self.name is not None
else ""
)
if formatter is not None:
return header + list(self.map(formatter))
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
def _format_with_header(
self, header: list[str], na_rep: str = "NaT", date_format: str | None = None
) -> list[str]:
# matches base class except for whitespace padding and date_format
return header + list(
self._format_native_types(na_rep=na_rep, date_format=date_format)
)
@property
def _formatter_func(self):
return self._data._formatter()
def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value).
"""
attrs = super()._format_attrs()
for attrib in self._attributes:
# iterating over _attributes prevents us from doing this for PeriodIndex
if attrib == "freq":
freq = self.freqstr
if freq is not None:
freq = repr(freq) # e.g. D -> 'D'
attrs.append(("freq", freq))
return attrs
@Appender(Index._summary.__doc__)
def _summary(self, name=None) -> str:
result = super()._summary(name=name)
if self.freq:
result += f"\nFreq: {self.freqstr}"
return result
# --------------------------------------------------------------------
# Indexing Methods
def _can_partial_date_slice(self, reso: Resolution) -> bool:
raise NotImplementedError
def _parsed_string_to_bounds(self, reso: Resolution, parsed):
raise NotImplementedError
def _parse_with_reso(self, label: str):
# overridden by TimedeltaIndex
parsed, reso_str = parsing.parse_time_string(label, self.freq)
reso = Resolution.from_attrname(reso_str)
return parsed, reso
def _get_string_slice(self, key: str):
parsed, reso = self._parse_with_reso(key)
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
raise KeyError(key) from err
@final
def _partial_date_slice(
self,
reso: Resolution,
parsed: datetime,
):
"""
Parameters
----------
reso : Resolution
parsed : datetime
Returns
-------
slice or ndarray[intp]
"""
if not self._can_partial_date_slice(reso):
raise ValueError
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
vals = self._data._ndarray
unbox = self._data._unbox
if self.is_monotonic_increasing:
if len(self) and (
(t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
):
# we are out of range
raise KeyError
# TODO: does this depend on being monotonic _increasing_?
# a monotonic (sorted) series can be sliced
left = vals.searchsorted(unbox(t1), side="left")
right = vals.searchsorted(unbox(t2), side="right")
return slice(left, right)
else:
lhs_mask = vals >= unbox(t1)
rhs_mask = vals <= unbox(t2)
# try to find the dates
return (lhs_mask & rhs_mask).nonzero()[0]
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
"""
If label is a string, cast it to scalar type according to resolution.
Parameters
----------
label : object
side : {'left', 'right'}
kind : {'loc', 'getitem'} or None
Returns
-------
label : object
Notes
-----
Value of `side` parameter should be validated in caller.
"""
assert kind in ["loc", "getitem", None, lib.no_default]
self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
if isinstance(label, str):
try:
parsed, reso = self._parse_with_reso(label)
except ValueError as err:
# DTI -> parsing.DateParseError
# TDI -> 'unit abbreviation w/o a number'
# PI -> string cannot be parsed as datetime-like
raise self._invalid_indexer("slice", label) from err
lower, upper = self._parsed_string_to_bounds(reso, parsed)
return lower if side == "left" else upper
elif not isinstance(label, self._data._recognized_scalars):
raise self._invalid_indexer("slice", label)
return label
# --------------------------------------------------------------------
# Arithmetic Methods
def shift(self: _T, periods: int = 1, freq=None) -> _T:
"""
Shift index by desired number of time frequency increments.
This method is for shifting the values of datetime-like indexes
by a specified time increment a given number of times.
Parameters
----------
periods : int, default 1
Number of periods (or increments) to shift by,
can be positive or negative.
freq : pandas.DateOffset, pandas.Timedelta or string, optional
Frequency increment to shift by.
If None, the index is shifted by its own `freq` attribute.
Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
Returns
-------
pandas.DatetimeIndex
Shifted index.
See Also
--------
Index.shift : Shift values of Index.
PeriodIndex.shift : Shift values of PeriodIndex.
"""
arr = self._data.view()
arr._freq = self.freq
result = arr._time_shift(periods, freq=freq)
return type(self)._simple_new(result, name=self.name)
# --------------------------------------------------------------------
@doc(Index._maybe_cast_listlike_indexer)
def _maybe_cast_listlike_indexer(self, keyarr):
try:
res = self._data._validate_listlike(keyarr, allow_object=True)
except (ValueError, TypeError):
if not isinstance(keyarr, ExtensionArray):
# e.g. we don't want to cast DTA to ndarray[object]
res = com.asarray_tuplesafe(keyarr)
# TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray
else:
res = keyarr
return Index(res, dtype=res.dtype)
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin):
"""
Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
but not PeriodIndex
"""
_data: DatetimeArray | TimedeltaArray
_comparables = ["name", "freq"]
_attributes = ["name", "freq"]
# Compat for frequency inference, see GH#23789
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
_is_unique = Index.is_unique
_join_precedence = 10
def _with_freq(self, freq):
arr = self._data._with_freq(freq)
return type(self)._simple_new(arr, name=self._name)
def is_type_compatible(self, kind: str) -> bool:
warnings.warn(
f"{type(self).__name__}.is_type_compatible is deprecated and will be "
"removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return kind in self._data._infer_matches
@property
def values(self) -> np.ndarray:
# NB: For Datetime64TZ this is lossy
return self._data._ndarray
# --------------------------------------------------------------------
# Set Operation Methods
@cache_readonly
def _as_range_index(self) -> RangeIndex:
# Convert our i8 representations to RangeIndex
# Caller is responsible for checking isinstance(self.freq, Tick)
freq = cast(Tick, self.freq)
tick = freq.delta.value
rng = range(self[0].value, self[-1].value + tick, tick)
return RangeIndex(rng)
def _can_range_setop(self, other):
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
def _wrap_range_setop(self, other, res_i8):
new_freq = None
if not len(res_i8):
# RangeIndex defaults to step=1, which we don't want.
new_freq = self.freq
elif isinstance(res_i8, RangeIndex):
new_freq = to_offset(Timedelta(res_i8.step))
res_i8 = res_i8
# TODO: we cannot just do
# type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
# because test_setops_preserve_freq fails with _validate_frequency raising.
# This raising is incorrect, as 'on_freq' is incorrect. This will
# be fixed by GH#41493
res_values = res_i8.values.view(self._data._ndarray.dtype)
result = type(self._data)._simple_new(
res_values, dtype=self.dtype, freq=new_freq
)
return self._wrap_setop_result(other, result)
def _range_intersect(self, other, sort):
# Dispatch to RangeIndex intersection logic.
left = self._as_range_index
right = other._as_range_index
res_i8 = left.intersection(right, sort=sort)
return self._wrap_range_setop(other, res_i8)
def _range_union(self, other, sort):
# Dispatch to RangeIndex union logic.
left = self._as_range_index
right = other._as_range_index
res_i8 = left.union(right, sort=sort)
return self._wrap_range_setop(other, res_i8)
def _intersection(self, other: Index, sort=False) -> Index:
"""
intersection specialized to the case with matching dtypes and both non-empty.
"""
other = cast("DatetimeTimedeltaMixin", other)
if self._can_range_setop(other):
return self._range_intersect(other, sort=sort)
if not self._can_fast_intersect(other):
result = Index._intersection(self, other, sort=sort)
# We need to invalidate the freq because Index._intersection
# uses _shallow_copy on a view of self._data, which will preserve
# self.freq if we're not careful.
# At this point we should have result.dtype == self.dtype
# and type(result) is type(self._data)
result = self._wrap_setop_result(other, result)
return result._with_freq(None)._with_freq("infer")
else:
return self._fast_intersect(other, sort)
def _fast_intersect(self, other, sort):
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
# after sorting, the intersection always starts with the right index
# and ends with the index of which the last elements is smallest
end = min(left[-1], right[-1])
start = right[0]
if end < start:
result = self[:0]
else:
lslice = slice(*left.slice_locs(start, end))
result = left._values[lslice]
return result
def _can_fast_intersect(self: _T, other: _T) -> bool:
# Note: we only get here with len(self) > 0 and len(other) > 0
if self.freq is None:
return False
elif other.freq != self.freq:
return False
elif not self.is_monotonic_increasing:
# Because freq is not None, we must then be monotonic decreasing
return False
# this along with matching freqs ensure that we "line up",
# so intersection will preserve freq
# Note we are assuming away Ticks, as those go through _range_intersect
# GH#42104
return self.freq.n == 1
def _can_fast_union(self: _T, other: _T) -> bool:
# Assumes that type(self) == type(other), as per the annotation
# The ability to fast_union also implies that `freq` should be
# retained on union.
freq = self.freq
if freq is None or freq != other.freq:
return False
if not self.is_monotonic_increasing:
# Because freq is not None, we must then be monotonic decreasing
# TODO: do union on the reversed indexes?
return False
if len(self) == 0 or len(other) == 0:
# only reached via union_many
return True
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
right_start = right[0]
left_end = left[-1]
# Only need to "adjoin", not overlap
return (right_start == left_end + freq) or right_start in left
def _fast_union(self: _TDT, other: _TDT, sort=None) -> _TDT:
# Caller is responsible for ensuring self and other are non-empty
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
elif sort is False:
# TDIs are not in the "correct" order and we don't want
# to sort but want to remove overlaps
left, right = self, other
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right._values[:loc]
dates = concat_compat((left._values, right_chunk))
result = type(self)._simple_new(dates, name=self.name)
return result
else:
left, right = other, self
left_end = left[-1]
right_end = right[-1]
# concatenate
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
# The can_fast_union check ensures that the result.freq
# should match self.freq
dates = type(self._data)(dates, freq=self.freq)
result = type(self)._simple_new(dates)
return result
else:
return left
def _union(self, other, sort):
# We are called by `union`, which is responsible for this validation
assert isinstance(other, type(self))
assert self.dtype == other.dtype
if self._can_range_setop(other):
return self._range_union(other, sort=sort)
if self._can_fast_union(other):
result = self._fast_union(other, sort=sort)
# in the case with sort=None, the _can_fast_union check ensures
# that result.freq == self.freq
return result
else:
return super()._union(other, sort)._with_freq("infer")
# --------------------------------------------------------------------
# Join Methods
def _get_join_freq(self, other):
"""
Get the freq to attach to the result of a join operation.
"""
freq = None
if self._can_fast_union(other):
freq = self.freq
return freq
def _wrap_joined_index(self, joined, other):
assert other.dtype == self.dtype, (other.dtype, self.dtype)
result = super()._wrap_joined_index(joined, other)
result._data._freq = self._get_join_freq(other)
return result
def _get_engine_target(self) -> np.ndarray:
# engine methods and libjoin methods need dt64/td64 values cast to i8
return self._data._ndarray.view("i8")
def _from_join_target(self, result: np.ndarray):
# view e.g. i8 back to M8[ns]
result = result.view(self._data._ndarray.dtype)
return self._data._from_backing_data(result)
# --------------------------------------------------------------------
# List-like Methods
def _get_delete_freq(self, loc: int | slice | Sequence[int]):
"""
Find the `freq` for self.delete(loc).
"""
freq = None
if self.freq is not None:
if is_integer(loc):
if loc in (0, -len(self), -1, len(self) - 1):
freq = self.freq
else:
if is_list_like(loc):
# error: Incompatible types in assignment (expression has
# type "Union[slice, ndarray]", variable has type
# "Union[int, slice, Sequence[int]]")
loc = lib.maybe_indices_to_slice( # type: ignore[assignment]
np.asarray(loc, dtype=np.intp), len(self)
)
if isinstance(loc, slice) and loc.step in (1, None):
if loc.start in (0, None) or loc.stop in (len(self), None):
freq = self.freq
return freq
def _get_insert_freq(self, loc: int, item):
"""
Find the `freq` for self.insert(loc, item).
"""
value = self._data._validate_scalar(item)
item = self._data._box_func(value)
freq = None
if self.freq is not None:
# freq can be preserved on edge cases
if self.size:
if item is NaT:
pass
elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
freq = self.freq
elif (loc == len(self)) and item - self.freq == self[-1]:
freq = self.freq
else:
# Adding a single item to an empty index may preserve freq
if isinstance(self.freq, Tick):
# all TimedeltaIndex cases go through here; is_on_offset
# would raise TypeError
freq = self.freq
elif self.freq.is_on_offset(item):
freq = self.freq
return freq
@doc(NDArrayBackedExtensionIndex.delete)
def delete(self, loc):
result = super().delete(loc)
result._data._freq = self._get_delete_freq(loc)
return result
@doc(NDArrayBackedExtensionIndex.insert)
def insert(self, loc: int, item):
result = super().insert(loc, item)
if isinstance(result, type(self)):
# i.e. parent class method did not cast
result._data._freq = self._get_insert_freq(loc, item)
return result
# --------------------------------------------------------------------
# NDArray-Like Methods
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
nv.validate_take((), kwargs)
indices = np.asarray(indices, dtype=np.intp)
result = NDArrayBackedExtensionIndex.take(
self, indices, axis, allow_fill, fill_value, **kwargs
)
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
if isinstance(maybe_slice, slice):
freq = self._data._get_getitem_freq(maybe_slice)
result._data._freq = freq
return result

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
"""
Shared methods for Index subclasses backed by ExtensionArray.
"""
from __future__ import annotations
from typing import (
Callable,
TypeVar,
)
import numpy as np
from pandas._typing import (
ArrayLike,
npt,
)
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.core.dtypes.generic import ABCDataFrame
from pandas.core.arrays import IntervalArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.indexes.base import Index
_T = TypeVar("_T", bound="NDArrayBackedExtensionIndex")
_ExtensionIndexT = TypeVar("_ExtensionIndexT", bound="ExtensionIndex")
def _inherit_from_data(
name: str, delegate: type, cache: bool = False, wrap: bool = False
):
"""
Make an alias for a method of the underlying ExtensionArray.
Parameters
----------
name : str
Name of an attribute the class should inherit from its EA parent.
delegate : class
cache : bool, default False
Whether to convert wrapped properties into cache_readonly
wrap : bool, default False
Whether to wrap the inherited result in an Index.
Returns
-------
attribute, method, property, or cache_readonly
"""
attr = getattr(delegate, name)
if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor":
# getset_descriptor i.e. property defined in cython class
if cache:
def cached(self):
return getattr(self._data, name)
cached.__name__ = name
cached.__doc__ = attr.__doc__
method = cache_readonly(cached)
else:
def fget(self):
result = getattr(self._data, name)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
def fset(self, value):
setattr(self._data, name, value)
fget.__name__ = name
fget.__doc__ = attr.__doc__
method = property(fget, fset)
elif not callable(attr):
# just a normal attribute, no wrapping
method = attr
else:
# error: Incompatible redefinition (redefinition with type "Callable[[Any,
# VarArg(Any), KwArg(Any)], Any]", original type "property")
def method(self, *args, **kwargs): # type: ignore[misc]
if "inplace" in kwargs:
raise ValueError(f"cannot use inplace with {type(self).__name__}")
result = attr(self._data, *args, **kwargs)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
# error: "property" has no attribute "__name__"
method.__name__ = name # type: ignore[attr-defined]
method.__doc__ = attr.__doc__
return method
def inherit_names(
names: list[str], delegate: type, cache: bool = False, wrap: bool = False
) -> Callable[[type[_ExtensionIndexT]], type[_ExtensionIndexT]]:
"""
Class decorator to pin attributes from an ExtensionArray to a Index subclass.
Parameters
----------
names : List[str]
delegate : class
cache : bool, default False
wrap : bool, default False
Whether to wrap the inherited result in an Index.
"""
def wrapper(cls: type[_ExtensionIndexT]) -> type[_ExtensionIndexT]:
for name in names:
meth = _inherit_from_data(name, delegate, cache=cache, wrap=wrap)
setattr(cls, name, meth)
return cls
return wrapper
class ExtensionIndex(Index):
"""
Index subclass for indexes backed by ExtensionArray.
"""
# The base class already passes through to _data:
# size, __len__, dtype
_data: IntervalArray | NDArrayBackedExtensionArray
# ---------------------------------------------------------------------
def _validate_fill_value(self, value):
"""
Convert value to be insertable to underlying array.
"""
return self._data._validate_setitem_value(value)
@doc(Index.map)
def map(self, mapper, na_action=None):
# Try to run function on index first, and then on elements of index
# Especially important for group-by functionality
try:
result = mapper(self)
# Try to use this result if we can
if isinstance(result, np.ndarray):
result = Index(result)
if not isinstance(result, Index):
raise TypeError("The map function must return an Index object")
return result
except Exception:
return self.astype(object).map(mapper)
@cache_readonly
def _isnan(self) -> npt.NDArray[np.bool_]:
# error: Incompatible return value type (got "ExtensionArray", expected
# "ndarray")
return self._data.isna() # type: ignore[return-value]
class NDArrayBackedExtensionIndex(ExtensionIndex):
"""
Index subclass for indexes backed by NDArrayBackedExtensionArray.
"""
_data: NDArrayBackedExtensionArray
def _get_engine_target(self) -> np.ndarray:
return self._data._ndarray
def _from_join_target(self, result: np.ndarray) -> ArrayLike:
assert result.dtype == self._data._ndarray.dtype
return self._data._from_backing_data(result)

View File

@@ -0,0 +1,110 @@
"""
frozen (immutable) data structures to support MultiIndexing
These are used for:
- .names (FrozenList)
"""
from __future__ import annotations
from typing import Any
from pandas.core.base import PandasObject
from pandas.io.formats.printing import pprint_thing
class FrozenList(PandasObject, list):
"""
Container that doesn't allow setting item *but*
because it's technically non-hashable, will be used
for lookups, appropriately, etc.
"""
# Side note: This has to be of type list. Otherwise,
# it messes up PyTables type checks.
def union(self, other) -> FrozenList:
"""
Returns a FrozenList with other concatenated to the end of self.
Parameters
----------
other : array-like
The array-like whose elements we are concatenating.
Returns
-------
FrozenList
The collection difference between self and other.
"""
if isinstance(other, tuple):
other = list(other)
return type(self)(super().__add__(other))
def difference(self, other) -> FrozenList:
"""
Returns a FrozenList with elements from other removed from self.
Parameters
----------
other : array-like
The array-like whose elements we are removing self.
Returns
-------
FrozenList
The collection difference between self and other.
"""
other = set(other)
temp = [x for x in self if x not in other]
return type(self)(temp)
# TODO: Consider deprecating these in favor of `union` (xref gh-15506)
__add__ = __iadd__ = union
def __getitem__(self, n):
if isinstance(n, slice):
return type(self)(super().__getitem__(n))
return super().__getitem__(n)
def __radd__(self, other):
if isinstance(other, tuple):
other = list(other)
return type(self)(other + list(self))
def __eq__(self, other: Any) -> bool:
if isinstance(other, (tuple, FrozenList)):
other = list(other)
return super().__eq__(other)
__req__ = __eq__
def __mul__(self, other):
return type(self)(super().__mul__(other))
__imul__ = __mul__
def __reduce__(self):
return type(self), (list(self),)
def __hash__(self):
return hash(tuple(self))
def _disabled(self, *args, **kwargs):
"""
This method will not function because object is immutable.
"""
raise TypeError(f"'{type(self).__name__}' does not support mutable operations.")
def __str__(self) -> str:
return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n"))
def __repr__(self) -> str:
return f"{type(self).__name__}({str(self)})"
__setitem__ = __setslice__ = _disabled # type: ignore[assignment]
__delitem__ = __delslice__ = _disabled
pop = append = extend = _disabled
remove = sort = insert = _disabled # type: ignore[assignment]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,488 @@
from __future__ import annotations
from typing import (
Callable,
Hashable,
)
import warnings
import numpy as np
from pandas._libs import (
index as libindex,
lib,
)
from pandas._typing import (
Dtype,
DtypeObj,
npt,
)
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.dtypes.common import (
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
is_integer_dtype,
is_numeric_dtype,
is_scalar,
is_signed_integer_dtype,
is_unsigned_integer_dtype,
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
class NumericIndex(Index):
"""
Immutable sequence used for indexing and alignment. The basic object
storing axis labels for all pandas objects. NumericIndex is a special case
of `Index` with purely numpy int/uint/float labels.
.. versionadded:: 1.4.0
Parameters
----------
data : array-like (1-dimensional)
dtype : NumPy dtype (default: None)
copy : bool
Make a copy of input ndarray.
name : object
Name to be stored in the index.
Attributes
----------
None
Methods
----------
None
See Also
--------
Index : The base pandas Index type.
Int64Index : Index of purely int64 labels (deprecated).
UInt64Index : Index of purely uint64 labels (deprecated).
Float64Index : Index of purely float64 labels (deprecated).
Notes
-----
An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or
float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric
dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
"""
_typ = "numericindex"
_values: np.ndarray
_default_dtype: np.dtype | None = None
_dtype_validation_metadata: tuple[Callable[..., bool], str] = (
is_numeric_dtype,
"numeric type",
)
_is_numeric_dtype = True
_can_hold_strings = False
_is_backward_compat_public_numeric_index: bool = True
# error: Signature of "_can_hold_na" incompatible with supertype "Index"
@cache_readonly
def _can_hold_na(self) -> bool: # type: ignore[override]
if is_float_dtype(self.dtype):
return True
else:
return False
_engine_types: dict[np.dtype, type[libindex.IndexEngine]] = {
np.dtype(np.int8): libindex.Int8Engine,
np.dtype(np.int16): libindex.Int16Engine,
np.dtype(np.int32): libindex.Int32Engine,
np.dtype(np.int64): libindex.Int64Engine,
np.dtype(np.uint8): libindex.UInt8Engine,
np.dtype(np.uint16): libindex.UInt16Engine,
np.dtype(np.uint32): libindex.UInt32Engine,
np.dtype(np.uint64): libindex.UInt64Engine,
np.dtype(np.float32): libindex.Float32Engine,
np.dtype(np.float64): libindex.Float64Engine,
}
@property
def _engine_type(self):
# error: Invalid index type "Union[dtype[Any], ExtensionDtype]" for
# "Dict[dtype[Any], Type[IndexEngine]]"; expected type "dtype[Any]"
return self._engine_types[self.dtype] # type: ignore[index]
@cache_readonly
def inferred_type(self) -> str:
return {
"i": "integer",
"u": "integer",
"f": "floating",
}[self.dtype.kind]
def __new__(cls, data=None, dtype: Dtype | None = None, copy=False, name=None):
name = maybe_extract_name(name, data, cls)
subarr = cls._ensure_array(data, dtype, copy)
return cls._simple_new(subarr, name=name)
@classmethod
def _ensure_array(cls, data, dtype, copy: bool):
"""
Ensure we have a valid array to pass to _simple_new.
"""
cls._validate_dtype(dtype)
if not isinstance(data, (np.ndarray, Index)):
# Coerce to ndarray if not already ndarray or Index
if is_scalar(data):
raise cls._scalar_data_error(data)
# other iterable of some kind
if not isinstance(data, (ABCSeries, list, tuple)):
data = list(data)
orig = data
data = np.asarray(data, dtype=dtype)
if dtype is None and data.dtype.kind == "f":
if cls is UInt64Index and (data >= 0).all():
# https://github.com/numpy/numpy/issues/19146
data = np.asarray(orig, dtype=np.uint64)
if issubclass(data.dtype.type, str):
cls._string_data_error(data)
dtype = cls._ensure_dtype(dtype)
if copy or not is_dtype_equal(data.dtype, dtype):
# TODO: the try/except below is because it's difficult to predict the error
# and/or error message from different combinations of data and dtype.
# Efforts to avoid this try/except welcome.
# See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222
try:
subarr = np.array(data, dtype=dtype, copy=copy)
cls._validate_dtype(subarr.dtype)
except (TypeError, ValueError):
raise ValueError(f"data is not compatible with {cls.__name__}")
cls._assert_safe_casting(data, subarr)
else:
subarr = data
if subarr.ndim > 1:
# GH#13601, GH#20285, GH#27125
raise ValueError("Index data must be 1-dimensional")
subarr = np.asarray(subarr)
return subarr
@classmethod
def _validate_dtype(cls, dtype: Dtype | None) -> None:
if dtype is None:
return
validation_func, expected = cls._dtype_validation_metadata
if not validation_func(dtype):
raise ValueError(
f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
)
@classmethod
def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
"""
Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex.
Assumes dtype has already been validated.
"""
if dtype is None:
return cls._default_dtype
dtype = pandas_dtype(dtype)
assert isinstance(dtype, np.dtype)
if cls._is_backward_compat_public_numeric_index:
# dtype for NumericIndex
return dtype
else:
# dtype for Int64Index, UInt64Index etc. Needed for backwards compat.
return cls._default_dtype
def __contains__(self, key) -> bool:
"""
Check if key is a float and has a decimal. If it has, return False.
"""
if not is_integer_dtype(self.dtype):
return super().__contains__(key)
hash(key)
try:
if is_float(key) and int(key) != key:
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
return False
return key in self._engine
except (OverflowError, TypeError, ValueError):
return False
@doc(Index.astype)
def astype(self, dtype, copy: bool = True):
dtype = pandas_dtype(dtype)
if is_float_dtype(self.dtype):
if needs_i8_conversion(dtype):
raise TypeError(
f"Cannot convert Float64Index to dtype {dtype}; integer "
"values are required for conversion"
)
elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype):
# TODO(ExtensionIndex); this can change once we have an EA Index type
# GH 13149
arr = astype_nansafe(self._values, dtype=dtype)
if isinstance(self, Float64Index):
return Int64Index(arr, name=self.name)
else:
return NumericIndex(arr, name=self.name, dtype=dtype)
elif self._is_backward_compat_public_numeric_index:
# this block is needed so e.g. NumericIndex[int8].astype("int32") returns
# NumericIndex[int32] and not Int64Index with dtype int64.
# When Int64Index etc. are removed from the code base, removed this also.
if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype):
return self._constructor(self, dtype=dtype, copy=copy)
return super().astype(dtype, copy=copy)
# ----------------------------------------------------------------
# Indexing Methods
# error: Decorated property not supported
@cache_readonly # type: ignore[misc]
@doc(Index._should_fallback_to_positional)
def _should_fallback_to_positional(self) -> bool:
return False
@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str):
if is_float_dtype(self.dtype):
assert kind in ["loc", "getitem"]
# We always treat __getitem__ slicing as label-based
# translate to locations
return self.slice_indexer(key.start, key.stop, key.step)
return super()._convert_slice_indexer(key, kind=kind)
@doc(Index._maybe_cast_slice_bound)
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
assert kind in ["loc", "getitem", None, lib.no_default]
self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
# we will try to coerce to integers
return self._maybe_cast_indexer(label)
# ----------------------------------------------------------------
@doc(Index._shallow_copy)
def _shallow_copy(self, values, name: Hashable = lib.no_default):
if not self._can_hold_na and values.dtype.kind == "f":
name = self._name if name is lib.no_default else name
# Ensure we are not returning an Int64Index with float data:
return Float64Index._simple_new(values, name=name)
return super()._shallow_copy(values=values, name=name)
def _convert_tolerance(self, tolerance, target):
tolerance = super()._convert_tolerance(tolerance, target)
if not np.issubdtype(tolerance.dtype, np.number):
if tolerance.ndim > 0:
raise ValueError(
f"tolerance argument for {type(self).__name__} must contain "
"numeric elements if it is list type"
)
else:
raise ValueError(
f"tolerance argument for {type(self).__name__} must be numeric "
f"if it is a scalar: {repr(tolerance)}"
)
return tolerance
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
# If we ever have BoolIndex or ComplexIndex, this may need to be tightened
return is_numeric_dtype(dtype)
@classmethod
def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None:
"""
Ensure incoming data can be represented with matching signed-ness.
Needed if the process of casting data from some accepted dtype to the internal
dtype(s) bears the risk of truncation (e.g. float to int).
"""
if is_integer_dtype(subarr.dtype):
if not np.array_equal(data, subarr):
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
@property
def _is_all_dates(self) -> bool:
"""
Checks that all the labels are datetime objects.
"""
return False
def _format_native_types(
self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
):
from pandas.io.formats.format import FloatArrayFormatter
if is_float_dtype(self.dtype):
formatter = FloatArrayFormatter(
self._values,
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
quoting=quoting,
fixed_width=False,
)
return formatter.get_result_as_array()
return super()._format_native_types(
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
quoting=quoting,
**kwargs,
)
_num_index_shared_docs = {}
_num_index_shared_docs[
"class_descr"
] = """
Immutable sequence used for indexing and alignment. The basic object
storing axis labels for all pandas objects. %(klass)s is a special case
of `Index` with purely %(ltype)s labels. %(extra)s.
.. deprecated:: 1.4.0
In pandas v2.0 %(klass)s will be removed and :class:`NumericIndex` used instead.
%(klass)s will remain fully functional for the duration of pandas 1.x.
Parameters
----------
data : array-like (1-dimensional)
dtype : NumPy dtype (default: %(dtype)s)
copy : bool
Make a copy of input ndarray.
name : object
Name to be stored in the index.
Attributes
----------
None
Methods
----------
None
See Also
--------
Index : The base pandas Index type.
NumericIndex : Index of numpy int/uint/float data.
Notes
-----
An Index instance can **only** contain hashable objects.
"""
class IntegerIndex(NumericIndex):
"""
This is an abstract class for Int64Index, UInt64Index.
"""
_is_backward_compat_public_numeric_index: bool = False
@property
def asi8(self) -> npt.NDArray[np.int64]:
# do not cache or you'll create a memory leak
warnings.warn(
"Index.asi8 is deprecated and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._values.view(self._default_dtype)
def _validate_fill_value(self, value):
# e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype)
# see TestSetitemFloatNDarrayIntoIntegerSeries
super()._validate_fill_value(value)
if hasattr(value, "dtype") and is_float_dtype(value.dtype):
converted = value.astype(self.dtype)
if (converted == value).all():
# See also: can_hold_element
return converted
raise TypeError
return value
class Int64Index(IntegerIndex):
_index_descr_args = {
"klass": "Int64Index",
"ltype": "integer",
"dtype": "int64",
"extra": "",
}
__doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
_typ = "int64index"
_engine_type = libindex.Int64Engine
_default_dtype = np.dtype(np.int64)
_dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")
class UInt64Index(IntegerIndex):
_index_descr_args = {
"klass": "UInt64Index",
"ltype": "unsigned integer",
"dtype": "uint64",
"extra": "",
}
__doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
_typ = "uint64index"
_engine_type = libindex.UInt64Engine
_default_dtype = np.dtype(np.uint64)
_dtype_validation_metadata = (is_unsigned_integer_dtype, "unsigned integer")
def _validate_fill_value(self, value):
# e.g. np.array([1]) we want np.array([1], dtype=np.uint64)
# see test_where_uin64
super()._validate_fill_value(value)
if hasattr(value, "dtype") and is_signed_integer_dtype(value.dtype):
if (value >= 0).all():
return value.astype(self.dtype)
raise TypeError
return value
class Float64Index(NumericIndex):
_index_descr_args = {
"klass": "Float64Index",
"dtype": "float64",
"ltype": "float",
"extra": "",
}
__doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
_typ = "float64index"
_engine_type = libindex.Float64Engine
_default_dtype = np.dtype(np.float64)
_dtype_validation_metadata = (is_float_dtype, "float")
_is_backward_compat_public_numeric_index: bool = False

View File

@@ -0,0 +1,564 @@
from __future__ import annotations
from datetime import (
datetime,
timedelta,
)
from typing import Hashable
import warnings
import numpy as np
from pandas._libs import (
index as libindex,
lib,
)
from pandas._libs.tslibs import (
BaseOffset,
NaT,
Period,
Resolution,
Tick,
)
from pandas._typing import (
Dtype,
DtypeObj,
)
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_datetime64_any_dtype,
is_integer,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.missing import is_valid_na_for_dtype
from pandas.core.arrays.period import (
PeriodArray,
period_array,
raise_on_incompatible,
validate_dtype_freq,
)
import pandas.core.common as com
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import maybe_extract_name
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
from pandas.core.indexes.datetimes import (
DatetimeIndex,
Index,
)
from pandas.core.indexes.extension import inherit_names
from pandas.core.indexes.numeric import Int64Index
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
_shared_doc_kwargs = {
"klass": "PeriodArray",
}
# --- Period index sketch
def _new_PeriodIndex(cls, **d):
# GH13277 for unpickling
values = d.pop("data")
if values.dtype == "int64":
freq = d.pop("freq", None)
values = PeriodArray(values, freq=freq)
return cls._simple_new(values, **d)
else:
return cls(values, **d)
@inherit_names(
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
PeriodArray,
wrap=True,
)
@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
class PeriodIndex(DatetimeIndexOpsMixin):
"""
Immutable ndarray holding ordinal values indicating regular periods in time.
Index keys are boxed to Period objects which carries the metadata (eg,
frequency information).
Parameters
----------
data : array-like (1d int np.ndarray or PeriodArray), optional
Optional period-like data to construct index with.
copy : bool
Make a copy of input ndarray.
freq : str or period object, optional
One of pandas period strings or corresponding objects.
year : int, array, or Series, default None
month : int, array, or Series, default None
quarter : int, array, or Series, default None
day : int, array, or Series, default None
hour : int, array, or Series, default None
minute : int, array, or Series, default None
second : int, array, or Series, default None
dtype : str or PeriodDtype, default None
Attributes
----------
day
dayofweek
day_of_week
dayofyear
day_of_year
days_in_month
daysinmonth
end_time
freq
freqstr
hour
is_leap_year
minute
month
quarter
qyear
second
start_time
week
weekday
weekofyear
year
Methods
-------
asfreq
strftime
to_timestamp
See Also
--------
Index : The base pandas Index type.
Period : Represents a period of time.
DatetimeIndex : Index with datetime64 data.
TimedeltaIndex : Index of timedelta64 data.
period_range : Create a fixed-frequency PeriodIndex.
Examples
--------
>>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
>>> idx
PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
"""
_typ = "periodindex"
_data: PeriodArray
freq: BaseOffset
dtype: PeriodDtype
_data_cls = PeriodArray
_engine_type = libindex.PeriodEngine
_supports_partial_string_indexing = True
# --------------------------------------------------------------------
# methods that dispatch to array and wrap result in Index
# These are defined here instead of via inherit_names for mypy
@doc(
PeriodArray.asfreq,
other="pandas.arrays.PeriodArray",
other_name="PeriodArray",
**_shared_doc_kwargs,
)
def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
arr = self._data.asfreq(freq, how)
return type(self)._simple_new(arr, name=self.name)
@doc(PeriodArray.to_timestamp)
def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
arr = self._data.to_timestamp(freq, how)
return DatetimeIndex._simple_new(arr, name=self.name)
# https://github.com/python/mypy/issues/1362
# error: Decorated property not supported
@property # type:ignore[misc]
@doc(PeriodArray.hour.fget)
def hour(self) -> Int64Index:
return Int64Index(self._data.hour, name=self.name)
# https://github.com/python/mypy/issues/1362
# error: Decorated property not supported
@property # type:ignore[misc]
@doc(PeriodArray.minute.fget)
def minute(self) -> Int64Index:
return Int64Index(self._data.minute, name=self.name)
# https://github.com/python/mypy/issues/1362
# error: Decorated property not supported
@property # type:ignore[misc]
@doc(PeriodArray.second.fget)
def second(self) -> Int64Index:
return Int64Index(self._data.second, name=self.name)
# ------------------------------------------------------------------------
# Index Constructors
def __new__(
cls,
data=None,
ordinal=None,
freq=None,
dtype: Dtype | None = None,
copy: bool = False,
name: Hashable = None,
**fields,
) -> PeriodIndex:
valid_field_set = {
"year",
"month",
"day",
"quarter",
"hour",
"minute",
"second",
}
if not set(fields).issubset(valid_field_set):
argument = list(set(fields) - valid_field_set)[0]
raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
name = maybe_extract_name(name, data, cls)
if data is None and ordinal is None:
# range-based.
if not fields:
# test_pickle_compat_construction
raise cls._scalar_data_error(None)
data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
# PeriodArray._generate range does validation that fields is
# empty when really using the range-based constructor.
freq = freq2
data = PeriodArray(data, freq=freq)
else:
freq = validate_dtype_freq(dtype, freq)
# PeriodIndex allow PeriodIndex(period_index, freq=different)
# Let's not encourage that kind of behavior in PeriodArray.
if freq and isinstance(data, cls) and data.freq != freq:
# TODO: We can do some of these with no-copy / coercion?
# e.g. D -> 2D seems to be OK
data = data.asfreq(freq)
if data is None and ordinal is not None:
# we strangely ignore `ordinal` if data is passed.
ordinal = np.asarray(ordinal, dtype=np.int64)
data = PeriodArray(ordinal, freq=freq)
else:
# don't pass copy here, since we copy later.
data = period_array(data=data, freq=freq)
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
# ------------------------------------------------------------------------
# Data
@property
def values(self) -> np.ndarray:
return np.asarray(self, dtype=object)
def _maybe_convert_timedelta(self, other):
"""
Convert timedelta-like input to an integer multiple of self.freq
Parameters
----------
other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
Returns
-------
converted : int, np.ndarray[int64]
Raises
------
IncompatibleFrequency : if the input cannot be written as a multiple
of self.freq. Note IncompatibleFrequency subclasses ValueError.
"""
if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
if isinstance(self.freq, Tick):
# _check_timedeltalike_freq_compat will raise if incompatible
delta = self._data._check_timedeltalike_freq_compat(other)
return delta
elif isinstance(other, BaseOffset):
if other.base == self.freq.base:
return other.n
raise raise_on_incompatible(self, other)
elif is_integer(other):
# integer is passed to .shift via
# _add_datetimelike_methods basically
# but ufunc may pass integer to _add_delta
return other
# raise when input doesn't have freq
raise raise_on_incompatible(self, None)
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
if not isinstance(dtype, PeriodDtype):
return False
# For the subset of DateOffsets that can be a dtype.freq, it
# suffices (and is much faster) to compare the dtype_code rather than
# the freq itself.
# See also: PeriodDtype.__eq__
freq = dtype.freq
own_freq = self.freq
return (
freq._period_dtype_code == own_freq._period_dtype_code
and freq.n == own_freq.n
)
# ------------------------------------------------------------------------
# Index Methods
def asof_locs(self, where: Index, mask: np.ndarray) -> np.ndarray:
"""
where : array of timestamps
mask : np.ndarray[bool]
Array of booleans where data is not NA.
"""
if isinstance(where, DatetimeIndex):
where = PeriodIndex(where._values, freq=self.freq)
elif not isinstance(where, PeriodIndex):
raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
return super().asof_locs(where, mask)
@doc(Index.astype)
def astype(self, dtype, copy: bool = True, how=lib.no_default):
dtype = pandas_dtype(dtype)
if how is not lib.no_default:
# GH#37982
warnings.warn(
"The 'how' keyword in PeriodIndex.astype is deprecated and "
"will be removed in a future version. "
"Use index.to_timestamp(how=how) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
how = "start"
if is_datetime64_any_dtype(dtype):
# 'how' is index-specific, isn't part of the EA interface.
# GH#45038 implement this for PeriodArray (but without "how")
# once the "how" deprecation is enforced we can just dispatch
# directly to PeriodArray.
tz = getattr(dtype, "tz", None)
return self.to_timestamp(how=how).tz_localize(tz)
return super().astype(dtype, copy=copy)
@property
def is_full(self) -> bool:
"""
Returns True if this PeriodIndex is range-like in that all Periods
between start and end are present, in order.
"""
if len(self) == 0:
return True
if not self.is_monotonic_increasing:
raise ValueError("Index is not monotonic")
values = self.asi8
return ((values[1:] - values[:-1]) < 2).all()
@property
def inferred_type(self) -> str:
# b/c data is represented as ints make sure we can't have ambiguous
# indexing
return "period"
# ------------------------------------------------------------------------
# Indexing Methods
def _convert_tolerance(self, tolerance, target):
# Returned tolerance must be in dtype/units so that
# `|self._get_engine_target() - target._engine_target()| <= tolerance`
# is meaningful. Since PeriodIndex returns int64 for engine_target,
# we may need to convert timedelta64 tolerance to int64.
tolerance = super()._convert_tolerance(tolerance, target)
if self.dtype == target.dtype:
# convert tolerance to i8
tolerance = self._maybe_convert_timedelta(tolerance)
return tolerance
def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label.
Parameters
----------
key : Period, NaT, str, or datetime
String or datetime key must be parsable as Period.
Returns
-------
loc : int or ndarray[int64]
Raises
------
KeyError
Key is not present in the index.
TypeError
If key is listlike or otherwise not hashable.
"""
orig_key = key
self._check_indexing_error(key)
if is_valid_na_for_dtype(key, self.dtype):
key = NaT
elif isinstance(key, str):
try:
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
# A string with invalid format
raise KeyError(f"Cannot interpret '{key}' as period") from err
if self._can_partial_date_slice(reso):
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
# TODO: pass if method is not None, like DTI does?
raise KeyError(key) from err
if reso == self.dtype.resolution:
# the reso < self.dtype.resolution case goes through _get_string_slice
key = Period(parsed, freq=self.freq)
loc = self.get_loc(key, method=method, tolerance=tolerance)
# Recursing instead of falling through matters for the exception
# message in test_get_loc3 (though not clear if that really matters)
return loc
elif method is None:
raise KeyError(key)
else:
key = Period(parsed, freq=self.freq)
elif isinstance(key, Period):
sfreq = self.freq
kfreq = key.freq
if not (
sfreq.n == kfreq.n
and sfreq._period_dtype_code == kfreq._period_dtype_code
):
# GH#42247 For the subset of DateOffsets that can be Period freqs,
# checking these two attributes is sufficient to check equality,
# and much more performant than `self.freq == key.freq`
raise KeyError(key)
elif isinstance(key, datetime):
try:
key = Period(key, freq=self.freq)
except ValueError as err:
# we cannot construct the Period
raise KeyError(orig_key) from err
else:
# in particular integer, which Period constructor would cast to string
raise KeyError(key)
try:
return Index.get_loc(self, key, method, tolerance)
except KeyError as err:
raise KeyError(orig_key) from err
@doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
if isinstance(label, datetime):
label = Period(label, freq=self.freq)
return super()._maybe_cast_slice_bound(label, side, kind=kind)
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
grp = reso.freq_group
iv = Period(parsed, freq=grp.value)
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
def _can_partial_date_slice(self, reso: Resolution) -> bool:
assert isinstance(reso, Resolution), (type(reso), reso)
# e.g. test_getitem_setitem_periodindex
return reso > self.dtype.resolution
def period_range(
start=None, end=None, periods: int | None = None, freq=None, name=None
) -> PeriodIndex:
"""
Return a fixed frequency PeriodIndex.
The day (calendar) is the default frequency.
Parameters
----------
start : str or period-like, default None
Left bound for generating periods.
end : str or period-like, default None
Right bound for generating periods.
periods : int, default None
Number of periods to generate.
freq : str or DateOffset, optional
Frequency alias. By default the freq is taken from `start` or `end`
if those are Period objects. Otherwise, the default is ``"D"`` for
daily frequency.
name : str, default None
Name of the resulting PeriodIndex.
Returns
-------
PeriodIndex
Notes
-----
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
must be specified.
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
Examples
--------
>>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='period[M]')
If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
endpoints for a ``PeriodIndex`` with frequency matching that of the
``period_range`` constructor.
>>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
... end=pd.Period('2017Q2', freq='Q'), freq='M')
PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
dtype='period[M]')
"""
if com.count_not_none(start, end, periods) != 2:
raise ValueError(
"Of the three parameters: start, end, and periods, "
"exactly two must be specified"
)
if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
freq = "D"
data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
data = PeriodArray(data, freq=freq)
return PeriodIndex(data, name=name)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,276 @@
""" implement the TimedeltaIndex """
from __future__ import annotations
from pandas._libs import (
index as libindex,
lib,
)
from pandas._libs.tslibs import (
Timedelta,
to_offset,
)
from pandas._typing import DtypeObj
from pandas.core.dtypes.common import (
TD64NS_DTYPE,
is_scalar,
is_timedelta64_dtype,
)
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.timedeltas import TimedeltaArray
import pandas.core.common as com
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
from pandas.core.indexes.extension import inherit_names
@inherit_names(
["__neg__", "__pos__", "__abs__", "total_seconds", "round", "floor", "ceil"]
+ TimedeltaArray._field_ops,
TimedeltaArray,
wrap=True,
)
@inherit_names(
[
"components",
"to_pytimedelta",
"sum",
"std",
"median",
"_format_native_types",
],
TimedeltaArray,
)
class TimedeltaIndex(DatetimeTimedeltaMixin):
"""
Immutable ndarray of timedelta64 data, represented internally as int64, and
which can be boxed to timedelta objects.
Parameters
----------
data : array-like (1-dimensional), optional
Optional timedelta-like data to construct index with.
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional
Which is an integer/float number.
freq : str or pandas offset object, optional
One of pandas date offset strings or corresponding objects. The string
'infer' can be passed in order to set the frequency of the index as the
inferred frequency upon creation.
copy : bool
Make a copy of input ndarray.
name : object
Name to be stored in the index.
Attributes
----------
days
seconds
microseconds
nanoseconds
components
inferred_freq
Methods
-------
to_pytimedelta
to_series
round
floor
ceil
to_frame
mean
See Also
--------
Index : The base pandas Index type.
Timedelta : Represents a duration between two dates or times.
DatetimeIndex : Index of datetime64 data.
PeriodIndex : Index of Period data.
timedelta_range : Create a fixed-frequency TimedeltaIndex.
Notes
-----
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
"""
_typ = "timedeltaindex"
_data_cls = TimedeltaArray
_engine_type = libindex.TimedeltaEngine
_data: TimedeltaArray
# Use base class method instead of DatetimeTimedeltaMixin._get_string_slice
_get_string_slice = Index._get_string_slice
# -------------------------------------------------------------------
# Constructors
def __new__(
cls,
data=None,
unit=None,
freq=lib.no_default,
closed=None,
dtype=TD64NS_DTYPE,
copy=False,
name=None,
):
name = maybe_extract_name(name, data, cls)
if is_scalar(data):
raise cls._scalar_data_error(data)
if unit in {"Y", "y", "M"}:
raise ValueError(
"Units 'M', 'Y', and 'y' are no longer supported, as they do not "
"represent unambiguous timedelta values durations."
)
if isinstance(data, TimedeltaArray) and freq is lib.no_default:
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
if isinstance(data, TimedeltaIndex) and freq is lib.no_default and name is None:
if copy:
return data.copy()
else:
return data._view()
# - Cases checked above all return/raise before reaching here - #
tdarr = TimedeltaArray._from_sequence_not_strict(
data, freq=freq, unit=unit, dtype=dtype, copy=copy
)
return cls._simple_new(tdarr, name=name)
# -------------------------------------------------------------------
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
return is_timedelta64_dtype(dtype) # aka self._data._is_recognized_dtype
# -------------------------------------------------------------------
# Indexing Methods
def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Returns
-------
loc : int, slice, or ndarray[int]
"""
self._check_indexing_error(key)
try:
key = self._data._validate_scalar(key, unbox=False)
except TypeError as err:
raise KeyError(key) from err
return Index.get_loc(self, key, method, tolerance)
def _parse_with_reso(self, label: str):
# the "with_reso" is a no-op for TimedeltaIndex
parsed = Timedelta(label)
return parsed, None
def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
# reso is unused, included to match signature of DTI/PI
lbound = parsed.round(parsed.resolution_string)
rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
return lbound, rbound
# -------------------------------------------------------------------
@property
def inferred_type(self) -> str:
return "timedelta64"
def timedelta_range(
start=None,
end=None,
periods: int | None = None,
freq=None,
name=None,
closed=None,
) -> TimedeltaIndex:
"""
Return a fixed frequency TimedeltaIndex, with day as the default
frequency.
Parameters
----------
start : str or timedelta-like, default None
Left bound for generating timedeltas.
end : str or timedelta-like, default None
Right bound for generating timedeltas.
periods : int, default None
Number of periods to generate.
freq : str or DateOffset, default 'D'
Frequency strings can have multiples, e.g. '5H'.
name : str, default None
Name of the resulting TimedeltaIndex.
closed : str, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None).
Returns
-------
TimedeltaIndex
Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
Examples
--------
>>> pd.timedelta_range(start='1 day', periods=4)
TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
The ``closed`` parameter specifies which endpoint is included. The default
behavior is to include both endpoints.
>>> pd.timedelta_range(start='1 day', periods=4, closed='right')
TimedeltaIndex(['2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
Only fixed frequencies can be passed, non-fixed frequencies such as
'M' (month end) will raise.
>>> pd.timedelta_range(start='1 day', end='2 days', freq='6H')
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
'1 days 18:00:00', '2 days 00:00:00'],
dtype='timedelta64[ns]', freq='6H')
Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).
>>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
'5 days 00:00:00'],
dtype='timedelta64[ns]', freq=None)
"""
if freq is None and com.any_none(periods, start, end):
freq = "D"
freq, _ = dtl.maybe_infer_freq(freq)
tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed)
return TimedeltaIndex._simple_new(tdarr, name=name)