2022-05-23 00:16:32 +04:00

1643 lines
47 KiB
Cython

import collections
import warnings
import cython
from cpython.object cimport (
Py_EQ,
Py_NE,
PyObject_RichCompare,
)
import numpy as np
cimport numpy as cnp
from numpy cimport (
int64_t,
ndarray,
)
cnp.import_array()
from cpython.datetime cimport (
PyDateTime_Check,
PyDateTime_IMPORT,
PyDelta_Check,
timedelta,
)
PyDateTime_IMPORT
cimport pandas._libs.tslibs.util as util
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
checknull_with_nat,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
cmp_scalar,
get_datetime64_unit,
get_timedelta64_value,
pandas_timedeltastruct,
td64_to_tdstruct,
)
from pandas._libs.tslibs.offsets cimport is_tick_object
from pandas._libs.tslibs.util cimport (
is_array,
is_datetime64_object,
is_float_object,
is_integer_object,
is_timedelta64_object,
)
from pandas._libs.tslibs.fields import (
RoundTo,
round_nsint64,
)
# ----------------------------------------------------------------------
# Constants
# components named tuple
Components = collections.namedtuple(
"Components",
[
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
],
)
cdef dict timedelta_abbrevs = {
"Y": "Y",
"y": "Y",
"M": "M",
"W": "W",
"w": "W",
"D": "D",
"d": "D",
"days": "D",
"day": "D",
"hours": "h",
"hour": "h",
"hr": "h",
"h": "h",
"m": "m",
"minute": "m",
"min": "m",
"minutes": "m",
"t": "m",
"s": "s",
"seconds": "s",
"sec": "s",
"second": "s",
"ms": "ms",
"milliseconds": "ms",
"millisecond": "ms",
"milli": "ms",
"millis": "ms",
"l": "ms",
"us": "us",
"microseconds": "us",
"microsecond": "us",
"µs": "us",
"micro": "us",
"micros": "us",
"u": "us",
"ns": "ns",
"nanoseconds": "ns",
"nano": "ns",
"nanos": "ns",
"nanosecond": "ns",
"n": "ns",
}
_no_input = object()
# ----------------------------------------------------------------------
# API
@cython.boundscheck(False)
@cython.wraparound(False)
def ints_to_pytimedelta(const int64_t[:] arr, box=False):
"""
convert an i8 repr to an ndarray of timedelta or Timedelta (if box ==
True)
Parameters
----------
arr : ndarray[int64_t]
box : bool, default False
Returns
-------
result : ndarray[object]
array of Timedelta or timedeltas objects
"""
cdef:
Py_ssize_t i, n = len(arr)
int64_t value
object[:] result = np.empty(n, dtype=object)
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = <object>NaT
else:
if box:
result[i] = Timedelta(value)
else:
result[i] = timedelta(microseconds=int(value) / 1000)
return result.base # .base to access underlying np.ndarray
# ----------------------------------------------------------------------
cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if is_tick_object(delta):
return delta.nanos
if isinstance(delta, _Timedelta):
delta = delta.value
if is_timedelta64_object(delta):
return get_timedelta64_value(ensure_td64ns(delta))
if is_integer_object(delta):
return delta
if PyDelta_Check(delta):
try:
return (
delta.days * 24 * 3600 * 1_000_000
+ delta.seconds * 1_000_000
+ delta.microseconds
) * 1000
except OverflowError as err:
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
raise OutOfBoundsTimedelta(*err.args) from err
raise TypeError(type(delta))
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
return "ns"
elif unit == NPY_DATETIMEUNIT.NPY_FR_us:
return "us"
elif unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return "ms"
elif unit == NPY_DATETIMEUNIT.NPY_FR_s:
return "s"
elif unit == NPY_DATETIMEUNIT.NPY_FR_m:
return "m"
elif unit == NPY_DATETIMEUNIT.NPY_FR_h:
return "h"
elif unit == NPY_DATETIMEUNIT.NPY_FR_D:
return "D"
elif unit == NPY_DATETIMEUNIT.NPY_FR_W:
return "W"
elif unit == NPY_DATETIMEUNIT.NPY_FR_M:
return "M"
elif unit == NPY_DATETIMEUNIT.NPY_FR_Y:
return "Y"
else:
raise NotImplementedError(unit)
@cython.overflowcheck(True)
cdef object ensure_td64ns(object ts):
"""
Overflow-safe implementation of td64.astype("m8[ns]")
Parameters
----------
ts : np.timedelta64
Returns
-------
np.timedelta64[ns]
"""
cdef:
NPY_DATETIMEUNIT td64_unit
int64_t td64_value, mult
str unitstr
td64_unit = get_datetime64_unit(ts)
if (
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
unitstr = npy_unit_to_abbrev(td64_unit)
td64_value = get_timedelta64_value(ts)
mult = precision_from_unit(unitstr)[0]
try:
# NB: cython#1381 this cannot be *=
td64_value = td64_value * mult
except OverflowError as err:
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
raise OutOfBoundsTimedelta(ts) from err
return np.timedelta64(td64_value, "ns")
return ts
cdef convert_to_timedelta64(object ts, str unit):
"""
Convert an incoming object to a timedelta64 if possible.
Before calling, unit must be standardized to avoid repeated unit conversion
Handle these types of objects:
- timedelta/Timedelta
- timedelta64
- an offset
- np.int64 (with unit providing a possible modifier)
- None/NaT
Return an ns based int64
"""
if checknull_with_nat(ts):
return np.timedelta64(NPY_NAT, "ns")
elif isinstance(ts, _Timedelta):
# already in the proper format
ts = np.timedelta64(ts.value, "ns")
elif is_timedelta64_object(ts):
ts = ensure_td64ns(ts)
elif is_integer_object(ts):
if ts == NPY_NAT:
return np.timedelta64(NPY_NAT, "ns")
else:
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(ts, unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif is_float_object(ts):
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(int(ts), unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif isinstance(ts, str):
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
ts = parse_iso_format_string(ts)
else:
ts = parse_timedelta_string(ts)
ts = np.timedelta64(ts, "ns")
elif is_tick_object(ts):
ts = np.timedelta64(ts.nanos, "ns")
if PyDelta_Check(ts):
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
elif not is_timedelta64_object(ts):
raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}")
return ts.astype("timedelta64[ns]")
@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(
ndarray[object] values, str unit=None, str errors="raise"
) -> ndarray:
"""
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
coerce non-convertible objects to NaT. Otherwise, raise.
Returns
-------
np.ndarray[timedelta64ns]
"""
cdef:
Py_ssize_t i, n
int64_t[:] iresult
if errors not in {'ignore', 'raise', 'coerce'}:
raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
n = values.shape[0]
result = np.empty(n, dtype='m8[ns]')
iresult = result.view('i8')
if unit is not None:
for i in range(n):
if isinstance(values[i], str) and errors != "coerce":
raise ValueError(
"unit must not be specified if the input contains a str"
)
# Usually, we have all strings. If so, we hit the fast path.
# If this path fails, we try conversion a different way, and
# this is where all of the error handling will take place.
try:
for i in range(n):
if values[i] is NaT:
# we allow this check in the fast-path because NaT is a C-object
# so this is an inexpensive check
iresult[i] = NPY_NAT
else:
result[i] = parse_timedelta_string(values[i])
except (TypeError, ValueError):
parsed_unit = parse_timedelta_unit(unit or 'ns')
for i in range(n):
try:
result[i] = convert_to_timedelta64(values[i], parsed_unit)
except ValueError as err:
if errors == 'coerce':
result[i] = NPY_NAT
elif "unit abbreviation w/o a number" in str(err):
# re-raise with more pertinent message
msg = f"Could not convert '{values[i]}' to NumPy timedelta"
raise ValueError(msg) from err
else:
raise
return iresult.base # .base to access underlying np.ndarray
cdef inline int64_t parse_timedelta_string(str ts) except? -1:
"""
Parse a regular format timedelta string. Return an int64_t (in ns)
or raise a ValueError on an invalid parse.
"""
cdef:
unicode c
bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0
object current_unit = None
int64_t result = 0, m = 0, r
list number = [], frac = [], unit = []
# neg : tracks if we have a leading negative for the value
# have_dot : tracks if we are processing a dot (either post hhmmss or
# inside an expression)
# have_value : track if we have at least 1 leading unit
# have_hhmmss : tracks if we have a regular format hh:mm:ss
if len(ts) == 0 or ts in nat_strings:
return NPY_NAT
for c in ts:
# skip whitespace / commas
if c == ' ' or c == ',':
pass
# positive signs are ignored
elif c == '+':
pass
# neg
elif c == '-':
if neg or have_value or have_hhmmss:
raise ValueError("only leading negative signs are allowed")
neg = 1
# number (ascii codes)
elif ord(c) >= 48 and ord(c) <= 57:
if have_dot:
# we found a dot, but now its just a fraction
if len(unit):
number.append(c)
have_dot = 0
else:
frac.append(c)
elif not len(unit):
number.append(c)
else:
r = timedelta_from_spec(number, frac, unit)
unit, number, frac = [], [c], []
result += timedelta_as_neg(r, neg)
# hh:mm:ss.
elif c == ':':
# we flip this off if we have a leading value
if have_value:
neg = 0
# we are in the pattern hh:mm:ss pattern
if len(number):
if current_unit is None:
current_unit = 'h'
m = 1000000000 * 3600
elif current_unit == 'h':
current_unit = 'm'
m = 1000000000 * 60
elif current_unit == 'm':
current_unit = 's'
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
have_hhmmss = 1
else:
raise ValueError(f"expecting hh:mm:ss format, received: {ts}")
unit, number = [], []
# after the decimal point
elif c == '.':
if len(number) and current_unit is not None:
# by definition we had something like
# so we need to evaluate the final field from a
# hh:mm:ss (so current_unit is 'm')
if current_unit != 'm':
raise ValueError("expected hh:mm:ss format before .")
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
have_value = 1
unit, number, frac = [], [], []
have_dot = 1
# unit
else:
unit.append(c)
have_value = 1
have_dot = 0
# we had a dot, but we have a fractional
# value since we have an unit
if have_dot and len(unit):
r = timedelta_from_spec(number, frac, unit)
result += timedelta_as_neg(r, neg)
# we have a dot as part of a regular format
# e.g. hh:mm:ss.fffffff
elif have_dot:
if ((len(number) or len(frac)) and not len(unit)
and current_unit is None):
raise ValueError("no units specified")
if len(frac) > 0 and len(frac) <= 3:
m = 10**(3 -len(frac)) * 1000 * 1000
elif len(frac) > 3 and len(frac) <= 6:
m = 10**(6 -len(frac)) * 1000
elif len(frac) > 6 and len(frac) <= 9:
m = 10**(9 -len(frac))
else:
m = 1
frac = frac[:9]
r = <int64_t>int(''.join(frac)) * m
result += timedelta_as_neg(r, neg)
# we have a regular format
# we must have seconds at this point (hence the unit is still 'm')
elif current_unit is not None:
if current_unit != 'm':
raise ValueError("expected hh:mm:ss format")
m = 1000000000
r = <int64_t>int(''.join(number)) * m
result += timedelta_as_neg(r, neg)
# we have a last abbreviation
elif len(unit):
if len(number):
r = timedelta_from_spec(number, frac, unit)
result += timedelta_as_neg(r, neg)
else:
raise ValueError("unit abbreviation w/o a number")
# we only have symbols and no numbers
elif len(number) == 0:
raise ValueError("symbols w/o a number")
# treat as nanoseconds
# but only if we don't have anything else
else:
if have_value:
raise ValueError("have leftover units")
if len(number):
r = timedelta_from_spec(number, frac, 'ns')
result += timedelta_as_neg(r, neg)
return result
cdef inline int64_t timedelta_as_neg(int64_t value, bint neg):
"""
Parameters
----------
value : int64_t of the timedelta value
neg : bool if the a negative value
"""
if neg:
return -value
return value
cdef inline timedelta_from_spec(object number, object frac, object unit):
"""
Parameters
----------
number : a list of number digits
frac : a list of frac digits
unit : a list of unit characters
"""
cdef:
str n
try:
unit = ''.join(unit)
if unit in ["M", "Y", "y"]:
warnings.warn(
"Units 'M', 'Y' and 'y' do not represent unambiguous "
"timedelta values and will be removed in a future version.",
FutureWarning,
stacklevel=2,
)
if unit == 'M':
# To parse ISO 8601 string, 'M' should be treated as minute,
# not month
unit = 'm'
unit = parse_timedelta_unit(unit)
except KeyError:
raise ValueError(f"invalid abbreviation: {unit}")
n = ''.join(number) + '.' + ''.join(frac)
return cast_from_unit(float(n), unit)
cpdef inline str parse_timedelta_unit(str unit):
"""
Parameters
----------
unit : str or None
Returns
-------
str
Canonical unit string.
Raises
------
ValueError : on non-parseable input
"""
if unit is None:
return "ns"
elif unit == "M":
return unit
try:
return timedelta_abbrevs[unit.lower()]
except (KeyError, AttributeError):
raise ValueError(f"invalid unit abbreviation: {unit}")
# ----------------------------------------------------------------------
# Timedelta ops utilities
cdef bint _validate_ops_compat(other):
# return True if we are compat with operating
if checknull_with_nat(other):
return True
elif is_any_td_scalar(other):
return True
elif isinstance(other, str):
return True
return False
def _op_unary_method(func, name):
def f(self):
return Timedelta(func(self.value), unit='ns')
f.__name__ = name
return f
def _binary_op_method_timedeltalike(op, name):
# define a binary operation that only works if the other argument is
# timedelta like or an array of timedeltalike
def f(self, other):
if other is NaT:
return NaT
elif is_datetime64_object(other) or (
PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)
):
# this case is for a datetime object that is specifically
# *not* a Timestamp, as the Timestamp case will be
# handled after `_validate_ops_compat` returns False below
from pandas._libs.tslibs.timestamps import Timestamp
return op(self, Timestamp(other))
# We are implicitly requiring the canonical behavior to be
# defined by Timestamp methods.
elif is_array(other):
# nd-array like
if other.dtype.kind in ['m', 'M']:
return op(self.to_timedelta64(), other)
elif other.dtype.kind == 'O':
return np.array([op(self, x) for x in other])
else:
return NotImplemented
elif not _validate_ops_compat(other):
# Includes any of our non-cython classes
return NotImplemented
try:
other = Timedelta(other)
except ValueError:
# failed to parse as timedelta
return NotImplemented
if other is NaT:
# e.g. if original other was timedelta64('NaT')
return NaT
return Timedelta(op(self.value, other.value), unit='ns')
f.__name__ = name
return f
# ----------------------------------------------------------------------
# Timedelta Construction
cdef inline int64_t parse_iso_format_string(str ts) except? -1:
"""
Extracts and cleanses the appropriate values from a match object with
groups for each component of an ISO 8601 duration
Parameters
----------
ts: str
ISO 8601 Duration formatted string
Returns
-------
ns: int64_t
Precision in nanoseconds of matched ISO 8601 duration
Raises
------
ValueError
If ``ts`` cannot be parsed
"""
cdef:
unicode c
int64_t result = 0, r
int p = 0, sign = 1
object dec_unit = 'ms', err_msg
bint have_dot = 0, have_value = 0, neg = 0
list number = [], unit = []
err_msg = f"Invalid ISO 8601 Duration format - {ts}"
if ts[0] == "-":
sign = -1
ts = ts[1:]
for c in ts:
# number (ascii codes)
if 48 <= ord(c) <= 57:
have_value = 1
if have_dot:
if p == 3 and dec_unit != 'ns':
unit.append(dec_unit)
if dec_unit == 'ms':
dec_unit = 'us'
elif dec_unit == 'us':
dec_unit = 'ns'
p = 0
p += 1
if not len(unit):
number.append(c)
else:
r = timedelta_from_spec(number, '0', unit)
result += timedelta_as_neg(r, neg)
neg = 0
unit, number = [], [c]
else:
if c == 'P' or c == 'T':
pass # ignore marking characters P and T
elif c == '-':
if neg or have_value:
raise ValueError(err_msg)
else:
neg = 1
elif c == "+":
pass
elif c in ['W', 'D', 'H', 'M']:
if c in ['H', 'M'] and len(number) > 2:
raise ValueError(err_msg)
if c == 'M':
c = 'min'
unit.append(c)
r = timedelta_from_spec(number, '0', unit)
result += timedelta_as_neg(r, neg)
neg = 0
unit, number = [], []
elif c == '.':
# append any seconds
if len(number):
r = timedelta_from_spec(number, '0', 'S')
result += timedelta_as_neg(r, neg)
unit, number = [], []
have_dot = 1
elif c == 'S':
if have_dot: # ms, us, or ns
if not len(number) or p > 3:
raise ValueError(err_msg)
# pad to 3 digits as required
pad = 3 - p
while pad > 0:
number.append('0')
pad -= 1
r = timedelta_from_spec(number, '0', dec_unit)
result += timedelta_as_neg(r, neg)
else: # seconds
r = timedelta_from_spec(number, '0', 'S')
result += timedelta_as_neg(r, neg)
else:
raise ValueError(err_msg)
if not have_value:
# Received string only - never parsed any values
raise ValueError(err_msg)
return sign*result
cdef _to_py_int_float(v):
# Note: This used to be defined inside Timedelta.__new__
# but cython will not allow `cdef` functions to be defined dynamically.
if is_integer_object(v):
return int(v)
elif is_float_object(v):
return float(v)
raise TypeError(f"Invalid type {type(v)}. Must be int or float.")
# Similar to Timestamp/datetime, this is a construction requirement for
# timedeltas that we need to do object instantiation in python. This will
# serve as a C extension type that shadows the Python class, where we do any
# heavy lifting.
cdef class _Timedelta(timedelta):
# cdef readonly:
# int64_t value # nanoseconds
# object freq # frequency reference
# bint is_populated # are my components populated
# int64_t _d, _h, _m, _s, _ms, _us, _ns
# higher than np.ndarray and np.matrix
__array_priority__ = 100
def __hash__(_Timedelta self):
if self._has_ns():
return hash(self.value)
else:
return timedelta.__hash__(self)
def __richcmp__(_Timedelta self, object other, int op):
cdef:
_Timedelta ots
int ndim
if isinstance(other, _Timedelta):
ots = other
elif is_any_td_scalar(other):
ots = Timedelta(other)
# TODO: watch out for overflows
elif other is NaT:
return op == Py_NE
elif util.is_array(other):
# TODO: watch out for zero-dim
if other.dtype.kind == "m":
return PyObject_RichCompare(self.asm8, other, op)
elif other.dtype.kind == "O":
# operate element-wise
return np.array(
[PyObject_RichCompare(self, x, op) for x in other],
dtype=bool,
)
if op == Py_EQ:
return np.zeros(other.shape, dtype=bool)
elif op == Py_NE:
return np.ones(other.shape, dtype=bool)
return NotImplemented # let other raise TypeError
else:
return NotImplemented
return cmp_scalar(self.value, ots.value, op)
cpdef bint _has_ns(self):
return self.value % 1000 != 0
def _ensure_components(_Timedelta self):
"""
compute the components
"""
if self.is_populated:
return
cdef:
pandas_timedeltastruct tds
td64_to_tdstruct(self.value, &tds)
self._d = tds.days
self._h = tds.hrs
self._m = tds.min
self._s = tds.sec
self._ms = tds.ms
self._us = tds.us
self._ns = tds.ns
self._seconds = tds.seconds
self._microseconds = tds.microseconds
self.is_populated = 1
cpdef timedelta to_pytimedelta(_Timedelta self):
"""
Convert a pandas Timedelta object into a python ``datetime.timedelta`` object.
Timedelta objects are internally saved as numpy datetime64[ns] dtype.
Use to_pytimedelta() to convert to object dtype.
Returns
-------
datetime.timedelta or numpy.array of datetime.timedelta
See Also
--------
to_timedelta : Convert argument to Timedelta type.
Notes
-----
Any nanosecond resolution will be lost.
"""
return timedelta(microseconds=int(self.value) / 1000)
def to_timedelta64(self) -> np.timedelta64:
"""
Return a numpy.timedelta64 object with 'ns' precision.
"""
return np.timedelta64(self.value, 'ns')
def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
"""
Convert the Timedelta to a NumPy timedelta64.
.. versionadded:: 0.25.0
This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
copy parameters are available here only for compatibility. Their values
will not affect the return value.
Returns
-------
numpy.timedelta64
See Also
--------
Series.to_numpy : Similar method for Series.
"""
if dtype is not None or copy is not False:
raise ValueError(
"Timedelta.to_numpy dtype and copy arguments are ignored"
)
return self.to_timedelta64()
def view(self, dtype):
"""
Array view compatibility.
"""
return np.timedelta64(self.value).view(dtype)
@property
def components(self):
"""
Return a components namedtuple-like.
"""
self._ensure_components()
# return the named tuple
return Components(self._d, self._h, self._m, self._s,
self._ms, self._us, self._ns)
@property
def delta(self):
"""
Return the timedelta in nanoseconds (ns), for internal compatibility.
Returns
-------
int
Timedelta in nanoseconds.
Examples
--------
>>> td = pd.Timedelta('1 days 42 ns')
>>> td.delta
86400000000042
>>> td = pd.Timedelta('3 s')
>>> td.delta
3000000000
>>> td = pd.Timedelta('3 ms 5 us')
>>> td.delta
3005000
>>> td = pd.Timedelta(42, unit='ns')
>>> td.delta
42
"""
return self.value
@property
def asm8(self) -> np.timedelta64:
"""
Return a numpy timedelta64 array scalar view.
Provides access to the array scalar view (i.e. a combination of the
value and the units) associated with the numpy.timedelta64().view(),
including a 64-bit integer representation of the timedelta in
nanoseconds (Python int compatible).
Returns
-------
numpy timedelta64 array scalar view
Array scalar view of the timedelta in nanoseconds.
Examples
--------
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.asm8
numpy.timedelta64(86520000003042,'ns')
>>> td = pd.Timedelta('2 min 3 s')
>>> td.asm8
numpy.timedelta64(123000000000,'ns')
>>> td = pd.Timedelta('3 ms 5 us')
>>> td.asm8
numpy.timedelta64(3005000,'ns')
>>> td = pd.Timedelta(42, unit='ns')
>>> td.asm8
numpy.timedelta64(42,'ns')
"""
return np.int64(self.value).view('m8[ns]')
@property
def resolution_string(self) -> str:
"""
Return a string representing the lowest timedelta resolution.
Each timedelta has a defined resolution that represents the lowest OR
most granular level of precision. Each level of resolution is
represented by a short string as defined below:
Resolution: Return value
* Days: 'D'
* Hours: 'H'
* Minutes: 'T'
* Seconds: 'S'
* Milliseconds: 'L'
* Microseconds: 'U'
* Nanoseconds: 'N'
Returns
-------
str
Timedelta resolution.
Examples
--------
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.resolution_string
'N'
>>> td = pd.Timedelta('1 days 2 min 3 us')
>>> td.resolution_string
'U'
>>> td = pd.Timedelta('2 min 3 s')
>>> td.resolution_string
'S'
>>> td = pd.Timedelta(36, unit='us')
>>> td.resolution_string
'U'
"""
self._ensure_components()
if self._ns:
return "N"
elif self._us:
return "U"
elif self._ms:
return "L"
elif self._s:
return "S"
elif self._m:
return "T"
elif self._h:
return "H"
else:
return "D"
@property
def nanoseconds(self):
"""
Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
Returns
-------
int
Number of nanoseconds.
See Also
--------
Timedelta.components : Return all attributes with assigned values
(i.e. days, hours, minutes, seconds, milliseconds, microseconds,
nanoseconds).
Examples
--------
**Using string input**
>>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
>>> td.nanoseconds
42
**Using integer input**
>>> td = pd.Timedelta(42, unit='ns')
>>> td.nanoseconds
42
"""
self._ensure_components()
return self._ns
def _repr_base(self, format=None) -> str:
"""
Parameters
----------
format : None|all|sub_day|long
Returns
-------
converted : string of a Timedelta
"""
cdef object sign, seconds_pretty, subs, fmt, comp_dict
self._ensure_components()
if self._d < 0:
sign = " +"
else:
sign = " "
if format == 'all':
fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}."
"{milliseconds:03}{microseconds:03}{nanoseconds:03}")
else:
# if we have a partial day
subs = (self._h or self._m or self._s or
self._ms or self._us or self._ns)
if self._ms or self._us or self._ns:
seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}"
if self._ns:
# GH#9309
seconds_fmt += "{nanoseconds:03}"
else:
seconds_fmt = "{seconds:02}"
if format == 'sub_day' and not self._d:
fmt = "{hours:02}:{minutes:02}:" + seconds_fmt
elif subs or format == 'long':
fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt
else:
fmt = "{days} days"
comp_dict = self.components._asdict()
comp_dict['sign'] = sign
return fmt.format(**comp_dict)
def __repr__(self) -> str:
repr_based = self._repr_base(format='long')
return f"Timedelta('{repr_based}')"
def __str__(self) -> str:
return self._repr_base(format='long')
def __bool__(self) -> bool:
return self.value != 0
def isoformat(self) -> str:
"""
Format Timedelta as ISO 8601 Duration like
``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
Returns
-------
str
See Also
--------
Timestamp.isoformat : Function is used to convert the given
Timestamp object into the ISO format.
Notes
-----
The longest component is days, whose value may be larger than
365.
Every component is always included, even if its value is 0.
Pandas uses nanosecond precision, so up to 9 decimal places may
be included in the seconds component.
Trailing 0's are removed from the seconds component after the decimal.
We do not 0 pad components, so it's `...T5H...`, not `...T05H...`
Examples
--------
>>> td = pd.Timedelta(days=6, minutes=50, seconds=3,
... milliseconds=10, microseconds=10, nanoseconds=12)
>>> td.isoformat()
'P6DT0H50M3.010010012S'
>>> pd.Timedelta(hours=1, seconds=10).isoformat()
'P0DT1H0M10S'
>>> pd.Timedelta(days=500.5).isoformat()
'P500DT12H0M0S'
"""
components = self.components
seconds = (f'{components.seconds}.'
f'{components.milliseconds:0>3}'
f'{components.microseconds:0>3}'
f'{components.nanoseconds:0>3}')
# Trim unnecessary 0s, 1.000000000 -> 1
seconds = seconds.rstrip('0').rstrip('.')
tpl = (f'P{components.days}DT{components.hours}'
f'H{components.minutes}M{seconds}S')
return tpl
# Python front end to C extension type _Timedelta
# This serves as the box for timedelta64
class Timedelta(_Timedelta):
"""
Represents a duration, the difference between two dates or times.
Timedelta is the pandas equivalent of python's ``datetime.timedelta``
and is interchangeable with it in most cases.
Parameters
----------
value : Timedelta, timedelta, np.timedelta64, str, or int
unit : str, default 'ns'
Denote the unit of the input, if input is an integer.
Possible values:
* 'W', 'D', 'T', 'S', 'L', 'U', or 'N'
* 'days' or 'day'
* 'hours', 'hour', 'hr', or 'h'
* 'minutes', 'minute', 'min', or 'm'
* 'seconds', 'second', or 'sec'
* 'milliseconds', 'millisecond', 'millis', or 'milli'
* 'microseconds', 'microsecond', 'micros', or 'micro'
* 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'.
**kwargs
Available kwargs: {days, seconds, microseconds,
milliseconds, minutes, hours, weeks}.
Values for construction in compat with datetime.timedelta.
Numpy ints and floats will be coerced to python ints and floats.
Notes
-----
The constructor may take in either both values of value and unit or
kwargs as above. Either one of them must be used during initialization
The ``.value`` attribute is always in ns.
If the precision is higher than nanoseconds, the precision of the duration is
truncated to nanoseconds.
Examples
--------
Here we initialize Timedelta object with both value and unit
>>> td = pd.Timedelta(1, "d")
>>> td
Timedelta('1 days 00:00:00')
Here we initialize the Timedelta object with kwargs
>>> td2 = pd.Timedelta(days=1)
>>> td2
Timedelta('1 days 00:00:00')
We see that either way we get the same result
"""
_req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds",
"milliseconds", "microseconds", "nanoseconds"}
def __new__(cls, object value=_no_input, unit=None, **kwargs):
cdef _Timedelta td_base
if value is _no_input:
if not len(kwargs):
raise ValueError("cannot construct a Timedelta without a "
"value/unit or descriptive keywords "
"(days,seconds....)")
kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs}
unsupported_kwargs = set(kwargs)
unsupported_kwargs.difference_update(cls._req_any_kwargs_new)
if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs):
raise ValueError(
"cannot construct a Timedelta from the passed arguments, "
"allowed keywords are "
"[weeks, days, hours, minutes, seconds, "
"milliseconds, microseconds, nanoseconds]"
)
# GH43764, convert any input to nanoseconds first and then
# create the timestamp. This ensures that any potential
# nanosecond contributions from kwargs parsed as floats
# are taken into consideration.
seconds = int((
(
(kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24
+ kwargs.get('hours', 0)
) * 3600
+ kwargs.get('minutes', 0) * 60
+ kwargs.get('seconds', 0)
) * 1_000_000_000
)
value = np.timedelta64(
int(kwargs.get('nanoseconds', 0))
+ int(kwargs.get('microseconds', 0) * 1_000)
+ int(kwargs.get('milliseconds', 0) * 1_000_000)
+ seconds
)
if unit in {'Y', 'y', 'M'}:
raise ValueError(
"Units 'M', 'Y', and 'y' are no longer supported, as they do not "
"represent unambiguous timedelta values durations."
)
# GH 30543 if pd.Timedelta already passed, return it
# check that only value is passed
if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0:
return value
elif isinstance(value, _Timedelta):
value = value.value
elif isinstance(value, str):
if unit is not None:
raise ValueError("unit must not be specified if the value is a str")
if (len(value) > 0 and value[0] == 'P') or (
len(value) > 1 and value[:2] == '-P'
):
value = parse_iso_format_string(value)
else:
value = parse_timedelta_string(value)
value = np.timedelta64(value)
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
if unit is not None:
value = value.astype(f'timedelta64[{unit}]')
value = ensure_td64ns(value)
elif is_tick_object(value):
value = np.timedelta64(value.nanos, 'ns')
elif is_integer_object(value) or is_float_object(value):
# unit=None is de-facto 'ns'
unit = parse_timedelta_unit(unit)
value = convert_to_timedelta64(value, unit)
elif checknull_with_nat(value):
return NaT
else:
raise ValueError(
"Value must be Timedelta, string, integer, "
f"float, timedelta or convertible, not {type(value).__name__}"
)
if is_timedelta64_object(value):
value = value.view('i8')
# nat
if value == NPY_NAT:
return NaT
# make timedelta happy
td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
td_base.value = value
td_base.is_populated = 0
return td_base
def __setstate__(self, state):
(value) = state
self.value = value
def __reduce__(self):
object_state = self.value,
return (Timedelta, object_state)
@cython.cdivision(True)
def _round(self, freq, mode):
cdef:
int64_t result, unit, remainder
ndarray[int64_t] arr
from pandas._libs.tslibs.offsets import to_offset
unit = to_offset(freq).nanos
arr = np.array([self.value], dtype="i8")
result = round_nsint64(arr, mode, unit)[0]
return Timedelta(result, unit="ns")
def round(self, freq):
"""
Round the Timedelta to the specified resolution.
Parameters
----------
freq : str
Frequency string indicating the rounding resolution.
Returns
-------
a new Timedelta rounded to the given resolution of `freq`
Raises
------
ValueError if the freq cannot be converted
"""
return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
def floor(self, freq):
"""
Return a new Timedelta floored to this resolution.
Parameters
----------
freq : str
Frequency string indicating the flooring resolution.
"""
return self._round(freq, RoundTo.MINUS_INFTY)
def ceil(self, freq):
"""
Return a new Timedelta ceiled to this resolution.
Parameters
----------
freq : str
Frequency string indicating the ceiling resolution.
"""
return self._round(freq, RoundTo.PLUS_INFTY)
# ----------------------------------------------------------------
# Arithmetic Methods
# TODO: Can some of these be defined in the cython class?
__neg__ = _op_unary_method(lambda x: -x, '__neg__')
__pos__ = _op_unary_method(lambda x: x, '__pos__')
__abs__ = _op_unary_method(lambda x: abs(x), '__abs__')
__add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__')
__radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__')
__sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__')
__rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__')
def __mul__(self, other):
if is_integer_object(other) or is_float_object(other):
return Timedelta(other * self.value, unit='ns')
elif is_array(other):
# ndarray-like
return other * self.to_timedelta64()
return NotImplemented
__rmul__ = __mul__
def __truediv__(self, other):
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return self.value / float(other.value)
elif is_integer_object(other) or is_float_object(other):
# integers or floats
return Timedelta(self.value / other, unit='ns')
elif is_array(other):
return self.to_timedelta64() / other
return NotImplemented
def __rtruediv__(self, other):
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return float(other.value) / self.value
elif is_array(other):
if other.dtype.kind == "O":
# GH#31869
return np.array([x / self for x in other])
return other / self.to_timedelta64()
return NotImplemented
def __floordiv__(self, other):
# numpy does not implement floordiv for timedelta64 dtype, so we cannot
# just defer
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return self.value // other.value
elif is_integer_object(other) or is_float_object(other):
return Timedelta(self.value // other, unit='ns')
elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
return _broadcast_floordiv_td64(self.value, other, _floordiv)
elif other.dtype.kind in ['i', 'u', 'f']:
if other.ndim == 0:
return Timedelta(self.value // other)
else:
return self.to_timedelta64() // other
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
return NotImplemented
def __rfloordiv__(self, other):
# numpy does not implement floordiv for timedelta64 dtype, so we cannot
# just defer
if _should_cast_to_timedelta(other):
# We interpret NaT as timedelta64("NaT")
other = Timedelta(other)
if other is NaT:
return np.nan
return other.value // self.value
elif is_array(other):
if other.dtype.kind == 'm':
# also timedelta-like
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
# Includes integer array // Timedelta, disallowed in GH#19761
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
return NotImplemented
def __mod__(self, other):
# Naive implementation, room for optimization
return self.__divmod__(other)[1]
def __rmod__(self, other):
# Naive implementation, room for optimization
return self.__rdivmod__(other)[1]
def __divmod__(self, other):
# Naive implementation, room for optimization
div = self // other
return div, self - div * other
def __rdivmod__(self, other):
# Naive implementation, room for optimization
div = other // self
return div, other - div * self
cdef bint is_any_td_scalar(object obj):
"""
Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))`
Parameters
----------
obj : object
Returns
-------
bool
"""
return (
PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj)
)
cdef bint _should_cast_to_timedelta(object obj):
"""
Should we treat this object as a Timedelta for the purpose of a binary op
"""
return (
is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
)
cdef _floordiv(int64_t value, right):
return value // right
cdef _rfloordiv(int64_t value, right):
# analogous to referencing operator.div, but there is no operator.rfloordiv
return right // value
cdef _broadcast_floordiv_td64(
int64_t value,
ndarray other,
object (*operation)(int64_t value, object right)
):
"""
Boilerplate code shared by Timedelta.__floordiv__ and
Timedelta.__rfloordiv__ because np.timedelta64 does not implement these.
Parameters
----------
value : int64_t; `self.value` from a Timedelta object
other : object
operation : function, either _floordiv or _rfloordiv
Returns
-------
result : varies based on `other`
"""
# assumes other.dtype.kind == 'm', i.e. other is timedelta-like
# We need to watch out for np.timedelta64('NaT').
mask = other.view('i8') == NPY_NAT
if other.ndim == 0:
if mask:
return np.nan
return operation(value, other.astype('m8[ns]').astype('i8'))
else:
res = operation(value, other.astype('m8[ns]').astype('i8'))
if mask.any():
res = res.astype('f8')
res[mask] = np.nan
return res
# resolution in ns
Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1)
Timedelta.max = Timedelta(np.iinfo(np.int64).max)
Timedelta.resolution = Timedelta(nanoseconds=1)