import collections import warnings import cython from cpython.object cimport ( Py_EQ, Py_NE, PyObject_RichCompare, ) import numpy as np cimport numpy as cnp from numpy cimport ( int64_t, ndarray, ) cnp.import_array() from cpython.datetime cimport ( PyDateTime_Check, PyDateTime_IMPORT, PyDelta_Check, timedelta, ) PyDateTime_IMPORT cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( cast_from_unit, precision_from_unit, ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, checknull_with_nat, ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, cmp_scalar, get_datetime64_unit, get_timedelta64_value, pandas_timedeltastruct, td64_to_tdstruct, ) from pandas._libs.tslibs.offsets cimport is_tick_object from pandas._libs.tslibs.util cimport ( is_array, is_datetime64_object, is_float_object, is_integer_object, is_timedelta64_object, ) from pandas._libs.tslibs.fields import ( RoundTo, round_nsint64, ) # ---------------------------------------------------------------------- # Constants # components named tuple Components = collections.namedtuple( "Components", [ "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds", ], ) cdef dict timedelta_abbrevs = { "Y": "Y", "y": "Y", "M": "M", "W": "W", "w": "W", "D": "D", "d": "D", "days": "D", "day": "D", "hours": "h", "hour": "h", "hr": "h", "h": "h", "m": "m", "minute": "m", "min": "m", "minutes": "m", "t": "m", "s": "s", "seconds": "s", "sec": "s", "second": "s", "ms": "ms", "milliseconds": "ms", "millisecond": "ms", "milli": "ms", "millis": "ms", "l": "ms", "us": "us", "microseconds": "us", "microsecond": "us", "µs": "us", "micro": "us", "micros": "us", "u": "us", "ns": "ns", "nanoseconds": "ns", "nano": "ns", "nanos": "ns", "nanosecond": "ns", "n": "ns", } _no_input = object() # ---------------------------------------------------------------------- # API @cython.boundscheck(False) @cython.wraparound(False) def ints_to_pytimedelta(const int64_t[:] arr, box=False): """ convert an i8 repr to an ndarray of timedelta or Timedelta (if box == True) Parameters ---------- arr : ndarray[int64_t] box : bool, default False Returns ------- result : ndarray[object] array of Timedelta or timedeltas objects """ cdef: Py_ssize_t i, n = len(arr) int64_t value object[:] result = np.empty(n, dtype=object) for i in range(n): value = arr[i] if value == NPY_NAT: result[i] = NaT else: if box: result[i] = Timedelta(value) else: result[i] = timedelta(microseconds=int(value) / 1000) return result.base # .base to access underlying np.ndarray # ---------------------------------------------------------------------- cpdef int64_t delta_to_nanoseconds(delta) except? -1: if is_tick_object(delta): return delta.nanos if isinstance(delta, _Timedelta): delta = delta.value if is_timedelta64_object(delta): return get_timedelta64_value(ensure_td64ns(delta)) if is_integer_object(delta): return delta if PyDelta_Check(delta): try: return ( delta.days * 24 * 3600 * 1_000_000 + delta.seconds * 1_000_000 + delta.microseconds ) * 1000 except OverflowError as err: from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta raise OutOfBoundsTimedelta(*err.args) from err raise TypeError(type(delta)) cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # generic -> default to nanoseconds return "ns" elif unit == NPY_DATETIMEUNIT.NPY_FR_us: return "us" elif unit == NPY_DATETIMEUNIT.NPY_FR_ms: return "ms" elif unit == NPY_DATETIMEUNIT.NPY_FR_s: return "s" elif unit == NPY_DATETIMEUNIT.NPY_FR_m: return "m" elif unit == NPY_DATETIMEUNIT.NPY_FR_h: return "h" elif unit == NPY_DATETIMEUNIT.NPY_FR_D: return "D" elif unit == NPY_DATETIMEUNIT.NPY_FR_W: return "W" elif unit == NPY_DATETIMEUNIT.NPY_FR_M: return "M" elif unit == NPY_DATETIMEUNIT.NPY_FR_Y: return "Y" else: raise NotImplementedError(unit) @cython.overflowcheck(True) cdef object ensure_td64ns(object ts): """ Overflow-safe implementation of td64.astype("m8[ns]") Parameters ---------- ts : np.timedelta64 Returns ------- np.timedelta64[ns] """ cdef: NPY_DATETIMEUNIT td64_unit int64_t td64_value, mult str unitstr td64_unit = get_datetime64_unit(ts) if ( td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC ): unitstr = npy_unit_to_abbrev(td64_unit) td64_value = get_timedelta64_value(ts) mult = precision_from_unit(unitstr)[0] try: # NB: cython#1381 this cannot be *= td64_value = td64_value * mult except OverflowError as err: from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta raise OutOfBoundsTimedelta(ts) from err return np.timedelta64(td64_value, "ns") return ts cdef convert_to_timedelta64(object ts, str unit): """ Convert an incoming object to a timedelta64 if possible. Before calling, unit must be standardized to avoid repeated unit conversion Handle these types of objects: - timedelta/Timedelta - timedelta64 - an offset - np.int64 (with unit providing a possible modifier) - None/NaT Return an ns based int64 """ if checknull_with_nat(ts): return np.timedelta64(NPY_NAT, "ns") elif isinstance(ts, _Timedelta): # already in the proper format ts = np.timedelta64(ts.value, "ns") elif is_timedelta64_object(ts): ts = ensure_td64ns(ts) elif is_integer_object(ts): if ts == NPY_NAT: return np.timedelta64(NPY_NAT, "ns") else: if unit in ["Y", "M", "W"]: ts = np.timedelta64(ts, unit) else: ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif is_float_object(ts): if unit in ["Y", "M", "W"]: ts = np.timedelta64(int(ts), unit) else: ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif isinstance(ts, str): if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): ts = parse_iso_format_string(ts) else: ts = parse_timedelta_string(ts) ts = np.timedelta64(ts, "ns") elif is_tick_object(ts): ts = np.timedelta64(ts.nanos, "ns") if PyDelta_Check(ts): ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") elif not is_timedelta64_object(ts): raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") return ts.astype("timedelta64[ns]") @cython.boundscheck(False) @cython.wraparound(False) def array_to_timedelta64( ndarray[object] values, str unit=None, str errors="raise" ) -> ndarray: """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. Returns ------- np.ndarray[timedelta64ns] """ cdef: Py_ssize_t i, n int64_t[:] iresult if errors not in {'ignore', 'raise', 'coerce'}: raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") n = values.shape[0] result = np.empty(n, dtype='m8[ns]') iresult = result.view('i8') if unit is not None: for i in range(n): if isinstance(values[i], str) and errors != "coerce": raise ValueError( "unit must not be specified if the input contains a str" ) # Usually, we have all strings. If so, we hit the fast path. # If this path fails, we try conversion a different way, and # this is where all of the error handling will take place. try: for i in range(n): if values[i] is NaT: # we allow this check in the fast-path because NaT is a C-object # so this is an inexpensive check iresult[i] = NPY_NAT else: result[i] = parse_timedelta_string(values[i]) except (TypeError, ValueError): parsed_unit = parse_timedelta_unit(unit or 'ns') for i in range(n): try: result[i] = convert_to_timedelta64(values[i], parsed_unit) except ValueError as err: if errors == 'coerce': result[i] = NPY_NAT elif "unit abbreviation w/o a number" in str(err): # re-raise with more pertinent message msg = f"Could not convert '{values[i]}' to NumPy timedelta" raise ValueError(msg) from err else: raise return iresult.base # .base to access underlying np.ndarray cdef inline int64_t parse_timedelta_string(str ts) except? -1: """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. """ cdef: unicode c bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 object current_unit = None int64_t result = 0, m = 0, r list number = [], frac = [], unit = [] # neg : tracks if we have a leading negative for the value # have_dot : tracks if we are processing a dot (either post hhmmss or # inside an expression) # have_value : track if we have at least 1 leading unit # have_hhmmss : tracks if we have a regular format hh:mm:ss if len(ts) == 0 or ts in nat_strings: return NPY_NAT for c in ts: # skip whitespace / commas if c == ' ' or c == ',': pass # positive signs are ignored elif c == '+': pass # neg elif c == '-': if neg or have_value or have_hhmmss: raise ValueError("only leading negative signs are allowed") neg = 1 # number (ascii codes) elif ord(c) >= 48 and ord(c) <= 57: if have_dot: # we found a dot, but now its just a fraction if len(unit): number.append(c) have_dot = 0 else: frac.append(c) elif not len(unit): number.append(c) else: r = timedelta_from_spec(number, frac, unit) unit, number, frac = [], [c], [] result += timedelta_as_neg(r, neg) # hh:mm:ss. elif c == ':': # we flip this off if we have a leading value if have_value: neg = 0 # we are in the pattern hh:mm:ss pattern if len(number): if current_unit is None: current_unit = 'h' m = 1000000000 * 3600 elif current_unit == 'h': current_unit = 'm' m = 1000000000 * 60 elif current_unit == 'm': current_unit = 's' m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 else: raise ValueError(f"expecting hh:mm:ss format, received: {ts}") unit, number = [], [] # after the decimal point elif c == '.': if len(number) and current_unit is not None: # by definition we had something like # so we need to evaluate the final field from a # hh:mm:ss (so current_unit is 'm') if current_unit != 'm': raise ValueError("expected hh:mm:ss format before .") m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 unit, number, frac = [], [], [] have_dot = 1 # unit else: unit.append(c) have_value = 1 have_dot = 0 # we had a dot, but we have a fractional # value since we have an unit if have_dot and len(unit): r = timedelta_from_spec(number, frac, unit) result += timedelta_as_neg(r, neg) # we have a dot as part of a regular format # e.g. hh:mm:ss.fffffff elif have_dot: if ((len(number) or len(frac)) and not len(unit) and current_unit is None): raise ValueError("no units specified") if len(frac) > 0 and len(frac) <= 3: m = 10**(3 -len(frac)) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: m = 10**(6 -len(frac)) * 1000 elif len(frac) > 6 and len(frac) <= 9: m = 10**(9 -len(frac)) else: m = 1 frac = frac[:9] r = int(''.join(frac)) * m result += timedelta_as_neg(r, neg) # we have a regular format # we must have seconds at this point (hence the unit is still 'm') elif current_unit is not None: if current_unit != 'm': raise ValueError("expected hh:mm:ss format") m = 1000000000 r = int(''.join(number)) * m result += timedelta_as_neg(r, neg) # we have a last abbreviation elif len(unit): if len(number): r = timedelta_from_spec(number, frac, unit) result += timedelta_as_neg(r, neg) else: raise ValueError("unit abbreviation w/o a number") # we only have symbols and no numbers elif len(number) == 0: raise ValueError("symbols w/o a number") # treat as nanoseconds # but only if we don't have anything else else: if have_value: raise ValueError("have leftover units") if len(number): r = timedelta_from_spec(number, frac, 'ns') result += timedelta_as_neg(r, neg) return result cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): """ Parameters ---------- value : int64_t of the timedelta value neg : bool if the a negative value """ if neg: return -value return value cdef inline timedelta_from_spec(object number, object frac, object unit): """ Parameters ---------- number : a list of number digits frac : a list of frac digits unit : a list of unit characters """ cdef: str n try: unit = ''.join(unit) if unit in ["M", "Y", "y"]: warnings.warn( "Units 'M', 'Y' and 'y' do not represent unambiguous " "timedelta values and will be removed in a future version.", FutureWarning, stacklevel=2, ) if unit == 'M': # To parse ISO 8601 string, 'M' should be treated as minute, # not month unit = 'm' unit = parse_timedelta_unit(unit) except KeyError: raise ValueError(f"invalid abbreviation: {unit}") n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) cpdef inline str parse_timedelta_unit(str unit): """ Parameters ---------- unit : str or None Returns ------- str Canonical unit string. Raises ------ ValueError : on non-parseable input """ if unit is None: return "ns" elif unit == "M": return unit try: return timedelta_abbrevs[unit.lower()] except (KeyError, AttributeError): raise ValueError(f"invalid unit abbreviation: {unit}") # ---------------------------------------------------------------------- # Timedelta ops utilities cdef bint _validate_ops_compat(other): # return True if we are compat with operating if checknull_with_nat(other): return True elif is_any_td_scalar(other): return True elif isinstance(other, str): return True return False def _op_unary_method(func, name): def f(self): return Timedelta(func(self.value), unit='ns') f.__name__ = name return f def _binary_op_method_timedeltalike(op, name): # define a binary operation that only works if the other argument is # timedelta like or an array of timedeltalike def f(self, other): if other is NaT: return NaT elif is_datetime64_object(other) or ( PyDateTime_Check(other) and not isinstance(other, ABCTimestamp) ): # this case is for a datetime object that is specifically # *not* a Timestamp, as the Timestamp case will be # handled after `_validate_ops_compat` returns False below from pandas._libs.tslibs.timestamps import Timestamp return op(self, Timestamp(other)) # We are implicitly requiring the canonical behavior to be # defined by Timestamp methods. elif is_array(other): # nd-array like if other.dtype.kind in ['m', 'M']: return op(self.to_timedelta64(), other) elif other.dtype.kind == 'O': return np.array([op(self, x) for x in other]) else: return NotImplemented elif not _validate_ops_compat(other): # Includes any of our non-cython classes return NotImplemented try: other = Timedelta(other) except ValueError: # failed to parse as timedelta return NotImplemented if other is NaT: # e.g. if original other was timedelta64('NaT') return NaT return Timedelta(op(self.value, other.value), unit='ns') f.__name__ = name return f # ---------------------------------------------------------------------- # Timedelta Construction cdef inline int64_t parse_iso_format_string(str ts) except? -1: """ Extracts and cleanses the appropriate values from a match object with groups for each component of an ISO 8601 duration Parameters ---------- ts: str ISO 8601 Duration formatted string Returns ------- ns: int64_t Precision in nanoseconds of matched ISO 8601 duration Raises ------ ValueError If ``ts`` cannot be parsed """ cdef: unicode c int64_t result = 0, r int p = 0, sign = 1 object dec_unit = 'ms', err_msg bint have_dot = 0, have_value = 0, neg = 0 list number = [], unit = [] err_msg = f"Invalid ISO 8601 Duration format - {ts}" if ts[0] == "-": sign = -1 ts = ts[1:] for c in ts: # number (ascii codes) if 48 <= ord(c) <= 57: have_value = 1 if have_dot: if p == 3 and dec_unit != 'ns': unit.append(dec_unit) if dec_unit == 'ms': dec_unit = 'us' elif dec_unit == 'us': dec_unit = 'ns' p = 0 p += 1 if not len(unit): number.append(c) else: r = timedelta_from_spec(number, '0', unit) result += timedelta_as_neg(r, neg) neg = 0 unit, number = [], [c] else: if c == 'P' or c == 'T': pass # ignore marking characters P and T elif c == '-': if neg or have_value: raise ValueError(err_msg) else: neg = 1 elif c == "+": pass elif c in ['W', 'D', 'H', 'M']: if c in ['H', 'M'] and len(number) > 2: raise ValueError(err_msg) if c == 'M': c = 'min' unit.append(c) r = timedelta_from_spec(number, '0', unit) result += timedelta_as_neg(r, neg) neg = 0 unit, number = [], [] elif c == '.': # append any seconds if len(number): r = timedelta_from_spec(number, '0', 'S') result += timedelta_as_neg(r, neg) unit, number = [], [] have_dot = 1 elif c == 'S': if have_dot: # ms, us, or ns if not len(number) or p > 3: raise ValueError(err_msg) # pad to 3 digits as required pad = 3 - p while pad > 0: number.append('0') pad -= 1 r = timedelta_from_spec(number, '0', dec_unit) result += timedelta_as_neg(r, neg) else: # seconds r = timedelta_from_spec(number, '0', 'S') result += timedelta_as_neg(r, neg) else: raise ValueError(err_msg) if not have_value: # Received string only - never parsed any values raise ValueError(err_msg) return sign*result cdef _to_py_int_float(v): # Note: This used to be defined inside Timedelta.__new__ # but cython will not allow `cdef` functions to be defined dynamically. if is_integer_object(v): return int(v) elif is_float_object(v): return float(v) raise TypeError(f"Invalid type {type(v)}. Must be int or float.") # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any # heavy lifting. cdef class _Timedelta(timedelta): # cdef readonly: # int64_t value # nanoseconds # object freq # frequency reference # bint is_populated # are my components populated # int64_t _d, _h, _m, _s, _ms, _us, _ns # higher than np.ndarray and np.matrix __array_priority__ = 100 def __hash__(_Timedelta self): if self._has_ns(): return hash(self.value) else: return timedelta.__hash__(self) def __richcmp__(_Timedelta self, object other, int op): cdef: _Timedelta ots int ndim if isinstance(other, _Timedelta): ots = other elif is_any_td_scalar(other): ots = Timedelta(other) # TODO: watch out for overflows elif other is NaT: return op == Py_NE elif util.is_array(other): # TODO: watch out for zero-dim if other.dtype.kind == "m": return PyObject_RichCompare(self.asm8, other, op) elif other.dtype.kind == "O": # operate element-wise return np.array( [PyObject_RichCompare(self, x, op) for x in other], dtype=bool, ) if op == Py_EQ: return np.zeros(other.shape, dtype=bool) elif op == Py_NE: return np.ones(other.shape, dtype=bool) return NotImplemented # let other raise TypeError else: return NotImplemented return cmp_scalar(self.value, ots.value, op) cpdef bint _has_ns(self): return self.value % 1000 != 0 def _ensure_components(_Timedelta self): """ compute the components """ if self.is_populated: return cdef: pandas_timedeltastruct tds td64_to_tdstruct(self.value, &tds) self._d = tds.days self._h = tds.hrs self._m = tds.min self._s = tds.sec self._ms = tds.ms self._us = tds.us self._ns = tds.ns self._seconds = tds.seconds self._microseconds = tds.microseconds self.is_populated = 1 cpdef timedelta to_pytimedelta(_Timedelta self): """ Convert a pandas Timedelta object into a python ``datetime.timedelta`` object. Timedelta objects are internally saved as numpy datetime64[ns] dtype. Use to_pytimedelta() to convert to object dtype. Returns ------- datetime.timedelta or numpy.array of datetime.timedelta See Also -------- to_timedelta : Convert argument to Timedelta type. Notes ----- Any nanosecond resolution will be lost. """ return timedelta(microseconds=int(self.value) / 1000) def to_timedelta64(self) -> np.timedelta64: """ Return a numpy.timedelta64 object with 'ns' precision. """ return np.timedelta64(self.value, 'ns') def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: """ Convert the Timedelta to a NumPy timedelta64. .. versionadded:: 0.25.0 This is an alias method for `Timedelta.to_timedelta64()`. The dtype and copy parameters are available here only for compatibility. Their values will not affect the return value. Returns ------- numpy.timedelta64 See Also -------- Series.to_numpy : Similar method for Series. """ if dtype is not None or copy is not False: raise ValueError( "Timedelta.to_numpy dtype and copy arguments are ignored" ) return self.to_timedelta64() def view(self, dtype): """ Array view compatibility. """ return np.timedelta64(self.value).view(dtype) @property def components(self): """ Return a components namedtuple-like. """ self._ensure_components() # return the named tuple return Components(self._d, self._h, self._m, self._s, self._ms, self._us, self._ns) @property def delta(self): """ Return the timedelta in nanoseconds (ns), for internal compatibility. Returns ------- int Timedelta in nanoseconds. Examples -------- >>> td = pd.Timedelta('1 days 42 ns') >>> td.delta 86400000000042 >>> td = pd.Timedelta('3 s') >>> td.delta 3000000000 >>> td = pd.Timedelta('3 ms 5 us') >>> td.delta 3005000 >>> td = pd.Timedelta(42, unit='ns') >>> td.delta 42 """ return self.value @property def asm8(self) -> np.timedelta64: """ Return a numpy timedelta64 array scalar view. Provides access to the array scalar view (i.e. a combination of the value and the units) associated with the numpy.timedelta64().view(), including a 64-bit integer representation of the timedelta in nanoseconds (Python int compatible). Returns ------- numpy timedelta64 array scalar view Array scalar view of the timedelta in nanoseconds. Examples -------- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') >>> td.asm8 numpy.timedelta64(86520000003042,'ns') >>> td = pd.Timedelta('2 min 3 s') >>> td.asm8 numpy.timedelta64(123000000000,'ns') >>> td = pd.Timedelta('3 ms 5 us') >>> td.asm8 numpy.timedelta64(3005000,'ns') >>> td = pd.Timedelta(42, unit='ns') >>> td.asm8 numpy.timedelta64(42,'ns') """ return np.int64(self.value).view('m8[ns]') @property def resolution_string(self) -> str: """ Return a string representing the lowest timedelta resolution. Each timedelta has a defined resolution that represents the lowest OR most granular level of precision. Each level of resolution is represented by a short string as defined below: Resolution: Return value * Days: 'D' * Hours: 'H' * Minutes: 'T' * Seconds: 'S' * Milliseconds: 'L' * Microseconds: 'U' * Nanoseconds: 'N' Returns ------- str Timedelta resolution. Examples -------- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') >>> td.resolution_string 'N' >>> td = pd.Timedelta('1 days 2 min 3 us') >>> td.resolution_string 'U' >>> td = pd.Timedelta('2 min 3 s') >>> td.resolution_string 'S' >>> td = pd.Timedelta(36, unit='us') >>> td.resolution_string 'U' """ self._ensure_components() if self._ns: return "N" elif self._us: return "U" elif self._ms: return "L" elif self._s: return "S" elif self._m: return "T" elif self._h: return "H" else: return "D" @property def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. Returns ------- int Number of nanoseconds. See Also -------- Timedelta.components : Return all attributes with assigned values (i.e. days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds). Examples -------- **Using string input** >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') >>> td.nanoseconds 42 **Using integer input** >>> td = pd.Timedelta(42, unit='ns') >>> td.nanoseconds 42 """ self._ensure_components() return self._ns def _repr_base(self, format=None) -> str: """ Parameters ---------- format : None|all|sub_day|long Returns ------- converted : string of a Timedelta """ cdef object sign, seconds_pretty, subs, fmt, comp_dict self._ensure_components() if self._d < 0: sign = " +" else: sign = " " if format == 'all': fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." "{milliseconds:03}{microseconds:03}{nanoseconds:03}") else: # if we have a partial day subs = (self._h or self._m or self._s or self._ms or self._us or self._ns) if self._ms or self._us or self._ns: seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}" if self._ns: # GH#9309 seconds_fmt += "{nanoseconds:03}" else: seconds_fmt = "{seconds:02}" if format == 'sub_day' and not self._d: fmt = "{hours:02}:{minutes:02}:" + seconds_fmt elif subs or format == 'long': fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt else: fmt = "{days} days" comp_dict = self.components._asdict() comp_dict['sign'] = sign return fmt.format(**comp_dict) def __repr__(self) -> str: repr_based = self._repr_base(format='long') return f"Timedelta('{repr_based}')" def __str__(self) -> str: return self._repr_base(format='long') def __bool__(self) -> bool: return self.value != 0 def isoformat(self) -> str: """ Format Timedelta as ISO 8601 Duration like ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the values. See https://en.wikipedia.org/wiki/ISO_8601#Durations. Returns ------- str See Also -------- Timestamp.isoformat : Function is used to convert the given Timestamp object into the ISO format. Notes ----- The longest component is days, whose value may be larger than 365. Every component is always included, even if its value is 0. Pandas uses nanosecond precision, so up to 9 decimal places may be included in the seconds component. Trailing 0's are removed from the seconds component after the decimal. We do not 0 pad components, so it's `...T5H...`, not `...T05H...` Examples -------- >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, ... milliseconds=10, microseconds=10, nanoseconds=12) >>> td.isoformat() 'P6DT0H50M3.010010012S' >>> pd.Timedelta(hours=1, seconds=10).isoformat() 'P0DT1H0M10S' >>> pd.Timedelta(days=500.5).isoformat() 'P500DT12H0M0S' """ components = self.components seconds = (f'{components.seconds}.' f'{components.milliseconds:0>3}' f'{components.microseconds:0>3}' f'{components.nanoseconds:0>3}') # Trim unnecessary 0s, 1.000000000 -> 1 seconds = seconds.rstrip('0').rstrip('.') tpl = (f'P{components.days}DT{components.hours}' f'H{components.minutes}M{seconds}S') return tpl # Python front end to C extension type _Timedelta # This serves as the box for timedelta64 class Timedelta(_Timedelta): """ Represents a duration, the difference between two dates or times. Timedelta is the pandas equivalent of python's ``datetime.timedelta`` and is interchangeable with it in most cases. Parameters ---------- value : Timedelta, timedelta, np.timedelta64, str, or int unit : str, default 'ns' Denote the unit of the input, if input is an integer. Possible values: * 'W', 'D', 'T', 'S', 'L', 'U', or 'N' * 'days' or 'day' * 'hours', 'hour', 'hr', or 'h' * 'minutes', 'minute', 'min', or 'm' * 'seconds', 'second', or 'sec' * 'milliseconds', 'millisecond', 'millis', or 'milli' * 'microseconds', 'microsecond', 'micros', or 'micro' * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. **kwargs Available kwargs: {days, seconds, microseconds, milliseconds, minutes, hours, weeks}. Values for construction in compat with datetime.timedelta. Numpy ints and floats will be coerced to python ints and floats. Notes ----- The constructor may take in either both values of value and unit or kwargs as above. Either one of them must be used during initialization The ``.value`` attribute is always in ns. If the precision is higher than nanoseconds, the precision of the duration is truncated to nanoseconds. Examples -------- Here we initialize Timedelta object with both value and unit >>> td = pd.Timedelta(1, "d") >>> td Timedelta('1 days 00:00:00') Here we initialize the Timedelta object with kwargs >>> td2 = pd.Timedelta(days=1) >>> td2 Timedelta('1 days 00:00:00') We see that either way we get the same result """ _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): cdef _Timedelta td_base if value is _no_input: if not len(kwargs): raise ValueError("cannot construct a Timedelta without a " "value/unit or descriptive keywords " "(days,seconds....)") kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} unsupported_kwargs = set(kwargs) unsupported_kwargs.difference_update(cls._req_any_kwargs_new) if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): raise ValueError( "cannot construct a Timedelta from the passed arguments, " "allowed keywords are " "[weeks, days, hours, minutes, seconds, " "milliseconds, microseconds, nanoseconds]" ) # GH43764, convert any input to nanoseconds first and then # create the timestamp. This ensures that any potential # nanosecond contributions from kwargs parsed as floats # are taken into consideration. seconds = int(( ( (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 + kwargs.get('hours', 0) ) * 3600 + kwargs.get('minutes', 0) * 60 + kwargs.get('seconds', 0) ) * 1_000_000_000 ) value = np.timedelta64( int(kwargs.get('nanoseconds', 0)) + int(kwargs.get('microseconds', 0) * 1_000) + int(kwargs.get('milliseconds', 0) * 1_000_000) + seconds ) if unit in {'Y', 'y', 'M'}: raise ValueError( "Units 'M', 'Y', and 'y' are no longer supported, as they do not " "represent unambiguous timedelta values durations." ) # GH 30543 if pd.Timedelta already passed, return it # check that only value is passed if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: return value elif isinstance(value, _Timedelta): value = value.value elif isinstance(value, str): if unit is not None: raise ValueError("unit must not be specified if the value is a str") if (len(value) > 0 and value[0] == 'P') or ( len(value) > 1 and value[:2] == '-P' ): value = parse_iso_format_string(value) else: value = parse_timedelta_string(value) value = np.timedelta64(value) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): if unit is not None: value = value.astype(f'timedelta64[{unit}]') value = ensure_td64ns(value) elif is_tick_object(value): value = np.timedelta64(value.nanos, 'ns') elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' unit = parse_timedelta_unit(unit) value = convert_to_timedelta64(value, unit) elif checknull_with_nat(value): return NaT else: raise ValueError( "Value must be Timedelta, string, integer, " f"float, timedelta or convertible, not {type(value).__name__}" ) if is_timedelta64_object(value): value = value.view('i8') # nat if value == NPY_NAT: return NaT # make timedelta happy td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000) td_base.value = value td_base.is_populated = 0 return td_base def __setstate__(self, state): (value) = state self.value = value def __reduce__(self): object_state = self.value, return (Timedelta, object_state) @cython.cdivision(True) def _round(self, freq, mode): cdef: int64_t result, unit, remainder ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset unit = to_offset(freq).nanos arr = np.array([self.value], dtype="i8") result = round_nsint64(arr, mode, unit)[0] return Timedelta(result, unit="ns") def round(self, freq): """ Round the Timedelta to the specified resolution. Parameters ---------- freq : str Frequency string indicating the rounding resolution. Returns ------- a new Timedelta rounded to the given resolution of `freq` Raises ------ ValueError if the freq cannot be converted """ return self._round(freq, RoundTo.NEAREST_HALF_EVEN) def floor(self, freq): """ Return a new Timedelta floored to this resolution. Parameters ---------- freq : str Frequency string indicating the flooring resolution. """ return self._round(freq, RoundTo.MINUS_INFTY) def ceil(self, freq): """ Return a new Timedelta ceiled to this resolution. Parameters ---------- freq : str Frequency string indicating the ceiling resolution. """ return self._round(freq, RoundTo.PLUS_INFTY) # ---------------------------------------------------------------- # Arithmetic Methods # TODO: Can some of these be defined in the cython class? __neg__ = _op_unary_method(lambda x: -x, '__neg__') __pos__ = _op_unary_method(lambda x: x, '__pos__') __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') def __mul__(self, other): if is_integer_object(other) or is_float_object(other): return Timedelta(other * self.value, unit='ns') elif is_array(other): # ndarray-like return other * self.to_timedelta64() return NotImplemented __rmul__ = __mul__ def __truediv__(self, other): if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) if other is NaT: return np.nan return self.value / float(other.value) elif is_integer_object(other) or is_float_object(other): # integers or floats return Timedelta(self.value / other, unit='ns') elif is_array(other): return self.to_timedelta64() / other return NotImplemented def __rtruediv__(self, other): if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) if other is NaT: return np.nan return float(other.value) / self.value elif is_array(other): if other.dtype.kind == "O": # GH#31869 return np.array([x / self for x in other]) return other / self.to_timedelta64() return NotImplemented def __floordiv__(self, other): # numpy does not implement floordiv for timedelta64 dtype, so we cannot # just defer if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) if other is NaT: return np.nan return self.value // other.value elif is_integer_object(other) or is_float_object(other): return Timedelta(self.value // other, unit='ns') elif is_array(other): if other.dtype.kind == 'm': # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _floordiv) elif other.dtype.kind in ['i', 'u', 'f']: if other.ndim == 0: return Timedelta(self.value // other) else: return self.to_timedelta64() // other raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') return NotImplemented def __rfloordiv__(self, other): # numpy does not implement floordiv for timedelta64 dtype, so we cannot # just defer if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) if other is NaT: return np.nan return other.value // self.value elif is_array(other): if other.dtype.kind == 'm': # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _rfloordiv) # Includes integer array // Timedelta, disallowed in GH#19761 raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') return NotImplemented def __mod__(self, other): # Naive implementation, room for optimization return self.__divmod__(other)[1] def __rmod__(self, other): # Naive implementation, room for optimization return self.__rdivmod__(other)[1] def __divmod__(self, other): # Naive implementation, room for optimization div = self // other return div, self - div * other def __rdivmod__(self, other): # Naive implementation, room for optimization div = other // self return div, other - div * self cdef bint is_any_td_scalar(object obj): """ Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))` Parameters ---------- obj : object Returns ------- bool """ return ( PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj) ) cdef bint _should_cast_to_timedelta(object obj): """ Should we treat this object as a Timedelta for the purpose of a binary op """ return ( is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str) ) cdef _floordiv(int64_t value, right): return value // right cdef _rfloordiv(int64_t value, right): # analogous to referencing operator.div, but there is no operator.rfloordiv return right // value cdef _broadcast_floordiv_td64( int64_t value, ndarray other, object (*operation)(int64_t value, object right) ): """ Boilerplate code shared by Timedelta.__floordiv__ and Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. Parameters ---------- value : int64_t; `self.value` from a Timedelta object other : object operation : function, either _floordiv or _rfloordiv Returns ------- result : varies based on `other` """ # assumes other.dtype.kind == 'm', i.e. other is timedelta-like # We need to watch out for np.timedelta64('NaT'). mask = other.view('i8') == NPY_NAT if other.ndim == 0: if mask: return np.nan return operation(value, other.astype('m8[ns]').astype('i8')) else: res = operation(value, other.astype('m8[ns]').astype('i8')) if mask.any(): res = res.astype('f8') res[mask] = np.nan return res # resolution in ns Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) Timedelta.resolution = Timedelta(nanoseconds=1)