""" timezone conversion """ import cython from cython import Py_ssize_t from cpython.datetime cimport ( PyDateTime_IMPORT, PyDelta_Check, datetime, timedelta, tzinfo, ) PyDateTime_IMPORT from dateutil.tz import tzutc import numpy as np import pytz cimport numpy as cnp from numpy cimport ( int64_t, intp_t, ndarray, uint8_t, ) cnp.import_array() from pandas._libs.tslibs.ccalendar cimport ( DAY_NANOS, HOUR_NANOS, ) from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( dt64_to_dtstruct, npy_datetimestruct, ) from pandas._libs.tslibs.timezones cimport ( get_dst_info, get_utcoffset, is_fixed_offset, is_tzlocal, is_utc, ) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, ) except? -1: """See tz_localize_to_utc.__doc__""" cdef: int64_t delta int64_t[:] deltas if val == NPY_NAT: return val elif is_utc(tz) or tz is None: return val elif is_tzlocal(tz): return _tz_convert_tzlocal_utc(val, tz, to_utc=True) elif is_fixed_offset(tz): # TODO: in this case we should be able to use get_utcoffset, # that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9' _, deltas, _ = get_dst_info(tz) delta = deltas[0] return val - delta else: return tz_localize_to_utc( np.array([val], dtype="i8"), tz, ambiguous=ambiguous, nonexistent=nonexistent, )[0] @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, object nonexistent=None): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. Parameters ---------- vals : ndarray[int64_t] tz : tzinfo or None ambiguous : str, bool, or arraylike When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter dictates how ambiguous times should be handled. - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a non-DST time (note that this flag is only applicable for ambiguous times, but the array must have the same length as vals) - bool if True, treat all vals as DST. If False, treat them as non-DST - 'NaT' will return NaT where there are ambiguous times nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ timedelta-like} How to handle non-existent times when converting wall times to UTC Returns ------- localized : ndarray[int64_t] """ cdef: int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq Py_ssize_t i, idx, pos, ntrans, n = len(vals) Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins int64_t first_delta int64_t shift_delta = 0 ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta ndarray trans_idx, grp, a_idx, b_idx, one_diff npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False bint fill_nonexist = False list trans_grp str stamp # Vectorized version of DstTzInfo.localize if is_utc(tz) or tz is None: return vals result = np.empty(n, dtype=np.int64) if is_tzlocal(tz): for i in range(n): v = vals[i] if v == NPY_NAT: result[i] = NPY_NAT else: result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) return result # silence false-positive compiler warning ambiguous_array = np.empty(0, dtype=bool) if isinstance(ambiguous, str): if ambiguous == 'infer': infer_dst = True elif ambiguous == 'NaT': fill = True elif isinstance(ambiguous, bool): is_dst = True if ambiguous: ambiguous_array = np.ones(len(vals), dtype=bool) else: ambiguous_array = np.zeros(len(vals), dtype=bool) elif hasattr(ambiguous, '__iter__'): is_dst = True if len(ambiguous) != len(vals): raise ValueError("Length of ambiguous bool-array must be " "the same size as vals") ambiguous_array = np.asarray(ambiguous, dtype=bool) if nonexistent == 'NaT': fill_nonexist = True elif nonexistent == 'shift_forward': shift_forward = True elif nonexistent == 'shift_backward': shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds shift_delta = delta_to_nanoseconds(nonexistent) elif nonexistent not in ('raise', None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") raise ValueError(msg) trans, deltas, _ = get_dst_info(tz) tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) result_a[:] = NPY_NAT result_b[:] = NPY_NAT idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NANOS, side='right') - 1)).astype(np.int64) idx_shifted_right = (np.maximum(0, trans.searchsorted( vals + DAY_NANOS, side='right') - 1)).astype(np.int64) for i in range(n): val = vals[i] v_left = val - deltas[idx_shifted_left[i]] pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 # timestamp falls to the left side of the DST transition if v_left + deltas[pos_left] == val: result_a[i] = v_left v_right = val - deltas[idx_shifted_right[i]] pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 # timestamp falls to the right side of the DST transition if v_right + deltas[pos_right] == val: result_b[i] = v_right # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) if infer_dst: dst_hours = np.empty(n, dtype=np.int64) dst_hours[:] = NPY_NAT # Get the ambiguous hours (given the above, these are the hours # where result_a != result_b and neither of them are NAT) both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) both_eq = result_a == result_b trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) if trans_idx.size == 1: stamp = _render_tstamp(vals[trans_idx]) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " f"are no repeated times") # Split the array into contiguous chunks (where the difference between # indices is 1). These are effectively dst transitions in different # years which is useful for checking that there is not an ambiguous # transition in an individual year. if trans_idx.size > 0: one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1 trans_grp = np.array_split(trans_idx, one_diff) # Iterate through each day, if there are no hours where the # delta is negative (indicates a repeat of hour) the switch # cannot be inferred for grp in trans_grp: delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): stamp = _render_tstamp(vals[grp[0]]) raise pytz.AmbiguousTimeError(stamp) # Find the index for the switch and pull from a for dst and b # for standard switch_idx = (delta <= 0).nonzero()[0] if switch_idx.size > 1: raise pytz.AmbiguousTimeError( f"There are {switch_idx.size} dst switches when " f"there should only be 1.") switch_idx = switch_idx[0] + 1 # Pull the only index and adjust a_idx = grp[:switch_idx] b_idx = grp[switch_idx:] dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) for i in range(n): val = vals[i] left = result_a[i] right = result_b[i] if val == NPY_NAT: result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: result[i] = left else: if infer_dst and dst_hours[i] != NPY_NAT: result[i] = dst_hours[i] elif is_dst: if ambiguous_array[i]: result[i] = left else: result[i] = right elif fill: result[i] = NPY_NAT else: stamp = _render_tstamp(val) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp}, try using the " f"'ambiguous' argument") elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: result[i] = right else: # Handle nonexistent times if shift_forward or shift_backward or shift_delta != 0: # Shift the nonexistent time to the closest existing time remaining_mins = val % HOUR_NANOS if shift_delta != 0: # Validate that we don't relocalize on another nonexistent # time if -1 < shift_delta + remaining_mins < HOUR_NANOS: raise ValueError( f"The provided timedelta will relocalize on a " f"nonexistent time: {nonexistent}" ) new_local = val + shift_delta elif shift_forward: new_local = val + (HOUR_NANOS - remaining_mins) else: # Subtract 1 since the beginning hour is _inclusive_ of # nonexistent times new_local = val - remaining_mins - 1 delta_idx = trans.searchsorted(new_local, side='right') # Shift the delta_idx by if the UTC offset of # the target tz is greater than 0 and we're moving forward # or vice versa first_delta = deltas[0] if (shift_forward or shift_delta > 0) and first_delta > 0: delta_idx_offset = 1 elif (shift_backward or shift_delta < 0) and first_delta < 0: delta_idx_offset = 1 else: delta_idx_offset = 0 delta_idx = delta_idx - delta_idx_offset result[i] = new_local - deltas[delta_idx] elif fill_nonexist: result[i] = NPY_NAT else: stamp = _render_tstamp(val) raise pytz.NonExistentTimeError(stamp) return result cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): cdef: Py_ssize_t pivot, left = 0, right = n assert n >= 1 # edge cases if val > data[n - 1]: return n if val < data[0]: return 0 while left < right: pivot = left + (right - left) // 2 if data[pivot] <= val: left = pivot + 1 else: right = pivot return left cdef inline str _render_tstamp(int64_t val): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp return str(Timestamp(val)) # ---------------------------------------------------------------------- # Timezone Conversion cdef int64_t tz_convert_utc_to_tzlocal( int64_t utc_val, tzinfo tz, bint* fold=NULL ) except? -1: """ Parameters ---------- utc_val : int64_t tz : tzinfo fold : bint* pointer to fold: whether datetime ends up in a fold or not after adjustment Returns ------- local_val : int64_t """ return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold) cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): """ Convert the val (in i8) from UTC to tz This is a single value version of tz_convert_from_utc. Parameters ---------- val : int64 tz : tzinfo Returns ------- converted: int64 """ cdef: int64_t delta int64_t[:] deltas ndarray[int64_t, ndim=1] trans intp_t pos if val == NPY_NAT: return val if is_utc(tz): return val elif is_tzlocal(tz): return _tz_convert_tzlocal_utc(val, tz, to_utc=False) elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] return val + delta else: trans, deltas, _ = get_dst_info(tz) pos = trans.searchsorted(val, side="right") - 1 return val + deltas[pos] def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): """ Convert the values (in i8) from UTC to tz Parameters ---------- vals : int64 ndarray tz : tzinfo Returns ------- int64 ndarray of converted """ cdef: const int64_t[:] converted if len(vals) == 0: return np.array([], dtype=np.int64) converted = _tz_convert_from_utc(vals, tz) return np.array(converted, dtype=np.int64) @cython.boundscheck(False) @cython.wraparound(False) cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): """ Convert the given values (in i8) either to UTC or from UTC. Parameters ---------- vals : int64 ndarray tz : tzinfo Returns ------- converted : ndarray[int64_t] """ cdef: int64_t[:] converted, deltas Py_ssize_t i, n = len(vals) int64_t val, delta intp_t[:] pos ndarray[int64_t] trans str typ if is_utc(tz): return vals elif is_tzlocal(tz): converted = np.empty(n, dtype=np.int64) for i in range(n): val = vals[i] if val == NPY_NAT: converted[i] = NPY_NAT else: converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) else: converted = np.empty(n, dtype=np.int64) trans, deltas, typ = get_dst_info(tz) if typ not in ["pytz", "dateutil"]: # FixedOffset, we know len(deltas) == 1 delta = deltas[0] for i in range(n): val = vals[i] if val == NPY_NAT: converted[i] = val else: converted[i] = val + delta else: pos = trans.searchsorted(vals, side="right") - 1 for i in range(n): val = vals[i] if val == NPY_NAT: converted[i] = val else: if pos[i] < 0: # TODO: How is this reached? Should we be checking for # it elsewhere? raise ValueError("First time before start of DST info") converted[i] = val + deltas[pos[i]] return converted # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, bint to_utc, bint *fold=NULL) except? -1: """ Calculate offset in nanoseconds needed to convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. Parameters ---------- val : int64_t tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not after adjustment Returns ------- delta : int64_t Notes ----- Sets fold by pointer """ cdef: npy_datetimestruct dts datetime dt int64_t delta timedelta td dt64_to_dtstruct(val, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) # tz.utcoffset only makes sense if datetime # is _wall time_, so if val is a UTC timestamp convert to wall time if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) if fold is not NULL: fold[0] = dt.fold td = tz.utcoffset(dt) return int(td.total_seconds() * 1_000_000_000) # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, bint* fold=NULL) except? -1: """ Convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. Private, not intended for use outside of tslibs.conversion Parameters ---------- val : int64_t tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction fold : bint* pointer to fold: whether datetime ends up in a fold or not after adjustment Returns ------- result : int64_t Notes ----- Sets fold by pointer """ cdef: int64_t delta delta = _tzlocal_get_offset_components(val, tz, to_utc, fold) if to_utc: return val - delta else: return val + delta