mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 10:28:02 +00:00
600 lines
18 KiB
Cython
600 lines
18 KiB
Cython
"""
|
|
timezone conversion
|
|
"""
|
|
import cython
|
|
from cython import Py_ssize_t
|
|
|
|
from cpython.datetime cimport (
|
|
PyDateTime_IMPORT,
|
|
PyDelta_Check,
|
|
datetime,
|
|
timedelta,
|
|
tzinfo,
|
|
)
|
|
|
|
PyDateTime_IMPORT
|
|
|
|
from dateutil.tz import tzutc
|
|
import numpy as np
|
|
import pytz
|
|
|
|
cimport numpy as cnp
|
|
from numpy cimport (
|
|
int64_t,
|
|
intp_t,
|
|
ndarray,
|
|
uint8_t,
|
|
)
|
|
|
|
cnp.import_array()
|
|
|
|
from pandas._libs.tslibs.ccalendar cimport (
|
|
DAY_NANOS,
|
|
HOUR_NANOS,
|
|
)
|
|
from pandas._libs.tslibs.nattype cimport NPY_NAT
|
|
from pandas._libs.tslibs.np_datetime cimport (
|
|
dt64_to_dtstruct,
|
|
npy_datetimestruct,
|
|
)
|
|
from pandas._libs.tslibs.timezones cimport (
|
|
get_dst_info,
|
|
get_utcoffset,
|
|
is_fixed_offset,
|
|
is_tzlocal,
|
|
is_utc,
|
|
)
|
|
|
|
|
|
cdef int64_t tz_localize_to_utc_single(
|
|
int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None,
|
|
) except? -1:
|
|
"""See tz_localize_to_utc.__doc__"""
|
|
cdef:
|
|
int64_t delta
|
|
int64_t[:] deltas
|
|
|
|
if val == NPY_NAT:
|
|
return val
|
|
|
|
elif is_utc(tz) or tz is None:
|
|
return val
|
|
|
|
elif is_tzlocal(tz):
|
|
return _tz_convert_tzlocal_utc(val, tz, to_utc=True)
|
|
|
|
elif is_fixed_offset(tz):
|
|
# TODO: in this case we should be able to use get_utcoffset,
|
|
# that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9'
|
|
_, deltas, _ = get_dst_info(tz)
|
|
delta = deltas[0]
|
|
return val - delta
|
|
|
|
else:
|
|
return tz_localize_to_utc(
|
|
np.array([val], dtype="i8"),
|
|
tz,
|
|
ambiguous=ambiguous,
|
|
nonexistent=nonexistent,
|
|
)[0]
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None,
|
|
object nonexistent=None):
|
|
"""
|
|
Localize tzinfo-naive i8 to given time zone (using pytz). If
|
|
there are ambiguities in the values, raise AmbiguousTimeError.
|
|
|
|
Parameters
|
|
----------
|
|
vals : ndarray[int64_t]
|
|
tz : tzinfo or None
|
|
ambiguous : str, bool, or arraylike
|
|
When clocks moved backward due to DST, ambiguous times may arise.
|
|
For example in Central European Time (UTC+01), when going from 03:00
|
|
DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
|
|
and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
|
|
dictates how ambiguous times should be handled.
|
|
|
|
- 'infer' will attempt to infer fall dst-transition hours based on
|
|
order
|
|
- bool-ndarray where True signifies a DST time, False signifies a
|
|
non-DST time (note that this flag is only applicable for ambiguous
|
|
times, but the array must have the same length as vals)
|
|
- bool if True, treat all vals as DST. If False, treat them as non-DST
|
|
- 'NaT' will return NaT where there are ambiguous times
|
|
|
|
nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \
|
|
timedelta-like}
|
|
How to handle non-existent times when converting wall times to UTC
|
|
|
|
Returns
|
|
-------
|
|
localized : ndarray[int64_t]
|
|
"""
|
|
cdef:
|
|
int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right
|
|
ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
|
|
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
|
|
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
|
|
int64_t *tdata
|
|
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
|
|
int64_t first_delta
|
|
int64_t shift_delta = 0
|
|
ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
|
|
ndarray trans_idx, grp, a_idx, b_idx, one_diff
|
|
npy_datetimestruct dts
|
|
bint infer_dst = False, is_dst = False, fill = False
|
|
bint shift_forward = False, shift_backward = False
|
|
bint fill_nonexist = False
|
|
list trans_grp
|
|
str stamp
|
|
|
|
# Vectorized version of DstTzInfo.localize
|
|
if is_utc(tz) or tz is None:
|
|
return vals
|
|
|
|
result = np.empty(n, dtype=np.int64)
|
|
|
|
if is_tzlocal(tz):
|
|
for i in range(n):
|
|
v = vals[i]
|
|
if v == NPY_NAT:
|
|
result[i] = NPY_NAT
|
|
else:
|
|
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True)
|
|
return result
|
|
|
|
# silence false-positive compiler warning
|
|
ambiguous_array = np.empty(0, dtype=bool)
|
|
if isinstance(ambiguous, str):
|
|
if ambiguous == 'infer':
|
|
infer_dst = True
|
|
elif ambiguous == 'NaT':
|
|
fill = True
|
|
elif isinstance(ambiguous, bool):
|
|
is_dst = True
|
|
if ambiguous:
|
|
ambiguous_array = np.ones(len(vals), dtype=bool)
|
|
else:
|
|
ambiguous_array = np.zeros(len(vals), dtype=bool)
|
|
elif hasattr(ambiguous, '__iter__'):
|
|
is_dst = True
|
|
if len(ambiguous) != len(vals):
|
|
raise ValueError("Length of ambiguous bool-array must be "
|
|
"the same size as vals")
|
|
ambiguous_array = np.asarray(ambiguous, dtype=bool)
|
|
|
|
if nonexistent == 'NaT':
|
|
fill_nonexist = True
|
|
elif nonexistent == 'shift_forward':
|
|
shift_forward = True
|
|
elif nonexistent == 'shift_backward':
|
|
shift_backward = True
|
|
elif PyDelta_Check(nonexistent):
|
|
from .timedeltas import delta_to_nanoseconds
|
|
shift_delta = delta_to_nanoseconds(nonexistent)
|
|
elif nonexistent not in ('raise', None):
|
|
msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
|
|
"shift_backwards} or a timedelta object")
|
|
raise ValueError(msg)
|
|
|
|
trans, deltas, _ = get_dst_info(tz)
|
|
|
|
tdata = <int64_t*>cnp.PyArray_DATA(trans)
|
|
ntrans = len(trans)
|
|
|
|
# Determine whether each date lies left of the DST transition (store in
|
|
# result_a) or right of the DST transition (store in result_b)
|
|
result_a = np.empty(n, dtype=np.int64)
|
|
result_b = np.empty(n, dtype=np.int64)
|
|
result_a[:] = NPY_NAT
|
|
result_b[:] = NPY_NAT
|
|
|
|
idx_shifted_left = (np.maximum(0, trans.searchsorted(
|
|
vals - DAY_NANOS, side='right') - 1)).astype(np.int64)
|
|
|
|
idx_shifted_right = (np.maximum(0, trans.searchsorted(
|
|
vals + DAY_NANOS, side='right') - 1)).astype(np.int64)
|
|
|
|
for i in range(n):
|
|
val = vals[i]
|
|
v_left = val - deltas[idx_shifted_left[i]]
|
|
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
|
|
# timestamp falls to the left side of the DST transition
|
|
if v_left + deltas[pos_left] == val:
|
|
result_a[i] = v_left
|
|
|
|
v_right = val - deltas[idx_shifted_right[i]]
|
|
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
|
|
# timestamp falls to the right side of the DST transition
|
|
if v_right + deltas[pos_right] == val:
|
|
result_b[i] = v_right
|
|
|
|
# silence false-positive compiler warning
|
|
dst_hours = np.empty(0, dtype=np.int64)
|
|
if infer_dst:
|
|
dst_hours = np.empty(n, dtype=np.int64)
|
|
dst_hours[:] = NPY_NAT
|
|
|
|
# Get the ambiguous hours (given the above, these are the hours
|
|
# where result_a != result_b and neither of them are NAT)
|
|
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
|
|
both_eq = result_a == result_b
|
|
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
|
|
if trans_idx.size == 1:
|
|
stamp = _render_tstamp(vals[trans_idx])
|
|
raise pytz.AmbiguousTimeError(
|
|
f"Cannot infer dst time from {stamp} as there "
|
|
f"are no repeated times")
|
|
# Split the array into contiguous chunks (where the difference between
|
|
# indices is 1). These are effectively dst transitions in different
|
|
# years which is useful for checking that there is not an ambiguous
|
|
# transition in an individual year.
|
|
if trans_idx.size > 0:
|
|
one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
|
|
trans_grp = np.array_split(trans_idx, one_diff)
|
|
|
|
# Iterate through each day, if there are no hours where the
|
|
# delta is negative (indicates a repeat of hour) the switch
|
|
# cannot be inferred
|
|
for grp in trans_grp:
|
|
|
|
delta = np.diff(result_a[grp])
|
|
if grp.size == 1 or np.all(delta > 0):
|
|
stamp = _render_tstamp(vals[grp[0]])
|
|
raise pytz.AmbiguousTimeError(stamp)
|
|
|
|
# Find the index for the switch and pull from a for dst and b
|
|
# for standard
|
|
switch_idx = (delta <= 0).nonzero()[0]
|
|
if switch_idx.size > 1:
|
|
raise pytz.AmbiguousTimeError(
|
|
f"There are {switch_idx.size} dst switches when "
|
|
f"there should only be 1.")
|
|
switch_idx = switch_idx[0] + 1
|
|
# Pull the only index and adjust
|
|
a_idx = grp[:switch_idx]
|
|
b_idx = grp[switch_idx:]
|
|
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
|
|
|
|
for i in range(n):
|
|
val = vals[i]
|
|
left = result_a[i]
|
|
right = result_b[i]
|
|
if val == NPY_NAT:
|
|
result[i] = val
|
|
elif left != NPY_NAT and right != NPY_NAT:
|
|
if left == right:
|
|
result[i] = left
|
|
else:
|
|
if infer_dst and dst_hours[i] != NPY_NAT:
|
|
result[i] = dst_hours[i]
|
|
elif is_dst:
|
|
if ambiguous_array[i]:
|
|
result[i] = left
|
|
else:
|
|
result[i] = right
|
|
elif fill:
|
|
result[i] = NPY_NAT
|
|
else:
|
|
stamp = _render_tstamp(val)
|
|
raise pytz.AmbiguousTimeError(
|
|
f"Cannot infer dst time from {stamp}, try using the "
|
|
f"'ambiguous' argument")
|
|
elif left != NPY_NAT:
|
|
result[i] = left
|
|
elif right != NPY_NAT:
|
|
result[i] = right
|
|
else:
|
|
# Handle nonexistent times
|
|
if shift_forward or shift_backward or shift_delta != 0:
|
|
# Shift the nonexistent time to the closest existing time
|
|
remaining_mins = val % HOUR_NANOS
|
|
if shift_delta != 0:
|
|
# Validate that we don't relocalize on another nonexistent
|
|
# time
|
|
if -1 < shift_delta + remaining_mins < HOUR_NANOS:
|
|
raise ValueError(
|
|
f"The provided timedelta will relocalize on a "
|
|
f"nonexistent time: {nonexistent}"
|
|
)
|
|
new_local = val + shift_delta
|
|
elif shift_forward:
|
|
new_local = val + (HOUR_NANOS - remaining_mins)
|
|
else:
|
|
# Subtract 1 since the beginning hour is _inclusive_ of
|
|
# nonexistent times
|
|
new_local = val - remaining_mins - 1
|
|
delta_idx = trans.searchsorted(new_local, side='right')
|
|
# Shift the delta_idx by if the UTC offset of
|
|
# the target tz is greater than 0 and we're moving forward
|
|
# or vice versa
|
|
first_delta = deltas[0]
|
|
if (shift_forward or shift_delta > 0) and first_delta > 0:
|
|
delta_idx_offset = 1
|
|
elif (shift_backward or shift_delta < 0) and first_delta < 0:
|
|
delta_idx_offset = 1
|
|
else:
|
|
delta_idx_offset = 0
|
|
delta_idx = delta_idx - delta_idx_offset
|
|
result[i] = new_local - deltas[delta_idx]
|
|
elif fill_nonexist:
|
|
result[i] = NPY_NAT
|
|
else:
|
|
stamp = _render_tstamp(val)
|
|
raise pytz.NonExistentTimeError(stamp)
|
|
|
|
return result
|
|
|
|
|
|
cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
|
|
int64_t val, Py_ssize_t n):
|
|
cdef:
|
|
Py_ssize_t pivot, left = 0, right = n
|
|
|
|
assert n >= 1
|
|
|
|
# edge cases
|
|
if val > data[n - 1]:
|
|
return n
|
|
|
|
if val < data[0]:
|
|
return 0
|
|
|
|
while left < right:
|
|
pivot = left + (right - left) // 2
|
|
|
|
if data[pivot] <= val:
|
|
left = pivot + 1
|
|
else:
|
|
right = pivot
|
|
|
|
return left
|
|
|
|
|
|
cdef inline str _render_tstamp(int64_t val):
|
|
""" Helper function to render exception messages"""
|
|
from pandas._libs.tslibs.timestamps import Timestamp
|
|
return str(Timestamp(val))
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Timezone Conversion
|
|
|
|
cdef int64_t tz_convert_utc_to_tzlocal(
|
|
int64_t utc_val, tzinfo tz, bint* fold=NULL
|
|
) except? -1:
|
|
"""
|
|
Parameters
|
|
----------
|
|
utc_val : int64_t
|
|
tz : tzinfo
|
|
fold : bint*
|
|
pointer to fold: whether datetime ends up in a fold or not
|
|
after adjustment
|
|
|
|
Returns
|
|
-------
|
|
local_val : int64_t
|
|
"""
|
|
return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold)
|
|
|
|
|
|
cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz):
|
|
"""
|
|
Convert the val (in i8) from UTC to tz
|
|
|
|
This is a single value version of tz_convert_from_utc.
|
|
|
|
Parameters
|
|
----------
|
|
val : int64
|
|
tz : tzinfo
|
|
|
|
Returns
|
|
-------
|
|
converted: int64
|
|
"""
|
|
cdef:
|
|
int64_t delta
|
|
int64_t[:] deltas
|
|
ndarray[int64_t, ndim=1] trans
|
|
intp_t pos
|
|
|
|
if val == NPY_NAT:
|
|
return val
|
|
|
|
if is_utc(tz):
|
|
return val
|
|
elif is_tzlocal(tz):
|
|
return _tz_convert_tzlocal_utc(val, tz, to_utc=False)
|
|
elif is_fixed_offset(tz):
|
|
_, deltas, _ = get_dst_info(tz)
|
|
delta = deltas[0]
|
|
return val + delta
|
|
else:
|
|
trans, deltas, _ = get_dst_info(tz)
|
|
pos = trans.searchsorted(val, side="right") - 1
|
|
return val + deltas[pos]
|
|
|
|
|
|
def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
|
|
"""
|
|
Convert the values (in i8) from UTC to tz
|
|
|
|
Parameters
|
|
----------
|
|
vals : int64 ndarray
|
|
tz : tzinfo
|
|
|
|
Returns
|
|
-------
|
|
int64 ndarray of converted
|
|
"""
|
|
cdef:
|
|
const int64_t[:] converted
|
|
|
|
if len(vals) == 0:
|
|
return np.array([], dtype=np.int64)
|
|
|
|
converted = _tz_convert_from_utc(vals, tz)
|
|
return np.array(converted, dtype=np.int64)
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
|
|
"""
|
|
Convert the given values (in i8) either to UTC or from UTC.
|
|
|
|
Parameters
|
|
----------
|
|
vals : int64 ndarray
|
|
tz : tzinfo
|
|
|
|
Returns
|
|
-------
|
|
converted : ndarray[int64_t]
|
|
"""
|
|
cdef:
|
|
int64_t[:] converted, deltas
|
|
Py_ssize_t i, n = len(vals)
|
|
int64_t val, delta
|
|
intp_t[:] pos
|
|
ndarray[int64_t] trans
|
|
str typ
|
|
|
|
if is_utc(tz):
|
|
return vals
|
|
elif is_tzlocal(tz):
|
|
converted = np.empty(n, dtype=np.int64)
|
|
for i in range(n):
|
|
val = vals[i]
|
|
if val == NPY_NAT:
|
|
converted[i] = NPY_NAT
|
|
else:
|
|
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False)
|
|
else:
|
|
converted = np.empty(n, dtype=np.int64)
|
|
|
|
trans, deltas, typ = get_dst_info(tz)
|
|
|
|
if typ not in ["pytz", "dateutil"]:
|
|
# FixedOffset, we know len(deltas) == 1
|
|
delta = deltas[0]
|
|
|
|
for i in range(n):
|
|
val = vals[i]
|
|
if val == NPY_NAT:
|
|
converted[i] = val
|
|
else:
|
|
converted[i] = val + delta
|
|
|
|
else:
|
|
pos = trans.searchsorted(vals, side="right") - 1
|
|
|
|
for i in range(n):
|
|
val = vals[i]
|
|
if val == NPY_NAT:
|
|
converted[i] = val
|
|
else:
|
|
if pos[i] < 0:
|
|
# TODO: How is this reached? Should we be checking for
|
|
# it elsewhere?
|
|
raise ValueError("First time before start of DST info")
|
|
|
|
converted[i] = val + deltas[pos[i]]
|
|
|
|
return converted
|
|
|
|
|
|
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
|
|
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
|
|
cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz,
|
|
bint to_utc,
|
|
bint *fold=NULL) except? -1:
|
|
"""
|
|
Calculate offset in nanoseconds needed to convert the i8 representation of
|
|
a datetime from a tzlocal timezone to UTC, or vice-versa.
|
|
|
|
Parameters
|
|
----------
|
|
val : int64_t
|
|
tz : tzinfo
|
|
to_utc : bint
|
|
True if converting tzlocal _to_ UTC, False if going the other direction
|
|
fold : bint*, default NULL
|
|
pointer to fold: whether datetime ends up in a fold or not
|
|
after adjustment
|
|
|
|
Returns
|
|
-------
|
|
delta : int64_t
|
|
|
|
Notes
|
|
-----
|
|
Sets fold by pointer
|
|
"""
|
|
cdef:
|
|
npy_datetimestruct dts
|
|
datetime dt
|
|
int64_t delta
|
|
timedelta td
|
|
|
|
dt64_to_dtstruct(val, &dts)
|
|
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
|
|
dts.min, dts.sec, dts.us)
|
|
# tz.utcoffset only makes sense if datetime
|
|
# is _wall time_, so if val is a UTC timestamp convert to wall time
|
|
if not to_utc:
|
|
dt = dt.replace(tzinfo=tzutc())
|
|
dt = dt.astimezone(tz)
|
|
|
|
if fold is not NULL:
|
|
fold[0] = dt.fold
|
|
|
|
td = tz.utcoffset(dt)
|
|
return int(td.total_seconds() * 1_000_000_000)
|
|
|
|
|
|
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
|
|
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
|
|
cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
|
|
bint* fold=NULL) except? -1:
|
|
"""
|
|
Convert the i8 representation of a datetime from a tzlocal timezone to
|
|
UTC, or vice-versa.
|
|
|
|
Private, not intended for use outside of tslibs.conversion
|
|
|
|
Parameters
|
|
----------
|
|
val : int64_t
|
|
tz : tzinfo
|
|
to_utc : bint
|
|
True if converting tzlocal _to_ UTC, False if going the other direction
|
|
fold : bint*
|
|
pointer to fold: whether datetime ends up in a fold or not
|
|
after adjustment
|
|
|
|
Returns
|
|
-------
|
|
result : int64_t
|
|
|
|
Notes
|
|
-----
|
|
Sets fold by pointer
|
|
"""
|
|
cdef:
|
|
int64_t delta
|
|
|
|
delta = _tzlocal_get_offset_components(val, tz, to_utc, fold)
|
|
|
|
if to_utc:
|
|
return val - delta
|
|
else:
|
|
return val + delta
|