mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 10:28:02 +00:00
859 lines
24 KiB
Cython
859 lines
24 KiB
Cython
import cython
|
|
import numpy as np
|
|
|
|
cimport numpy as cnp
|
|
from numpy cimport (
|
|
int32_t,
|
|
int64_t,
|
|
intp_t,
|
|
ndarray,
|
|
)
|
|
|
|
cnp.import_array()
|
|
|
|
import pytz
|
|
|
|
# stdlib datetime imports
|
|
|
|
from cpython.datetime cimport (
|
|
PyDate_Check,
|
|
PyDateTime_Check,
|
|
PyDateTime_IMPORT,
|
|
datetime,
|
|
time,
|
|
tzinfo,
|
|
)
|
|
|
|
PyDateTime_IMPORT
|
|
|
|
from pandas._libs.tslibs.base cimport ABCTimestamp
|
|
from pandas._libs.tslibs.np_datetime cimport (
|
|
NPY_DATETIMEUNIT,
|
|
NPY_FR_ns,
|
|
_string_to_dts,
|
|
check_dts_bounds,
|
|
dt64_to_dtstruct,
|
|
dtstruct_to_dt64,
|
|
get_datetime64_unit,
|
|
get_datetime64_value,
|
|
npy_datetime,
|
|
npy_datetimestruct,
|
|
pandas_datetime_to_datetimestruct,
|
|
pydatetime_to_dt64,
|
|
)
|
|
|
|
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
|
|
|
|
from pandas._libs.tslibs.timezones cimport (
|
|
get_dst_info,
|
|
get_utcoffset,
|
|
is_fixed_offset,
|
|
is_tzlocal,
|
|
is_utc,
|
|
maybe_get_tz,
|
|
tz_compare,
|
|
utc_pytz as UTC,
|
|
)
|
|
from pandas._libs.tslibs.util cimport (
|
|
is_datetime64_object,
|
|
is_float_object,
|
|
is_integer_object,
|
|
)
|
|
|
|
from pandas._libs.tslibs.parsing import parse_datetime_string
|
|
|
|
from pandas._libs.tslibs.nattype cimport (
|
|
NPY_NAT,
|
|
c_NaT as NaT,
|
|
c_nat_strings as nat_strings,
|
|
checknull_with_nat,
|
|
)
|
|
from pandas._libs.tslibs.tzconversion cimport (
|
|
tz_convert_utc_to_tzlocal,
|
|
tz_localize_to_utc_single,
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Constants
|
|
|
|
DT64NS_DTYPE = np.dtype('M8[ns]')
|
|
TD64NS_DTYPE = np.dtype('m8[ns]')
|
|
|
|
|
|
class OutOfBoundsTimedelta(ValueError):
|
|
"""
|
|
Raised when encountering a timedelta value that cannot be represented
|
|
as a timedelta64[ns].
|
|
"""
|
|
# Timedelta analogue to OutOfBoundsDatetime
|
|
pass
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Unit Conversion Helpers
|
|
|
|
cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
|
|
"""
|
|
Return a casting of the unit represented to nanoseconds
|
|
round the fractional part of a float to our precision, p.
|
|
|
|
Parameters
|
|
----------
|
|
ts : int, float, or None
|
|
unit : str
|
|
|
|
Returns
|
|
-------
|
|
int64_t
|
|
"""
|
|
cdef:
|
|
int64_t m
|
|
int p
|
|
|
|
m, p = precision_from_unit(unit)
|
|
|
|
# just give me the unit back
|
|
if ts is None:
|
|
return m
|
|
|
|
# cast the unit, multiply base/frace separately
|
|
# to avoid precision issues from float -> int
|
|
base = <int64_t>ts
|
|
frac = ts - base
|
|
if p:
|
|
frac = round(frac, p)
|
|
return <int64_t>(base * m) + <int64_t>(frac * m)
|
|
|
|
|
|
cpdef inline (int64_t, int) precision_from_unit(str unit):
|
|
"""
|
|
Return a casting of the unit represented to nanoseconds + the precision
|
|
to round the fractional part.
|
|
|
|
Notes
|
|
-----
|
|
The caller is responsible for ensuring that the default value of "ns"
|
|
takes the place of None.
|
|
"""
|
|
cdef:
|
|
int64_t m
|
|
int p
|
|
|
|
if unit == "Y":
|
|
m = 1_000_000_000 * 31556952
|
|
p = 9
|
|
elif unit == "M":
|
|
m = 1_000_000_000 * 2629746
|
|
p = 9
|
|
elif unit == "W":
|
|
m = 1_000_000_000 * 3600 * 24 * 7
|
|
p = 9
|
|
elif unit == "D" or unit == "d":
|
|
m = 1_000_000_000 * 3600 * 24
|
|
p = 9
|
|
elif unit == "h":
|
|
m = 1_000_000_000 * 3600
|
|
p = 9
|
|
elif unit == "m":
|
|
m = 1_000_000_000 * 60
|
|
p = 9
|
|
elif unit == "s":
|
|
m = 1_000_000_000
|
|
p = 9
|
|
elif unit == "ms":
|
|
m = 1_000_000
|
|
p = 6
|
|
elif unit == "us":
|
|
m = 1000
|
|
p = 3
|
|
elif unit == "ns" or unit is None:
|
|
m = 1
|
|
p = 0
|
|
else:
|
|
raise ValueError(f"cannot cast unit {unit}")
|
|
return m, p
|
|
|
|
|
|
cdef inline int64_t get_datetime64_nanos(object val) except? -1:
|
|
"""
|
|
Extract the value and unit from a np.datetime64 object, then convert the
|
|
value to nanoseconds if necessary.
|
|
"""
|
|
cdef:
|
|
npy_datetimestruct dts
|
|
NPY_DATETIMEUNIT unit
|
|
npy_datetime ival
|
|
|
|
ival = get_datetime64_value(val)
|
|
if ival == NPY_NAT:
|
|
return NPY_NAT
|
|
|
|
unit = get_datetime64_unit(val)
|
|
|
|
if unit != NPY_FR_ns:
|
|
pandas_datetime_to_datetimestruct(ival, unit, &dts)
|
|
check_dts_bounds(&dts)
|
|
ival = dtstruct_to_dt64(&dts)
|
|
|
|
return ival
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
def ensure_datetime64ns(arr: ndarray, copy: bool = True):
|
|
"""
|
|
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
|
|
|
|
Parameters
|
|
----------
|
|
arr : ndarray
|
|
copy : bool, default True
|
|
|
|
Returns
|
|
-------
|
|
ndarray with dtype datetime64[ns]
|
|
"""
|
|
cdef:
|
|
Py_ssize_t i, n = arr.size
|
|
const int64_t[:] ivalues
|
|
int64_t[:] iresult
|
|
NPY_DATETIMEUNIT unit
|
|
npy_datetimestruct dts
|
|
|
|
shape = (<object>arr).shape
|
|
|
|
if (<object>arr).dtype.byteorder == ">":
|
|
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
|
|
dtype = arr.dtype
|
|
arr = arr.astype(dtype.newbyteorder("<"))
|
|
|
|
if arr.size == 0:
|
|
result = arr.view(DT64NS_DTYPE)
|
|
if copy:
|
|
result = result.copy()
|
|
return result
|
|
|
|
unit = get_datetime64_unit(arr.flat[0])
|
|
if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
|
|
# without raising explicitly here, we end up with a SystemError
|
|
# built-in function ensure_datetime64ns returned a result with an error
|
|
raise ValueError("datetime64/timedelta64 must have a unit specified")
|
|
|
|
if unit == NPY_FR_ns:
|
|
# Check this before allocating result for perf, might save some memory
|
|
if copy:
|
|
return arr.copy()
|
|
return arr
|
|
|
|
ivalues = arr.view(np.int64).ravel("K")
|
|
|
|
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
|
|
iresult = result.ravel("K").view(np.int64)
|
|
|
|
for i in range(n):
|
|
if ivalues[i] != NPY_NAT:
|
|
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
|
|
iresult[i] = dtstruct_to_dt64(&dts)
|
|
check_dts_bounds(&dts)
|
|
else:
|
|
iresult[i] = NPY_NAT
|
|
|
|
return result
|
|
|
|
|
|
def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
|
|
"""
|
|
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
|
|
|
|
Parameters
|
|
----------
|
|
arr : ndarray
|
|
copy : bool, default True
|
|
|
|
Returns
|
|
-------
|
|
ndarray[timedelta64[ns]]
|
|
"""
|
|
assert arr.dtype.kind == "m", arr.dtype
|
|
|
|
if arr.dtype == TD64NS_DTYPE:
|
|
return arr.copy() if copy else arr
|
|
|
|
# Re-use the datetime64 machinery to do an overflow-safe `astype`
|
|
dtype = arr.dtype.str.replace("m8", "M8")
|
|
dummy = arr.view(dtype)
|
|
try:
|
|
dt64_result = ensure_datetime64ns(dummy, copy)
|
|
except OutOfBoundsDatetime as err:
|
|
# Re-write the exception in terms of timedelta64 instead of dt64
|
|
|
|
# Find the value that we are going to report as causing an overflow
|
|
tdmin = arr.min()
|
|
tdmax = arr.max()
|
|
if np.abs(tdmin) >= np.abs(tdmax):
|
|
bad_val = tdmin
|
|
else:
|
|
bad_val = tdmax
|
|
|
|
msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}"
|
|
raise OutOfBoundsTimedelta(msg)
|
|
|
|
return dt64_result.view(TD64NS_DTYPE)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
def datetime_to_datetime64(ndarray[object] values):
|
|
"""
|
|
Convert ndarray of datetime-like objects to int64 array representing
|
|
nanosecond timestamps.
|
|
|
|
Parameters
|
|
----------
|
|
values : ndarray[object]
|
|
|
|
Returns
|
|
-------
|
|
result : ndarray[datetime64ns]
|
|
inferred_tz : tzinfo or None
|
|
"""
|
|
cdef:
|
|
Py_ssize_t i, n = len(values)
|
|
object val
|
|
int64_t[:] iresult
|
|
npy_datetimestruct dts
|
|
_TSObject _ts
|
|
bint found_naive = False
|
|
tzinfo inferred_tz = None
|
|
|
|
result = np.empty(n, dtype='M8[ns]')
|
|
iresult = result.view('i8')
|
|
for i in range(n):
|
|
val = values[i]
|
|
if checknull_with_nat(val):
|
|
iresult[i] = NPY_NAT
|
|
elif PyDateTime_Check(val):
|
|
if val.tzinfo is not None:
|
|
if found_naive:
|
|
raise ValueError('Cannot mix tz-aware with '
|
|
'tz-naive values')
|
|
if inferred_tz is not None:
|
|
if not tz_compare(val.tzinfo, inferred_tz):
|
|
raise ValueError('Array must be all same time zone')
|
|
else:
|
|
inferred_tz = val.tzinfo
|
|
|
|
_ts = convert_datetime_to_tsobject(val, None)
|
|
iresult[i] = _ts.value
|
|
check_dts_bounds(&_ts.dts)
|
|
else:
|
|
found_naive = True
|
|
if inferred_tz is not None:
|
|
raise ValueError('Cannot mix tz-aware with '
|
|
'tz-naive values')
|
|
iresult[i] = pydatetime_to_dt64(val, &dts)
|
|
check_dts_bounds(&dts)
|
|
else:
|
|
raise TypeError(f'Unrecognized value type: {type(val)}')
|
|
|
|
return result, inferred_tz
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# _TSObject Conversion
|
|
|
|
# lightweight C object to hold datetime & int64 pair
|
|
cdef class _TSObject:
|
|
# cdef:
|
|
# npy_datetimestruct dts # npy_datetimestruct
|
|
# int64_t value # numpy dt64
|
|
# object tzinfo
|
|
# bint fold
|
|
|
|
def __cinit__(self):
|
|
# GH 25057. As per PEP 495, set fold to 0 by default
|
|
self.fold = 0
|
|
|
|
@property
|
|
def value(self):
|
|
# This is needed in order for `value` to be accessible in lib.pyx
|
|
return self.value
|
|
|
|
|
|
cdef convert_to_tsobject(object ts, tzinfo tz, str unit,
|
|
bint dayfirst, bint yearfirst, int32_t nanos=0):
|
|
"""
|
|
Extract datetime and int64 from any of:
|
|
- np.int64 (with unit providing a possible modifier)
|
|
- np.datetime64
|
|
- a float (with unit providing a possible modifier)
|
|
- python int or long object (with unit providing a possible modifier)
|
|
- iso8601 string object
|
|
- python datetime object
|
|
- another timestamp object
|
|
|
|
Raises
|
|
------
|
|
OutOfBoundsDatetime : ts cannot be converted within implementation bounds
|
|
"""
|
|
cdef:
|
|
_TSObject obj
|
|
|
|
obj = _TSObject()
|
|
|
|
if isinstance(ts, str):
|
|
return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
|
|
|
|
if ts is None or ts is NaT:
|
|
obj.value = NPY_NAT
|
|
elif is_datetime64_object(ts):
|
|
obj.value = get_datetime64_nanos(ts)
|
|
if obj.value != NPY_NAT:
|
|
dt64_to_dtstruct(obj.value, &obj.dts)
|
|
elif is_integer_object(ts):
|
|
try:
|
|
ts = <int64_t>ts
|
|
except OverflowError:
|
|
# GH#26651 re-raise as OutOfBoundsDatetime
|
|
raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp {ts}")
|
|
if ts == NPY_NAT:
|
|
obj.value = NPY_NAT
|
|
else:
|
|
ts = ts * cast_from_unit(None, unit)
|
|
obj.value = ts
|
|
dt64_to_dtstruct(ts, &obj.dts)
|
|
elif is_float_object(ts):
|
|
if ts != ts or ts == NPY_NAT:
|
|
obj.value = NPY_NAT
|
|
else:
|
|
ts = cast_from_unit(ts, unit)
|
|
obj.value = ts
|
|
dt64_to_dtstruct(ts, &obj.dts)
|
|
elif PyDateTime_Check(ts):
|
|
return convert_datetime_to_tsobject(ts, tz, nanos)
|
|
elif PyDate_Check(ts):
|
|
# Keep the converter same as PyDateTime's
|
|
ts = datetime.combine(ts, time())
|
|
return convert_datetime_to_tsobject(ts, tz)
|
|
else:
|
|
from .period import Period
|
|
if isinstance(ts, Period):
|
|
raise ValueError("Cannot convert Period to Timestamp "
|
|
"unambiguously. Use to_timestamp")
|
|
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
|
|
f'Timestamp')
|
|
|
|
if tz is not None:
|
|
_localize_tso(obj, tz)
|
|
|
|
if obj.value != NPY_NAT:
|
|
# check_overflows needs to run after _localize_tso
|
|
check_dts_bounds(&obj.dts)
|
|
check_overflows(obj)
|
|
return obj
|
|
|
|
|
|
cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
|
|
int32_t nanos=0):
|
|
"""
|
|
Convert a datetime (or Timestamp) input `ts`, along with optional timezone
|
|
object `tz` to a _TSObject.
|
|
|
|
The optional argument `nanos` allows for cases where datetime input
|
|
needs to be supplemented with higher-precision information.
|
|
|
|
Parameters
|
|
----------
|
|
ts : datetime or Timestamp
|
|
Value to be converted to _TSObject
|
|
tz : tzinfo or None
|
|
timezone for the timezone-aware output
|
|
nanos : int32_t, default is 0
|
|
nanoseconds supplement the precision of the datetime input ts
|
|
|
|
Returns
|
|
-------
|
|
obj : _TSObject
|
|
"""
|
|
cdef:
|
|
_TSObject obj = _TSObject()
|
|
|
|
obj.fold = ts.fold
|
|
if tz is not None:
|
|
tz = maybe_get_tz(tz)
|
|
|
|
if ts.tzinfo is not None:
|
|
# Convert the current timezone to the passed timezone
|
|
ts = ts.astimezone(tz)
|
|
obj.value = pydatetime_to_dt64(ts, &obj.dts)
|
|
obj.tzinfo = ts.tzinfo
|
|
elif not is_utc(tz):
|
|
ts = _localize_pydatetime(ts, tz)
|
|
obj.value = pydatetime_to_dt64(ts, &obj.dts)
|
|
obj.tzinfo = ts.tzinfo
|
|
else:
|
|
# UTC
|
|
obj.value = pydatetime_to_dt64(ts, &obj.dts)
|
|
obj.tzinfo = tz
|
|
else:
|
|
obj.value = pydatetime_to_dt64(ts, &obj.dts)
|
|
obj.tzinfo = ts.tzinfo
|
|
|
|
if obj.tzinfo is not None and not is_utc(obj.tzinfo):
|
|
offset = get_utcoffset(obj.tzinfo, ts)
|
|
obj.value -= int(offset.total_seconds() * 1e9)
|
|
|
|
if isinstance(ts, ABCTimestamp):
|
|
obj.value += <int64_t>ts.nanosecond
|
|
obj.dts.ps = ts.nanosecond * 1000
|
|
|
|
if nanos:
|
|
obj.value += nanos
|
|
obj.dts.ps = nanos * 1000
|
|
|
|
check_dts_bounds(&obj.dts)
|
|
check_overflows(obj)
|
|
return obj
|
|
|
|
|
|
cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
|
|
int tzoffset, tzinfo tz=None):
|
|
"""
|
|
Convert a datetimestruct `dts`, along with initial timezone offset
|
|
`tzoffset` to a _TSObject (with timezone object `tz` - optional).
|
|
|
|
Parameters
|
|
----------
|
|
dts: npy_datetimestruct
|
|
tzoffset: int
|
|
tz : tzinfo or None
|
|
timezone for the timezone-aware output.
|
|
|
|
Returns
|
|
-------
|
|
obj : _TSObject
|
|
"""
|
|
cdef:
|
|
_TSObject obj = _TSObject()
|
|
int64_t value # numpy dt64
|
|
datetime dt
|
|
ndarray[int64_t] trans
|
|
int64_t[:] deltas
|
|
|
|
value = dtstruct_to_dt64(&dts)
|
|
obj.dts = dts
|
|
obj.tzinfo = pytz.FixedOffset(tzoffset)
|
|
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
|
|
if tz is None:
|
|
check_overflows(obj)
|
|
return obj
|
|
|
|
# Infer fold from offset-adjusted obj.value
|
|
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
|
|
if is_utc(tz):
|
|
pass
|
|
elif is_tzlocal(tz):
|
|
tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
|
|
else:
|
|
trans, deltas, typ = get_dst_info(tz)
|
|
|
|
if typ == 'dateutil':
|
|
pos = trans.searchsorted(obj.value, side='right') - 1
|
|
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
|
|
|
|
# Keep the converter same as PyDateTime's
|
|
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
|
|
obj.dts.hour, obj.dts.min, obj.dts.sec,
|
|
obj.dts.us, obj.tzinfo, fold=obj.fold)
|
|
obj = convert_datetime_to_tsobject(
|
|
dt, tz, nanos=obj.dts.ps // 1000)
|
|
return obj
|
|
|
|
|
|
cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
|
|
bint dayfirst=False,
|
|
bint yearfirst=False):
|
|
"""
|
|
Convert a string input `ts`, along with optional timezone object`tz`
|
|
to a _TSObject.
|
|
|
|
The optional arguments `dayfirst` and `yearfirst` are passed to the
|
|
dateutil parser.
|
|
|
|
Parameters
|
|
----------
|
|
ts : str
|
|
Value to be converted to _TSObject
|
|
tz : tzinfo or None
|
|
timezone for the timezone-aware output
|
|
unit : str or None
|
|
dayfirst : bool, default False
|
|
When parsing an ambiguous date string, interpret e.g. "3/4/1975" as
|
|
April 3, as opposed to the standard US interpretation March 4.
|
|
yearfirst : bool, default False
|
|
When parsing an ambiguous date string, interpret e.g. "01/05/09"
|
|
as "May 9, 2001", as opposed to the default "Jan 5, 2009"
|
|
|
|
Returns
|
|
-------
|
|
obj : _TSObject
|
|
"""
|
|
cdef:
|
|
npy_datetimestruct dts
|
|
int out_local = 0, out_tzoffset = 0
|
|
bint do_parse_datetime_string = False
|
|
|
|
if len(ts) == 0 or ts in nat_strings:
|
|
ts = NaT
|
|
elif ts == 'now':
|
|
# Issue 9000, we short-circuit rather than going
|
|
# into np_datetime_strings which returns utc
|
|
ts = datetime.now(tz)
|
|
elif ts == 'today':
|
|
# Issue 9000, we short-circuit rather than going
|
|
# into np_datetime_strings which returns a normalized datetime
|
|
ts = datetime.now(tz)
|
|
# equiv: datetime.today().replace(tzinfo=tz)
|
|
else:
|
|
string_to_dts_failed = _string_to_dts(
|
|
ts, &dts, &out_local,
|
|
&out_tzoffset, False
|
|
)
|
|
try:
|
|
if not string_to_dts_failed:
|
|
check_dts_bounds(&dts)
|
|
if out_local == 1:
|
|
return _create_tsobject_tz_using_offset(dts,
|
|
out_tzoffset, tz)
|
|
else:
|
|
ts = dtstruct_to_dt64(&dts)
|
|
if tz is not None:
|
|
# shift for _localize_tso
|
|
ts = tz_localize_to_utc_single(ts, tz,
|
|
ambiguous="raise")
|
|
|
|
except OutOfBoundsDatetime:
|
|
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
|
|
# parser will return incorrect result because it will ignore
|
|
# nanoseconds
|
|
raise
|
|
|
|
except ValueError:
|
|
do_parse_datetime_string = True
|
|
|
|
if string_to_dts_failed or do_parse_datetime_string:
|
|
try:
|
|
ts = parse_datetime_string(ts, dayfirst=dayfirst,
|
|
yearfirst=yearfirst)
|
|
except (ValueError, OverflowError):
|
|
raise ValueError("could not convert string to Timestamp")
|
|
|
|
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
|
|
|
|
|
|
cdef inline check_overflows(_TSObject obj):
|
|
"""
|
|
Check that we haven't silently overflowed in timezone conversion
|
|
|
|
Parameters
|
|
----------
|
|
obj : _TSObject
|
|
|
|
Returns
|
|
-------
|
|
None
|
|
|
|
Raises
|
|
------
|
|
OutOfBoundsDatetime
|
|
"""
|
|
# GH#12677
|
|
if obj.dts.year == 1677:
|
|
if not (obj.value < 0):
|
|
from pandas._libs.tslibs.timestamps import Timestamp
|
|
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
|
|
f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}")
|
|
raise OutOfBoundsDatetime(
|
|
f"Converting {fmt} underflows past {Timestamp.min}"
|
|
)
|
|
elif obj.dts.year == 2262:
|
|
if not (obj.value > 0):
|
|
from pandas._libs.tslibs.timestamps import Timestamp
|
|
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
|
|
f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}")
|
|
raise OutOfBoundsDatetime(
|
|
f"Converting {fmt} overflows past {Timestamp.max}"
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Localization
|
|
|
|
cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
|
|
"""
|
|
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
|
|
representation of that timestamp in the given timezone.
|
|
|
|
Parameters
|
|
----------
|
|
obj : _TSObject
|
|
tz : tzinfo
|
|
|
|
Returns
|
|
-------
|
|
None
|
|
|
|
Notes
|
|
-----
|
|
Sets obj.tzinfo inplace, alters obj.dts inplace.
|
|
"""
|
|
cdef:
|
|
ndarray[int64_t] trans
|
|
int64_t[:] deltas
|
|
int64_t local_val
|
|
Py_ssize_t pos
|
|
str typ
|
|
|
|
assert obj.tzinfo is None
|
|
|
|
if is_utc(tz):
|
|
pass
|
|
elif obj.value == NPY_NAT:
|
|
pass
|
|
elif is_tzlocal(tz):
|
|
local_val = tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
|
|
dt64_to_dtstruct(local_val, &obj.dts)
|
|
else:
|
|
# Adjust datetime64 timestamp, recompute datetimestruct
|
|
trans, deltas, typ = get_dst_info(tz)
|
|
|
|
if is_fixed_offset(tz):
|
|
# static/fixed tzinfo; in this case we know len(deltas) == 1
|
|
# This can come back with `typ` of either "fixed" or None
|
|
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
|
|
elif typ == 'pytz':
|
|
# i.e. treat_tz_as_pytz(tz)
|
|
pos = trans.searchsorted(obj.value, side='right') - 1
|
|
tz = tz._tzinfos[tz._transition_info[pos]]
|
|
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
|
|
elif typ == 'dateutil':
|
|
# i.e. treat_tz_as_dateutil(tz)
|
|
pos = trans.searchsorted(obj.value, side='right') - 1
|
|
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
|
|
# dateutil supports fold, so we infer fold from value
|
|
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
|
|
else:
|
|
# Note: as of 2018-07-17 all tzinfo objects that are _not_
|
|
# either pytz or dateutil have is_fixed_offset(tz) == True,
|
|
# so this branch will never be reached.
|
|
pass
|
|
|
|
obj.tzinfo = tz
|
|
|
|
|
|
cdef inline bint _infer_tsobject_fold(
|
|
_TSObject obj,
|
|
const int64_t[:] trans,
|
|
const int64_t[:] deltas,
|
|
int32_t pos,
|
|
):
|
|
"""
|
|
Infer _TSObject fold property from value by assuming 0 and then setting
|
|
to 1 if necessary.
|
|
|
|
Parameters
|
|
----------
|
|
obj : _TSObject
|
|
trans : ndarray[int64_t]
|
|
ndarray of offset transition points in nanoseconds since epoch.
|
|
deltas : int64_t[:]
|
|
array of offsets corresponding to transition points in trans.
|
|
pos : int32_t
|
|
Position of the last transition point before taking fold into account.
|
|
|
|
Returns
|
|
-------
|
|
bint
|
|
Due to daylight saving time, one wall clock time can occur twice
|
|
when shifting from summer to winter time; fold describes whether the
|
|
datetime-like corresponds to the first (0) or the second time (1)
|
|
the wall clock hits the ambiguous time
|
|
|
|
References
|
|
----------
|
|
.. [1] "PEP 495 - Local Time Disambiguation"
|
|
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
|
|
"""
|
|
cdef:
|
|
bint fold = 0
|
|
|
|
if pos > 0:
|
|
fold_delta = deltas[pos - 1] - deltas[pos]
|
|
if obj.value - fold_delta < trans[pos]:
|
|
fold = 1
|
|
|
|
return fold
|
|
|
|
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
|
|
"""
|
|
Take a datetime/Timestamp in UTC and localizes to timezone tz.
|
|
|
|
NB: Unlike the public version, this treats datetime and Timestamp objects
|
|
identically, i.e. discards nanos from Timestamps.
|
|
It also assumes that the `tz` input is not None.
|
|
"""
|
|
try:
|
|
# datetime.replace with pytz may be incorrect result
|
|
return tz.localize(dt)
|
|
except AttributeError:
|
|
return dt.replace(tzinfo=tz)
|
|
|
|
|
|
cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
|
|
"""
|
|
Take a datetime/Timestamp in UTC and localizes to timezone tz.
|
|
|
|
Parameters
|
|
----------
|
|
dt : datetime or Timestamp
|
|
tz : tzinfo or None
|
|
|
|
Returns
|
|
-------
|
|
localized : datetime or Timestamp
|
|
"""
|
|
if tz is None:
|
|
return dt
|
|
elif isinstance(dt, ABCTimestamp):
|
|
return dt.tz_localize(tz)
|
|
elif is_utc(tz):
|
|
return _localize_pydatetime(dt, tz)
|
|
try:
|
|
# datetime.replace with pytz may be incorrect result
|
|
return tz.localize(dt)
|
|
except AttributeError:
|
|
return dt.replace(tzinfo=tz)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Normalization
|
|
|
|
@cython.cdivision(False)
|
|
cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil:
|
|
"""
|
|
Round the localized nanosecond timestamp down to the previous midnight.
|
|
|
|
Parameters
|
|
----------
|
|
local_val : int64_t
|
|
|
|
Returns
|
|
-------
|
|
int64_t
|
|
"""
|
|
cdef:
|
|
int64_t day_nanos = 24 * 3600 * 1_000_000_000
|
|
return local_val - (local_val % day_nanos)
|