first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
__all__ = [
"dtypes",
"localize_pydatetime",
"NaT",
"NaTType",
"iNaT",
"nat_strings",
"OutOfBoundsDatetime",
"OutOfBoundsTimedelta",
"IncompatibleFrequency",
"Period",
"Resolution",
"Timedelta",
"normalize_i8_timestamps",
"is_date_array_normalized",
"dt64arr_to_periodarr",
"delta_to_nanoseconds",
"ints_to_pydatetime",
"ints_to_pytimedelta",
"get_resolution",
"Timestamp",
"tz_convert_from_utc_single",
"to_offset",
"Tick",
"BaseOffset",
"tz_compare",
]
from pandas._libs.tslibs import dtypes
from pandas._libs.tslibs.conversion import (
OutOfBoundsTimedelta,
localize_pydatetime,
)
from pandas._libs.tslibs.dtypes import Resolution
from pandas._libs.tslibs.nattype import (
NaT,
NaTType,
iNaT,
nat_strings,
)
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.offsets import (
BaseOffset,
Tick,
to_offset,
)
from pandas._libs.tslibs.period import (
IncompatibleFrequency,
Period,
)
from pandas._libs.tslibs.timedeltas import (
Timedelta,
delta_to_nanoseconds,
ints_to_pytimedelta,
)
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._libs.tslibs.timezones import tz_compare
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc_single
from pandas._libs.tslibs.vectorized import (
dt64arr_to_periodarr,
get_resolution,
ints_to_pydatetime,
is_date_array_normalized,
normalize_i8_timestamps,
)

View File

@@ -0,0 +1,5 @@
from cpython.datetime cimport datetime
cdef class ABCTimestamp(datetime):
pass

View File

@@ -0,0 +1,12 @@
"""
We define base classes that will be inherited by Timestamp, Timedelta, etc
in order to allow for fast isinstance checks without circular dependency issues.
This is analogous to core.dtypes.generic.
"""
from cpython.datetime cimport datetime
cdef class ABCTimestamp(datetime):
pass

View File

@@ -0,0 +1,22 @@
from cython cimport Py_ssize_t
from numpy cimport (
int32_t,
int64_t,
)
ctypedef (int32_t, int32_t, int32_t) iso_calendar_t
cdef int dayofweek(int y, int m, int d) nogil
cdef bint is_leapyear(int64_t year) nogil
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
cpdef int32_t get_week_of_year(int year, int month, int day) nogil
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil
cpdef int32_t get_day_of_year(int year, int month, int day) nogil
cpdef int get_lastbday(int year, int month) nogil
cpdef int get_firstbday(int year, int month) nogil
cdef int64_t DAY_NANOS
cdef int64_t HOUR_NANOS
cdef dict c_MONTH_NUMBERS
cdef int32_t* month_offset

View File

@@ -0,0 +1,12 @@
DAYS: list[str]
MONTH_ALIASES: dict[int, str]
MONTH_NUMBERS: dict[str, int]
MONTHS: list[str]
int_to_weekday: dict[int, str]
def get_firstbday(year: int, month: int) -> int: ...
def get_lastbday(year: int, month: int) -> int: ...
def get_day_of_year(year: int, month: int, day: int) -> int: ...
def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
def get_week_of_year(year: int, month: int, day: int) -> int: ...
def get_days_in_month(year: int, month: int) -> int: ...

View File

@@ -0,0 +1,295 @@
# cython: boundscheck=False
"""
Cython implementations of functions resembling the stdlib calendar module
"""
import cython
from numpy cimport (
int32_t,
int64_t,
)
# ----------------------------------------------------------------------
# Constants
# Slightly more performant cython lookups than a 2D table
# The first 12 entries correspond to month lengths for non-leap years.
# The remaining 12 entries give month lengths for leap years
cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
# The remaining 13 entries give the days elapsed in leap years.
cdef int32_t* month_offset = [
0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365,
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]
# Canonical location for other modules to find name constants
MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
# The first blank line is consistent with calendar.month_name in the calendar
# standard library
MONTHS_FULL = ['', 'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November',
'December']
MONTH_NUMBERS = {name: num for num, name in enumerate(MONTHS)}
cdef dict c_MONTH_NUMBERS = MONTH_NUMBERS
MONTH_ALIASES = {(num + 1): name for num, name in enumerate(MONTHS)}
MONTH_TO_CAL_NUM = {name: num + 1 for num, name in enumerate(MONTHS)}
DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
'Saturday', 'Sunday']
int_to_weekday = {num: name for num, name in enumerate(DAYS)}
weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday}
DAY_SECONDS = 86400
HOUR_SECONDS = 3600
cdef int64_t DAY_NANOS = DAY_SECONDS * 1_000_000_000
cdef int64_t HOUR_NANOS = HOUR_SECONDS * 1_000_000_000
# ----------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
"""
Return the number of days in the given month of the given year.
Parameters
----------
year : int
month : int
Returns
-------
days_in_month : int
Notes
-----
Assumes that the arguments are valid. Passing a month not between 1 and 12
risks a segfault.
"""
return days_per_month_array[12 * is_leapyear(year) + month - 1]
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
cdef int dayofweek(int y, int m, int d) nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
0 represents Monday. See [1]_.
Parameters
----------
y : int
m : int
d : int
Returns
-------
weekday : int
Notes
-----
Assumes that y, m, d, represents a valid date.
See Also
--------
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday
[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
"""
cdef:
int day
y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7
cdef bint is_leapyear(int64_t year) nogil:
"""
Returns 1 if the given year is a leap year, 0 otherwise.
Parameters
----------
year : int
Returns
-------
is_leap : bool
"""
return ((year & 0x3) == 0 and # year % 4 == 0
((year % 100) != 0 or (year % 400) == 0))
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
"""
Return the ordinal week-of-year for the given day.
Parameters
----------
year : int
month : int
day : int
Returns
-------
week_of_year : int32_t
Notes
-----
Assumes the inputs describe a valid date.
"""
return get_iso_calendar(year, month, day)[1]
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil:
"""
Return the year, week, and day of year corresponding to ISO 8601
Parameters
----------
year : int
month : int
day : int
Returns
-------
year : int32_t
week : int32_t
day : int32_t
Notes
-----
Assumes the inputs describe a valid date.
"""
cdef:
int32_t doy, dow
int32_t iso_year, iso_week
doy = get_day_of_year(year, month, day)
dow = dayofweek(year, month, day)
# estimate
iso_week = (doy - 1) - dow + 3
if iso_week >= 0:
iso_week = iso_week // 7 + 1
# verify
if iso_week < 0:
if (iso_week > -2) or (iso_week == -2 and is_leapyear(year - 1)):
iso_week = 53
else:
iso_week = 52
elif iso_week == 53:
if 31 - day + dow < 3:
iso_week = 1
iso_year = year
if iso_week == 1 and month == 12:
iso_year += 1
elif iso_week >= 52 and month == 1:
iso_year -= 1
return iso_year, iso_week, dow + 1
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef int32_t get_day_of_year(int year, int month, int day) nogil:
"""
Return the ordinal day-of-year for the given day.
Parameters
----------
year : int
month : int
day : int
Returns
-------
day_of_year : int32_t
Notes
-----
Assumes the inputs describe a valid date.
"""
cdef:
bint isleap
int32_t mo_off
int day_of_year
isleap = is_leapyear(year)
mo_off = month_offset[isleap * 13 + month - 1]
day_of_year = mo_off + day
return day_of_year
# ---------------------------------------------------------------------
# Business Helpers
cpdef int get_lastbday(int year, int month) nogil:
"""
Find the last day of the month that is a business day.
Parameters
----------
year : int
month : int
Returns
-------
last_bday : int
"""
cdef:
int wkday, days_in_month
wkday = dayofweek(year, month, 1)
days_in_month = get_days_in_month(year, month)
return days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0)
cpdef int get_firstbday(int year, int month) nogil:
"""
Find the first day of the month that is a business day.
Parameters
----------
year : int
month : int
Returns
-------
first_bday : int
"""
cdef:
int first, wkday
wkday = dayofweek(year, month, 1)
first = 1
if wkday == 5: # on Saturday
first = 3
elif wkday == 6: # on Sunday
first = 2
return first

View File

@@ -0,0 +1,35 @@
from cpython.datetime cimport (
datetime,
tzinfo,
)
from numpy cimport (
int32_t,
int64_t,
ndarray,
)
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
cdef class _TSObject:
cdef:
npy_datetimestruct dts # npy_datetimestruct
int64_t value # numpy dt64
object tzinfo
bint fold
cdef convert_to_tsobject(object ts, tzinfo tz, str unit,
bint dayfirst, bint yearfirst,
int32_t nanos=*)
cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
int32_t nanos=*)
cdef int64_t get_datetime64_nanos(object val) except? -1
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit) except? -1
cpdef (int64_t, int) precision_from_unit(str unit)
cdef int64_t normalize_i8_stamp(int64_t local_val) nogil

View File

@@ -0,0 +1,29 @@
from datetime import (
datetime,
tzinfo,
)
import numpy as np
from pandas._typing import npt
DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype
class OutOfBoundsTimedelta(ValueError): ...
def precision_from_unit(
unit: str,
) -> tuple[int, int,]: ... # (int64_t, _)
def ensure_datetime64ns(
arr: np.ndarray, # np.ndarray[datetime64[ANY]]
copy: bool = ...,
) -> np.ndarray: ... # np.ndarray[datetime64ns]
def ensure_timedelta64ns(
arr: np.ndarray, # np.ndarray[timedelta64[ANY]]
copy: bool = ...,
) -> np.ndarray: ... # np.ndarray[timedelta64ns]
def datetime_to_datetime64(
values: npt.NDArray[np.object_],
) -> tuple[np.ndarray, tzinfo | None,]: ... # (np.ndarray[dt64ns], _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

View File

@@ -0,0 +1,858 @@
import cython
import numpy as np
cimport numpy as cnp
from numpy cimport (
int32_t,
int64_t,
intp_t,
ndarray,
)
cnp.import_array()
import pytz
# stdlib datetime imports
from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
PyDateTime_IMPORT,
datetime,
time,
tzinfo,
)
PyDateTime_IMPORT
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
_string_to_dts,
check_dts_bounds,
dt64_to_dtstruct,
dtstruct_to_dt64,
get_datetime64_unit,
get_datetime64_value,
npy_datetime,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pydatetime_to_dt64,
)
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.timezones cimport (
get_dst_info,
get_utcoffset,
is_fixed_offset,
is_tzlocal,
is_utc,
maybe_get_tz,
tz_compare,
utc_pytz as UTC,
)
from pandas._libs.tslibs.util cimport (
is_datetime64_object,
is_float_object,
is_integer_object,
)
from pandas._libs.tslibs.parsing import parse_datetime_string
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
checknull_with_nat,
)
from pandas._libs.tslibs.tzconversion cimport (
tz_convert_utc_to_tzlocal,
tz_localize_to_utc_single,
)
# ----------------------------------------------------------------------
# Constants
DT64NS_DTYPE = np.dtype('M8[ns]')
TD64NS_DTYPE = np.dtype('m8[ns]')
class OutOfBoundsTimedelta(ValueError):
"""
Raised when encountering a timedelta value that cannot be represented
as a timedelta64[ns].
"""
# Timedelta analogue to OutOfBoundsDatetime
pass
# ----------------------------------------------------------------------
# Unit Conversion Helpers
cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
"""
Return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p.
Parameters
----------
ts : int, float, or None
unit : str
Returns
-------
int64_t
"""
cdef:
int64_t m
int p
m, p = precision_from_unit(unit)
# just give me the unit back
if ts is None:
return m
# cast the unit, multiply base/frace separately
# to avoid precision issues from float -> int
base = <int64_t>ts
frac = ts - base
if p:
frac = round(frac, p)
return <int64_t>(base * m) + <int64_t>(frac * m)
cpdef inline (int64_t, int) precision_from_unit(str unit):
"""
Return a casting of the unit represented to nanoseconds + the precision
to round the fractional part.
Notes
-----
The caller is responsible for ensuring that the default value of "ns"
takes the place of None.
"""
cdef:
int64_t m
int p
if unit == "Y":
m = 1_000_000_000 * 31556952
p = 9
elif unit == "M":
m = 1_000_000_000 * 2629746
p = 9
elif unit == "W":
m = 1_000_000_000 * 3600 * 24 * 7
p = 9
elif unit == "D" or unit == "d":
m = 1_000_000_000 * 3600 * 24
p = 9
elif unit == "h":
m = 1_000_000_000 * 3600
p = 9
elif unit == "m":
m = 1_000_000_000 * 60
p = 9
elif unit == "s":
m = 1_000_000_000
p = 9
elif unit == "ms":
m = 1_000_000
p = 6
elif unit == "us":
m = 1000
p = 3
elif unit == "ns" or unit is None:
m = 1
p = 0
else:
raise ValueError(f"cannot cast unit {unit}")
return m, p
cdef inline int64_t get_datetime64_nanos(object val) except? -1:
"""
Extract the value and unit from a np.datetime64 object, then convert the
value to nanoseconds if necessary.
"""
cdef:
npy_datetimestruct dts
NPY_DATETIMEUNIT unit
npy_datetime ival
ival = get_datetime64_value(val)
if ival == NPY_NAT:
return NPY_NAT
unit = get_datetime64_unit(val)
if unit != NPY_FR_ns:
pandas_datetime_to_datetimestruct(ival, unit, &dts)
check_dts_bounds(&dts)
ival = dtstruct_to_dt64(&dts)
return ival
@cython.boundscheck(False)
@cython.wraparound(False)
def ensure_datetime64ns(arr: ndarray, copy: bool = True):
"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
Parameters
----------
arr : ndarray
copy : bool, default True
Returns
-------
ndarray with dtype datetime64[ns]
"""
cdef:
Py_ssize_t i, n = arr.size
const int64_t[:] ivalues
int64_t[:] iresult
NPY_DATETIMEUNIT unit
npy_datetimestruct dts
shape = (<object>arr).shape
if (<object>arr).dtype.byteorder == ">":
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
dtype = arr.dtype
arr = arr.astype(dtype.newbyteorder("<"))
if arr.size == 0:
result = arr.view(DT64NS_DTYPE)
if copy:
result = result.copy()
return result
unit = get_datetime64_unit(arr.flat[0])
if unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# without raising explicitly here, we end up with a SystemError
# built-in function ensure_datetime64ns returned a result with an error
raise ValueError("datetime64/timedelta64 must have a unit specified")
if unit == NPY_FR_ns:
# Check this before allocating result for perf, might save some memory
if copy:
return arr.copy()
return arr
ivalues = arr.view(np.int64).ravel("K")
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
iresult = result.ravel("K").view(np.int64)
for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT
return result
def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
"""
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
Parameters
----------
arr : ndarray
copy : bool, default True
Returns
-------
ndarray[timedelta64[ns]]
"""
assert arr.dtype.kind == "m", arr.dtype
if arr.dtype == TD64NS_DTYPE:
return arr.copy() if copy else arr
# Re-use the datetime64 machinery to do an overflow-safe `astype`
dtype = arr.dtype.str.replace("m8", "M8")
dummy = arr.view(dtype)
try:
dt64_result = ensure_datetime64ns(dummy, copy)
except OutOfBoundsDatetime as err:
# Re-write the exception in terms of timedelta64 instead of dt64
# Find the value that we are going to report as causing an overflow
tdmin = arr.min()
tdmax = arr.max()
if np.abs(tdmin) >= np.abs(tdmax):
bad_val = tdmin
else:
bad_val = tdmax
msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}"
raise OutOfBoundsTimedelta(msg)
return dt64_result.view(TD64NS_DTYPE)
# ----------------------------------------------------------------------
@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Parameters
----------
values : ndarray[object]
Returns
-------
result : ndarray[datetime64ns]
inferred_tz : tzinfo or None
"""
cdef:
Py_ssize_t i, n = len(values)
object val
int64_t[:] iresult
npy_datetimestruct dts
_TSObject _ts
bint found_naive = False
tzinfo inferred_tz = None
result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
for i in range(n):
val = values[i]
if checknull_with_nat(val):
iresult[i] = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if found_naive:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if inferred_tz is not None:
if not tz_compare(val.tzinfo, inferred_tz):
raise ValueError('Array must be all same time zone')
else:
inferred_tz = val.tzinfo
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
check_dts_bounds(&_ts.dts)
else:
found_naive = True
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError(f'Unrecognized value type: {type(val)}')
return result, inferred_tz
# ----------------------------------------------------------------------
# _TSObject Conversion
# lightweight C object to hold datetime & int64 pair
cdef class _TSObject:
# cdef:
# npy_datetimestruct dts # npy_datetimestruct
# int64_t value # numpy dt64
# object tzinfo
# bint fold
def __cinit__(self):
# GH 25057. As per PEP 495, set fold to 0 by default
self.fold = 0
@property
def value(self):
# This is needed in order for `value` to be accessible in lib.pyx
return self.value
cdef convert_to_tsobject(object ts, tzinfo tz, str unit,
bint dayfirst, bint yearfirst, int32_t nanos=0):
"""
Extract datetime and int64 from any of:
- np.int64 (with unit providing a possible modifier)
- np.datetime64
- a float (with unit providing a possible modifier)
- python int or long object (with unit providing a possible modifier)
- iso8601 string object
- python datetime object
- another timestamp object
Raises
------
OutOfBoundsDatetime : ts cannot be converted within implementation bounds
"""
cdef:
_TSObject obj
obj = _TSObject()
if isinstance(ts, str):
return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
if ts is None or ts is NaT:
obj.value = NPY_NAT
elif is_datetime64_object(ts):
obj.value = get_datetime64_nanos(ts)
if obj.value != NPY_NAT:
dt64_to_dtstruct(obj.value, &obj.dts)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
except OverflowError:
# GH#26651 re-raise as OutOfBoundsDatetime
raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp {ts}")
if ts == NPY_NAT:
obj.value = NPY_NAT
else:
ts = ts * cast_from_unit(None, unit)
obj.value = ts
dt64_to_dtstruct(ts, &obj.dts)
elif is_float_object(ts):
if ts != ts or ts == NPY_NAT:
obj.value = NPY_NAT
else:
ts = cast_from_unit(ts, unit)
obj.value = ts
dt64_to_dtstruct(ts, &obj.dts)
elif PyDateTime_Check(ts):
return convert_datetime_to_tsobject(ts, tz, nanos)
elif PyDate_Check(ts):
# Keep the converter same as PyDateTime's
ts = datetime.combine(ts, time())
return convert_datetime_to_tsobject(ts, tz)
else:
from .period import Period
if isinstance(ts, Period):
raise ValueError("Cannot convert Period to Timestamp "
"unambiguously. Use to_timestamp")
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
f'Timestamp')
if tz is not None:
_localize_tso(obj, tz)
if obj.value != NPY_NAT:
# check_overflows needs to run after _localize_tso
check_dts_bounds(&obj.dts)
check_overflows(obj)
return obj
cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
int32_t nanos=0):
"""
Convert a datetime (or Timestamp) input `ts`, along with optional timezone
object `tz` to a _TSObject.
The optional argument `nanos` allows for cases where datetime input
needs to be supplemented with higher-precision information.
Parameters
----------
ts : datetime or Timestamp
Value to be converted to _TSObject
tz : tzinfo or None
timezone for the timezone-aware output
nanos : int32_t, default is 0
nanoseconds supplement the precision of the datetime input ts
Returns
-------
obj : _TSObject
"""
cdef:
_TSObject obj = _TSObject()
obj.fold = ts.fold
if tz is not None:
tz = maybe_get_tz(tz)
if ts.tzinfo is not None:
# Convert the current timezone to the passed timezone
ts = ts.astimezone(tz)
obj.value = pydatetime_to_dt64(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
elif not is_utc(tz):
ts = _localize_pydatetime(ts, tz)
obj.value = pydatetime_to_dt64(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else:
# UTC
obj.value = pydatetime_to_dt64(ts, &obj.dts)
obj.tzinfo = tz
else:
obj.value = pydatetime_to_dt64(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
if obj.tzinfo is not None and not is_utc(obj.tzinfo):
offset = get_utcoffset(obj.tzinfo, ts)
obj.value -= int(offset.total_seconds() * 1e9)
if isinstance(ts, ABCTimestamp):
obj.value += <int64_t>ts.nanosecond
obj.dts.ps = ts.nanosecond * 1000
if nanos:
obj.value += nanos
obj.dts.ps = nanos * 1000
check_dts_bounds(&obj.dts)
check_overflows(obj)
return obj
cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
int tzoffset, tzinfo tz=None):
"""
Convert a datetimestruct `dts`, along with initial timezone offset
`tzoffset` to a _TSObject (with timezone object `tz` - optional).
Parameters
----------
dts: npy_datetimestruct
tzoffset: int
tz : tzinfo or None
timezone for the timezone-aware output.
Returns
-------
obj : _TSObject
"""
cdef:
_TSObject obj = _TSObject()
int64_t value # numpy dt64
datetime dt
ndarray[int64_t] trans
int64_t[:] deltas
value = dtstruct_to_dt64(&dts)
obj.dts = dts
obj.tzinfo = pytz.FixedOffset(tzoffset)
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
if tz is None:
check_overflows(obj)
return obj
# Infer fold from offset-adjusted obj.value
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
if is_utc(tz):
pass
elif is_tzlocal(tz):
tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
else:
trans, deltas, typ = get_dst_info(tz)
if typ == 'dateutil':
pos = trans.searchsorted(obj.value, side='right') - 1
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
# Keep the converter same as PyDateTime's
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo, fold=obj.fold)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj
cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
bint dayfirst=False,
bint yearfirst=False):
"""
Convert a string input `ts`, along with optional timezone object`tz`
to a _TSObject.
The optional arguments `dayfirst` and `yearfirst` are passed to the
dateutil parser.
Parameters
----------
ts : str
Value to be converted to _TSObject
tz : tzinfo or None
timezone for the timezone-aware output
unit : str or None
dayfirst : bool, default False
When parsing an ambiguous date string, interpret e.g. "3/4/1975" as
April 3, as opposed to the standard US interpretation March 4.
yearfirst : bool, default False
When parsing an ambiguous date string, interpret e.g. "01/05/09"
as "May 9, 2001", as opposed to the default "Jan 5, 2009"
Returns
-------
obj : _TSObject
"""
cdef:
npy_datetimestruct dts
int out_local = 0, out_tzoffset = 0
bint do_parse_datetime_string = False
if len(ts) == 0 or ts in nat_strings:
ts = NaT
elif ts == 'now':
# Issue 9000, we short-circuit rather than going
# into np_datetime_strings which returns utc
ts = datetime.now(tz)
elif ts == 'today':
# Issue 9000, we short-circuit rather than going
# into np_datetime_strings which returns a normalized datetime
ts = datetime.now(tz)
# equiv: datetime.today().replace(tzinfo=tz)
else:
string_to_dts_failed = _string_to_dts(
ts, &dts, &out_local,
&out_tzoffset, False
)
try:
if not string_to_dts_failed:
check_dts_bounds(&dts)
if out_local == 1:
return _create_tsobject_tz_using_offset(dts,
out_tzoffset, tz)
else:
ts = dtstruct_to_dt64(&dts)
if tz is not None:
# shift for _localize_tso
ts = tz_localize_to_utc_single(ts, tz,
ambiguous="raise")
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
# parser will return incorrect result because it will ignore
# nanoseconds
raise
except ValueError:
do_parse_datetime_string = True
if string_to_dts_failed or do_parse_datetime_string:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
yearfirst=yearfirst)
except (ValueError, OverflowError):
raise ValueError("could not convert string to Timestamp")
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
cdef inline check_overflows(_TSObject obj):
"""
Check that we haven't silently overflowed in timezone conversion
Parameters
----------
obj : _TSObject
Returns
-------
None
Raises
------
OutOfBoundsDatetime
"""
# GH#12677
if obj.dts.year == 1677:
if not (obj.value < 0):
from pandas._libs.tslibs.timestamps import Timestamp
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}")
raise OutOfBoundsDatetime(
f"Converting {fmt} underflows past {Timestamp.min}"
)
elif obj.dts.year == 2262:
if not (obj.value > 0):
from pandas._libs.tslibs.timestamps import Timestamp
fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} "
f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}")
raise OutOfBoundsDatetime(
f"Converting {fmt} overflows past {Timestamp.max}"
)
# ----------------------------------------------------------------------
# Localization
cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
"""
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
representation of that timestamp in the given timezone.
Parameters
----------
obj : _TSObject
tz : tzinfo
Returns
-------
None
Notes
-----
Sets obj.tzinfo inplace, alters obj.dts inplace.
"""
cdef:
ndarray[int64_t] trans
int64_t[:] deltas
int64_t local_val
Py_ssize_t pos
str typ
assert obj.tzinfo is None
if is_utc(tz):
pass
elif obj.value == NPY_NAT:
pass
elif is_tzlocal(tz):
local_val = tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
dt64_to_dtstruct(local_val, &obj.dts)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
if is_fixed_offset(tz):
# static/fixed tzinfo; in this case we know len(deltas) == 1
# This can come back with `typ` of either "fixed" or None
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
elif typ == 'pytz':
# i.e. treat_tz_as_pytz(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
tz = tz._tzinfos[tz._transition_info[pos]]
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
elif typ == 'dateutil':
# i.e. treat_tz_as_dateutil(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
# dateutil supports fold, so we infer fold from value
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
else:
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
# so this branch will never be reached.
pass
obj.tzinfo = tz
cdef inline bint _infer_tsobject_fold(
_TSObject obj,
const int64_t[:] trans,
const int64_t[:] deltas,
int32_t pos,
):
"""
Infer _TSObject fold property from value by assuming 0 and then setting
to 1 if necessary.
Parameters
----------
obj : _TSObject
trans : ndarray[int64_t]
ndarray of offset transition points in nanoseconds since epoch.
deltas : int64_t[:]
array of offsets corresponding to transition points in trans.
pos : int32_t
Position of the last transition point before taking fold into account.
Returns
-------
bint
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time
References
----------
.. [1] "PEP 495 - Local Time Disambiguation"
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
"""
cdef:
bint fold = 0
if pos > 0:
fold_delta = deltas[pos - 1] - deltas[pos]
if obj.value - fold_delta < trans[pos]:
fold = 1
return fold
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
"""
Take a datetime/Timestamp in UTC and localizes to timezone tz.
NB: Unlike the public version, this treats datetime and Timestamp objects
identically, i.e. discards nanos from Timestamps.
It also assumes that the `tz` input is not None.
"""
try:
# datetime.replace with pytz may be incorrect result
return tz.localize(dt)
except AttributeError:
return dt.replace(tzinfo=tz)
cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
"""
Take a datetime/Timestamp in UTC and localizes to timezone tz.
Parameters
----------
dt : datetime or Timestamp
tz : tzinfo or None
Returns
-------
localized : datetime or Timestamp
"""
if tz is None:
return dt
elif isinstance(dt, ABCTimestamp):
return dt.tz_localize(tz)
elif is_utc(tz):
return _localize_pydatetime(dt, tz)
try:
# datetime.replace with pytz may be incorrect result
return tz.localize(dt)
except AttributeError:
return dt.replace(tzinfo=tz)
# ----------------------------------------------------------------------
# Normalization
@cython.cdivision(False)
cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil:
"""
Round the localized nanosecond timestamp down to the previous midnight.
Parameters
----------
local_val : int64_t
Returns
-------
int64_t
"""
cdef:
int64_t day_nanos = 24 * 3600 * 1_000_000_000
return local_val - (local_val % day_nanos)

View File

@@ -0,0 +1,76 @@
cdef dict attrname_to_abbrevs
cdef enum c_FreqGroup:
# Mirrors FreqGroup in the .pxy file
FR_ANN = 1000
FR_QTR = 2000
FR_MTH = 3000
FR_WK = 4000
FR_BUS = 5000
FR_DAY = 6000
FR_HR = 7000
FR_MIN = 8000
FR_SEC = 9000
FR_MS = 10000
FR_US = 11000
FR_NS = 12000
FR_UND = -10000 # undefined
cdef enum PeriodDtypeCode:
# Annual freqs with various fiscal year ends.
# eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005
A = 1000 # Default alias
A_DEC = 1000 # Annual - December year end
A_JAN = 1001 # Annual - January year end
A_FEB = 1002 # Annual - February year end
A_MAR = 1003 # Annual - March year end
A_APR = 1004 # Annual - April year end
A_MAY = 1005 # Annual - May year end
A_JUN = 1006 # Annual - June year end
A_JUL = 1007 # Annual - July year end
A_AUG = 1008 # Annual - August year end
A_SEP = 1009 # Annual - September year end
A_OCT = 1010 # Annual - October year end
A_NOV = 1011 # Annual - November year end
# Quarterly frequencies with various fiscal year ends.
# eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005
Q_DEC = 2000 # Quarterly - December year end
Q_JAN = 2001 # Quarterly - January year end
Q_FEB = 2002 # Quarterly - February year end
Q_MAR = 2003 # Quarterly - March year end
Q_APR = 2004 # Quarterly - April year end
Q_MAY = 2005 # Quarterly - May year end
Q_JUN = 2006 # Quarterly - June year end
Q_JUL = 2007 # Quarterly - July year end
Q_AUG = 2008 # Quarterly - August year end
Q_SEP = 2009 # Quarterly - September year end
Q_OCT = 2010 # Quarterly - October year end
Q_NOV = 2011 # Quarterly - November year end
M = 3000 # Monthly
W_SUN = 4000 # Weekly - Sunday end of week
W_MON = 4001 # Weekly - Monday end of week
W_TUE = 4002 # Weekly - Tuesday end of week
W_WED = 4003 # Weekly - Wednesday end of week
W_THU = 4004 # Weekly - Thursday end of week
W_FRI = 4005 # Weekly - Friday end of week
W_SAT = 4006 # Weekly - Saturday end of week
B = 5000 # Business days
D = 6000 # Daily
H = 7000 # Hourly
T = 8000 # Minutely
S = 9000 # Secondly
L = 10000 # Millisecondly
U = 11000 # Microsecondly
N = 12000 # Nanosecondly
UNDEFINED = -10_000
cdef class PeriodDtypeBase:
cdef readonly:
PeriodDtypeCode _dtype_code

View File

@@ -0,0 +1,57 @@
from enum import Enum
from pandas._libs.tslibs.offsets import BaseOffset
_attrname_to_abbrevs: dict[str, str]
_period_code_map: dict[str, int]
class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
# actually __cinit__
def __new__(cls, code: int): ...
def freq_group_code(self) -> int: ...
def date_offset(self) -> BaseOffset: ...
@classmethod
def from_date_offset(cls, offset: BaseOffset) -> PeriodDtypeBase: ...
@property
def resolution(self) -> Resolution: ...
class FreqGroup(Enum):
FR_ANN: int
FR_QTR: int
FR_MTH: int
FR_WK: int
FR_BUS: int
FR_DAY: int
FR_HR: int
FR_MIN: int
FR_SEC: int
FR_MS: int
FR_US: int
FR_NS: int
FR_UND: int
@staticmethod
def get_freq_group(code: int) -> FreqGroup: ...
class Resolution(Enum):
RESO_NS: int
RESO_US: int
RESO_MS: int
RESO_SEC: int
RESO_MIN: int
RESO_HR: int
RESO_DAY: int
RESO_MTH: int
RESO_QTR: int
RESO_YR: int
def __lt__(self, other: Resolution) -> bool: ...
def __ge__(self, other: Resolution) -> bool: ...
@property
def freq_group(self) -> FreqGroup: ...
@property
def attrname(self) -> str: ...
@classmethod
def from_attrname(cls, attrname: str) -> Resolution: ...
@classmethod
def get_reso_from_freq(cls, freq: str) -> Resolution: ...

View File

@@ -0,0 +1,289 @@
# period frequency constants corresponding to scikits timeseries
# originals
from enum import Enum
cdef class PeriodDtypeBase:
"""
Similar to an actual dtype, this contains all of the information
describing a PeriodDtype in an integer code.
"""
# cdef readonly:
# PeriodDtypeCode _dtype_code
def __cinit__(self, PeriodDtypeCode code):
self._dtype_code = code
def __eq__(self, other):
if not isinstance(other, PeriodDtypeBase):
return False
if not isinstance(self, PeriodDtypeBase):
# cython semantics, this is a reversed op
return False
return self._dtype_code == other._dtype_code
@property
def freq_group_code(self) -> int:
# See also: libperiod.get_freq_group
return (self._dtype_code // 1000) * 1000
@property
def resolution(self) -> "Resolution":
fgc = self.freq_group_code
return Resolution.from_freq_group(FreqGroup(fgc))
@property
def date_offset(self):
"""
Corresponding DateOffset object.
This mapping is mainly for backward-compatibility.
"""
from .offsets import to_offset
freqstr = _reverse_period_code_map.get(self._dtype_code)
return to_offset(freqstr)
@classmethod
def from_date_offset(cls, offset):
code = offset._period_dtype_code
return cls(code)
_period_code_map = {
# Annual freqs with various fiscal year ends.
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
"A-DEC": 1000, # Annual - December year end
"A-JAN": 1001, # Annual - January year end
"A-FEB": 1002, # Annual - February year end
"A-MAR": 1003, # Annual - March year end
"A-APR": 1004, # Annual - April year end
"A-MAY": 1005, # Annual - May year end
"A-JUN": 1006, # Annual - June year end
"A-JUL": 1007, # Annual - July year end
"A-AUG": 1008, # Annual - August year end
"A-SEP": 1009, # Annual - September year end
"A-OCT": 1010, # Annual - October year end
"A-NOV": 1011, # Annual - November year end
# Quarterly frequencies with various fiscal year ends.
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
"Q-DEC": 2000, # Quarterly - December year end
"Q-JAN": 2001, # Quarterly - January year end
"Q-FEB": 2002, # Quarterly - February year end
"Q-MAR": 2003, # Quarterly - March year end
"Q-APR": 2004, # Quarterly - April year end
"Q-MAY": 2005, # Quarterly - May year end
"Q-JUN": 2006, # Quarterly - June year end
"Q-JUL": 2007, # Quarterly - July year end
"Q-AUG": 2008, # Quarterly - August year end
"Q-SEP": 2009, # Quarterly - September year end
"Q-OCT": 2010, # Quarterly - October year end
"Q-NOV": 2011, # Quarterly - November year end
"M": 3000, # Monthly
"W-SUN": 4000, # Weekly - Sunday end of week
"W-MON": 4001, # Weekly - Monday end of week
"W-TUE": 4002, # Weekly - Tuesday end of week
"W-WED": 4003, # Weekly - Wednesday end of week
"W-THU": 4004, # Weekly - Thursday end of week
"W-FRI": 4005, # Weekly - Friday end of week
"W-SAT": 4006, # Weekly - Saturday end of week
"B": 5000, # Business days
"D": 6000, # Daily
"H": 7000, # Hourly
"T": 8000, # Minutely
"S": 9000, # Secondly
"L": 10000, # Millisecondly
"U": 11000, # Microsecondly
"N": 12000, # Nanosecondly
}
_reverse_period_code_map = {
_period_code_map[key]: key for key in _period_code_map}
# Yearly aliases; careful not to put these in _reverse_period_code_map
_period_code_map.update({"Y" + key[1:]: _period_code_map[key]
for key in _period_code_map
if key.startswith("A-")})
_period_code_map.update({
"Q": 2000, # Quarterly - December year end (default quarterly)
"A": 1000, # Annual
"W": 4000, # Weekly
"C": 5000, # Custom Business Day
})
cdef set _month_names = {
x.split("-")[-1] for x in _period_code_map.keys() if x.startswith("A-")
}
# Map attribute-name resolutions to resolution abbreviations
_attrname_to_abbrevs = {
"year": "A",
"quarter": "Q",
"month": "M",
"day": "D",
"hour": "H",
"minute": "T",
"second": "S",
"millisecond": "L",
"microsecond": "U",
"nanosecond": "N",
}
cdef dict attrname_to_abbrevs = _attrname_to_abbrevs
cdef dict _abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()}
class FreqGroup(Enum):
# Mirrors c_FreqGroup in the .pxd file
FR_ANN = 1000
FR_QTR = 2000
FR_MTH = 3000
FR_WK = 4000
FR_BUS = 5000
FR_DAY = 6000
FR_HR = 7000
FR_MIN = 8000
FR_SEC = 9000
FR_MS = 10000
FR_US = 11000
FR_NS = 12000
FR_UND = -10000 # undefined
@staticmethod
def get_freq_group(code: int) -> "FreqGroup":
# See also: PeriodDtypeBase.freq_group_code
code = (code // 1000) * 1000
return FreqGroup(code)
class Resolution(Enum):
# Note: cython won't allow us to reference the cdef versions at the
# module level
RESO_NS = 0
RESO_US = 1
RESO_MS = 2
RESO_SEC = 3
RESO_MIN = 4
RESO_HR = 5
RESO_DAY = 6
RESO_MTH = 7
RESO_QTR = 8
RESO_YR = 9
def __lt__(self, other):
return self.value < other.value
def __ge__(self, other):
return self.value >= other.value
@property
def freq_group(self) -> FreqGroup:
if self == Resolution.RESO_NS:
return FreqGroup.FR_NS
elif self == Resolution.RESO_US:
return FreqGroup.FR_US
elif self == Resolution.RESO_MS:
return FreqGroup.FR_MS
elif self == Resolution.RESO_SEC:
return FreqGroup.FR_SEC
elif self == Resolution.RESO_MIN:
return FreqGroup.FR_MIN
elif self == Resolution.RESO_HR:
return FreqGroup.FR_HR
elif self == Resolution.RESO_DAY:
return FreqGroup.FR_DAY
elif self == Resolution.RESO_MTH:
return FreqGroup.FR_MTH
elif self == Resolution.RESO_QTR:
return FreqGroup.FR_QTR
elif self == Resolution.RESO_YR:
return FreqGroup.FR_ANN
else:
raise ValueError(self) # pragma: no cover
@property
def attrname(self) -> str:
"""
Return datetime attribute name corresponding to this Resolution.
Examples
--------
>>> Resolution.RESO_SEC.attrname
'second'
"""
return _reso_str_map[self.value]
@classmethod
def from_attrname(cls, attrname: str) -> "Resolution":
"""
Return resolution str against resolution code.
Examples
--------
>>> Resolution.from_attrname('second')
<Resolution.RESO_SEC: 3>
>>> Resolution.from_attrname('second') == Resolution.RESO_SEC
True
"""
return cls(_str_reso_map[attrname])
@classmethod
def get_reso_from_freq(cls, freq: str) -> "Resolution":
"""
Return resolution code against frequency str.
`freq` is given by the `offset.freqstr` for some DateOffset object.
Examples
--------
>>> Resolution.get_reso_from_freq('H')
<Resolution.RESO_HR: 5>
>>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR
True
"""
try:
attr_name = _abbrev_to_attrnames[freq]
except KeyError:
# For quarterly and yearly resolutions, we need to chop off
# a month string.
split_freq = freq.split("-")
if len(split_freq) != 2:
raise
if split_freq[1] not in _month_names:
# i.e. we want e.g. "Q-DEC", not "Q-INVALID"
raise
attr_name = _abbrev_to_attrnames[split_freq[0]]
return cls.from_attrname(attr_name)
@classmethod
def from_freq_group(cls, freq_group: FreqGroup) -> "Resolution":
abbrev = _reverse_period_code_map[freq_group.value].split("-")[0]
if abbrev == "B":
return cls.RESO_DAY
attrname = _abbrev_to_attrnames[abbrev]
return cls.from_attrname(attrname)
cdef dict _reso_str_map = {
Resolution.RESO_NS.value: "nanosecond",
Resolution.RESO_US.value: "microsecond",
Resolution.RESO_MS.value: "millisecond",
Resolution.RESO_SEC.value: "second",
Resolution.RESO_MIN.value: "minute",
Resolution.RESO_HR.value: "hour",
Resolution.RESO_DAY.value: "day",
Resolution.RESO_MTH.value: "month",
Resolution.RESO_QTR.value: "quarter",
Resolution.RESO_YR.value: "year",
}
cdef dict _str_reso_map = {v: k for k, v in _reso_str_map.items()}

View File

@@ -0,0 +1,52 @@
import numpy as np
from pandas._typing import npt
def build_field_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
) -> np.ndarray: ...
def month_position_check(fields, weekdays) -> str | None: ...
def get_date_name_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
locale: str | None = ...,
) -> npt.NDArray[np.object_]: ...
def get_start_end_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
freqstr: str | None = ...,
month_kw: int = ...,
) -> npt.NDArray[np.bool_]: ...
def get_date_field(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
field: str,
) -> npt.NDArray[np.int32]: ...
def get_timedelta_field(
tdindex: np.ndarray, # const int64_t[:]
field: str,
) -> npt.NDArray[np.int32]: ...
def isleapyear_arr(
years: np.ndarray,
) -> npt.NDArray[np.bool_]: ...
def build_isocalendar_sarray(
dtindex: npt.NDArray[np.int64], # const int64_t[:]
) -> np.ndarray: ...
def get_locale_names(name_type: str, locale: str | None = ...): ...
class RoundTo:
@property
def MINUS_INFTY(self) -> int: ...
@property
def PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_EVEN(self) -> int: ...
@property
def NEAREST_HALF_PLUS_INFTY(self) -> int: ...
@property
def NEAREST_HALF_MINUS_INFTY(self) -> int: ...
def round_nsint64(
values: npt.NDArray[np.int64],
mode: RoundTo,
nanos: int,
) -> npt.NDArray[np.int64]: ...

View File

@@ -0,0 +1,743 @@
"""
Functions for accessing attributes of Timestamp/datetime64/datetime-like
objects and arrays
"""
from locale import LC_TIME
import cython
from cython import Py_ssize_t
import numpy as np
cimport numpy as cnp
from numpy cimport (
int8_t,
int32_t,
int64_t,
ndarray,
uint32_t,
)
cnp.import_array()
from pandas._config.localization import set_locale
from pandas._libs.tslibs.ccalendar import (
DAYS_FULL,
MONTHS_FULL,
)
from pandas._libs.tslibs.ccalendar cimport (
dayofweek,
get_day_of_year,
get_days_in_month,
get_firstbday,
get_iso_calendar,
get_lastbday,
get_week_of_year,
is_leapyear,
iso_calendar_t,
month_offset,
)
from pandas._libs.tslibs.nattype cimport NPY_NAT
from pandas._libs.tslibs.np_datetime cimport (
dt64_to_dtstruct,
npy_datetimestruct,
pandas_timedeltastruct,
td64_to_tdstruct,
)
from pandas._libs.tslibs.strptime import LocaleTime
@cython.wraparound(False)
@cython.boundscheck(False)
def build_field_sarray(const int64_t[:] dtindex):
"""
Datetime as int64 representation to a structured array of fields
"""
cdef:
Py_ssize_t i, count = len(dtindex)
npy_datetimestruct dts
ndarray[int32_t] years, months, days, hours, minutes, seconds, mus
sa_dtype = [
("Y", "i4"), # year
("M", "i4"), # month
("D", "i4"), # day
("h", "i4"), # hour
("m", "i4"), # min
("s", "i4"), # second
("u", "i4"), # microsecond
]
out = np.empty(count, dtype=sa_dtype)
years = out['Y']
months = out['M']
days = out['D']
hours = out['h']
minutes = out['m']
seconds = out['s']
mus = out['u']
for i in range(count):
dt64_to_dtstruct(dtindex[i], &dts)
years[i] = dts.year
months[i] = dts.month
days[i] = dts.day
hours[i] = dts.hour
minutes[i] = dts.min
seconds[i] = dts.sec
mus[i] = dts.us
return out
def month_position_check(fields, weekdays) -> str | None:
cdef:
int32_t daysinmonth, y, m, d
bint calendar_end = True
bint business_end = True
bint calendar_start = True
bint business_start = True
bint cal
int32_t[:] years = fields["Y"]
int32_t[:] months = fields["M"]
int32_t[:] days = fields["D"]
for y, m, d, wd in zip(years, months, days, weekdays):
if calendar_start:
calendar_start &= d == 1
if business_start:
business_start &= d == 1 or (d <= 3 and wd == 0)
if calendar_end or business_end:
daysinmonth = get_days_in_month(y, m)
cal = d == daysinmonth
if calendar_end:
calendar_end &= cal
if business_end:
business_end &= cal or (daysinmonth - d < 3 and wd == 4)
elif not calendar_start and not business_start:
break
if calendar_end:
return "ce"
elif business_end:
return "be"
elif calendar_start:
return "cs"
elif business_start:
return "bs"
else:
return None
@cython.wraparound(False)
@cython.boundscheck(False)
def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None):
"""
Given a int64-based datetime index, return array of strings of date
name based on requested field (e.g. day_name)
"""
cdef:
Py_ssize_t i, count = len(dtindex)
ndarray[object] out, names
npy_datetimestruct dts
int dow
out = np.empty(count, dtype=object)
if field == 'day_name':
if locale is None:
names = np.array(DAYS_FULL, dtype=np.object_)
else:
names = np.array(get_locale_names('f_weekday', locale),
dtype=np.object_)
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = np.nan
continue
dt64_to_dtstruct(dtindex[i], &dts)
dow = dayofweek(dts.year, dts.month, dts.day)
out[i] = names[dow].capitalize()
elif field == 'month_name':
if locale is None:
names = np.array(MONTHS_FULL, dtype=np.object_)
else:
names = np.array(get_locale_names('f_month', locale),
dtype=np.object_)
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = np.nan
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = names[dts.month].capitalize()
else:
raise ValueError(f"Field {field} not supported")
return out
cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
"""
Analogous to DateOffset.is_on_offset checking for the month part of a date.
"""
if modby == 1:
return True
elif modby == 3:
return (month - compare_month) % 3 == 0
else:
return month == compare_month
@cython.wraparound(False)
@cython.boundscheck(False)
def get_start_end_field(const int64_t[:] dtindex, str field,
str freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
of whether timestamps are at the start/end of the month/quarter/year
(defined by frequency).
"""
cdef:
Py_ssize_t i
int count = len(dtindex)
bint is_business = 0
int end_month = 12
int start_month = 1
ndarray[int8_t] out
npy_datetimestruct dts
int compare_month, modby
out = np.zeros(count, dtype='int8')
if freqstr:
if freqstr == 'C':
raise ValueError(f"Custom business days is not supported by {field}")
is_business = freqstr[0] == 'B'
# YearBegin(), BYearBegin() use month = starting month of year.
# QuarterBegin(), BQuarterBegin() use startingMonth = starting
# month of year. Other offsets use month, startingMonth as ending
# month of year.
if (freqstr[0:2] in ['MS', 'QS', 'AS']) or (
freqstr[1:3] in ['MS', 'QS', 'AS']):
end_month = 12 if month_kw == 1 else month_kw - 1
start_month = month_kw
else:
end_month = month_kw
start_month = (end_month % 12) + 1
else:
end_month = 12
start_month = 1
compare_month = start_month if "start" in field else end_month
if "month" in field:
modby = 1
elif "quarter" in field:
modby = 3
else:
modby = 12
if field in ["is_month_start", "is_quarter_start", "is_year_start"]:
if is_business:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = 0
continue
dt64_to_dtstruct(dtindex[i], &dts)
if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_firstbday(dts.year, dts.month)):
out[i] = 1
else:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = 0
continue
dt64_to_dtstruct(dtindex[i], &dts)
if _is_on_month(dts.month, compare_month, modby) and dts.day == 1:
out[i] = 1
elif field in ["is_month_end", "is_quarter_end", "is_year_end"]:
if is_business:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = 0
continue
dt64_to_dtstruct(dtindex[i], &dts)
if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_lastbday(dts.year, dts.month)):
out[i] = 1
else:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = 0
continue
dt64_to_dtstruct(dtindex[i], &dts)
if _is_on_month(dts.month, compare_month, modby) and (
dts.day == get_days_in_month(dts.year, dts.month)):
out[i] = 1
else:
raise ValueError(f"Field {field} not supported")
return out.view(bool)
@cython.wraparound(False)
@cython.boundscheck(False)
def get_date_field(const int64_t[:] dtindex, str field):
"""
Given a int64-based datetime index, extract the year, month, etc.,
field and return an array of these values.
"""
cdef:
Py_ssize_t i, count = len(dtindex)
ndarray[int32_t] out
npy_datetimestruct dts
out = np.empty(count, dtype='i4')
if field == 'Y':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.year
return out
elif field == 'M':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.month
return out
elif field == 'D':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.day
return out
elif field == 'h':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.hour
return out
elif field == 'm':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.min
return out
elif field == 's':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.sec
return out
elif field == 'us':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.us
return out
elif field == 'ns':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.ps // 1000
return out
elif field == 'doy':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = get_day_of_year(dts.year, dts.month, dts.day)
return out
elif field == 'dow':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dayofweek(dts.year, dts.month, dts.day)
return out
elif field == 'woy':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = get_week_of_year(dts.year, dts.month, dts.day)
return out
elif field == 'q':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = dts.month
out[i] = ((out[i] - 1) // 3) + 1
return out
elif field == 'dim':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = -1
continue
dt64_to_dtstruct(dtindex[i], &dts)
out[i] = get_days_in_month(dts.year, dts.month)
return out
elif field == 'is_leap_year':
return isleapyear_arr(get_date_field(dtindex, 'Y'))
raise ValueError(f"Field {field} not supported")
@cython.wraparound(False)
@cython.boundscheck(False)
def get_timedelta_field(const int64_t[:] tdindex, str field):
"""
Given a int64-based timedelta index, extract the days, hrs, sec.,
field and return an array of these values.
"""
cdef:
Py_ssize_t i, count = len(tdindex)
ndarray[int32_t] out
pandas_timedeltastruct tds
out = np.empty(count, dtype='i4')
if field == 'days':
with nogil:
for i in range(count):
if tdindex[i] == NPY_NAT:
out[i] = -1
continue
td64_to_tdstruct(tdindex[i], &tds)
out[i] = tds.days
return out
elif field == 'seconds':
with nogil:
for i in range(count):
if tdindex[i] == NPY_NAT:
out[i] = -1
continue
td64_to_tdstruct(tdindex[i], &tds)
out[i] = tds.seconds
return out
elif field == 'microseconds':
with nogil:
for i in range(count):
if tdindex[i] == NPY_NAT:
out[i] = -1
continue
td64_to_tdstruct(tdindex[i], &tds)
out[i] = tds.microseconds
return out
elif field == 'nanoseconds':
with nogil:
for i in range(count):
if tdindex[i] == NPY_NAT:
out[i] = -1
continue
td64_to_tdstruct(tdindex[i], &tds)
out[i] = tds.nanoseconds
return out
raise ValueError(f"Field {field} not supported")
cpdef isleapyear_arr(ndarray years):
"""vectorized version of isleapyear; NaT evaluates as False"""
cdef:
ndarray[int8_t] out
out = np.zeros(len(years), dtype='int8')
out[np.logical_or(years % 400 == 0,
np.logical_and(years % 4 == 0,
years % 100 > 0))] = 1
return out.view(bool)
@cython.wraparound(False)
@cython.boundscheck(False)
def build_isocalendar_sarray(const int64_t[:] dtindex):
"""
Given a int64-based datetime array, return the ISO 8601 year, week, and day
as a structured array.
"""
cdef:
Py_ssize_t i, count = len(dtindex)
npy_datetimestruct dts
ndarray[uint32_t] iso_years, iso_weeks, days
iso_calendar_t ret_val
sa_dtype = [
("year", "u4"),
("week", "u4"),
("day", "u4"),
]
out = np.empty(count, dtype=sa_dtype)
iso_years = out["year"]
iso_weeks = out["week"]
days = out["day"]
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
ret_val = 0, 0, 0
else:
dt64_to_dtstruct(dtindex[i], &dts)
ret_val = get_iso_calendar(dts.year, dts.month, dts.day)
iso_years[i] = ret_val[0]
iso_weeks[i] = ret_val[1]
days[i] = ret_val[2]
return out
def get_locale_names(name_type: str, locale: object = None):
"""
Returns an array of localized day or month names.
Parameters
----------
name_type : str
Attribute of LocaleTime() in which to return localized names.
locale : str
Returns
-------
list of locale names
"""
with set_locale(locale, LC_TIME):
return getattr(LocaleTime(), name_type)
# ---------------------------------------------------------------------
# Rounding
class RoundTo:
"""
enumeration defining the available rounding modes
Attributes
----------
MINUS_INFTY
round towards -∞, or floor [2]_
PLUS_INFTY
round towards +∞, or ceil [3]_
NEAREST_HALF_EVEN
round to nearest, tie-break half to even [6]_
NEAREST_HALF_MINUS_INFTY
round to nearest, tie-break half to -∞ [5]_
NEAREST_HALF_PLUS_INFTY
round to nearest, tie-break half to +∞ [4]_
References
----------
.. [1] "Rounding - Wikipedia"
https://en.wikipedia.org/wiki/Rounding
.. [2] "Rounding down"
https://en.wikipedia.org/wiki/Rounding#Rounding_down
.. [3] "Rounding up"
https://en.wikipedia.org/wiki/Rounding#Rounding_up
.. [4] "Round half up"
https://en.wikipedia.org/wiki/Rounding#Round_half_up
.. [5] "Round half down"
https://en.wikipedia.org/wiki/Rounding#Round_half_down
.. [6] "Round half to even"
https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
"""
@property
def MINUS_INFTY(self) -> int:
return 0
@property
def PLUS_INFTY(self) -> int:
return 1
@property
def NEAREST_HALF_EVEN(self) -> int:
return 2
@property
def NEAREST_HALF_PLUS_INFTY(self) -> int:
return 3
@property
def NEAREST_HALF_MINUS_INFTY(self) -> int:
return 4
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value
with cython.overflowcheck(True):
for i in range(n):
value = values[i]
if value == NPY_NAT:
res = NPY_NAT
else:
res = value - value % unit
result[i] = res
return result
cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
cdef:
Py_ssize_t i, n = len(values)
ndarray[int64_t] result = np.empty(n, dtype="i8")
int64_t res, value
with cython.overflowcheck(True):
for i in range(n):
value = values[i]
if value == NPY_NAT:
res = NPY_NAT
else:
remainder = value % unit
if remainder == 0:
res = value
else:
res = value + (unit - remainder)
result[i] = res
return result
cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
return _ceil_int64(values - unit // 2, unit)
cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
return _floor_int64(values + unit // 2, unit)
def round_nsint64(values: np.ndarray, mode: RoundTo, nanos: int) -> np.ndarray:
"""
Applies rounding mode at given frequency
Parameters
----------
values : np.ndarray[int64_t]`
mode : instance of `RoundTo` enumeration
nanos : np.int64
Freq to round to, expressed in nanoseconds
Returns
-------
np.ndarray[int64_t]
"""
cdef:
int64_t unit = nanos
if mode == RoundTo.MINUS_INFTY:
return _floor_int64(values, unit)
elif mode == RoundTo.PLUS_INFTY:
return _ceil_int64(values, unit)
elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY:
return _rounddown_int64(values, unit)
elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY:
return _roundup_int64(values, unit)
elif mode == RoundTo.NEAREST_HALF_EVEN:
# for odd unit there is no need of a tie break
if unit % 2:
return _rounddown_int64(values, unit)
quotient, remainder = np.divmod(values, unit)
mask = np.logical_or(
remainder > (unit // 2),
np.logical_and(remainder == (unit // 2), quotient % 2)
)
quotient[mask] += 1
return quotient * unit
# if/elif above should catch all rounding modes defined in enum 'RoundTo':
# if flow of control arrives here, it is a bug
raise ValueError("round_nsint64 called with an unrecognized rounding mode")

View File

@@ -0,0 +1,19 @@
from cpython.datetime cimport datetime
from numpy cimport int64_t
cdef int64_t NPY_NAT
cdef bint _nat_scalar_rules[6]
cdef set c_nat_strings
cdef class _NaT(datetime):
cdef readonly:
int64_t value
cdef _NaT c_NaT
cdef bint checknull_with_nat(object val)
cdef bint is_dt64nat(object val)
cdef bint is_td64nat(object val)

View File

@@ -0,0 +1,155 @@
from datetime import (
datetime,
timedelta,
tzinfo as _tzinfo,
)
from typing import Any
import numpy as np
from pandas._libs.tslibs.period import Period
NaT: NaTType
iNaT: int
nat_strings: set[str]
def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ...
class NaTType(datetime):
value: np.int64
def asm8(self) -> np.datetime64: ...
def to_datetime64(self) -> np.datetime64: ...
def to_numpy(
self, dtype: np.dtype | str | None = ..., copy: bool = ...
) -> np.datetime64 | np.timedelta64: ...
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
@property
def day_of_year(self) -> float: ...
@property
def dayofyear(self) -> float: ...
@property
def days_in_month(self) -> float: ...
@property
def daysinmonth(self) -> float: ...
@property
def day_of_week(self) -> float: ...
@property
def dayofweek(self) -> float: ...
@property
def week(self) -> float: ...
@property
def weekofyear(self) -> float: ...
def day_name(self) -> float: ...
def month_name(self) -> float: ...
# error: Return type "float" of "weekday" incompatible with return
# type "int" in supertype "date"
def weekday(self) -> float: ... # type: ignore[override]
# error: Return type "float" of "isoweekday" incompatible with return
# type "int" in supertype "date"
def isoweekday(self) -> float: ... # type: ignore[override]
def total_seconds(self) -> float: ...
# error: Signature of "today" incompatible with supertype "datetime"
def today(self, *args, **kwargs) -> NaTType: ... # type: ignore[override]
# error: Signature of "today" incompatible with supertype "datetime"
def now(self, *args, **kwargs) -> NaTType: ... # type: ignore[override]
def to_pydatetime(self) -> NaTType: ...
def date(self) -> NaTType: ...
def round(self) -> NaTType: ...
def floor(self) -> NaTType: ...
def ceil(self) -> NaTType: ...
def tz_convert(self) -> NaTType: ...
def tz_localize(self) -> NaTType: ...
# error: Signature of "replace" incompatible with supertype "datetime"
def replace( # type: ignore[override]
self,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
fold: int | None = ...,
) -> NaTType: ...
# error: Return type "float" of "year" incompatible with return
# type "int" in supertype "date"
@property
def year(self) -> float: ... # type: ignore[override]
@property
def quarter(self) -> float: ...
# error: Return type "float" of "month" incompatible with return
# type "int" in supertype "date"
@property
def month(self) -> float: ... # type: ignore[override]
# error: Return type "float" of "day" incompatible with return
# type "int" in supertype "date"
@property
def day(self) -> float: ... # type: ignore[override]
# error: Return type "float" of "hour" incompatible with return
# type "int" in supertype "date"
@property
def hour(self) -> float: ... # type: ignore[override]
# error: Return type "float" of "minute" incompatible with return
# type "int" in supertype "date"
@property
def minute(self) -> float: ... # type: ignore[override]
# error: Return type "float" of "second" incompatible with return
# type "int" in supertype "date"
@property
def second(self) -> float: ... # type: ignore[override]
@property
def millisecond(self) -> float: ...
# error: Return type "float" of "microsecond" incompatible with return
# type "int" in supertype "date"
@property
def microsecond(self) -> float: ... # type: ignore[override]
@property
def nanosecond(self) -> float: ...
# inject Timedelta properties
@property
def days(self) -> float: ...
@property
def microseconds(self) -> float: ...
@property
def nanoseconds(self) -> float: ...
# inject Period properties
@property
def qyear(self) -> float: ...
def __eq__(self, other: Any) -> bool: ...
def __ne__(self, other: Any) -> bool: ...
# https://github.com/python/mypy/issues/9015
# error: Argument 1 of "__lt__" is incompatible with supertype "date";
# supertype defines the argument type as "date"
def __lt__( # type: ignore[override]
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64
) -> bool: ...
# error: Argument 1 of "__le__" is incompatible with supertype "date";
# supertype defines the argument type as "date"
def __le__( # type: ignore[override]
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64
) -> bool: ...
# error: Argument 1 of "__gt__" is incompatible with supertype "date";
# supertype defines the argument type as "date"
def __gt__( # type: ignore[override]
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64
) -> bool: ...
# error: Argument 1 of "__ge__" is incompatible with supertype "date";
# supertype defines the argument type as "date"
def __ge__( # type: ignore[override]
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64
) -> bool: ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
from cpython.datetime cimport (
date,
datetime,
)
from numpy cimport (
int32_t,
int64_t,
)
cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime
cdef extern from "numpy/ndarraytypes.h":
ctypedef struct PyArray_DatetimeMetaData:
NPY_DATETIMEUNIT base
int64_t num
cdef extern from "numpy/arrayscalars.h":
ctypedef struct PyDatetimeScalarObject:
# PyObject_HEAD
npy_datetime obval
PyArray_DatetimeMetaData obmeta
ctypedef struct PyTimedeltaScalarObject:
# PyObject_HEAD
npy_timedelta obval
PyArray_DatetimeMetaData obmeta
cdef extern from "numpy/ndarraytypes.h":
ctypedef struct npy_datetimestruct:
int64_t year
int32_t month, day, hour, min, sec, us, ps, as
ctypedef enum NPY_DATETIMEUNIT:
NPY_FR_Y
NPY_FR_M
NPY_FR_W
NPY_FR_D
NPY_FR_B
NPY_FR_h
NPY_FR_m
NPY_FR_s
NPY_FR_ms
NPY_FR_us
NPY_FR_ns
NPY_FR_ps
NPY_FR_fs
NPY_FR_as
NPY_FR_GENERIC
cdef extern from "src/datetime/np_datetime.h":
ctypedef struct pandas_timedeltastruct:
int64_t days
int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds
void pandas_datetime_to_datetimestruct(npy_datetime val,
NPY_DATETIMEUNIT fr,
npy_datetimestruct *result) nogil
cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1
cdef check_dts_bounds(npy_datetimestruct *dts)
cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil
cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil
cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil
cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts)
cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts)
cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts)
cdef npy_datetime get_datetime64_value(object obj) nogil
cdef npy_timedelta get_timedelta64_value(object obj) nogil
cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
cdef int _string_to_dts(str val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset,
bint want_exc) except? -1

View File

@@ -0,0 +1 @@
class OutOfBoundsDatetime(ValueError): ...

View File

@@ -0,0 +1,186 @@
from cpython.datetime cimport (
PyDateTime_DATE_GET_HOUR,
PyDateTime_DATE_GET_MICROSECOND,
PyDateTime_DATE_GET_MINUTE,
PyDateTime_DATE_GET_SECOND,
PyDateTime_GET_DAY,
PyDateTime_GET_MONTH,
PyDateTime_GET_YEAR,
PyDateTime_IMPORT,
)
from cpython.object cimport (
Py_EQ,
Py_GE,
Py_GT,
Py_LE,
Py_LT,
Py_NE,
)
PyDateTime_IMPORT
from numpy cimport int64_t
from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
cdef extern from "src/datetime/np_datetime.h":
int cmp_npy_datetimestruct(npy_datetimestruct *a,
npy_datetimestruct *b)
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
npy_datetimestruct *d) nogil
void pandas_datetime_to_datetimestruct(npy_datetime val,
NPY_DATETIMEUNIT fr,
npy_datetimestruct *result) nogil
void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
NPY_DATETIMEUNIT fr,
pandas_timedeltastruct *result
) nogil
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset)
# ----------------------------------------------------------------------
# numpy object inspection
cdef inline npy_datetime get_datetime64_value(object obj) nogil:
"""
returns the int64 value underlying scalar numpy datetime64 object
Note that to interpret this as a datetime, the corresponding unit is
also needed. That can be found using `get_datetime64_unit`.
"""
return (<PyDatetimeScalarObject*>obj).obval
cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:
"""
returns the int64 value underlying scalar numpy timedelta64 object
"""
return (<PyTimedeltaScalarObject*>obj).obval
cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:
"""
returns the unit part of the dtype for a numpy datetime64 object.
"""
return <NPY_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base
# ----------------------------------------------------------------------
# Comparison
cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1:
"""
cmp_scalar is a more performant version of PyObject_RichCompare
typed for int64_t arguments.
"""
if op == Py_EQ:
return lhs == rhs
elif op == Py_NE:
return lhs != rhs
elif op == Py_LT:
return lhs < rhs
elif op == Py_LE:
return lhs <= rhs
elif op == Py_GT:
return lhs > rhs
elif op == Py_GE:
return lhs >= rhs
class OutOfBoundsDatetime(ValueError):
pass
cdef inline check_dts_bounds(npy_datetimestruct *dts):
"""Raises OutOfBoundsDatetime if the given date is outside the range that
can be represented by nanosecond-resolution 64-bit integers."""
cdef:
bint error = False
if (dts.year <= 1677 and
cmp_npy_datetimestruct(dts, &_NS_MIN_DTS) == -1):
error = True
elif (dts.year >= 2262 and
cmp_npy_datetimestruct(dts, &_NS_MAX_DTS) == 1):
error = True
if error:
fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}')
# ----------------------------------------------------------------------
# Conversion
cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil:
"""Convenience function to call npy_datetimestruct_to_datetime
with the by-far-most-common frequency NPY_FR_ns"""
return npy_datetimestruct_to_datetime(NPY_FR_ns, dts)
cdef inline void dt64_to_dtstruct(int64_t dt64,
npy_datetimestruct* out) nogil:
"""Convenience function to call pandas_datetime_to_datetimestruct
with the by-far-most-common frequency NPY_FR_ns"""
pandas_datetime_to_datetimestruct(dt64, NPY_FR_ns, out)
return
cdef inline void td64_to_tdstruct(int64_t td64,
pandas_timedeltastruct* out) nogil:
"""Convenience function to call pandas_timedelta_to_timedeltastruct
with the by-far-most-common frequency NPY_FR_ns"""
pandas_timedelta_to_timedeltastruct(td64, NPY_FR_ns, out)
return
cdef inline int64_t pydatetime_to_dt64(datetime val,
npy_datetimestruct *dts):
"""
Note we are assuming that the datetime object is timezone-naive.
"""
dts.year = PyDateTime_GET_YEAR(val)
dts.month = PyDateTime_GET_MONTH(val)
dts.day = PyDateTime_GET_DAY(val)
dts.hour = PyDateTime_DATE_GET_HOUR(val)
dts.min = PyDateTime_DATE_GET_MINUTE(val)
dts.sec = PyDateTime_DATE_GET_SECOND(val)
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
dts.ps = dts.as = 0
return dtstruct_to_dt64(dts)
cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts):
dts.year = PyDateTime_GET_YEAR(val)
dts.month = PyDateTime_GET_MONTH(val)
dts.day = PyDateTime_GET_DAY(val)
dts.hour = dts.min = dts.sec = dts.us = 0
dts.ps = dts.as = 0
return
cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
pydate_to_dtstruct(val, dts)
return dtstruct_to_dt64(dts)
cdef inline int _string_to_dts(str val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset,
bint want_exc) except? -1:
cdef:
Py_ssize_t length
const char* buf
buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_local, out_tzoffset)

View File

@@ -0,0 +1,12 @@
from numpy cimport int64_t
cpdef to_offset(object obj)
cdef bint is_offset_object(object obj)
cdef bint is_tick_object(object obj)
cdef class BaseOffset:
cdef readonly:
int64_t n
bint normalize
dict _cache

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
cpdef str get_rule_month(str source)
cpdef quarter_to_myear(int year, int quarter, str freq)

View File

@@ -0,0 +1,62 @@
from datetime import datetime
import numpy as np
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import npt
class DateParseError(ValueError): ...
def parse_datetime_string(
date_string: str,
dayfirst: bool = ...,
yearfirst: bool = ...,
**kwargs,
) -> datetime: ...
def parse_time_string(
arg: str,
freq: BaseOffset | str | None = ...,
dayfirst: bool | None = ...,
yearfirst: bool | None = ...,
) -> tuple[datetime, str]: ...
def _does_string_look_like_datetime(py_string: str) -> bool: ...
def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
def try_parse_dates(
values: npt.NDArray[np.object_], # object[:]
parser=...,
dayfirst: bool = ...,
default: datetime | None = ...,
) -> npt.NDArray[np.object_]: ...
def try_parse_date_and_time(
dates: npt.NDArray[np.object_], # object[:]
times: npt.NDArray[np.object_], # object[:]
date_parser=...,
time_parser=...,
dayfirst: bool = ...,
default: datetime | None = ...,
) -> npt.NDArray[np.object_]: ...
def try_parse_year_month_day(
years: npt.NDArray[np.object_], # object[:]
months: npt.NDArray[np.object_], # object[:]
days: npt.NDArray[np.object_], # object[:]
) -> npt.NDArray[np.object_]: ...
def try_parse_datetime_components(
years: npt.NDArray[np.object_], # object[:]
months: npt.NDArray[np.object_], # object[:]
days: npt.NDArray[np.object_], # object[:]
hours: npt.NDArray[np.object_], # object[:]
minutes: npt.NDArray[np.object_], # object[:]
seconds: npt.NDArray[np.object_], # object[:]
) -> npt.NDArray[np.object_]: ...
def format_is_iso(f: str) -> bool: ...
def guess_datetime_format(
dt_str,
dayfirst: bool = ...,
dt_str_parse=...,
dt_str_split=...,
) -> str | None: ...
def concat_date_cols(
date_cols: tuple,
keep_trivial_numbers: bool = ...,
) -> npt.NDArray[np.object_]: ...
def get_rule_month(source: str) -> str: ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
from numpy cimport int64_t
from .np_datetime cimport npy_datetimestruct
cdef bint is_period_object(object obj)
cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil

View File

@@ -0,0 +1,125 @@
from typing import Literal
import numpy as np
from pandas._libs.tslibs.nattype import NaTType
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._libs.tslibs.timestamps import Timestamp
from pandas._typing import (
Frequency,
Timezone,
npt,
)
INVALID_FREQ_ERR_MSG: str
DIFFERENT_FREQ: str
class IncompatibleFrequency(ValueError): ...
def periodarr_to_dt64arr(
periodarr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def period_asfreq_arr(
arr: npt.NDArray[np.int64],
freq1: int,
freq2: int,
end: bool,
) -> npt.NDArray[np.int64]: ...
def get_period_field_arr(
field: str,
arr: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
) -> npt.NDArray[np.int64]: ...
def from_ordinals(
values: npt.NDArray[np.int64], # const int64_t[:]
freq: Frequency,
) -> npt.NDArray[np.int64]: ...
def extract_ordinals(
values: npt.NDArray[np.object_],
freq: Frequency | int,
) -> npt.NDArray[np.int64]: ...
def extract_freq(
values: npt.NDArray[np.object_],
) -> BaseOffset: ...
# exposed for tests
def period_asfreq(ordinal: int, freq1: int, freq2: int, end: bool) -> int: ...
def period_ordinal(
y: int, m: int, d: int, h: int, min: int, s: int, us: int, ps: int, freq: int
) -> int: ...
def freq_to_dtype_code(freq: BaseOffset) -> int: ...
def validate_end_alias(how: str) -> Literal["E", "S"]: ...
class Period:
ordinal: int # int64_t
freq: BaseOffset
# error: "__new__" must return a class instance (got "Union[Period, NaTType]")
def __new__( # type: ignore[misc]
cls,
value=...,
freq: int | str | None = ...,
ordinal: int | None = ...,
year: int | None = ...,
month: int | None = ...,
quarter: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
) -> Period | NaTType: ...
@classmethod
def _maybe_convert_freq(cls, freq) -> BaseOffset: ...
@classmethod
def _from_ordinal(cls, ordinal: int, freq) -> Period: ...
@classmethod
def now(cls, freq: BaseOffset = ...) -> Period: ...
def strftime(self, fmt: str) -> str: ...
def to_timestamp(
self,
freq: str | BaseOffset | None = ...,
how: str = ...,
tz: Timezone | None = ...,
) -> Timestamp: ...
def asfreq(self, freq: str, how: str = ...) -> Period: ...
@property
def freqstr(self) -> str: ...
@property
def is_leap_year(self) -> bool: ...
@property
def daysinmonth(self) -> int: ...
@property
def days_in_month(self) -> int: ...
@property
def qyear(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def weekday(self) -> int: ...
@property
def day_of_week(self) -> int: ...
@property
def week(self) -> int: ...
@property
def weekofyear(self) -> int: ...
@property
def second(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def day(self) -> int: ...
@property
def month(self) -> int: ...
@property
def year(self) -> int: ...
@property
def end_time(self) -> Timestamp: ...
@property
def start_time(self) -> Timestamp: ...
def __sub__(self, other) -> Period | BaseOffset: ...
def __add__(self, other) -> Period: ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,774 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.
Distributed under the terms of the BSD Simplified License.
The full license is in the LICENSE file, distributed with this software.
Copyright (c) 2005-2011, NumPy Developers
All rights reserved.
This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
*/
#define NO_IMPORT
#ifndef NPY_NO_DEPRECATED_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#endif // NPY_NO_DEPRECATED_API
#include <Python.h>
#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include "np_datetime.h"
#if PY_MAJOR_VERSION >= 3
#define PyInt_AsLong PyLong_AsLong
#endif // PyInt_AsLong
const npy_datetimestruct _NS_MIN_DTS = {
1677, 9, 21, 0, 12, 43, 145224, 193000, 0};
const npy_datetimestruct _NS_MAX_DTS = {
2262, 4, 11, 23, 47, 16, 854775, 807000, 0};
const int days_per_month_table[2][12] = {
{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
{31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
/*
* Returns 1 if the given year is a leap year, 0 otherwise.
*/
int is_leapyear(npy_int64 year) {
return (year & 0x3) == 0 && /* year % 4 == 0 */
((year % 100) != 0 || (year % 400) == 0);
}
/*
* Adjusts a datetimestruct based on a minutes offset. Assumes
* the current values are valid.g
*/
void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) {
int isleap;
/* MINUTES */
dts->min += minutes;
while (dts->min < 0) {
dts->min += 60;
dts->hour--;
}
while (dts->min >= 60) {
dts->min -= 60;
dts->hour++;
}
/* HOURS */
while (dts->hour < 0) {
dts->hour += 24;
dts->day--;
}
while (dts->hour >= 24) {
dts->hour -= 24;
dts->day++;
}
/* DAYS */
if (dts->day < 1) {
dts->month--;
if (dts->month < 1) {
dts->year--;
dts->month = 12;
}
isleap = is_leapyear(dts->year);
dts->day += days_per_month_table[isleap][dts->month - 1];
} else if (dts->day > 28) {
isleap = is_leapyear(dts->year);
if (dts->day > days_per_month_table[isleap][dts->month - 1]) {
dts->day -= days_per_month_table[isleap][dts->month - 1];
dts->month++;
if (dts->month > 12) {
dts->year++;
dts->month = 1;
}
}
}
}
/*
* Calculates the days offset from the 1970 epoch.
*/
npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
int i, month;
npy_int64 year, days = 0;
const int *month_lengths;
year = dts->year - 1970;
days = year * 365;
/* Adjust for leap years */
if (days >= 0) {
/*
* 1968 is the closest leap year before 1970.
* Exclude the current year, so add 1.
*/
year += 1;
/* Add one day for each 4 years */
days += year / 4;
/* 1900 is the closest previous year divisible by 100 */
year += 68;
/* Subtract one day for each 100 years */
days -= year / 100;
/* 1600 is the closest previous year divisible by 400 */
year += 300;
/* Add one day for each 400 years */
days += year / 400;
} else {
/*
* 1972 is the closest later year after 1970.
* Include the current year, so subtract 2.
*/
year -= 2;
/* Subtract one day for each 4 years */
days += year / 4;
/* 2000 is the closest later year divisible by 100 */
year -= 28;
/* Add one day for each 100 years */
days -= year / 100;
/* 2000 is also the closest later year divisible by 400 */
/* Subtract one day for each 400 years */
days += year / 400;
}
month_lengths = days_per_month_table[is_leapyear(dts->year)];
month = dts->month - 1;
/* Add the months */
for (i = 0; i < month; ++i) {
days += month_lengths[i];
}
/* Add the days */
days += dts->day - 1;
return days;
}
/*
* Modifies '*days_' to be the day offset within the year,
* and returns the year.
*/
static npy_int64 days_to_yearsdays(npy_int64 *days_) {
const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1);
/* Adjust so it's relative to the year 2000 (divisible by 400) */
npy_int64 days = (*days_) - (365 * 30 + 7);
npy_int64 year;
/* Break down the 400 year cycle to get the year and day within the year */
if (days >= 0) {
year = 400 * (days / days_per_400years);
days = days % days_per_400years;
} else {
year = 400 * ((days - (days_per_400years - 1)) / days_per_400years);
days = days % days_per_400years;
if (days < 0) {
days += days_per_400years;
}
}
/* Work out the year/day within the 400 year cycle */
if (days >= 366) {
year += 100 * ((days - 1) / (100 * 365 + 25 - 1));
days = (days - 1) % (100 * 365 + 25 - 1);
if (days >= 365) {
year += 4 * ((days + 1) / (4 * 365 + 1));
days = (days + 1) % (4 * 365 + 1);
if (days >= 366) {
year += (days - 1) / 365;
days = (days - 1) % 365;
}
}
}
*days_ = days;
return year + 2000;
}
/*
* Adjusts a datetimestruct based on a seconds offset. Assumes
* the current values are valid.
*/
NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts,
int seconds) {
int minutes;
dts->sec += seconds;
if (dts->sec < 0) {
minutes = dts->sec / 60;
dts->sec = dts->sec % 60;
if (dts->sec < 0) {
--minutes;
dts->sec += 60;
}
add_minutes_to_datetimestruct(dts, minutes);
} else if (dts->sec >= 60) {
minutes = dts->sec / 60;
dts->sec = dts->sec % 60;
add_minutes_to_datetimestruct(dts, minutes);
}
}
/*
* Fills in the year, month, day in 'dts' based on the days
* offset from 1970.
*/
static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) {
const int *month_lengths;
int i;
dts->year = days_to_yearsdays(&days);
month_lengths = days_per_month_table[is_leapyear(dts->year)];
for (i = 0; i < 12; ++i) {
if (days < month_lengths[i]) {
dts->month = i + 1;
dts->day = days + 1;
return;
} else {
days -= month_lengths[i];
}
}
}
/*
* Compares two npy_datetimestruct objects chronologically
*/
int cmp_npy_datetimestruct(const npy_datetimestruct *a,
const npy_datetimestruct *b) {
if (a->year > b->year) {
return 1;
} else if (a->year < b->year) {
return -1;
}
if (a->month > b->month) {
return 1;
} else if (a->month < b->month) {
return -1;
}
if (a->day > b->day) {
return 1;
} else if (a->day < b->day) {
return -1;
}
if (a->hour > b->hour) {
return 1;
} else if (a->hour < b->hour) {
return -1;
}
if (a->min > b->min) {
return 1;
} else if (a->min < b->min) {
return -1;
}
if (a->sec > b->sec) {
return 1;
} else if (a->sec < b->sec) {
return -1;
}
if (a->us > b->us) {
return 1;
} else if (a->us < b->us) {
return -1;
}
if (a->ps > b->ps) {
return 1;
} else if (a->ps < b->ps) {
return -1;
}
if (a->as > b->as) {
return 1;
} else if (a->as < b->as) {
return -1;
}
return 0;
}
/*
*
* Converts a Python datetime.datetime or datetime.date
* object into a NumPy npy_datetimestruct. Uses tzinfo (if present)
* to convert to UTC time.
*
* The following implementation just asks for attributes, and thus
* supports datetime duck typing. The tzinfo time zone conversion
* requires this style of access as well.
*
* Returns -1 on error, 0 on success, and 1 (with no error set)
* if obj doesn't have the needed date or datetime attributes.
*/
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
npy_datetimestruct *out) {
// Assumes that obj is a valid datetime object
PyObject *tmp;
PyObject *obj = (PyObject*)dtobj;
/* Initialize the output to all zeros */
memset(out, 0, sizeof(npy_datetimestruct));
out->month = 1;
out->day = 1;
out->year = PyInt_AsLong(PyObject_GetAttrString(obj, "year"));
out->month = PyInt_AsLong(PyObject_GetAttrString(obj, "month"));
out->day = PyInt_AsLong(PyObject_GetAttrString(obj, "day"));
// TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use
// PyDateTime_Check here, and less verbose attribute lookups.
/* Check for time attributes (if not there, return success as a date) */
if (!PyObject_HasAttrString(obj, "hour") ||
!PyObject_HasAttrString(obj, "minute") ||
!PyObject_HasAttrString(obj, "second") ||
!PyObject_HasAttrString(obj, "microsecond")) {
return 0;
}
out->hour = PyInt_AsLong(PyObject_GetAttrString(obj, "hour"));
out->min = PyInt_AsLong(PyObject_GetAttrString(obj, "minute"));
out->sec = PyInt_AsLong(PyObject_GetAttrString(obj, "second"));
out->us = PyInt_AsLong(PyObject_GetAttrString(obj, "microsecond"));
/* Apply the time zone offset if datetime obj is tz-aware */
if (PyObject_HasAttrString((PyObject*)obj, "tzinfo")) {
tmp = PyObject_GetAttrString(obj, "tzinfo");
if (tmp == NULL) {
return -1;
}
if (tmp == Py_None) {
Py_DECREF(tmp);
} else {
PyObject *offset;
PyObject *tmp_int;
int seconds_offset, minutes_offset;
/* The utcoffset function should return a timedelta */
offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
if (offset == NULL) {
Py_DECREF(tmp);
return -1;
}
Py_DECREF(tmp);
/*
* The timedelta should have a function "total_seconds"
* which contains the value we want.
*/
tmp = PyObject_CallMethod(offset, "total_seconds", "");
if (tmp == NULL) {
return -1;
}
tmp_int = PyNumber_Long(tmp);
if (tmp_int == NULL) {
Py_DECREF(tmp);
return -1;
}
seconds_offset = PyInt_AsLong(tmp_int);
if (seconds_offset == -1 && PyErr_Occurred()) {
Py_DECREF(tmp_int);
Py_DECREF(tmp);
return -1;
}
Py_DECREF(tmp_int);
Py_DECREF(tmp);
/* Convert to a minutes offset and apply it */
minutes_offset = seconds_offset / 60;
add_minutes_to_datetimestruct(out, -minutes_offset);
}
}
return 0;
}
/*
* Converts a datetime from a datetimestruct to a datetime based
* on a metadata unit. The date is assumed to be valid.
*/
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
const npy_datetimestruct *dts) {
npy_datetime ret;
if (base == NPY_FR_Y) {
/* Truncate to the year */
ret = dts->year - 1970;
} else if (base == NPY_FR_M) {
/* Truncate to the month */
ret = 12 * (dts->year - 1970) + (dts->month - 1);
} else {
/* Otherwise calculate the number of days to start */
npy_int64 days = get_datetimestruct_days(dts);
switch (base) {
case NPY_FR_W:
/* Truncate to weeks */
if (days >= 0) {
ret = days / 7;
} else {
ret = (days - 6) / 7;
}
break;
case NPY_FR_D:
ret = days;
break;
case NPY_FR_h:
ret = days * 24 + dts->hour;
break;
case NPY_FR_m:
ret = (days * 24 + dts->hour) * 60 + dts->min;
break;
case NPY_FR_s:
ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec;
break;
case NPY_FR_ms:
ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000 +
dts->us / 1000;
break;
case NPY_FR_us:
ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us;
break;
case NPY_FR_ns:
ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000 +
dts->ps / 1000;
break;
case NPY_FR_ps:
ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps;
break;
case NPY_FR_fs:
/* only 2.6 hours */
ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps) *
1000 +
dts->as / 1000;
break;
case NPY_FR_as:
/* only 9.2 secs */
ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps) *
1000000 +
dts->as;
break;
default:
/* Something got corrupted */
PyErr_SetString(
PyExc_ValueError,
"NumPy datetime metadata with corrupt unit value");
return -1;
}
}
return ret;
}
/*
* Port numpy#13188 https://github.com/numpy/numpy/pull/13188/
*
* Computes the python `ret, d = divmod(d, unit)`.
*
* Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch
* for subsequent calls to this command - it is able to deduce that `*d >= 0`.
*/
npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) {
assert(unit > 0);
npy_int64 div = *d / unit;
npy_int64 mod = *d % unit;
if (mod < 0) {
mod += unit;
div -= 1;
}
assert(mod >= 0);
*d = mod;
return div;
}
/*
* Converts a datetime based on the given metadata into a datetimestruct
*/
void pandas_datetime_to_datetimestruct(npy_datetime dt,
NPY_DATETIMEUNIT base,
npy_datetimestruct *out) {
npy_int64 perday;
/* Initialize the output to all zeros */
memset(out, 0, sizeof(npy_datetimestruct));
out->year = 1970;
out->month = 1;
out->day = 1;
/*
* Note that care must be taken with the / and % operators
* for negative values.
*/
switch (base) {
case NPY_FR_Y:
out->year = 1970 + dt;
break;
case NPY_FR_M:
out->year = 1970 + extract_unit(&dt, 12);
out->month = dt + 1;
break;
case NPY_FR_W:
/* A week is 7 days */
set_datetimestruct_days(dt * 7, out);
break;
case NPY_FR_D:
set_datetimestruct_days(dt, out);
break;
case NPY_FR_h:
perday = 24LL;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = dt;
break;
case NPY_FR_m:
perday = 24LL * 60;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 60);
out->min = (int)dt;
break;
case NPY_FR_s:
perday = 24LL * 60 * 60;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 60 * 60);
out->min = (int)extract_unit(&dt, 60);
out->sec = (int)dt;
break;
case NPY_FR_ms:
perday = 24LL * 60 * 60 * 1000;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60);
out->min = (int)extract_unit(&dt, 1000LL * 60);
out->sec = (int)extract_unit(&dt, 1000LL);
out->us = (int)(dt * 1000);
break;
case NPY_FR_us:
perday = 24LL * 60LL * 60LL * 1000LL * 1000LL;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60);
out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60);
out->sec = (int)extract_unit(&dt, 1000LL * 1000);
out->us = (int)dt;
break;
case NPY_FR_ns:
perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
out->us = (int)extract_unit(&dt, 1000LL);
out->ps = (int)(dt * 1000);
break;
case NPY_FR_ps:
perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
set_datetimestruct_days(extract_unit(&dt, perday), out);
out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
out->us = (int)extract_unit(&dt, 1000LL);
out->ps = (int)(dt * 1000);
break;
case NPY_FR_fs:
/* entire range is only +- 2.6 hours */
out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
1000 * 60 * 60);
if (out->hour < 0) {
out->year = 1969;
out->month = 12;
out->day = 31;
out->hour += 24;
assert(out->hour >= 0);
}
out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
1000 * 60);
out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
1000);
out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000);
out->ps = (int)extract_unit(&dt, 1000LL);
out->as = (int)(dt * 1000);
break;
case NPY_FR_as:
/* entire range is only +- 9.2 seconds */
out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 *
1000 * 1000);
if (out->sec < 0) {
out->year = 1969;
out->month = 12;
out->day = 31;
out->hour = 23;
out->min = 59;
out->sec += 60;
assert(out->sec >= 0);
}
out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
out->ps = (int)extract_unit(&dt, 1000LL * 1000);
out->as = (int)dt;
break;
default:
PyErr_SetString(PyExc_RuntimeError,
"NumPy datetime metadata is corrupted with invalid "
"base unit");
}
}
/*
* Converts a timedelta from a timedeltastruct to a timedelta based
* on a metadata unit. The timedelta is assumed to be valid.
*
* Returns 0 on success, -1 on failure.
*/
void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
NPY_DATETIMEUNIT base,
pandas_timedeltastruct *out) {
npy_int64 frac;
npy_int64 sfrac;
npy_int64 ifrac;
int sign;
npy_int64 DAY_NS = 86400000000000LL;
/* Initialize the output to all zeros */
memset(out, 0, sizeof(pandas_timedeltastruct));
switch (base) {
case NPY_FR_ns:
// put frac in seconds
if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0)
frac = td / (1000LL * 1000LL * 1000LL) - 1;
else
frac = td / (1000LL * 1000LL * 1000LL);
if (frac < 0) {
sign = -1;
// even fraction
if ((-frac % 86400LL) != 0) {
out->days = -frac / 86400LL + 1;
frac += 86400LL * out->days;
} else {
frac = -frac;
}
} else {
sign = 1;
out->days = 0;
}
if (frac >= 86400) {
out->days += frac / 86400LL;
frac -= out->days * 86400LL;
}
if (frac >= 3600) {
out->hrs = frac / 3600LL;
frac -= out->hrs * 3600LL;
} else {
out->hrs = 0;
}
if (frac >= 60) {
out->min = frac / 60LL;
frac -= out->min * 60LL;
} else {
out->min = 0;
}
if (frac >= 0) {
out->sec = frac;
frac -= out->sec;
} else {
out->sec = 0;
}
sfrac = (out->hrs * 3600LL + out->min * 60LL
+ out->sec) * (1000LL * 1000LL * 1000LL);
if (sign < 0)
out->days = -out->days;
ifrac = td - (out->days * DAY_NS + sfrac);
if (ifrac != 0) {
out->ms = ifrac / (1000LL * 1000LL);
ifrac -= out->ms * 1000LL * 1000LL;
out->us = ifrac / 1000LL;
ifrac -= out->us * 1000LL;
out->ns = ifrac;
} else {
out->ms = 0;
out->us = 0;
out->ns = 0;
}
out->seconds = out->hrs * 3600 + out->min * 60 + out->sec;
out->microseconds = out->ms * 1000 + out->us;
out->nanoseconds = out->ns;
break;
default:
PyErr_SetString(PyExc_RuntimeError,
"NumPy timedelta metadata is corrupted with "
"invalid base unit");
}
}

View File

@@ -0,0 +1,79 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.
Distributed under the terms of the BSD Simplified License.
The full license is in the LICENSE file, distributed with this software.
Copyright (c) 2005-2011, NumPy Developers
All rights reserved.
This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
*/
#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_
#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_
#ifndef NPY_NO_DEPRECATED_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#endif // NPY_NO_DEPRECATED_API
#include <numpy/ndarraytypes.h>
typedef struct {
npy_int64 days;
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
} pandas_timedeltastruct;
extern const npy_datetimestruct _NS_MIN_DTS;
extern const npy_datetimestruct _NS_MAX_DTS;
// stuff pandas needs
// ----------------------------------------------------------------------------
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
npy_datetimestruct *out);
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
const npy_datetimestruct *dts);
void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr,
npy_datetimestruct *result);
void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
NPY_DATETIMEUNIT fr,
pandas_timedeltastruct *result);
extern const int days_per_month_table[2][12];
// stuff numpy-derived code needs in header
// ----------------------------------------------------------------------------
int is_leapyear(npy_int64 year);
/*
* Calculates the days offset from the 1970 epoch.
*/
npy_int64
get_datetimestruct_days(const npy_datetimestruct *dts);
/*
* Compares two npy_datetimestruct objects chronologically
*/
int cmp_npy_datetimestruct(const npy_datetimestruct *a,
const npy_datetimestruct *b);
/*
* Adjusts a datetimestruct based on a minutes offset. Assumes
* the current values are valid.
*/
void
add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes);
#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_

View File

@@ -0,0 +1,941 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.
Distributed under the terms of the BSD Simplified License.
The full license is in the LICENSE file, distributed with this software.
Written by Mark Wiebe (mwwiebe@gmail.com)
Copyright (c) 2011 by Enthought, Inc.
Copyright (c) 2005-2011, NumPy Developers
All rights reserved.
See NUMPY_LICENSE.txt for the license.
This file implements string parsing and creation for NumPy datetime.
*/
#define PY_SSIZE_T_CLEAN
#define NO_IMPORT
#ifndef NPY_NO_DEPRECATED_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#endif // NPY_NO_DEPRECATED_API
#include <Python.h>
#include <time.h>
#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include "np_datetime.h"
#include "np_datetime_strings.h"
/*
* Parses (almost) standard ISO 8601 date strings. The differences are:
*
* + Only seconds may have a decimal point, with up to 18 digits after it
* (maximum attoseconds precision).
* + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
* the date and the time. Both are treated equivalently.
* + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
* + Doesn't handle leap seconds (seconds value has 60 in these cases).
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
* + Accepts special values "NaT" (not a time), "Today", (current
* day according to local time) and "Now" (current time in UTC).
* + ':' separator between hours, minutes, and seconds is optional. When
* omitted, each component must be 2 digits if it appears. (GH-10041)
*
* 'str' must be a NULL-terminated string, and 'len' must be its length.
*
* 'out' gets filled with the parsed date-time.
* 'out_local' gets set to 1 if the parsed time contains timezone,
* to 0 otherwise.
* 'out_tzoffset' gets set to timezone offset by minutes
* if the parsed time was in local time,
* to 0 otherwise. The values 'now' and 'today' don't get counted
* as local, and neither do UTC +/-#### timezone offsets, because
* they aren't using the computer's local timezone offset.
*
* Returns 0 on success, -1 on failure.
*/
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset) {
int year_leap = 0;
int i, numdigits;
const char *substr;
int sublen;
/* If year-month-day are separated by a valid separator,
* months/days without leading zeroes will be parsed
* (though not iso8601). If the components aren't separated,
* 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
* forbidden here (but parsed as YYMMDD elsewhere).
*/
int has_ymd_sep = 0;
char ymd_sep = '\0';
char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '};
int valid_ymd_sep_len = sizeof(valid_ymd_sep);
/* hour-minute-second may or may not separated by ':'. If not, then
* each component must be 2 digits. */
int has_hms_sep = 0;
int hour_was_2_digits = 0;
/* Initialize the output to all zeros */
memset(out, 0, sizeof(npy_datetimestruct));
out->month = 1;
out->day = 1;
substr = str;
sublen = len;
/* Skip leading whitespace */
while (sublen > 0 && isspace(*substr)) {
++substr;
--sublen;
}
/* Leading '-' sign for negative year */
if (*substr == '-') {
++substr;
--sublen;
}
if (sublen == 0) {
goto parse_error;
}
/* PARSE THE YEAR (4 digits) */
out->year = 0;
if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) &&
isdigit(substr[2]) && isdigit(substr[3])) {
out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') +
10 * (substr[2] - '0') + (substr[3] - '0');
substr += 4;
sublen -= 4;
}
/* Negate the year if necessary */
if (str[0] == '-') {
out->year = -out->year;
}
/* Check whether it's a leap-year */
year_leap = is_leapyear(out->year);
/* Next character must be a separator, start of month, or end of string */
if (sublen == 0) {
if (out_local != NULL) {
*out_local = 0;
}
goto finish;
}
if (!isdigit(*substr)) {
for (i = 0; i < valid_ymd_sep_len; ++i) {
if (*substr == valid_ymd_sep[i]) {
break;
}
}
if (i == valid_ymd_sep_len) {
goto parse_error;
}
has_ymd_sep = 1;
ymd_sep = valid_ymd_sep[i];
++substr;
--sublen;
/* Cannot have trailing separator */
if (sublen == 0 || !isdigit(*substr)) {
goto parse_error;
}
}
/* PARSE THE MONTH */
/* First digit required */
out->month = (*substr - '0');
++substr;
--sublen;
/* Second digit optional if there was a separator */
if (isdigit(*substr)) {
out->month = 10 * out->month + (*substr - '0');
++substr;
--sublen;
} else if (!has_ymd_sep) {
goto parse_error;
}
if (out->month < 1 || out->month > 12) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Month out of range in datetime string \"%s\"", str);
}
goto error;
}
/* Next character must be the separator, start of day, or end of string */
if (sublen == 0) {
/* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
if (!has_ymd_sep) {
goto parse_error;
}
if (out_local != NULL) {
*out_local = 0;
}
goto finish;
}
if (has_ymd_sep) {
/* Must have separator, but cannot be trailing */
if (*substr != ymd_sep || sublen == 1) {
goto parse_error;
}
++substr;
--sublen;
}
/* PARSE THE DAY */
/* First digit required */
if (!isdigit(*substr)) {
goto parse_error;
}
out->day = (*substr - '0');
++substr;
--sublen;
/* Second digit optional if there was a separator */
if (isdigit(*substr)) {
out->day = 10 * out->day + (*substr - '0');
++substr;
--sublen;
} else if (!has_ymd_sep) {
goto parse_error;
}
if (out->day < 1 ||
out->day > days_per_month_table[year_leap][out->month - 1]) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Day out of range in datetime string \"%s\"", str);
}
goto error;
}
/* Next character must be a 'T', ' ', or end of string */
if (sublen == 0) {
if (out_local != NULL) {
*out_local = 0;
}
goto finish;
}
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
goto parse_error;
}
++substr;
--sublen;
/* PARSE THE HOURS */
/* First digit required */
if (!isdigit(*substr)) {
goto parse_error;
}
out->hour = (*substr - '0');
++substr;
--sublen;
/* Second digit optional */
if (isdigit(*substr)) {
hour_was_2_digits = 1;
out->hour = 10 * out->hour + (*substr - '0');
++substr;
--sublen;
if (out->hour >= 24) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"",
str);
}
goto error;
}
}
/* Next character must be a ':' or the end of the string */
if (sublen == 0) {
if (!hour_was_2_digits) {
goto parse_error;
}
goto finish;
}
if (*substr == ':') {
has_hms_sep = 1;
++substr;
--sublen;
/* Cannot have a trailing separator */
if (sublen == 0 || !isdigit(*substr)) {
goto parse_error;
}
} else if (!isdigit(*substr)) {
if (!hour_was_2_digits) {
goto parse_error;
}
goto parse_timezone;
}
/* PARSE THE MINUTES */
/* First digit required */
out->min = (*substr - '0');
++substr;
--sublen;
/* Second digit optional if there was a separator */
if (isdigit(*substr)) {
out->min = 10 * out->min + (*substr - '0');
++substr;
--sublen;
if (out->min >= 60) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Minutes out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
goto parse_error;
}
if (sublen == 0) {
goto finish;
}
/* If we make it through this condition block, then the next
* character is a digit. */
if (has_hms_sep && *substr == ':') {
++substr;
--sublen;
/* Cannot have a trailing ':' */
if (sublen == 0 || !isdigit(*substr)) {
goto parse_error;
}
} else if (!has_hms_sep && isdigit(*substr)) {
} else {
goto parse_timezone;
}
/* PARSE THE SECONDS */
/* First digit required */
out->sec = (*substr - '0');
++substr;
--sublen;
/* Second digit optional if there was a separator */
if (isdigit(*substr)) {
out->sec = 10 * out->sec + (*substr - '0');
++substr;
--sublen;
if (out->sec >= 60) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Seconds out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
goto parse_error;
}
/* Next character may be a '.' indicating fractional seconds */
if (sublen > 0 && *substr == '.') {
++substr;
--sublen;
} else {
goto parse_timezone;
}
/* PARSE THE MICROSECONDS (0 to 6 digits) */
numdigits = 0;
for (i = 0; i < 6; ++i) {
out->us *= 10;
if (sublen > 0 && isdigit(*substr)) {
out->us += (*substr - '0');
++substr;
--sublen;
++numdigits;
}
}
if (sublen == 0 || !isdigit(*substr)) {
goto parse_timezone;
}
/* PARSE THE PICOSECONDS (0 to 6 digits) */
numdigits = 0;
for (i = 0; i < 6; ++i) {
out->ps *= 10;
if (sublen > 0 && isdigit(*substr)) {
out->ps += (*substr - '0');
++substr;
--sublen;
++numdigits;
}
}
if (sublen == 0 || !isdigit(*substr)) {
goto parse_timezone;
}
/* PARSE THE ATTOSECONDS (0 to 6 digits) */
numdigits = 0;
for (i = 0; i < 6; ++i) {
out->as *= 10;
if (sublen > 0 && isdigit(*substr)) {
out->as += (*substr - '0');
++substr;
--sublen;
++numdigits;
}
}
parse_timezone:
/* trim any whitespace between time/timeezone */
while (sublen > 0 && isspace(*substr)) {
++substr;
--sublen;
}
if (sublen == 0) {
// Unlike NumPy, treating no time zone as naive
goto finish;
}
/* UTC specifier */
if (*substr == 'Z') {
/* "Z" should be equivalent to tz offset "+00:00" */
if (out_local != NULL) {
*out_local = 1;
}
if (out_tzoffset != NULL) {
*out_tzoffset = 0;
}
if (sublen == 1) {
goto finish;
} else {
++substr;
--sublen;
}
} else if (*substr == '-' || *substr == '+') {
/* Time zone offset */
int offset_neg = 0, offset_hour = 0, offset_minute = 0;
/*
* Since "local" means local with respect to the current
* machine, we say this is non-local.
*/
if (*substr == '-') {
offset_neg = 1;
}
++substr;
--sublen;
/* The hours offset */
if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0');
substr += 2;
sublen -= 2;
if (offset_hour >= 24) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone hours offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
offset_hour = substr[0] - '0';
++substr;
--sublen;
} else {
goto parse_error;
}
/* The minutes offset is optional */
if (sublen > 0) {
/* Optional ':' */
if (*substr == ':') {
++substr;
--sublen;
}
/* The minutes offset (at the end of the string) */
if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0');
substr += 2;
sublen -= 2;
if (offset_minute >= 60) {
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone minutes offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
offset_minute = substr[0] - '0';
++substr;
--sublen;
} else {
goto parse_error;
}
}
/* Apply the time zone offset */
if (offset_neg) {
offset_hour = -offset_hour;
offset_minute = -offset_minute;
}
if (out_local != NULL) {
*out_local = 1;
// Unlike NumPy, do not change internal value to local time
*out_tzoffset = 60 * offset_hour + offset_minute;
}
}
/* Skip trailing whitespace */
while (sublen > 0 && isspace(*substr)) {
++substr;
--sublen;
}
if (sublen != 0) {
goto parse_error;
}
finish:
return 0;
parse_error:
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Error parsing datetime string \"%s\" at position %d", str,
(int)(substr - str));
}
return -1;
error:
return -1;
}
/*
* Provides a string length to use for converting datetime
* objects with the given local and unit settings.
*/
int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
int len = 0;
switch (base) {
/* Generic units can only be used to represent NaT */
/* return 4;*/
case NPY_FR_as:
len += 3; /* "###" */
case NPY_FR_fs:
len += 3; /* "###" */
case NPY_FR_ps:
len += 3; /* "###" */
case NPY_FR_ns:
len += 3; /* "###" */
case NPY_FR_us:
len += 3; /* "###" */
case NPY_FR_ms:
len += 4; /* ".###" */
case NPY_FR_s:
len += 3; /* ":##" */
case NPY_FR_m:
len += 3; /* ":##" */
case NPY_FR_h:
len += 3; /* "T##" */
case NPY_FR_D:
case NPY_FR_W:
len += 3; /* "-##" */
case NPY_FR_M:
len += 3; /* "-##" */
case NPY_FR_Y:
len += 21; /* 64-bit year */
break;
default:
len += 3; /* handle the now defunct NPY_FR_B */
break;
}
if (base >= NPY_FR_h) {
if (local) {
len += 5; /* "+####" or "-####" */
} else {
len += 1; /* "Z" */
}
}
len += 1; /* NULL terminator */
return len;
}
/*
* Converts an npy_datetimestruct to an (almost) ISO 8601
* NULL-terminated string using timezone Z (UTC). If the string fits in
* the space exactly, it leaves out the NULL terminator and returns success.
*
* The differences from ISO 8601 are the 'NaT' string, and
* the number of year digits is >= 4 instead of strictly 4.
*
* 'base' restricts the output to that unit. Set 'base' to
* -1 to auto-detect a base after which all the values are zero.
*
* Returns 0 on success, -1 on failure (for example if the output
* string was too short).
*/
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base) {
char *substr = outstr;
int sublen = outlen;
int tmplen;
/*
* Print weeks with the same precision as days.
*
* TODO: Could print weeks with YYYY-Www format if the week
* epoch is a Monday.
*/
if (base == NPY_FR_W) {
base = NPY_FR_D;
}
/* YEAR */
/*
* Can't use PyOS_snprintf, because it always produces a '\0'
* character at the end, and NumPy string types are permitted
* to have data all the way to the end of the buffer.
*/
#ifdef _WIN32
tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
#else
tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
#endif // _WIN32
/* If it ran out of space or there isn't space for the NULL terminator */
if (tmplen < 0 || tmplen > sublen) {
goto string_too_short;
}
substr += tmplen;
sublen -= tmplen;
/* Stop if the unit is years */
if (base == NPY_FR_Y) {
if (sublen > 0) {
*substr = '\0';
}
return 0;
}
/* MONTH */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = '-';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->month / 10) + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->month % 10) + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is months */
if (base == NPY_FR_M) {
if (sublen > 0) {
*substr = '\0';
}
return 0;
}
/* DAY */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = '-';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->day / 10) + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->day % 10) + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is days */
if (base == NPY_FR_D) {
if (sublen > 0) {
*substr = '\0';
}
return 0;
}
/* HOUR */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = 'T';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->hour / 10) + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->hour % 10) + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is hours */
if (base == NPY_FR_h) {
goto add_time_zone;
}
/* MINUTE */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = ':';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->min / 10) + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->min % 10) + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is minutes */
if (base == NPY_FR_m) {
goto add_time_zone;
}
/* SECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = ':';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->sec / 10) + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->sec % 10) + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is seconds */
if (base == NPY_FR_s) {
goto add_time_zone;
}
/* MILLISECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = '.';
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->us / 100000) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->us / 10000) % 10 + '0');
if (sublen < 4) {
goto string_too_short;
}
substr[3] = (char)((dts->us / 1000) % 10 + '0');
substr += 4;
sublen -= 4;
/* Stop if the unit is milliseconds */
if (base == NPY_FR_ms) {
goto add_time_zone;
}
/* MICROSECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = (char)((dts->us / 100) % 10 + '0');
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->us / 10) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)(dts->us % 10 + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is microseconds */
if (base == NPY_FR_us) {
goto add_time_zone;
}
/* NANOSECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = (char)((dts->ps / 100000) % 10 + '0');
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->ps / 10000) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->ps / 1000) % 10 + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is nanoseconds */
if (base == NPY_FR_ns) {
goto add_time_zone;
}
/* PICOSECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = (char)((dts->ps / 100) % 10 + '0');
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->ps / 10) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)(dts->ps % 10 + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is picoseconds */
if (base == NPY_FR_ps) {
goto add_time_zone;
}
/* FEMTOSECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = (char)((dts->as / 100000) % 10 + '0');
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->as / 10000) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)((dts->as / 1000) % 10 + '0');
substr += 3;
sublen -= 3;
/* Stop if the unit is femtoseconds */
if (base == NPY_FR_fs) {
goto add_time_zone;
}
/* ATTOSECOND */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = (char)((dts->as / 100) % 10 + '0');
if (sublen < 2) {
goto string_too_short;
}
substr[1] = (char)((dts->as / 10) % 10 + '0');
if (sublen < 3) {
goto string_too_short;
}
substr[2] = (char)(dts->as % 10 + '0');
substr += 3;
sublen -= 3;
add_time_zone:
/* UTC "Zulu" time */
if (sublen < 1) {
goto string_too_short;
}
substr[0] = 'Z';
substr += 1;
sublen -= 1;
/* Add a NULL terminator, and return */
if (sublen > 0) {
substr[0] = '\0';
}
return 0;
string_too_short:
PyErr_Format(PyExc_RuntimeError,
"The string provided for NumPy ISO datetime formatting "
"was too short, with length %d",
outlen);
return -1;
}
int make_iso_8601_timedelta(pandas_timedeltastruct *tds,
char *outstr, size_t *outlen) {
*outlen = 0;
*outlen += snprintf(outstr, 60, // NOLINT
"P%" NPY_INT64_FMT
"DT%" NPY_INT32_FMT
"H%" NPY_INT32_FMT
"M%" NPY_INT32_FMT,
tds->days, tds->hrs, tds->min, tds->sec);
outstr += *outlen;
if (tds->ns != 0) {
*outlen += snprintf(outstr, 12, // NOLINT
".%03" NPY_INT32_FMT
"%03" NPY_INT32_FMT
"%03" NPY_INT32_FMT
"S", tds->ms, tds->us, tds->ns);
} else if (tds->us != 0) {
*outlen += snprintf(outstr, 9, // NOLINT
".%03" NPY_INT32_FMT
"%03" NPY_INT32_FMT
"S", tds->ms, tds->us);
} else if (tds->ms != 0) {
*outlen += snprintf(outstr, 6, // NOLINT
".%03" NPY_INT32_FMT "S", tds->ms);
} else {
*outlen += snprintf(outstr, 2, // NOLINT
"%s", "S");
}
return 0;
}

View File

@@ -0,0 +1,92 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.
Distributed under the terms of the BSD Simplified License.
The full license is in the LICENSE file, distributed with this software.
Written by Mark Wiebe (mwwiebe@gmail.com)
Copyright (c) 2011 by Enthought, Inc.
Copyright (c) 2005-2011, NumPy Developers
All rights reserved.
See NUMPY_LICENSE.txt for the license.
This file implements string parsing and creation for NumPy datetime.
*/
#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_
#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_
#ifndef NPY_NO_DEPRECATED_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#endif // NPY_NO_DEPRECATED_API
/*
* Parses (almost) standard ISO 8601 date strings. The differences are:
*
* + The date "20100312" is parsed as the year 20100312, not as
* equivalent to "2010-03-12". The '-' in the dates are not optional.
* + Only seconds may have a decimal point, with up to 18 digits after it
* (maximum attoseconds precision).
* + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
* the date and the time. Both are treated equivalently.
* + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
* + Doesn't handle leap seconds (seconds value has 60 in these cases).
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
* + Accepts special values "NaT" (not a time), "Today", (current
* day according to local time) and "Now" (current time in UTC).
*
* 'str' must be a NULL-terminated string, and 'len' must be its length.
*
* 'out' gets filled with the parsed date-time.
* 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time.
* 'out_tzoffset' gets set to timezone offset by minutes
* if the parsed time was in local time,
* to 0 otherwise. The values 'now' and 'today' don't get counted
* as local, and neither do UTC +/-#### timezone offsets, because
* they aren't using the computer's local timezone offset.
*
* Returns 0 on success, -1 on failure.
*/
int
parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local,
int *out_tzoffset);
/*
* Provides a string length to use for converting datetime
* objects with the given local and unit settings.
*/
int
get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
/*
* Converts an npy_datetimestruct to an (almost) ISO 8601
* NULL-terminated string using timezone Z (UTC).
*
* 'base' restricts the output to that unit. Set 'base' to
* -1 to auto-detect a base after which all the values are zero.
*
* Returns 0 on success, -1 on failure (for example if the output
* string was too short).
*/
int
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base);
/*
* Converts an pandas_timedeltastruct to an ISO 8601 string.
*
* Mutates outlen to provide size of (non-NULL terminated) string.
*
* Currently has no error handling
*/
int make_iso_8601_timedelta(pandas_timedeltastruct *tds, char *outstr,
size_t *outlen);
#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_

View File

@@ -0,0 +1,12 @@
import numpy as np
from pandas._typing import npt
def array_strptime(
values: npt.NDArray[np.object_],
fmt: str | None,
exact: bool = ...,
errors: str = ...,
) -> tuple[np.ndarray, np.ndarray]: ...
# first ndarray is M8[ns], second is object ndarray of tzinfo | None

View File

@@ -0,0 +1,762 @@
"""Strptime-related classes and functions.
"""
import calendar
import locale
import re
import time
from cpython.datetime cimport (
date,
tzinfo,
)
from _thread import allocate_lock as _thread_allocate_lock
import numpy as np
import pytz
from numpy cimport (
int64_t,
ndarray,
)
from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.np_datetime cimport (
check_dts_bounds,
dtstruct_to_dt64,
npy_datetimestruct,
)
cdef dict _parse_code_table = {'y': 0,
'Y': 1,
'm': 2,
'B': 3,
'b': 4,
'd': 5,
'H': 6,
'I': 7,
'M': 8,
'S': 9,
'f': 10,
'A': 11,
'a': 12,
'w': 13,
'j': 14,
'U': 15,
'W': 16,
'Z': 17,
'p': 18, # an additional key, only with I
'z': 19,
'G': 20,
'V': 21,
'u': 22}
def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'):
"""
Calculates the datetime structs represented by the passed array of strings
Parameters
----------
values : ndarray of string-like objects
fmt : string-like regex
exact : matches must be exact if True, search if False
errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
"""
cdef:
Py_ssize_t i, n = len(values)
npy_datetimestruct dts
int64_t[:] iresult
object[:] result_timezone
int year, month, day, minute, hour, second, weekday, julian
int week_of_year, week_of_year_start, parse_code, ordinal
int iso_week, iso_year
int64_t us, ns
object val, group_key, ampm, found, timezone
dict found_key
bint is_raise = errors=='raise'
bint is_ignore = errors=='ignore'
bint is_coerce = errors=='coerce'
assert is_raise or is_ignore or is_coerce
if fmt is not None:
if '%W' in fmt or '%U' in fmt:
if '%Y' not in fmt and '%y' not in fmt:
raise ValueError("Cannot use '%W' or '%U' without day and year")
if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt:
raise ValueError("Cannot use '%W' or '%U' without day and year")
elif '%Z' in fmt and '%z' in fmt:
raise ValueError("Cannot parse both %Z and %z")
global _TimeRE_cache, _regex_cache
with _cache_lock:
if _getlang() != _TimeRE_cache.locale_time.lang:
_TimeRE_cache = TimeRE()
_regex_cache.clear()
if len(_regex_cache) > _CACHE_MAX_SIZE:
_regex_cache.clear()
locale_time = _TimeRE_cache.locale_time
format_regex = _regex_cache.get(fmt)
if not format_regex:
try:
format_regex = _TimeRE_cache.compile(fmt)
# KeyError raised when a bad format is found; can be specified as
# \\, in which case it was a stray % but with a space after it
except KeyError, err:
bad_directive = err.args[0]
if bad_directive == "\\":
bad_directive = "%"
del err
raise ValueError(f"'{bad_directive}' is a bad directive "
f"in format '{fmt}'")
# IndexError only occurs when the format string is "%"
except IndexError:
raise ValueError(f"stray % in format '{fmt}'")
_regex_cache[fmt] = format_regex
result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
result_timezone = np.empty(n, dtype='object')
dts.us = dts.ps = dts.as = 0
for i in range(n):
val = values[i]
if isinstance(val, str):
if val in nat_strings:
iresult[i] = NPY_NAT
continue
else:
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
else:
val = str(val)
# exact matching
if exact:
found = format_regex.match(val)
if not found:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise ValueError(f"time data '{val}' does not match "
f"format '{fmt}' (match)")
if len(val) != found.end():
if is_coerce:
iresult[i] = NPY_NAT
continue
raise ValueError(f"unconverted data remains: {val[found.end():]}")
# search
else:
found = format_regex.search(val)
if not found:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise ValueError(f"time data {repr(val)} does not match format "
f"{repr(fmt)} (search)")
iso_year = -1
year = 1900
month = day = 1
hour = minute = second = ns = us = 0
timezone = None
# Default to -1 to signify that values not known; not critical to have,
# though
iso_week = week_of_year = -1
week_of_year_start = -1
# weekday and julian defaulted to -1 so as to signal need to calculate
# values
weekday = julian = -1
found_dict = found.groupdict()
for group_key in found_dict.iterkeys():
# Directives not explicitly handled below:
# c, x, X
# handled by making out of other directives
# U, W
# worthless without day of the week
parse_code = _parse_code_table[group_key]
if parse_code == 0:
year = int(found_dict['y'])
# Open Group specification for strptime() states that a %y
# value in the range of [00, 68] is in the century 2000, while
# [69,99] is in the century 1900
if year <= 68:
year += 2000
else:
year += 1900
elif parse_code == 1:
year = int(found_dict['Y'])
elif parse_code == 2:
month = int(found_dict['m'])
# elif group_key == 'B':
elif parse_code == 3:
month = locale_time.f_month.index(found_dict['B'].lower())
# elif group_key == 'b':
elif parse_code == 4:
month = locale_time.a_month.index(found_dict['b'].lower())
# elif group_key == 'd':
elif parse_code == 5:
day = int(found_dict['d'])
# elif group_key == 'H':
elif parse_code == 6:
hour = int(found_dict['H'])
elif parse_code == 7:
hour = int(found_dict['I'])
ampm = found_dict.get('p', '').lower()
# If there was no AM/PM indicator, we'll treat this like AM
if ampm in ('', locale_time.am_pm[0]):
# We're in AM so the hour is correct unless we're
# looking at 12 midnight.
# 12 midnight == 12 AM == hour 0
if hour == 12:
hour = 0
elif ampm == locale_time.am_pm[1]:
# We're in PM so we need to add 12 to the hour unless
# we're looking at 12 noon.
# 12 noon == 12 PM == hour 12
if hour != 12:
hour += 12
elif parse_code == 8:
minute = int(found_dict['M'])
elif parse_code == 9:
second = int(found_dict['S'])
elif parse_code == 10:
s = found_dict['f']
# Pad to always return nanoseconds
s += "0" * (9 - len(s))
us = long(s)
ns = us % 1000
us = us // 1000
elif parse_code == 11:
weekday = locale_time.f_weekday.index(found_dict['A'].lower())
elif parse_code == 12:
weekday = locale_time.a_weekday.index(found_dict['a'].lower())
elif parse_code == 13:
weekday = int(found_dict['w'])
if weekday == 0:
weekday = 6
else:
weekday -= 1
elif parse_code == 14:
julian = int(found_dict['j'])
elif parse_code == 15 or parse_code == 16:
week_of_year = int(found_dict[group_key])
if group_key == 'U':
# U starts week on Sunday.
week_of_year_start = 6
else:
# W starts week on Monday.
week_of_year_start = 0
elif parse_code == 17:
timezone = pytz.timezone(found_dict['Z'])
elif parse_code == 19:
timezone = parse_timezone_directive(found_dict['z'])
elif parse_code == 20:
iso_year = int(found_dict['G'])
elif parse_code == 21:
iso_week = int(found_dict['V'])
elif parse_code == 22:
weekday = int(found_dict['u'])
weekday -= 1
# don't assume default values for ISO week/year
if iso_year != -1:
if iso_week == -1 or weekday == -1:
raise ValueError("ISO year directive '%G' must be used with "
"the ISO week directive '%V' and a weekday "
"directive '%A', '%a', '%w', or '%u'.")
if julian != -1:
raise ValueError("Day of the year directive '%j' is not "
"compatible with ISO year directive '%G'. "
"Use '%Y' instead.")
elif year != -1 and week_of_year == -1 and iso_week != -1:
if weekday == -1:
raise ValueError("ISO week directive '%V' must be used with "
"the ISO year directive '%G' and a weekday "
"directive '%A', '%a', '%w', or '%u'.")
else:
raise ValueError("ISO week directive '%V' is incompatible with "
"the year directive '%Y'. Use the ISO year "
"'%G' instead.")
# If we know the wk of the year and what day of that wk, we can figure
# out the Julian day of the year.
if julian == -1 and weekday != -1:
if week_of_year != -1:
week_starts_Mon = week_of_year_start == 0
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
week_starts_Mon)
elif iso_year != -1 and iso_week != -1:
year, julian = _calc_julian_from_V(iso_year, iso_week,
weekday + 1)
# Cannot pre-calculate date() since can change in Julian
# calculation and thus could have different value for the day of the wk
# calculation.
try:
if julian == -1:
# Need to add 1 to result since first day of the year is 1, not
# 0.
ordinal = date(year, month, day).toordinal()
julian = ordinal - date(year, 1, 1).toordinal() + 1
else:
# Assume that if they bothered to include Julian day it will
# be accurate.
datetime_result = date.fromordinal(
(julian - 1) + date(year, 1, 1).toordinal())
year = datetime_result.year
month = datetime_result.month
day = datetime_result.day
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
if weekday == -1:
weekday = date(year, month, day).weekday()
dts.year = year
dts.month = month
dts.day = day
dts.hour = hour
dts.min = minute
dts.sec = second
dts.us = us
dts.ps = ns * 1000
iresult[i] = dtstruct_to_dt64(&dts)
try:
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
result_timezone[i] = timezone
return result, result_timezone.base
"""
_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
from the standard library, see
https://github.com/python/cpython/blob/master/Lib/_strptime.py
The original module-level docstring follows.
Strptime-related classes and functions.
CLASSES:
LocaleTime -- Discovers and stores locale-specific time information
TimeRE -- Creates regexes for pattern matching a string of text containing
time information
FUNCTIONS:
_getlang -- Figure out what language is being used for the locale
strptime -- Calculates the time struct represented by the passed-in string
"""
def _getlang():
"""Figure out what language is being used for the locale"""
return locale.getlocale(locale.LC_TIME)
class LocaleTime:
"""
Stores and handles locale-specific information related to time.
ATTRIBUTES:
f_weekday -- full weekday names (7-item list)
a_weekday -- abbreviated weekday names (7-item list)
f_month -- full month names (13-item list; dummy value in [0], which
is added by code)
a_month -- abbreviated month names (13-item list, dummy value in
[0], which is added by code)
am_pm -- AM/PM representation (2-item list)
LC_date_time -- format string for date/time representation (string)
LC_date -- format string for date representation (string)
LC_time -- format string for time representation (string)
timezone -- daylight- and non-daylight-savings timezone representation
(2-item list of sets)
lang -- Language used by instance (2-item tuple)
"""
def __init__(self):
"""
Set all attributes.
Order of methods called matters for dependency reasons.
The locale language is set at the offset and then checked again before
exiting. This is to make sure that the attributes were not set with a
mix of information from more than one locale. This would most likely
happen when using threads where one thread calls a locale-dependent
function while another thread changes the locale while the function in
the other thread is still running. Proper coding would call for
locks to prevent changing the locale while locale-dependent code is
running. The check here is done in case someone does not think about
doing this.
Only other possible issue is if someone changed the timezone and did
not call tz.tzset . That is an issue for the programmer, though,
since changing the timezone is worthless without that call.
"""
self.lang = _getlang()
self.__calc_weekday()
self.__calc_month()
self.__calc_am_pm()
self.__calc_timezone()
self.__calc_date_time()
if _getlang() != self.lang:
raise ValueError("locale changed during initialization")
def __pad(self, seq, front):
# Add '' to seq to either the front (is True), else the back.
seq = list(seq)
if front:
seq.insert(0, '')
else:
seq.append('')
return seq
def __calc_weekday(self):
# Set self.a_weekday and self.f_weekday using the calendar
# module.
a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
f_weekday = [calendar.day_name[i].lower() for i in range(7)]
self.a_weekday = a_weekday
self.f_weekday = f_weekday
def __calc_month(self):
# Set self.f_month and self.a_month using the calendar module.
a_month = [calendar.month_abbr[i].lower() for i in range(13)]
f_month = [calendar.month_name[i].lower() for i in range(13)]
self.a_month = a_month
self.f_month = f_month
def __calc_am_pm(self):
# Set self.am_pm by using time.strftime().
# The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
# magical; just happened to have used it everywhere else where a
# static date was needed.
am_pm = []
for hour in (01, 22):
time_tuple = time.struct_time(
(1999, 3, 17, hour, 44, 55, 2, 76, 0))
am_pm.append(time.strftime("%p", time_tuple).lower())
self.am_pm = am_pm
def __calc_date_time(self):
# Set self.date_time, self.date, & self.time by using
# time.strftime().
# Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
# overloaded numbers is minimized. The order in which searches for
# values within the format string is very important; it eliminates
# possible ambiguity for what something represents.
time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0))
date_time = [None, None, None]
date_time[0] = time.strftime("%c", time_tuple).lower()
date_time[1] = time.strftime("%x", time_tuple).lower()
date_time[2] = time.strftime("%X", time_tuple).lower()
replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
(self.f_month[3], '%B'),
(self.a_weekday[2], '%a'),
(self.a_month[3], '%b'), (self.am_pm[1], '%p'),
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
('44', '%M'), ('55', '%S'), ('76', '%j'),
('17', '%d'), ('03', '%m'), ('3', '%m'),
# '3' needed for when no leading zero.
('2', '%w'), ('10', '%I')]
replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
for tz in tz_values])
for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')):
current_format = date_time[offset]
for old, new in replacement_pairs:
# Must deal with possible lack of locale info
# manifesting itself as the empty string (e.g., Swedish's
# lack of AM/PM info) or a platform returning a tuple of empty
# strings (e.g., MacOS 9 having timezone as ('','')).
if old:
current_format = current_format.replace(old, new)
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
# 2005-01-03 occurs before the first Monday of the year. Otherwise
# %U is used.
time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0))
if '00' in time.strftime(directive, time_tuple):
U_W = '%W'
else:
U_W = '%U'
date_time[offset] = current_format.replace('11', U_W)
self.LC_date_time = date_time[0]
self.LC_date = date_time[1]
self.LC_time = date_time[2]
def __calc_timezone(self):
# Set self.timezone by using time.tzname.
# Do not worry about possibility of time.tzname[0] == timetzname[1]
# and time.daylight; handle that in strptime .
try:
time.tzset()
except AttributeError:
pass
no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
if time.daylight:
has_saving = frozenset([time.tzname[1].lower()])
else:
has_saving = frozenset()
self.timezone = (no_saving, has_saving)
class TimeRE(dict):
"""
Handle conversion from format directives to regexes.
Creates regexes for pattern matching a string of text containing
time information
"""
def __init__(self, locale_time=None):
"""
Create keys/values.
Order of execution is important for dependency reasons.
"""
if locale_time:
self.locale_time = locale_time
else:
self.locale_time = LocaleTime()
self._Z = None
base = super()
base.__init__({
# The " \d" part of the regex is to make %c from ANSI C work
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
'f': r"(?P<f>[0-9]{1,9})",
'G': r"(?P<G>\d\d\d\d)",
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
r"[1-9]\d|0[1-9]|[1-9])"),
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
'M': r"(?P<M>[0-5]\d|\d)",
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
'u': r"(?P<u>[1-7])",
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
'w': r"(?P<w>[0-6])",
# W is set below by using 'U'
'y': r"(?P<y>\d\d)",
# TODO: Does 'Y' need to worry about having less or more than
# 4 digits?
'Y': r"(?P<Y>\d\d\d\d)",
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)",
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
# 'Z' key is generated lazily via __getitem__
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
def __getitem__(self, key):
if key == "Z":
# lazy computation
if self._Z is None:
self._Z = self.__seqToRE(pytz.all_timezones, 'Z')
return self._Z
return super().__getitem__(key)
def __seqToRE(self, to_convert, directive):
"""
Convert a list to a regex string for matching a directive.
Want possible matching values to be from longest to shortest. This
prevents the possibility of a match occurring for a value that also
a substring of a larger value that should have matched (e.g., 'abc'
matching when 'abcdef' should have been the match).
"""
to_convert = sorted(to_convert, key=len, reverse=True)
for value in to_convert:
if value != '':
break
else:
return ''
regex = '|'.join(re.escape(stuff) for stuff in to_convert)
regex = f"(?P<{directive}>{regex})"
return regex
def pattern(self, format):
"""
Return regex pattern for the format string.
Need to make sure that any characters that might be interpreted as
regex syntax are escaped.
"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
# as regex syntax. Cannot use re.escape since we have to deal with
# format directives (%m, etc.).
regex_chars = re.compile(r"([\\.^$*+?\(\){}\[\]|])")
format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re.compile(r'\s+')
format = whitespace_replacement.sub(r'\\s+', format)
while '%' in format:
directive_index = format.index('%') +1
processed_format = (f"{processed_format}"
f"{format[:directive_index -1]}"
f"{self[format[directive_index]]}")
format = format[directive_index +1:]
return f"{processed_format}{format}"
def compile(self, format):
"""Return a compiled re object for the format string."""
return re.compile(self.pattern(format), re.IGNORECASE)
_cache_lock = _thread_allocate_lock()
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
# first!
_TimeRE_cache = TimeRE()
_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
_regex_cache = {}
cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
int day_of_week, int week_starts_Mon):
"""
Calculate the Julian day based on the year, week of the year, and day of
the week, with week_start_day representing whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0).
Parameters
----------
year : int
the year
week_of_year : int
week taken from format U or W
week_starts_Mon : int
represents whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0)
Returns
-------
int
converted julian day
"""
cdef:
int first_weekday, week_0_length, days_to_week
first_weekday = date(year, 1, 1).weekday()
# If we are dealing with the %U directive (week starts on Sunday), it's
# easier to just shift the view to Sunday being the first day of the
# week.
if not week_starts_Mon:
first_weekday = (first_weekday + 1) % 7
day_of_week = (day_of_week + 1) % 7
# Need to watch out for a week 0 (when the first day of the year is not
# the same as that specified by %U or %W).
week_0_length = (7 - first_weekday) % 7
if week_of_year == 0:
return 1 + day_of_week - first_weekday
else:
days_to_week = week_0_length + (7 * (week_of_year - 1))
return 1 + days_to_week + day_of_week
cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""
Calculate the Julian day based on the ISO 8601 year, week, and weekday.
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
ISO week days range from 1 (Monday) to 7 (Sunday).
Parameters
----------
iso_year : int
the year taken from format %G
iso_week : int
the week taken from format %V
iso_weekday : int
weekday taken from format %u
Returns
-------
(int, int)
the iso year and the Gregorian ordinal date / julian date
"""
cdef:
int correction, ordinal
correction = date(iso_year, 1, 4).isoweekday() + 3
ordinal = (iso_week * 7) + iso_weekday - correction
# ordinal may be negative or 0 now, which means the date is in the previous
# calendar year
if ordinal < 1:
ordinal += date(iso_year, 1, 1).toordinal()
iso_year -= 1
ordinal -= date(iso_year, 1, 1).toordinal()
return iso_year, ordinal
cdef tzinfo parse_timezone_directive(str z):
"""
Parse the '%z' directive and return a pytz.FixedOffset
Parameters
----------
z : string of the UTC offset
Returns
-------
pytz.FixedOffset
Notes
-----
This is essentially similar to the cpython implementation
https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479
"""
cdef:
int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds
int total_minutes
object gmtoff_remainder, gmtoff_remainder_padding
if z == 'Z':
return pytz.FixedOffset(0)
if z[3] == ':':
z = z[:3] + z[4:]
if len(z) > 5:
if z[5] != ':':
raise ValueError(f"Inconsistent use of : in {z}")
z = z[:5] + z[6:]
hours = int(z[1:3])
minutes = int(z[3:5])
seconds = int(z[5:7] or 0)
# Pad to always return microseconds.
gmtoff_remainder = z[8:]
pad_number = 6 - len(gmtoff_remainder)
gmtoff_remainder_padding = "0" * pad_number
microseconds = int(gmtoff_remainder + gmtoff_remainder_padding)
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
(microseconds // 60_000_000))
total_minutes = -total_minutes if z.startswith("-") else total_minutes
return pytz.FixedOffset(total_minutes)

View File

@@ -0,0 +1,19 @@
from cpython.datetime cimport timedelta
from numpy cimport int64_t
# Exposed for tslib, not intended for outside use.
cpdef int64_t delta_to_nanoseconds(delta) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)
cdef class _Timedelta(timedelta):
cdef readonly:
int64_t value # nanoseconds
object freq # frequency reference
bint is_populated # are my components populated
int64_t _d, _h, _m, _s, _ms, _us, _ns
cpdef timedelta to_pytimedelta(_Timedelta self)
cpdef bint _has_ns(self)

View File

@@ -0,0 +1,84 @@
from datetime import timedelta
from typing import (
ClassVar,
Type,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
NaTType,
Tick,
)
from pandas._typing import npt
_S = TypeVar("_S", bound=timedelta)
def ints_to_pytimedelta(
arr: npt.NDArray[np.int64], # const int64_t[:]
box: bool = ...,
) -> npt.NDArray[np.object_]: ...
def array_to_timedelta64(
values: npt.NDArray[np.object_],
unit: str | None = ...,
errors: str = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> str: ...
def delta_to_nanoseconds(delta: Tick | np.timedelta64 | timedelta | int) -> int: ...
class Timedelta(timedelta):
min: ClassVar[Timedelta]
max: ClassVar[Timedelta]
resolution: ClassVar[Timedelta]
value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timedelta, NaTType]")
def __new__( # type: ignore[misc]
cls: Type[_S],
value=...,
unit: str = ...,
**kwargs: int | float | np.integer | np.floating,
) -> _S | NaTType: ...
@property
def days(self) -> int: ...
@property
def seconds(self) -> int: ...
@property
def microseconds(self) -> int: ...
def total_seconds(self) -> float: ...
def to_pytimedelta(self) -> timedelta: ...
def to_timedelta64(self) -> np.timedelta64: ...
@property
def asm8(self) -> np.timedelta64: ...
# TODO: round/floor/ceil could return NaT?
def round(self: _S, freq: str) -> _S: ...
def floor(self: _S, freq: str) -> _S: ...
def ceil(self: _S, freq: str) -> _S: ...
@property
def resolution_string(self) -> str: ...
def __add__(self, other: timedelta) -> timedelta: ...
def __radd__(self, other: timedelta) -> timedelta: ...
def __sub__(self, other: timedelta) -> timedelta: ...
def __rsub__(self, other: timedelta) -> timedelta: ...
def __neg__(self) -> timedelta: ...
def __pos__(self) -> timedelta: ...
def __abs__(self) -> timedelta: ...
def __mul__(self, other: float) -> timedelta: ...
def __rmul__(self, other: float) -> timedelta: ...
@overload
def __floordiv__(self, other: timedelta) -> int: ...
@overload
def __floordiv__(self, other: int) -> timedelta: ...
@overload
def __truediv__(self, other: timedelta) -> float: ...
@overload
def __truediv__(self, other: float) -> timedelta: ...
def __mod__(self, other: timedelta) -> timedelta: ...
def __divmod__(self, other: timedelta) -> tuple[int, timedelta]: ...
def __le__(self, other: timedelta) -> bool: ...
def __lt__(self, other: timedelta) -> bool: ...
def __ge__(self, other: timedelta) -> bool: ...
def __gt__(self, other: timedelta) -> bool: ...
def __hash__(self) -> int: ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
from cpython.datetime cimport (
datetime,
tzinfo,
)
from numpy cimport int64_t
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
cdef object create_timestamp_from_ts(int64_t value,
npy_datetimestruct dts,
tzinfo tz, object freq, bint fold)
cdef class _Timestamp(ABCTimestamp):
cdef readonly:
int64_t value, nanosecond
object _freq
cdef bint _get_start_end_field(self, str field, freq)
cdef _get_date_name_field(self, str field, object locale)
cdef int64_t _maybe_convert_value_to_local(self)
cdef bint _can_compare(self, datetime other)
cpdef to_datetime64(self)
cpdef datetime to_pydatetime(_Timestamp self, bint warn=*)
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
int op) except -1
cpdef void _set_freq(self, freq)
cdef _warn_on_field_deprecation(_Timestamp self, freq, str field)

View File

@@ -0,0 +1,207 @@
from datetime import (
date as _date,
datetime,
time as _time,
timedelta,
tzinfo as _tzinfo,
)
from time import struct_time
from typing import (
ClassVar,
TypeVar,
overload,
)
import numpy as np
from pandas._libs.tslibs import (
BaseOffset,
NaTType,
Period,
Timedelta,
)
_DatetimeT = TypeVar("_DatetimeT", bound=datetime)
def integer_op_not_supported(obj: object) -> TypeError: ...
class Timestamp(datetime):
min: ClassVar[Timestamp]
max: ClassVar[Timestamp]
resolution: ClassVar[Timedelta]
value: int # np.int64
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
def __new__( # type: ignore[misc]
cls: type[_DatetimeT],
ts_input: int
| np.integer
| float
| str
| _date
| datetime
| np.datetime64 = ...,
freq: int | None | str | BaseOffset = ...,
tz: str | _tzinfo | None | int = ...,
unit: str | int | None = ...,
year: int | None = ...,
month: int | None = ...,
day: int | None = ...,
hour: int | None = ...,
minute: int | None = ...,
second: int | None = ...,
microsecond: int | None = ...,
nanosecond: int | None = ...,
tzinfo: _tzinfo | None = ...,
*,
fold: int | None = ...,
) -> _DatetimeT | NaTType: ...
def _set_freq(self, freq: BaseOffset | None) -> None: ...
@property
def year(self) -> int: ...
@property
def month(self) -> int: ...
@property
def day(self) -> int: ...
@property
def hour(self) -> int: ...
@property
def minute(self) -> int: ...
@property
def second(self) -> int: ...
@property
def microsecond(self) -> int: ...
@property
def tzinfo(self) -> _tzinfo | None: ...
@property
def tz(self) -> _tzinfo | None: ...
@property
def fold(self) -> int: ...
@classmethod
def fromtimestamp(
cls: type[_DatetimeT], t: float, tz: _tzinfo | None = ...
) -> _DatetimeT: ...
@classmethod
def utcfromtimestamp(cls: type[_DatetimeT], t: float) -> _DatetimeT: ...
@classmethod
def today(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ...
@classmethod
def fromordinal(
cls: type[_DatetimeT],
ordinal: int,
freq: str | BaseOffset | None = ...,
tz: _tzinfo | str | None = ...,
) -> _DatetimeT: ...
@classmethod
def now(cls: type[_DatetimeT], tz: _tzinfo | str | None = ...) -> _DatetimeT: ...
@classmethod
def utcnow(cls: type[_DatetimeT]) -> _DatetimeT: ...
# error: Signature of "combine" incompatible with supertype "datetime"
@classmethod
def combine(cls, date: _date, time: _time) -> datetime: ... # type: ignore[override]
@classmethod
def fromisoformat(cls: type[_DatetimeT], date_string: str) -> _DatetimeT: ...
def strftime(self, format: str) -> str: ...
def __format__(self, fmt: str) -> str: ...
def toordinal(self) -> int: ...
def timetuple(self) -> struct_time: ...
def timestamp(self) -> float: ...
def utctimetuple(self) -> struct_time: ...
def date(self) -> _date: ...
def time(self) -> _time: ...
def timetz(self) -> _time: ...
def replace(
self,
year: int = ...,
month: int = ...,
day: int = ...,
hour: int = ...,
minute: int = ...,
second: int = ...,
microsecond: int = ...,
tzinfo: _tzinfo | None = ...,
fold: int = ...,
) -> datetime: ...
def astimezone(self: _DatetimeT, tz: _tzinfo | None = ...) -> _DatetimeT: ...
def ctime(self) -> str: ...
def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
@classmethod
def strptime(cls, date_string: str, format: str) -> datetime: ...
def utcoffset(self) -> timedelta | None: ...
def tzname(self) -> str | None: ...
def dst(self) -> timedelta | None: ...
def __le__(self, other: datetime) -> bool: ... # type: ignore
def __lt__(self, other: datetime) -> bool: ... # type: ignore
def __ge__(self, other: datetime) -> bool: ... # type: ignore
def __gt__(self, other: datetime) -> bool: ... # type: ignore
# error: Signature of "__add__" incompatible with supertype "date"/"datetime"
@overload # type: ignore[override]
def __add__(self, other: np.ndarray) -> np.ndarray: ...
@overload
# TODO: other can also be Tick (but it cannot be resolved)
def __add__(self: _DatetimeT, other: timedelta | np.timedelta64) -> _DatetimeT: ...
def __radd__(self: _DatetimeT, other: timedelta) -> _DatetimeT: ...
@overload # type: ignore
def __sub__(self, other: datetime) -> timedelta: ...
@overload
# TODO: other can also be Tick (but it cannot be resolved)
def __sub__(self, other: timedelta | np.timedelta64) -> datetime: ...
def __hash__(self) -> int: ...
def weekday(self) -> int: ...
def isoweekday(self) -> int: ...
def isocalendar(self) -> tuple[int, int, int]: ...
@property
def is_leap_year(self) -> bool: ...
@property
def is_month_start(self) -> bool: ...
@property
def is_quarter_start(self) -> bool: ...
@property
def is_year_start(self) -> bool: ...
@property
def is_month_end(self) -> bool: ...
@property
def is_quarter_end(self) -> bool: ...
@property
def is_year_end(self) -> bool: ...
def to_pydatetime(self, warn: bool = ...) -> datetime: ...
def to_datetime64(self) -> np.datetime64: ...
def to_period(self, freq: BaseOffset | str | None = ...) -> Period: ...
def to_julian_date(self) -> np.float64: ...
@property
def asm8(self) -> np.datetime64: ...
def tz_convert(self: _DatetimeT, tz: _tzinfo | str | None) -> _DatetimeT: ...
# TODO: could return NaT?
def tz_localize(
self: _DatetimeT,
tz: _tzinfo | str | None,
ambiguous: str = ...,
nonexistent: str = ...,
) -> _DatetimeT: ...
def normalize(self: _DatetimeT) -> _DatetimeT: ...
# TODO: round/floor/ceil could return NaT?
def round(
self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ...
) -> _DatetimeT: ...
def floor(
self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ...
) -> _DatetimeT: ...
def ceil(
self: _DatetimeT, freq: str, ambiguous: bool | str = ..., nonexistent: str = ...
) -> _DatetimeT: ...
def day_name(self, locale: str | None = ...) -> str: ...
def month_name(self, locale: str | None = ...) -> str: ...
@property
def day_of_week(self) -> int: ...
@property
def day_of_month(self) -> int: ...
@property
def day_of_year(self) -> int: ...
@property
def quarter(self) -> int: ...
@property
def week(self) -> int: ...
def to_numpy(
self, dtype: np.dtype | None = ..., copy: bool = ...
) -> np.datetime64: ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
from cpython.datetime cimport (
datetime,
timedelta,
tzinfo,
)
cdef tzinfo utc_pytz
cpdef bint is_utc(tzinfo tz)
cdef bint is_tzlocal(tzinfo tz)
cdef bint treat_tz_as_pytz(tzinfo tz)
cpdef bint tz_compare(tzinfo start, tzinfo end)
cpdef object get_timezone(tzinfo tz)
cpdef tzinfo maybe_get_tz(object tz)
cdef timedelta get_utcoffset(tzinfo tz, datetime obj)
cdef bint is_fixed_offset(tzinfo tz)
cdef object get_dst_info(tzinfo tz)

View File

@@ -0,0 +1,25 @@
from datetime import (
datetime,
tzinfo,
)
from typing import Callable
import numpy as np
from pandas._typing import npt
# imported from dateutil.tz
dateutil_gettz: Callable[[str], tzinfo]
def tz_standardize(tz: tzinfo) -> tzinfo: ...
def tz_compare(start: tzinfo | None, end: tzinfo | None) -> bool: ...
def infer_tzinfo(
start: datetime | None,
end: datetime | None,
) -> tzinfo | None: ...
def get_dst_info(
tz: tzinfo,
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], str]: ...
def maybe_get_tz(tz: str | int | np.int64 | tzinfo | None) -> tzinfo | None: ...
def get_timezone(tz: tzinfo) -> tzinfo | str: ...
def is_utc(tz: tzinfo | None) -> bool: ...

View File

@@ -0,0 +1,408 @@
from datetime import (
timedelta,
timezone,
)
from cpython.datetime cimport (
datetime,
timedelta,
tzinfo,
)
# dateutil compat
from dateutil.tz import (
gettz as dateutil_gettz,
tzfile as _dateutil_tzfile,
tzlocal as _dateutil_tzlocal,
tzutc as _dateutil_tzutc,
)
import pytz
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
UTC = pytz.utc
import numpy as np
cimport numpy as cnp
from numpy cimport int64_t
cnp.import_array()
# ----------------------------------------------------------------------
from pandas._libs.tslibs.util cimport (
get_nat,
is_integer_object,
)
cdef int64_t NPY_NAT = get_nat()
cdef tzinfo utc_stdlib = timezone.utc
cdef tzinfo utc_pytz = UTC
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
# ----------------------------------------------------------------------
cpdef inline bint is_utc(tzinfo tz):
return (
tz is utc_pytz
or tz is utc_stdlib
or isinstance(tz, _dateutil_tzutc)
or tz is utc_dateutil_str
)
cdef inline bint is_tzlocal(tzinfo tz):
return isinstance(tz, _dateutil_tzlocal)
cdef inline bint treat_tz_as_pytz(tzinfo tz):
return (hasattr(tz, '_utc_transition_times') and
hasattr(tz, '_transition_info'))
cdef inline bint treat_tz_as_dateutil(tzinfo tz):
return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
# Returns str or tzinfo object
cpdef inline object get_timezone(tzinfo tz):
"""
We need to do several things here:
1) Distinguish between pytz and dateutil timezones
2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
but a different tz object)
3) Provide something to serialize when we're storing a datetime object
in pytables.
We return a string prefaced with dateutil if it's a dateutil tz, else just
the tz name. It needs to be a string so that we can serialize it with
UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
"""
if tz is None:
raise TypeError("tz argument cannot be None")
if is_utc(tz):
return tz
else:
if treat_tz_as_dateutil(tz):
if '.tar.gz' in tz._filename:
raise ValueError(
'Bad tz filename. Dateutil on python 3 on windows has a '
'bug which causes tzfile._filename to be the same for all '
'timezone files. Please construct dateutil timezones '
'implicitly by passing a string like "dateutil/Europe'
'/London" when you construct your pandas objects instead '
'of passing a timezone object. See '
'https://github.com/pandas-dev/pandas/pull/7362')
return 'dateutil/' + tz._filename
else:
# tz is a pytz timezone or unknown.
try:
zone = tz.zone
if zone is None:
return tz
return zone
except AttributeError:
return tz
cpdef inline tzinfo maybe_get_tz(object tz):
"""
(Maybe) Construct a timezone object from a string. If tz is a string, use
it to construct a timezone object. Otherwise, just return tz.
"""
if isinstance(tz, str):
if tz == 'tzlocal()':
tz = _dateutil_tzlocal()
elif tz.startswith('dateutil/'):
zone = tz[9:]
tz = dateutil_gettz(zone)
# On Python 3 on Windows, the filename is not always set correctly.
if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
tz._filename = zone
elif tz[0] in {'-', '+'}:
hours = int(tz[0:3])
minutes = int(tz[0] + tz[4:6])
tz = timezone(timedelta(hours=hours, minutes=minutes))
elif tz[0:4] in {'UTC-', 'UTC+'}:
hours = int(tz[3:6])
minutes = int(tz[3] + tz[7:9])
tz = timezone(timedelta(hours=hours, minutes=minutes))
else:
tz = pytz.timezone(tz)
elif is_integer_object(tz):
tz = pytz.FixedOffset(tz / 60)
elif isinstance(tz, tzinfo):
pass
elif tz is None:
pass
else:
raise TypeError(type(tz))
return tz
def _p_tz_cache_key(tz: tzinfo):
"""
Python interface for cache function to facilitate testing.
"""
return tz_cache_key(tz)
# Timezone data caches, key is the pytz string or dateutil file name.
dst_cache = {}
cdef inline object tz_cache_key(tzinfo tz):
"""
Return the key in the cache for the timezone info object or None
if unknown.
The key is currently the tz string for pytz timezones, the filename for
dateutil timezones.
Notes
-----
This cannot just be the hash of a timezone object. Unfortunately, the
hashes of two dateutil tz objects which represent the same timezone are
not equal (even though the tz objects will compare equal and represent
the same tz file). Also, pytz objects are not always hashable so we use
str(tz) instead.
"""
if isinstance(tz, _pytz_BaseTzInfo):
return tz.zone
elif isinstance(tz, _dateutil_tzfile):
if '.tar.gz' in tz._filename:
raise ValueError('Bad tz filename. Dateutil on python 3 on '
'windows has a bug which causes tzfile._filename '
'to be the same for all timezone files. Please '
'construct dateutil timezones implicitly by '
'passing a string like "dateutil/Europe/London" '
'when you construct your pandas objects instead '
'of passing a timezone object. See '
'https://github.com/pandas-dev/pandas/pull/7362')
return 'dateutil' + tz._filename
else:
return None
# ----------------------------------------------------------------------
# UTC Offsets
cdef timedelta get_utcoffset(tzinfo tz, datetime obj):
try:
return tz._utcoffset
except AttributeError:
return tz.utcoffset(obj)
cdef inline bint is_fixed_offset(tzinfo tz):
if treat_tz_as_dateutil(tz):
if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
return 1
else:
return 0
elif treat_tz_as_pytz(tz):
if (len(tz._transition_info) == 0
and len(tz._utc_transition_times) == 0):
return 1
else:
return 0
# This also implicitly accepts datetime.timezone objects which are
# considered fixed
return 1
cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
"""
Transition times in dateutil timezones are stored in local non-dst
time. This code converts them to UTC. It's the reverse of the code
in dateutil.tz.tzfile.__init__.
"""
new_trans = list(tz._trans_list)
last_std_offset = 0
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
if not tti.isdst:
last_std_offset = tti.offset
new_trans[i] = trans - last_std_offset
return new_trans
cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef:
Py_ssize_t i, sz
int64_t[:] arr
sz = len(transinfo)
arr = np.empty(sz, dtype='i8')
for i in range(sz):
arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
return arr
# ----------------------------------------------------------------------
# Daylight Savings
cdef object get_dst_info(tzinfo tz):
"""
Returns
-------
ndarray[int64_t]
Nanosecond UTC times of DST transitions.
ndarray[int64_t]
Nanosecond UTC offsets corresponding to DST transitions.
str
Describing the type of tzinfo object.
"""
cache_key = tz_cache_key(tz)
if cache_key is None:
# e.g. pytz.FixedOffset, matplotlib.dates._UTC,
# psycopg2.tz.FixedOffsetTimezone
num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
return (np.array([NPY_NAT + 1], dtype=np.int64),
np.array([num], dtype=np.int64),
"unknown")
if cache_key not in dst_cache:
if treat_tz_as_pytz(tz):
trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
trans = trans.view('i8')
if tz._utc_transition_times[0].year == 1:
trans[0] = NPY_NAT + 1
deltas = unbox_utcoffsets(tz._transition_info)
typ = 'pytz'
elif treat_tz_as_dateutil(tz):
if len(tz._trans_list):
# get utc trans times
trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
trans = np.hstack([
np.array([0], dtype='M8[s]'), # place holder for 1st item
np.array(trans_list, dtype='M8[s]')]).astype(
'M8[ns]') # all trans listed
trans = trans.view('i8')
trans[0] = NPY_NAT + 1
# deltas
deltas = np.array([v.offset for v in (
tz._ttinfo_before,) + tz._trans_idx], dtype='i8')
deltas *= 1000000000
typ = 'dateutil'
elif is_fixed_offset(tz):
trans = np.array([NPY_NAT + 1], dtype=np.int64)
deltas = np.array([tz._ttinfo_std.offset],
dtype='i8') * 1000000000
typ = 'fixed'
else:
# 2018-07-12 this is not reached in the tests, and this case
# is not handled in any of the functions that call
# get_dst_info. If this case _were_ hit the calling
# functions would then hit an IndexError because they assume
# `deltas` is non-empty.
# (under the just-deleted code that returned empty arrays)
raise AssertionError("dateutil tzinfo is not a FixedOffset "
"and has an empty `_trans_list`.", tz)
else:
# static tzinfo, we can get here with pytz.StaticTZInfo
# which are not caught by treat_tz_as_pytz
trans = np.array([NPY_NAT + 1], dtype=np.int64)
num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
deltas = np.array([num], dtype=np.int64)
typ = "static"
dst_cache[cache_key] = (trans, deltas, typ)
return dst_cache[cache_key]
def infer_tzinfo(datetime start, datetime end):
if start is not None and end is not None:
tz = start.tzinfo
if not tz_compare(tz, end.tzinfo):
raise AssertionError(f'Inputs must both have the same timezone, '
f'{tz} != {end.tzinfo}')
elif start is not None:
tz = start.tzinfo
elif end is not None:
tz = end.tzinfo
else:
tz = None
return tz
cpdef bint tz_compare(tzinfo start, tzinfo end):
"""
Compare string representations of timezones
The same timezone can be represented as different instances of
timezones. For example
`<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
`<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
timezones but aren't evaluated such, but the string representation
for both of these is `'Europe/Paris'`.
This exists only to add a notion of equality to pytz-style zones
that is compatible with the notion of equality expected of tzinfo
subclasses.
Parameters
----------
start : tzinfo
end : tzinfo
Returns:
-------
bool
"""
# GH 18523
if is_utc(start):
# GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent
return is_utc(end)
elif is_utc(end):
# Ensure we don't treat tzlocal as equal to UTC when running in UTC
return False
elif start is None or end is None:
return start is None and end is None
return get_timezone(start) == get_timezone(end)
def tz_standardize(tz: tzinfo) -> tzinfo:
"""
If the passed tz is a pytz timezone object, "normalize" it to the a
consistent version
Parameters
----------
tz : tzinfo
Returns
-------
tzinfo
Examples
--------
>>> from datetime import datetime
>>> from pytz import timezone
>>> tz = timezone('US/Pacific').normalize(
... datetime(2014, 1, 1, tzinfo=pytz.utc)
... ).tzinfo
>>> tz
<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
>>> tz_standardize(tz)
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
>>> tz = timezone('US/Pacific')
>>> tz
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
>>> tz_standardize(tz)
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
"""
if treat_tz_as_pytz(tz):
return pytz.timezone(str(tz))
return tz

View File

@@ -0,0 +1,11 @@
from cpython.datetime cimport tzinfo
from numpy cimport int64_t
cdef int64_t tz_convert_utc_to_tzlocal(
int64_t utc_val, tzinfo tz, bint* fold=*
) except? -1
cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz)
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
) except? -1

View File

@@ -0,0 +1,21 @@
from datetime import (
timedelta,
tzinfo,
)
from typing import Iterable
import numpy as np
from pandas._typing import npt
def tz_convert_from_utc(
vals: npt.NDArray[np.int64], # const int64_t[:]
tz: tzinfo,
) -> npt.NDArray[np.int64]: ...
def tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ...
def tz_localize_to_utc(
vals: npt.NDArray[np.int64],
tz: tzinfo | None,
ambiguous: str | bool | Iterable[bool] | None = ...,
nonexistent: str | timedelta | np.timedelta64 | None = ...,
) -> npt.NDArray[np.int64]: ...

View File

@@ -0,0 +1,599 @@
"""
timezone conversion
"""
import cython
from cython import Py_ssize_t
from cpython.datetime cimport (
PyDateTime_IMPORT,
PyDelta_Check,
datetime,
timedelta,
tzinfo,
)
PyDateTime_IMPORT
from dateutil.tz import tzutc
import numpy as np
import pytz
cimport numpy as cnp
from numpy cimport (
int64_t,
intp_t,
ndarray,
uint8_t,
)
cnp.import_array()
from pandas._libs.tslibs.ccalendar cimport (
DAY_NANOS,
HOUR_NANOS,
)
from pandas._libs.tslibs.nattype cimport NPY_NAT
from pandas._libs.tslibs.np_datetime cimport (
dt64_to_dtstruct,
npy_datetimestruct,
)
from pandas._libs.tslibs.timezones cimport (
get_dst_info,
get_utcoffset,
is_fixed_offset,
is_tzlocal,
is_utc,
)
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None,
) except? -1:
"""See tz_localize_to_utc.__doc__"""
cdef:
int64_t delta
int64_t[:] deltas
if val == NPY_NAT:
return val
elif is_utc(tz) or tz is None:
return val
elif is_tzlocal(tz):
return _tz_convert_tzlocal_utc(val, tz, to_utc=True)
elif is_fixed_offset(tz):
# TODO: in this case we should be able to use get_utcoffset,
# that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9'
_, deltas, _ = get_dst_info(tz)
delta = deltas[0]
return val - delta
else:
return tz_localize_to_utc(
np.array([val], dtype="i8"),
tz,
ambiguous=ambiguous,
nonexistent=nonexistent,
)[0]
@cython.boundscheck(False)
@cython.wraparound(False)
def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None,
object nonexistent=None):
"""
Localize tzinfo-naive i8 to given time zone (using pytz). If
there are ambiguities in the values, raise AmbiguousTimeError.
Parameters
----------
vals : ndarray[int64_t]
tz : tzinfo or None
ambiguous : str, bool, or arraylike
When clocks moved backward due to DST, ambiguous times may arise.
For example in Central European Time (UTC+01), when going from 03:00
DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
dictates how ambiguous times should be handled.
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False signifies a
non-DST time (note that this flag is only applicable for ambiguous
times, but the array must have the same length as vals)
- bool if True, treat all vals as DST. If False, treat them as non-DST
- 'NaT' will return NaT where there are ambiguous times
nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \
timedelta-like}
How to handle non-existent times when converting wall times to UTC
Returns
-------
localized : ndarray[int64_t]
"""
cdef:
int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right
ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
int64_t *tdata
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
int64_t first_delta
int64_t shift_delta = 0
ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
ndarray trans_idx, grp, a_idx, b_idx, one_diff
npy_datetimestruct dts
bint infer_dst = False, is_dst = False, fill = False
bint shift_forward = False, shift_backward = False
bint fill_nonexist = False
list trans_grp
str stamp
# Vectorized version of DstTzInfo.localize
if is_utc(tz) or tz is None:
return vals
result = np.empty(n, dtype=np.int64)
if is_tzlocal(tz):
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True)
return result
# silence false-positive compiler warning
ambiguous_array = np.empty(0, dtype=bool)
if isinstance(ambiguous, str):
if ambiguous == 'infer':
infer_dst = True
elif ambiguous == 'NaT':
fill = True
elif isinstance(ambiguous, bool):
is_dst = True
if ambiguous:
ambiguous_array = np.ones(len(vals), dtype=bool)
else:
ambiguous_array = np.zeros(len(vals), dtype=bool)
elif hasattr(ambiguous, '__iter__'):
is_dst = True
if len(ambiguous) != len(vals):
raise ValueError("Length of ambiguous bool-array must be "
"the same size as vals")
ambiguous_array = np.asarray(ambiguous, dtype=bool)
if nonexistent == 'NaT':
fill_nonexist = True
elif nonexistent == 'shift_forward':
shift_forward = True
elif nonexistent == 'shift_backward':
shift_backward = True
elif PyDelta_Check(nonexistent):
from .timedeltas import delta_to_nanoseconds
shift_delta = delta_to_nanoseconds(nonexistent)
elif nonexistent not in ('raise', None):
msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
"shift_backwards} or a timedelta object")
raise ValueError(msg)
trans, deltas, _ = get_dst_info(tz)
tdata = <int64_t*>cnp.PyArray_DATA(trans)
ntrans = len(trans)
# Determine whether each date lies left of the DST transition (store in
# result_a) or right of the DST transition (store in result_b)
result_a = np.empty(n, dtype=np.int64)
result_b = np.empty(n, dtype=np.int64)
result_a[:] = NPY_NAT
result_b[:] = NPY_NAT
idx_shifted_left = (np.maximum(0, trans.searchsorted(
vals - DAY_NANOS, side='right') - 1)).astype(np.int64)
idx_shifted_right = (np.maximum(0, trans.searchsorted(
vals + DAY_NANOS, side='right') - 1)).astype(np.int64)
for i in range(n):
val = vals[i]
v_left = val - deltas[idx_shifted_left[i]]
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
# timestamp falls to the left side of the DST transition
if v_left + deltas[pos_left] == val:
result_a[i] = v_left
v_right = val - deltas[idx_shifted_right[i]]
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
# timestamp falls to the right side of the DST transition
if v_right + deltas[pos_right] == val:
result_b[i] = v_right
# silence false-positive compiler warning
dst_hours = np.empty(0, dtype=np.int64)
if infer_dst:
dst_hours = np.empty(n, dtype=np.int64)
dst_hours[:] = NPY_NAT
# Get the ambiguous hours (given the above, these are the hours
# where result_a != result_b and neither of them are NAT)
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
both_eq = result_a == result_b
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
if trans_idx.size == 1:
stamp = _render_tstamp(vals[trans_idx])
raise pytz.AmbiguousTimeError(
f"Cannot infer dst time from {stamp} as there "
f"are no repeated times")
# Split the array into contiguous chunks (where the difference between
# indices is 1). These are effectively dst transitions in different
# years which is useful for checking that there is not an ambiguous
# transition in an individual year.
if trans_idx.size > 0:
one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
trans_grp = np.array_split(trans_idx, one_diff)
# Iterate through each day, if there are no hours where the
# delta is negative (indicates a repeat of hour) the switch
# cannot be inferred
for grp in trans_grp:
delta = np.diff(result_a[grp])
if grp.size == 1 or np.all(delta > 0):
stamp = _render_tstamp(vals[grp[0]])
raise pytz.AmbiguousTimeError(stamp)
# Find the index for the switch and pull from a for dst and b
# for standard
switch_idx = (delta <= 0).nonzero()[0]
if switch_idx.size > 1:
raise pytz.AmbiguousTimeError(
f"There are {switch_idx.size} dst switches when "
f"there should only be 1.")
switch_idx = switch_idx[0] + 1
# Pull the only index and adjust
a_idx = grp[:switch_idx]
b_idx = grp[switch_idx:]
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
for i in range(n):
val = vals[i]
left = result_a[i]
right = result_b[i]
if val == NPY_NAT:
result[i] = val
elif left != NPY_NAT and right != NPY_NAT:
if left == right:
result[i] = left
else:
if infer_dst and dst_hours[i] != NPY_NAT:
result[i] = dst_hours[i]
elif is_dst:
if ambiguous_array[i]:
result[i] = left
else:
result[i] = right
elif fill:
result[i] = NPY_NAT
else:
stamp = _render_tstamp(val)
raise pytz.AmbiguousTimeError(
f"Cannot infer dst time from {stamp}, try using the "
f"'ambiguous' argument")
elif left != NPY_NAT:
result[i] = left
elif right != NPY_NAT:
result[i] = right
else:
# Handle nonexistent times
if shift_forward or shift_backward or shift_delta != 0:
# Shift the nonexistent time to the closest existing time
remaining_mins = val % HOUR_NANOS
if shift_delta != 0:
# Validate that we don't relocalize on another nonexistent
# time
if -1 < shift_delta + remaining_mins < HOUR_NANOS:
raise ValueError(
f"The provided timedelta will relocalize on a "
f"nonexistent time: {nonexistent}"
)
new_local = val + shift_delta
elif shift_forward:
new_local = val + (HOUR_NANOS - remaining_mins)
else:
# Subtract 1 since the beginning hour is _inclusive_ of
# nonexistent times
new_local = val - remaining_mins - 1
delta_idx = trans.searchsorted(new_local, side='right')
# Shift the delta_idx by if the UTC offset of
# the target tz is greater than 0 and we're moving forward
# or vice versa
first_delta = deltas[0]
if (shift_forward or shift_delta > 0) and first_delta > 0:
delta_idx_offset = 1
elif (shift_backward or shift_delta < 0) and first_delta < 0:
delta_idx_offset = 1
else:
delta_idx_offset = 0
delta_idx = delta_idx - delta_idx_offset
result[i] = new_local - deltas[delta_idx]
elif fill_nonexist:
result[i] = NPY_NAT
else:
stamp = _render_tstamp(val)
raise pytz.NonExistentTimeError(stamp)
return result
cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
int64_t val, Py_ssize_t n):
cdef:
Py_ssize_t pivot, left = 0, right = n
assert n >= 1
# edge cases
if val > data[n - 1]:
return n
if val < data[0]:
return 0
while left < right:
pivot = left + (right - left) // 2
if data[pivot] <= val:
left = pivot + 1
else:
right = pivot
return left
cdef inline str _render_tstamp(int64_t val):
""" Helper function to render exception messages"""
from pandas._libs.tslibs.timestamps import Timestamp
return str(Timestamp(val))
# ----------------------------------------------------------------------
# Timezone Conversion
cdef int64_t tz_convert_utc_to_tzlocal(
int64_t utc_val, tzinfo tz, bint* fold=NULL
) except? -1:
"""
Parameters
----------
utc_val : int64_t
tz : tzinfo
fold : bint*
pointer to fold: whether datetime ends up in a fold or not
after adjustment
Returns
-------
local_val : int64_t
"""
return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False, fold=fold)
cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz):
"""
Convert the val (in i8) from UTC to tz
This is a single value version of tz_convert_from_utc.
Parameters
----------
val : int64
tz : tzinfo
Returns
-------
converted: int64
"""
cdef:
int64_t delta
int64_t[:] deltas
ndarray[int64_t, ndim=1] trans
intp_t pos
if val == NPY_NAT:
return val
if is_utc(tz):
return val
elif is_tzlocal(tz):
return _tz_convert_tzlocal_utc(val, tz, to_utc=False)
elif is_fixed_offset(tz):
_, deltas, _ = get_dst_info(tz)
delta = deltas[0]
return val + delta
else:
trans, deltas, _ = get_dst_info(tz)
pos = trans.searchsorted(val, side="right") - 1
return val + deltas[pos]
def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
"""
Convert the values (in i8) from UTC to tz
Parameters
----------
vals : int64 ndarray
tz : tzinfo
Returns
-------
int64 ndarray of converted
"""
cdef:
const int64_t[:] converted
if len(vals) == 0:
return np.array([], dtype=np.int64)
converted = _tz_convert_from_utc(vals, tz)
return np.array(converted, dtype=np.int64)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
"""
Convert the given values (in i8) either to UTC or from UTC.
Parameters
----------
vals : int64 ndarray
tz : tzinfo
Returns
-------
converted : ndarray[int64_t]
"""
cdef:
int64_t[:] converted, deltas
Py_ssize_t i, n = len(vals)
int64_t val, delta
intp_t[:] pos
ndarray[int64_t] trans
str typ
if is_utc(tz):
return vals
elif is_tzlocal(tz):
converted = np.empty(n, dtype=np.int64)
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = NPY_NAT
else:
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False)
else:
converted = np.empty(n, dtype=np.int64)
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# FixedOffset, we know len(deltas) == 1
delta = deltas[0]
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
converted[i] = val + delta
else:
pos = trans.searchsorted(vals, side="right") - 1
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
if pos[i] < 0:
# TODO: How is this reached? Should we be checking for
# it elsewhere?
raise ValueError("First time before start of DST info")
converted[i] = val + deltas[pos[i]]
return converted
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz,
bint to_utc,
bint *fold=NULL) except? -1:
"""
Calculate offset in nanoseconds needed to convert the i8 representation of
a datetime from a tzlocal timezone to UTC, or vice-versa.
Parameters
----------
val : int64_t
tz : tzinfo
to_utc : bint
True if converting tzlocal _to_ UTC, False if going the other direction
fold : bint*, default NULL
pointer to fold: whether datetime ends up in a fold or not
after adjustment
Returns
-------
delta : int64_t
Notes
-----
Sets fold by pointer
"""
cdef:
npy_datetimestruct dts
datetime dt
int64_t delta
timedelta td
dt64_to_dtstruct(val, &dts)
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us)
# tz.utcoffset only makes sense if datetime
# is _wall time_, so if val is a UTC timestamp convert to wall time
if not to_utc:
dt = dt.replace(tzinfo=tzutc())
dt = dt.astimezone(tz)
if fold is not NULL:
fold[0] = dt.fold
td = tz.utcoffset(dt)
return int(td.total_seconds() * 1_000_000_000)
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
bint* fold=NULL) except? -1:
"""
Convert the i8 representation of a datetime from a tzlocal timezone to
UTC, or vice-versa.
Private, not intended for use outside of tslibs.conversion
Parameters
----------
val : int64_t
tz : tzinfo
to_utc : bint
True if converting tzlocal _to_ UTC, False if going the other direction
fold : bint*
pointer to fold: whether datetime ends up in a fold or not
after adjustment
Returns
-------
result : int64_t
Notes
-----
Sets fold by pointer
"""
cdef:
int64_t delta
delta = _tzlocal_get_offset_components(val, tz, to_utc, fold)
if to_utc:
return val - delta
else:
return val + delta

View File

@@ -0,0 +1,222 @@
from cpython.object cimport PyTypeObject
cdef extern from *:
"""
PyObject* char_to_string(const char* data) {
return PyUnicode_FromString(data);
}
"""
object char_to_string(const char* data)
cdef extern from "Python.h":
# Note: importing extern-style allows us to declare these as nogil
# functions, whereas `from cpython cimport` does not.
bint PyUnicode_Check(object obj) nogil
bint PyBool_Check(object obj) nogil
bint PyFloat_Check(object obj) nogil
bint PyComplex_Check(object obj) nogil
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
# Note that following functions can potentially raise an exception,
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
# potentially allocate memory inside in unlikely case of when underlying
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
const char* PyUnicode_AsUTF8AndSize(object obj,
Py_ssize_t* length) except NULL
from numpy cimport (
float64_t,
int64_t,
)
cdef extern from "numpy/arrayobject.h":
PyTypeObject PyFloatingArrType_Type
cdef extern from "numpy/ndarrayobject.h":
PyTypeObject PyTimedeltaArrType_Type
PyTypeObject PyDatetimeArrType_Type
PyTypeObject PyComplexFloatingArrType_Type
PyTypeObject PyBoolArrType_Type
bint PyArray_IsIntegerScalar(obj) nogil
bint PyArray_Check(obj) nogil
cdef extern from "numpy/npy_common.h":
int64_t NPY_MIN_INT64
cdef inline int64_t get_nat():
return NPY_MIN_INT64
# --------------------------------------------------------------------
# Type Checking
cdef inline bint is_integer_object(object obj) nogil:
"""
Cython equivalent of
`isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)`
Parameters
----------
val : object
Returns
-------
is_integer : bool
Notes
-----
This counts np.timedelta64 objects as integers.
"""
return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj)
and not is_timedelta64_object(obj))
cdef inline bint is_float_object(object obj) nogil:
"""
Cython equivalent of `isinstance(val, (float, np.complex_))`
Parameters
----------
val : object
Returns
-------
is_float : bool
"""
return (PyFloat_Check(obj) or
(PyObject_TypeCheck(obj, &PyFloatingArrType_Type)))
cdef inline bint is_complex_object(object obj) nogil:
"""
Cython equivalent of `isinstance(val, (complex, np.complex_))`
Parameters
----------
val : object
Returns
-------
is_complex : bool
"""
return (PyComplex_Check(obj) or
PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type))
cdef inline bint is_bool_object(object obj) nogil:
"""
Cython equivalent of `isinstance(val, (bool, np.bool_))`
Parameters
----------
val : object
Returns
-------
is_bool : bool
"""
return (PyBool_Check(obj) or
PyObject_TypeCheck(obj, &PyBoolArrType_Type))
cdef inline bint is_real_number_object(object obj) nogil:
return is_bool_object(obj) or is_integer_object(obj) or is_float_object(obj)
cdef inline bint is_timedelta64_object(object obj) nogil:
"""
Cython equivalent of `isinstance(val, np.timedelta64)`
Parameters
----------
val : object
Returns
-------
is_timedelta64 : bool
"""
return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type)
cdef inline bint is_datetime64_object(object obj) nogil:
"""
Cython equivalent of `isinstance(val, np.datetime64)`
Parameters
----------
val : object
Returns
-------
is_datetime64 : bool
"""
return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type)
cdef inline bint is_array(object val):
"""
Cython equivalent of `isinstance(val, np.ndarray)`
Parameters
----------
val : object
Returns
-------
is_ndarray : bool
"""
return PyArray_Check(val)
cdef inline bint is_nan(object val):
"""
Check if val is a Not-A-Number float or complex, including
float('NaN') and np.nan.
Parameters
----------
val : object
Returns
-------
is_nan : bool
"""
cdef float64_t fval
if is_float_object(val):
fval = val
return fval != fval
return is_complex_object(val) and val != val
cdef inline const char* get_c_string_buf_and_size(str py_string,
Py_ssize_t *length) except NULL:
"""
Extract internal char* buffer of unicode or bytes object `py_string` with
getting length of this internal buffer saved in `length`.
Notes
-----
Python object owns memory, thus returned char* must not be freed.
`length` can be NULL if getting buffer length is not needed.
Parameters
----------
py_string : str
length : Py_ssize_t*
Returns
-------
buf : const char*
"""
return PyUnicode_AsUTF8AndSize(py_string, length)
cdef inline const char* get_c_string(str py_string) except NULL:
return get_c_string_buf_and_size(py_string, NULL)

View File

@@ -0,0 +1,36 @@
"""
For cython types that cannot be represented precisely, closest-available
python equivalents are used, and the precise types kept as adjacent comments.
"""
from datetime import tzinfo
import numpy as np
from pandas._libs.tslibs.dtypes import Resolution
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import npt
def dt64arr_to_periodarr(
stamps: npt.NDArray[np.int64], # const int64_t[:]
freq: int,
tz: tzinfo | None,
) -> npt.NDArray[np.int64]: ... # np.ndarray[np.int64, ndim=1]
def is_date_array_normalized(
stamps: npt.NDArray[np.int64], # const int64_t[:]
tz: tzinfo | None = ...,
) -> bool: ...
def normalize_i8_timestamps(
stamps: npt.NDArray[np.int64], # const int64_t[:]
tz: tzinfo | None,
) -> npt.NDArray[np.int64]: ...
def get_resolution(
stamps: npt.NDArray[np.int64], # const int64_t[:]
tz: tzinfo | None = ...,
) -> Resolution: ...
def ints_to_pydatetime(
arr: npt.NDArray[np.int64], # const int64_t[:}]
tz: tzinfo | None = ...,
freq: str | BaseOffset | None = ...,
fold: bool = ...,
box: str = ...,
) -> npt.NDArray[np.object_]: ...

View File

@@ -0,0 +1,436 @@
import cython
from cpython.datetime cimport (
date,
datetime,
time,
tzinfo,
)
import numpy as np
from numpy cimport (
int64_t,
intp_t,
ndarray,
)
from .conversion cimport normalize_i8_stamp
from .dtypes import Resolution
from .nattype cimport (
NPY_NAT,
c_NaT as NaT,
)
from .np_datetime cimport (
dt64_to_dtstruct,
npy_datetimestruct,
)
from .offsets cimport to_offset
from .period cimport get_period_ordinal
from .timestamps cimport create_timestamp_from_ts
from .timezones cimport (
get_dst_info,
is_tzlocal,
is_utc,
)
from .tzconversion cimport tz_convert_utc_to_tzlocal
# -------------------------------------------------------------------------
cdef inline object create_datetime_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold,
):
"""
Convenience routine to construct a datetime.datetime from its parts.
"""
return datetime(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
tz, fold=fold,
)
cdef inline object create_date_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold
):
"""
Convenience routine to construct a datetime.date from its parts.
"""
# GH#25057 add fold argument to match other func_create signatures
return date(dts.year, dts.month, dts.day)
cdef inline object create_time_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold
):
"""
Convenience routine to construct a datetime.time from its parts.
"""
return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold)
@cython.wraparound(False)
@cython.boundscheck(False)
def ints_to_pydatetime(
const int64_t[:] arr,
tzinfo tz=None,
object freq=None,
bint fold=False,
str box="datetime"
) -> np.ndarray:
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
Parameters
----------
arr : array of i8
tz : str, optional
convert to this timezone
freq : str/Offset, optional
freq to convert
fold : bint, default is 0
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time
.. versionadded:: 1.1.0
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
* If datetime, convert to datetime.datetime
* If date, convert to datetime.date
* If time, convert to datetime.time
* If Timestamp, convert to pandas.Timestamp
Returns
-------
ndarray[object] of type specified by box
"""
cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans
int64_t[:] deltas
intp_t[:] pos
npy_datetimestruct dts
object dt, new_tz
str typ
int64_t value, local_value, delta = NPY_NAT # dummy for delta
ndarray[object] result = np.empty(n, dtype=object)
object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint)
bint use_utc = False, use_tzlocal = False, use_fixed = False
bint use_pytz = False
if box == "date":
assert (tz is None), "tz should be None when converting to date"
func_create = create_date_from_ts
elif box == "timestamp":
func_create = create_timestamp_from_ts
if isinstance(freq, str):
freq = to_offset(freq)
elif box == "time":
func_create = create_time_from_ts
elif box == "datetime":
func_create = create_datetime_from_ts
else:
raise ValueError(
"box must be one of 'datetime', 'date', 'time' or 'timestamp'"
)
if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(arr, side="right") - 1
use_pytz = typ == "pytz"
for i in range(n):
new_tz = tz
value = arr[i]
if value == NPY_NAT:
result[i] = <object>NaT
else:
if use_utc:
local_value = value
elif use_tzlocal:
local_value = tz_convert_utc_to_tzlocal(value, tz)
elif use_fixed:
local_value = value + delta
elif not use_pytz:
# i.e. dateutil
# no zone-name change for dateutil tzs - dst etc
# represented in single object.
local_value = value + deltas[pos[i]]
else:
# pytz
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos[i]]]
local_value = value + deltas[pos[i]]
dt64_to_dtstruct(local_value, &dts)
result[i] = func_create(value, dts, new_tz, freq, fold)
return result
# -------------------------------------------------------------------------
cdef:
int RESO_NS = 0
int RESO_US = 1
int RESO_MS = 2
int RESO_SEC = 3
int RESO_MIN = 4
int RESO_HR = 5
int RESO_DAY = 6
int RESO_MTH = 7
int RESO_QTR = 8
int RESO_YR = 9
cdef inline int _reso_stamp(npy_datetimestruct *dts):
if dts.us != 0:
if dts.us % 1000 == 0:
return RESO_MS
return RESO_US
elif dts.sec != 0:
return RESO_SEC
elif dts.min != 0:
return RESO_MIN
elif dts.hour != 0:
return RESO_HR
return RESO_DAY
def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
cdef:
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans
int64_t[:] deltas
intp_t[:] pos
int64_t local_val, delta = NPY_NAT
bint use_utc = False, use_tzlocal = False, use_fixed = False
if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(stamps, side="right") - 1
for i in range(n):
if stamps[i] == NPY_NAT:
continue
if use_utc:
local_val = stamps[i]
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
elif use_fixed:
local_val = stamps[i] + delta
else:
local_val = stamps[i] + deltas[pos[i]]
dt64_to_dtstruct(local_val, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
return Resolution(reso)
# -------------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz):
"""
Normalize each of the (nanosecond) timezone aware timestamps in the given
array by rounding down to the beginning of the day (i.e. midnight).
This is midnight for timezone, `tz`.
Parameters
----------
stamps : int64 ndarray
tz : tzinfo or None
Returns
-------
result : int64 ndarray of converted of normalized nanosecond timestamps
"""
cdef:
Py_ssize_t i, n = len(stamps)
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
str typ
Py_ssize_t[:] pos
int64_t local_val, delta = NPY_NAT
bint use_utc = False, use_tzlocal = False, use_fixed = False
if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(stamps, side="right") - 1
for i in range(n):
# TODO: reinstate nogil for use_utc case?
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
if use_utc:
local_val = stamps[i]
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
elif use_fixed:
local_val = stamps[i] + delta
else:
local_val = stamps[i] + deltas[pos[i]]
result[i] = normalize_i8_stamp(local_val)
return result.base # `.base` to access underlying ndarray
@cython.wraparound(False)
@cython.boundscheck(False)
def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
"""
Check if all of the given (nanosecond) timestamps are normalized to
midnight, i.e. hour == minute == second == 0. If the optional timezone
`tz` is not None, then this is midnight for this timezone.
Parameters
----------
stamps : int64 ndarray
tz : tzinfo or None
Returns
-------
is_normalized : bool True if all stamps are normalized
"""
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans
int64_t[:] deltas
intp_t[:] pos
int64_t local_val, delta = NPY_NAT
str typ
int64_t day_nanos = 24 * 3600 * 1_000_000_000
bint use_utc = False, use_tzlocal = False, use_fixed = False
if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(stamps, side="right") - 1
for i in range(n):
if use_utc:
local_val = stamps[i]
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
elif use_fixed:
local_val = stamps[i] + delta
else:
local_val = stamps[i] + deltas[pos[i]]
if local_val % day_nanos != 0:
return False
return True
# -------------------------------------------------------------------------
@cython.wraparound(False)
@cython.boundscheck(False)
def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
cdef:
Py_ssize_t n = len(stamps)
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t local_val, delta = NPY_NAT
bint use_utc = False, use_tzlocal = False, use_fixed = False
if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(stamps, side="right") - 1
for i in range(n):
# TODO: reinstate nogil for use_utc case?
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
if use_utc:
local_val = stamps[i]
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
elif use_fixed:
local_val = stamps[i] + delta
else:
local_val = stamps[i] + deltas[pos[i]]
dt64_to_dtstruct(local_val, &dts)
result[i] = get_period_ordinal(&dts, freq)
return result.base # .base to get underlying ndarray