mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 22:57:06 +00:00
first commit
This commit is contained in:
1268
.venv/Lib/site-packages/pandas/core/tools/datetimes.py
Normal file
1268
.venv/Lib/site-packages/pandas/core/tools/datetimes.py
Normal file
File diff suppressed because it is too large
Load Diff
243
.venv/Lib/site-packages/pandas/core/tools/numeric.py
Normal file
243
.venv/Lib/site-packages/pandas/core/tools/numeric.py
Normal file
@ -0,0 +1,243 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib
|
||||
|
||||
from pandas.core.dtypes.cast import maybe_downcast_numeric
|
||||
from pandas.core.dtypes.common import (
|
||||
ensure_object,
|
||||
is_datetime_or_timedelta_dtype,
|
||||
is_decimal,
|
||||
is_integer_dtype,
|
||||
is_number,
|
||||
is_numeric_dtype,
|
||||
is_scalar,
|
||||
needs_i8_conversion,
|
||||
)
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCIndex,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.numeric import NumericArray
|
||||
|
||||
|
||||
def to_numeric(arg, errors="raise", downcast=None):
|
||||
"""
|
||||
Convert argument to a numeric type.
|
||||
|
||||
The default return dtype is `float64` or `int64`
|
||||
depending on the data supplied. Use the `downcast` parameter
|
||||
to obtain other dtypes.
|
||||
|
||||
Please note that precision loss may occur if really large numbers
|
||||
are passed in. Due to the internal limitations of `ndarray`, if
|
||||
numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
|
||||
or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
|
||||
passed in, it is very likely they will be converted to float so that
|
||||
they can stored in an `ndarray`. These warnings apply similarly to
|
||||
`Series` since it internally leverages `ndarray`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arg : scalar, list, tuple, 1-d array, or Series
|
||||
Argument to be converted.
|
||||
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
|
||||
- If 'raise', then invalid parsing will raise an exception.
|
||||
- If 'coerce', then invalid parsing will be set as NaN.
|
||||
- If 'ignore', then invalid parsing will return the input.
|
||||
downcast : str, default None
|
||||
Can be 'integer', 'signed', 'unsigned', or 'float'.
|
||||
If not None, and if the data has been successfully cast to a
|
||||
numerical dtype (or if the data was numeric to begin with),
|
||||
downcast that resulting data to the smallest numerical dtype
|
||||
possible according to the following rules:
|
||||
|
||||
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
|
||||
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
|
||||
- 'float': smallest float dtype (min.: np.float32)
|
||||
|
||||
As this behaviour is separate from the core conversion to
|
||||
numeric values, any errors raised during the downcasting
|
||||
will be surfaced regardless of the value of the 'errors' input.
|
||||
|
||||
In addition, downcasting will only occur if the size
|
||||
of the resulting data's dtype is strictly larger than
|
||||
the dtype it is to be cast to, so if none of the dtypes
|
||||
checked satisfy that specification, no downcasting will be
|
||||
performed on the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ret
|
||||
Numeric if parsing succeeded.
|
||||
Return type depends on input. Series if Series, otherwise ndarray.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.astype : Cast argument to a specified dtype.
|
||||
to_datetime : Convert argument to datetime.
|
||||
to_timedelta : Convert argument to timedelta.
|
||||
numpy.ndarray.astype : Cast a numpy array to a specified type.
|
||||
DataFrame.convert_dtypes : Convert dtypes.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Take separate series and convert to numeric, coercing when told to
|
||||
|
||||
>>> s = pd.Series(['1.0', '2', -3])
|
||||
>>> pd.to_numeric(s)
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 -3.0
|
||||
dtype: float64
|
||||
>>> pd.to_numeric(s, downcast='float')
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 -3.0
|
||||
dtype: float32
|
||||
>>> pd.to_numeric(s, downcast='signed')
|
||||
0 1
|
||||
1 2
|
||||
2 -3
|
||||
dtype: int8
|
||||
>>> s = pd.Series(['apple', '1.0', '2', -3])
|
||||
>>> pd.to_numeric(s, errors='ignore')
|
||||
0 apple
|
||||
1 1.0
|
||||
2 2
|
||||
3 -3
|
||||
dtype: object
|
||||
>>> pd.to_numeric(s, errors='coerce')
|
||||
0 NaN
|
||||
1 1.0
|
||||
2 2.0
|
||||
3 -3.0
|
||||
dtype: float64
|
||||
|
||||
Downcasting of nullable integer and floating dtypes is supported:
|
||||
|
||||
>>> s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
>>> pd.to_numeric(s, downcast="integer")
|
||||
0 1
|
||||
1 2
|
||||
2 3
|
||||
dtype: Int8
|
||||
>>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
|
||||
>>> pd.to_numeric(s, downcast="float")
|
||||
0 1.0
|
||||
1 2.1
|
||||
2 3.0
|
||||
dtype: Float32
|
||||
"""
|
||||
if downcast not in (None, "integer", "signed", "unsigned", "float"):
|
||||
raise ValueError("invalid downcasting method provided")
|
||||
|
||||
if errors not in ("ignore", "raise", "coerce"):
|
||||
raise ValueError("invalid error value specified")
|
||||
|
||||
is_series = False
|
||||
is_index = False
|
||||
is_scalars = False
|
||||
|
||||
if isinstance(arg, ABCSeries):
|
||||
is_series = True
|
||||
values = arg.values
|
||||
elif isinstance(arg, ABCIndex):
|
||||
is_index = True
|
||||
if needs_i8_conversion(arg.dtype):
|
||||
values = arg.asi8
|
||||
else:
|
||||
values = arg.values
|
||||
elif isinstance(arg, (list, tuple)):
|
||||
values = np.array(arg, dtype="O")
|
||||
elif is_scalar(arg):
|
||||
if is_decimal(arg):
|
||||
return float(arg)
|
||||
if is_number(arg):
|
||||
return arg
|
||||
is_scalars = True
|
||||
values = np.array([arg], dtype="O")
|
||||
elif getattr(arg, "ndim", 1) > 1:
|
||||
raise TypeError("arg must be a list, tuple, 1-d array, or Series")
|
||||
else:
|
||||
values = arg
|
||||
|
||||
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
|
||||
# save mask to reconstruct the full array after casting
|
||||
mask: np.ndarray | None = None
|
||||
if isinstance(values, NumericArray):
|
||||
mask = values._mask
|
||||
values = values._data[~mask]
|
||||
|
||||
values_dtype = getattr(values, "dtype", None)
|
||||
if is_numeric_dtype(values_dtype):
|
||||
pass
|
||||
elif is_datetime_or_timedelta_dtype(values_dtype):
|
||||
values = values.view(np.int64)
|
||||
else:
|
||||
values = ensure_object(values)
|
||||
coerce_numeric = errors not in ("ignore", "raise")
|
||||
try:
|
||||
values, _ = lib.maybe_convert_numeric(
|
||||
values, set(), coerce_numeric=coerce_numeric
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
if errors == "raise":
|
||||
raise
|
||||
|
||||
# attempt downcast only if the data has been successfully converted
|
||||
# to a numerical dtype and if a downcast method has been specified
|
||||
if downcast is not None and is_numeric_dtype(values.dtype):
|
||||
typecodes: str | None = None
|
||||
|
||||
if downcast in ("integer", "signed"):
|
||||
typecodes = np.typecodes["Integer"]
|
||||
elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0):
|
||||
typecodes = np.typecodes["UnsignedInteger"]
|
||||
elif downcast == "float":
|
||||
typecodes = np.typecodes["Float"]
|
||||
|
||||
# pandas support goes only to np.float32,
|
||||
# as float dtypes smaller than that are
|
||||
# extremely rare and not well supported
|
||||
float_32_char = np.dtype(np.float32).char
|
||||
float_32_ind = typecodes.index(float_32_char)
|
||||
typecodes = typecodes[float_32_ind:]
|
||||
|
||||
if typecodes is not None:
|
||||
# from smallest to largest
|
||||
for typecode in typecodes:
|
||||
dtype = np.dtype(typecode)
|
||||
if dtype.itemsize <= values.dtype.itemsize:
|
||||
values = maybe_downcast_numeric(values, dtype)
|
||||
|
||||
# successful conversion
|
||||
if values.dtype == dtype:
|
||||
break
|
||||
|
||||
# GH33013: for IntegerArray & FloatingArray need to reconstruct masked array
|
||||
if mask is not None:
|
||||
data = np.zeros(mask.shape, dtype=values.dtype)
|
||||
data[~mask] = values
|
||||
|
||||
from pandas.core.arrays import (
|
||||
FloatingArray,
|
||||
IntegerArray,
|
||||
)
|
||||
|
||||
klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray
|
||||
values = klass(data, mask.copy())
|
||||
|
||||
if is_series:
|
||||
return arg._constructor(values, index=arg.index, name=arg.name)
|
||||
elif is_index:
|
||||
# because we want to coerce to numeric if possible,
|
||||
# do not use _shallow_copy
|
||||
return pd.Index(values, name=arg.name)
|
||||
elif is_scalars:
|
||||
return values[0]
|
||||
else:
|
||||
return values
|
196
.venv/Lib/site-packages/pandas/core/tools/timedeltas.py
Normal file
196
.venv/Lib/site-packages/pandas/core/tools/timedeltas.py
Normal file
@ -0,0 +1,196 @@
|
||||
"""
|
||||
timedelta support tools
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import lib
|
||||
from pandas._libs.tslibs import (
|
||||
NaT,
|
||||
NaTType,
|
||||
)
|
||||
from pandas._libs.tslibs.timedeltas import (
|
||||
Timedelta,
|
||||
parse_timedelta_unit,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCIndex,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.arrays.timedeltas import sequence_to_td64ns
|
||||
|
||||
|
||||
def to_timedelta(arg, unit=None, errors="raise"):
|
||||
"""
|
||||
Convert argument to timedelta.
|
||||
|
||||
Timedeltas are absolute differences in times, expressed in difference
|
||||
units (e.g. days, hours, minutes, seconds). This method converts
|
||||
an argument from a recognized timedelta format / value into
|
||||
a Timedelta type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arg : str, timedelta, list-like or Series
|
||||
The data to be converted to timedelta.
|
||||
|
||||
.. deprecated:: 1.2
|
||||
Strings with units 'M', 'Y' and 'y' do not represent
|
||||
unambiguous timedelta values and will be removed in a future version
|
||||
|
||||
unit : str, optional
|
||||
Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.
|
||||
|
||||
Possible values:
|
||||
|
||||
* 'W'
|
||||
* 'D' / 'days' / 'day'
|
||||
* 'hours' / 'hour' / 'hr' / 'h'
|
||||
* 'm' / 'minute' / 'min' / 'minutes' / 'T'
|
||||
* 'S' / 'seconds' / 'sec' / 'second'
|
||||
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
|
||||
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
|
||||
* 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
|
||||
|
||||
.. versionchanged:: 1.1.0
|
||||
|
||||
Must not be specified when `arg` context strings and
|
||||
``errors="raise"``.
|
||||
|
||||
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
|
||||
- If 'raise', then invalid parsing will raise an exception.
|
||||
- If 'coerce', then invalid parsing will be set as NaT.
|
||||
- If 'ignore', then invalid parsing will return the input.
|
||||
|
||||
Returns
|
||||
-------
|
||||
timedelta
|
||||
If parsing succeeded.
|
||||
Return type depends on input:
|
||||
|
||||
- list-like: TimedeltaIndex of timedelta64 dtype
|
||||
- Series: Series of timedelta64 dtype
|
||||
- scalar: Timedelta
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.astype : Cast argument to a specified dtype.
|
||||
to_datetime : Convert argument to datetime.
|
||||
convert_dtypes : Convert dtypes.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If the precision is higher than nanoseconds, the precision of the duration is
|
||||
truncated to nanoseconds for string inputs.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Parsing a single string to a Timedelta:
|
||||
|
||||
>>> pd.to_timedelta('1 days 06:05:01.00003')
|
||||
Timedelta('1 days 06:05:01.000030')
|
||||
>>> pd.to_timedelta('15.5us')
|
||||
Timedelta('0 days 00:00:00.000015500')
|
||||
|
||||
Parsing a list or array of strings:
|
||||
|
||||
>>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
|
||||
TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
|
||||
Converting numbers by specifying the `unit` keyword argument:
|
||||
|
||||
>>> pd.to_timedelta(np.arange(5), unit='s')
|
||||
TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
|
||||
'0 days 00:00:03', '0 days 00:00:04'],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
>>> pd.to_timedelta(np.arange(5), unit='d')
|
||||
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
|
||||
dtype='timedelta64[ns]', freq=None)
|
||||
"""
|
||||
if unit is not None:
|
||||
unit = parse_timedelta_unit(unit)
|
||||
|
||||
if errors not in ("ignore", "raise", "coerce"):
|
||||
raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
|
||||
|
||||
if unit in {"Y", "y", "M"}:
|
||||
raise ValueError(
|
||||
"Units 'M', 'Y', and 'y' are no longer supported, as they do not "
|
||||
"represent unambiguous timedelta values durations."
|
||||
)
|
||||
|
||||
if arg is None:
|
||||
return arg
|
||||
elif isinstance(arg, ABCSeries):
|
||||
values = _convert_listlike(arg._values, unit=unit, errors=errors)
|
||||
return arg._constructor(values, index=arg.index, name=arg.name)
|
||||
elif isinstance(arg, ABCIndex):
|
||||
return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name)
|
||||
elif isinstance(arg, np.ndarray) and arg.ndim == 0:
|
||||
# extract array scalar and process below
|
||||
arg = lib.item_from_zerodim(arg)
|
||||
elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1:
|
||||
return _convert_listlike(arg, unit=unit, errors=errors)
|
||||
elif getattr(arg, "ndim", 1) > 1:
|
||||
raise TypeError(
|
||||
"arg must be a string, timedelta, list, tuple, 1-d array, or Series"
|
||||
)
|
||||
|
||||
if isinstance(arg, str) and unit is not None:
|
||||
raise ValueError("unit must not be specified if the input is/contains a str")
|
||||
|
||||
# ...so it must be a scalar value. Return scalar.
|
||||
return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
|
||||
|
||||
|
||||
def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"):
|
||||
"""Convert string 'r' to a timedelta object."""
|
||||
result: Timedelta | NaTType
|
||||
|
||||
try:
|
||||
result = Timedelta(r, unit)
|
||||
except ValueError:
|
||||
if errors == "raise":
|
||||
raise
|
||||
elif errors == "ignore":
|
||||
return r
|
||||
|
||||
# coerce
|
||||
result = NaT
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _convert_listlike(arg, unit=None, errors="raise", name=None):
|
||||
"""Convert a list of objects to a timedelta index object."""
|
||||
if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"):
|
||||
# This is needed only to ensure that in the case where we end up
|
||||
# returning arg (errors == "ignore"), and where the input is a
|
||||
# generator, we return a useful list-like instead of a
|
||||
# used-up generator
|
||||
arg = np.array(list(arg), dtype=object)
|
||||
|
||||
try:
|
||||
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
|
||||
except ValueError:
|
||||
if errors == "ignore":
|
||||
return arg
|
||||
else:
|
||||
# This else-block accounts for the cases when errors='raise'
|
||||
# and errors='coerce'. If errors == 'raise', these errors
|
||||
# should be raised. If errors == 'coerce', we shouldn't
|
||||
# expect any errors to be raised, since all parsing errors
|
||||
# cause coercion to pd.NaT. However, if an error / bug is
|
||||
# introduced that causes an Exception to be raised, we would
|
||||
# like to surface it.
|
||||
raise
|
||||
|
||||
from pandas import TimedeltaIndex
|
||||
|
||||
value = TimedeltaIndex(td64arr, unit="ns", name=name)
|
||||
return value
|
146
.venv/Lib/site-packages/pandas/core/tools/times.py
Normal file
146
.venv/Lib/site-packages/pandas/core/tools/times.py
Normal file
@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import (
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.lib import is_list_like
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCIndex,
|
||||
ABCSeries,
|
||||
)
|
||||
from pandas.core.dtypes.missing import notna
|
||||
|
||||
|
||||
def to_time(arg, format=None, infer_time_format=False, errors="raise"):
|
||||
"""
|
||||
Parse time strings to time objects using fixed strptime formats ("%H:%M",
|
||||
"%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
|
||||
"%I%M%S%p")
|
||||
|
||||
Use infer_time_format if all the strings are in the same format to speed
|
||||
up conversion.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arg : string in time format, datetime.time, list, tuple, 1-d array, Series
|
||||
format : str, default None
|
||||
Format used to convert arg into a time object. If None, fixed formats
|
||||
are used.
|
||||
infer_time_format: bool, default False
|
||||
Infer the time format based on the first non-NaN element. If all
|
||||
strings are in the same format, this will speed up conversion.
|
||||
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
|
||||
- If 'raise', then invalid parsing will raise an exception
|
||||
- If 'coerce', then invalid parsing will be set as None
|
||||
- If 'ignore', then invalid parsing will return the input
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime.time
|
||||
"""
|
||||
|
||||
def _convert_listlike(arg, format):
|
||||
|
||||
if isinstance(arg, (list, tuple)):
|
||||
arg = np.array(arg, dtype="O")
|
||||
|
||||
elif getattr(arg, "ndim", 1) > 1:
|
||||
raise TypeError(
|
||||
"arg must be a string, datetime, list, tuple, 1-d array, or Series"
|
||||
)
|
||||
|
||||
arg = np.asarray(arg, dtype="O")
|
||||
|
||||
if infer_time_format and format is None:
|
||||
format = _guess_time_format_for_array(arg)
|
||||
|
||||
times: list[time | None] = []
|
||||
if format is not None:
|
||||
for element in arg:
|
||||
try:
|
||||
times.append(datetime.strptime(element, format).time())
|
||||
except (ValueError, TypeError) as err:
|
||||
if errors == "raise":
|
||||
msg = (
|
||||
f"Cannot convert {element} to a time with given "
|
||||
f"format {format}"
|
||||
)
|
||||
raise ValueError(msg) from err
|
||||
elif errors == "ignore":
|
||||
return arg
|
||||
else:
|
||||
times.append(None)
|
||||
else:
|
||||
formats = _time_formats[:]
|
||||
format_found = False
|
||||
for element in arg:
|
||||
time_object = None
|
||||
for time_format in formats:
|
||||
try:
|
||||
time_object = datetime.strptime(element, time_format).time()
|
||||
if not format_found:
|
||||
# Put the found format in front
|
||||
fmt = formats.pop(formats.index(time_format))
|
||||
formats.insert(0, fmt)
|
||||
format_found = True
|
||||
break
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
if time_object is not None:
|
||||
times.append(time_object)
|
||||
elif errors == "raise":
|
||||
raise ValueError(f"Cannot convert arg {arg} to a time")
|
||||
elif errors == "ignore":
|
||||
return arg
|
||||
else:
|
||||
times.append(None)
|
||||
|
||||
return times
|
||||
|
||||
if arg is None:
|
||||
return arg
|
||||
elif isinstance(arg, time):
|
||||
return arg
|
||||
elif isinstance(arg, ABCSeries):
|
||||
values = _convert_listlike(arg._values, format)
|
||||
return arg._constructor(values, index=arg.index, name=arg.name)
|
||||
elif isinstance(arg, ABCIndex):
|
||||
return _convert_listlike(arg, format)
|
||||
elif is_list_like(arg):
|
||||
return _convert_listlike(arg, format)
|
||||
|
||||
return _convert_listlike(np.array([arg]), format)[0]
|
||||
|
||||
|
||||
# Fixed time formats for time parsing
|
||||
_time_formats = [
|
||||
"%H:%M",
|
||||
"%H%M",
|
||||
"%I:%M%p",
|
||||
"%I%M%p",
|
||||
"%H:%M:%S",
|
||||
"%H%M%S",
|
||||
"%I:%M:%S%p",
|
||||
"%I%M%S%p",
|
||||
]
|
||||
|
||||
|
||||
def _guess_time_format_for_array(arr):
|
||||
# Try to guess the format based on the first non-NaN element
|
||||
non_nan_elements = notna(arr).nonzero()[0]
|
||||
if len(non_nan_elements):
|
||||
element = arr[non_nan_elements[0]]
|
||||
for time_format in _time_formats:
|
||||
try:
|
||||
datetime.strptime(element, time_format)
|
||||
return time_format
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None
|
Reference in New Issue
Block a user