first commit

2025-07-03 22:57:06 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pandas/core/tools/init.py
+++ b/.venv/Lib/site-packages/pandas/core/tools/init.py
--- a/.venv/Lib/site-packages/pandas/core/tools/datetimes.py
+++ b/.venv/Lib/site-packages/pandas/core/tools/datetimes.py
--- a/.venv/Lib/site-packages/pandas/core/tools/numeric.py
+++ b/.venv/Lib/site-packages/pandas/core/tools/numeric.py
@ -0,0 +1,243 @@
+from __future__ import annotations
+
+import numpy as np
+
+from pandas._libs import lib
+
+from pandas.core.dtypes.cast import maybe_downcast_numeric
+from pandas.core.dtypes.common import (
+    ensure_object,
+    is_datetime_or_timedelta_dtype,
+    is_decimal,
+    is_integer_dtype,
+    is_number,
+    is_numeric_dtype,
+    is_scalar,
+    needs_i8_conversion,
+)
+from pandas.core.dtypes.generic import (
+    ABCIndex,
+    ABCSeries,
+)
+
+import pandas as pd
+from pandas.core.arrays.numeric import NumericArray
+
+
+def to_numeric(arg, errors="raise", downcast=None):
+    """
+    Convert argument to a numeric type.
+
+    The default return dtype is `float64` or `int64`
+    depending on the data supplied. Use the `downcast` parameter
+    to obtain other dtypes.
+
+    Please note that precision loss may occur if really large numbers
+    are passed in. Due to the internal limitations of `ndarray`, if
+    numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
+    or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
+    passed in, it is very likely they will be converted to float so that
+    they can stored in an `ndarray`. These warnings apply similarly to
+    `Series` since it internally leverages `ndarray`.
+
+    Parameters
+    ----------
+    arg : scalar, list, tuple, 1-d array, or Series
+        Argument to be converted.
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception.
+        - If 'coerce', then invalid parsing will be set as NaN.
+        - If 'ignore', then invalid parsing will return the input.
+    downcast : str, default None
+        Can be 'integer', 'signed', 'unsigned', or 'float'.
+        If not None, and if the data has been successfully cast to a
+        numerical dtype (or if the data was numeric to begin with),
+        downcast that resulting data to the smallest numerical dtype
+        possible according to the following rules:
+
+        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
+        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
+        - 'float': smallest float dtype (min.: np.float32)
+
+        As this behaviour is separate from the core conversion to
+        numeric values, any errors raised during the downcasting
+        will be surfaced regardless of the value of the 'errors' input.
+
+        In addition, downcasting will only occur if the size
+        of the resulting data's dtype is strictly larger than
+        the dtype it is to be cast to, so if none of the dtypes
+        checked satisfy that specification, no downcasting will be
+        performed on the data.
+
+    Returns
+    -------
+    ret
+        Numeric if parsing succeeded.
+        Return type depends on input.  Series if Series, otherwise ndarray.
+
+    See Also
+    --------
+    DataFrame.astype : Cast argument to a specified dtype.
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    numpy.ndarray.astype : Cast a numpy array to a specified type.
+    DataFrame.convert_dtypes : Convert dtypes.
+
+    Examples
+    --------
+    Take separate series and convert to numeric, coercing when told to
+
+    >>> s = pd.Series(['1.0', '2', -3])
+    >>> pd.to_numeric(s)
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> pd.to_numeric(s, downcast='float')
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float32
+    >>> pd.to_numeric(s, downcast='signed')
+    0    1
+    1    2
+    2   -3
+    dtype: int8
+    >>> s = pd.Series(['apple', '1.0', '2', -3])
+    >>> pd.to_numeric(s, errors='ignore')
+    0    apple
+    1      1.0
+    2        2
+    3       -3
+    dtype: object
+    >>> pd.to_numeric(s, errors='coerce')
+    0    NaN
+    1    1.0
+    2    2.0
+    3   -3.0
+    dtype: float64
+
+    Downcasting of nullable integer and floating dtypes is supported:
+
+    >>> s = pd.Series([1, 2, 3], dtype="Int64")
+    >>> pd.to_numeric(s, downcast="integer")
+    0    1
+    1    2
+    2    3
+    dtype: Int8
+    >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
+    >>> pd.to_numeric(s, downcast="float")
+    0    1.0
+    1    2.1
+    2    3.0
+    dtype: Float32
+    """
+    if downcast not in (None, "integer", "signed", "unsigned", "float"):
+        raise ValueError("invalid downcasting method provided")
+
+    if errors not in ("ignore", "raise", "coerce"):
+        raise ValueError("invalid error value specified")
+
+    is_series = False
+    is_index = False
+    is_scalars = False
+
+    if isinstance(arg, ABCSeries):
+        is_series = True
+        values = arg.values
+    elif isinstance(arg, ABCIndex):
+        is_index = True
+        if needs_i8_conversion(arg.dtype):
+            values = arg.asi8
+        else:
+            values = arg.values
+    elif isinstance(arg, (list, tuple)):
+        values = np.array(arg, dtype="O")
+    elif is_scalar(arg):
+        if is_decimal(arg):
+            return float(arg)
+        if is_number(arg):
+            return arg
+        is_scalars = True
+        values = np.array([arg], dtype="O")
+    elif getattr(arg, "ndim", 1) > 1:
+        raise TypeError("arg must be a list, tuple, 1-d array, or Series")
+    else:
+        values = arg
+
+    # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
+    # save mask to reconstruct the full array after casting
+    mask: np.ndarray | None = None
+    if isinstance(values, NumericArray):
+        mask = values._mask
+        values = values._data[~mask]
+
+    values_dtype = getattr(values, "dtype", None)
+    if is_numeric_dtype(values_dtype):
+        pass
+    elif is_datetime_or_timedelta_dtype(values_dtype):
+        values = values.view(np.int64)
+    else:
+        values = ensure_object(values)
+        coerce_numeric = errors not in ("ignore", "raise")
+        try:
+            values, _ = lib.maybe_convert_numeric(
+                values, set(), coerce_numeric=coerce_numeric
+            )
+        except (ValueError, TypeError):
+            if errors == "raise":
+                raise
+
+    # attempt downcast only if the data has been successfully converted
+    # to a numerical dtype and if a downcast method has been specified
+    if downcast is not None and is_numeric_dtype(values.dtype):
+        typecodes: str | None = None
+
+        if downcast in ("integer", "signed"):
+            typecodes = np.typecodes["Integer"]
+        elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0):
+            typecodes = np.typecodes["UnsignedInteger"]
+        elif downcast == "float":
+            typecodes = np.typecodes["Float"]
+
+            # pandas support goes only to np.float32,
+            # as float dtypes smaller than that are
+            # extremely rare and not well supported
+            float_32_char = np.dtype(np.float32).char
+            float_32_ind = typecodes.index(float_32_char)
+            typecodes = typecodes[float_32_ind:]
+
+        if typecodes is not None:
+            # from smallest to largest
+            for typecode in typecodes:
+                dtype = np.dtype(typecode)
+                if dtype.itemsize <= values.dtype.itemsize:
+                    values = maybe_downcast_numeric(values, dtype)
+
+                    # successful conversion
+                    if values.dtype == dtype:
+                        break
+
+    # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array
+    if mask is not None:
+        data = np.zeros(mask.shape, dtype=values.dtype)
+        data[~mask] = values
+
+        from pandas.core.arrays import (
+            FloatingArray,
+            IntegerArray,
+        )
+
+        klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray
+        values = klass(data, mask.copy())
+
+    if is_series:
+        return arg._constructor(values, index=arg.index, name=arg.name)
+    elif is_index:
+        # because we want to coerce to numeric if possible,
+        # do not use _shallow_copy
+        return pd.Index(values, name=arg.name)
+    elif is_scalars:
+        return values[0]
+    else:
+        return values
--- a/.venv/Lib/site-packages/pandas/core/tools/timedeltas.py
+++ b/.venv/Lib/site-packages/pandas/core/tools/timedeltas.py
@ -0,0 +1,196 @@
+"""
+timedelta support tools
+"""
+from __future__ import annotations
+
+import numpy as np
+
+from pandas._libs import lib
+from pandas._libs.tslibs import (
+    NaT,
+    NaTType,
+)
+from pandas._libs.tslibs.timedeltas import (
+    Timedelta,
+    parse_timedelta_unit,
+)
+
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.generic import (
+    ABCIndex,
+    ABCSeries,
+)
+
+from pandas.core.arrays.timedeltas import sequence_to_td64ns
+
+
+def to_timedelta(arg, unit=None, errors="raise"):
+    """
+    Convert argument to timedelta.
+
+    Timedeltas are absolute differences in times, expressed in difference
+    units (e.g. days, hours, minutes, seconds). This method converts
+    an argument from a recognized timedelta format / value into
+    a Timedelta type.
+
+    Parameters
+    ----------
+    arg : str, timedelta, list-like or Series
+        The data to be converted to timedelta.
+
+        .. deprecated:: 1.2
+            Strings with units 'M', 'Y' and 'y' do not represent
+            unambiguous timedelta values and will be removed in a future version
+
+    unit : str, optional
+        Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.
+
+        Possible values:
+
+        * 'W'
+        * 'D' / 'days' / 'day'
+        * 'hours' / 'hour' / 'hr' / 'h'
+        * 'm' / 'minute' / 'min' / 'minutes' / 'T'
+        * 'S' / 'seconds' / 'sec' / 'second'
+        * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
+        * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
+        * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
+
+        .. versionchanged:: 1.1.0
+
+           Must not be specified when `arg` context strings and
+           ``errors="raise"``.
+
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception.
+        - If 'coerce', then invalid parsing will be set as NaT.
+        - If 'ignore', then invalid parsing will return the input.
+
+    Returns
+    -------
+    timedelta
+        If parsing succeeded.
+        Return type depends on input:
+
+        - list-like: TimedeltaIndex of timedelta64 dtype
+        - Series: Series of timedelta64 dtype
+        - scalar: Timedelta
+
+    See Also
+    --------
+    DataFrame.astype : Cast argument to a specified dtype.
+    to_datetime : Convert argument to datetime.
+    convert_dtypes : Convert dtypes.
+
+    Notes
+    -----
+    If the precision is higher than nanoseconds, the precision of the duration is
+    truncated to nanoseconds for string inputs.
+
+    Examples
+    --------
+    Parsing a single string to a Timedelta:
+
+    >>> pd.to_timedelta('1 days 06:05:01.00003')
+    Timedelta('1 days 06:05:01.000030')
+    >>> pd.to_timedelta('15.5us')
+    Timedelta('0 days 00:00:00.000015500')
+
+    Parsing a list or array of strings:
+
+    >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
+    TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT],
+                   dtype='timedelta64[ns]', freq=None)
+
+    Converting numbers by specifying the `unit` keyword argument:
+
+    >>> pd.to_timedelta(np.arange(5), unit='s')
+    TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
+                    '0 days 00:00:03', '0 days 00:00:04'],
+                   dtype='timedelta64[ns]', freq=None)
+    >>> pd.to_timedelta(np.arange(5), unit='d')
+    TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
+                   dtype='timedelta64[ns]', freq=None)
+    """
+    if unit is not None:
+        unit = parse_timedelta_unit(unit)
+
+    if errors not in ("ignore", "raise", "coerce"):
+        raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
+
+    if unit in {"Y", "y", "M"}:
+        raise ValueError(
+            "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
+            "represent unambiguous timedelta values durations."
+        )
+
+    if arg is None:
+        return arg
+    elif isinstance(arg, ABCSeries):
+        values = _convert_listlike(arg._values, unit=unit, errors=errors)
+        return arg._constructor(values, index=arg.index, name=arg.name)
+    elif isinstance(arg, ABCIndex):
+        return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name)
+    elif isinstance(arg, np.ndarray) and arg.ndim == 0:
+        # extract array scalar and process below
+        arg = lib.item_from_zerodim(arg)
+    elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1:
+        return _convert_listlike(arg, unit=unit, errors=errors)
+    elif getattr(arg, "ndim", 1) > 1:
+        raise TypeError(
+            "arg must be a string, timedelta, list, tuple, 1-d array, or Series"
+        )
+
+    if isinstance(arg, str) and unit is not None:
+        raise ValueError("unit must not be specified if the input is/contains a str")
+
+    # ...so it must be a scalar value. Return scalar.
+    return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
+
+
+def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"):
+    """Convert string 'r' to a timedelta object."""
+    result: Timedelta | NaTType
+
+    try:
+        result = Timedelta(r, unit)
+    except ValueError:
+        if errors == "raise":
+            raise
+        elif errors == "ignore":
+            return r
+
+        # coerce
+        result = NaT
+
+    return result
+
+
+def _convert_listlike(arg, unit=None, errors="raise", name=None):
+    """Convert a list of objects to a timedelta index object."""
+    if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"):
+        # This is needed only to ensure that in the case where we end up
+        #  returning arg (errors == "ignore"), and where the input is a
+        #  generator, we return a useful list-like instead of a
+        #  used-up generator
+        arg = np.array(list(arg), dtype=object)
+
+    try:
+        td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
+    except ValueError:
+        if errors == "ignore":
+            return arg
+        else:
+            # This else-block accounts for the cases when errors='raise'
+            # and errors='coerce'. If errors == 'raise', these errors
+            # should be raised. If errors == 'coerce', we shouldn't
+            # expect any errors to be raised, since all parsing errors
+            # cause coercion to pd.NaT. However, if an error / bug is
+            # introduced that causes an Exception to be raised, we would
+            # like to surface it.
+            raise
+
+    from pandas import TimedeltaIndex
+
+    value = TimedeltaIndex(td64arr, unit="ns", name=name)
+    return value
--- a/.venv/Lib/site-packages/pandas/core/tools/times.py
+++ b/.venv/Lib/site-packages/pandas/core/tools/times.py
@ -0,0 +1,146 @@
+from __future__ import annotations
+
+from datetime import (
+    datetime,
+    time,
+)
+
+import numpy as np
+
+from pandas._libs.lib import is_list_like
+
+from pandas.core.dtypes.generic import (
+    ABCIndex,
+    ABCSeries,
+)
+from pandas.core.dtypes.missing import notna
+
+
+def to_time(arg, format=None, infer_time_format=False, errors="raise"):
+    """
+    Parse time strings to time objects using fixed strptime formats ("%H:%M",
+    "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
+    "%I%M%S%p")
+
+    Use infer_time_format if all the strings are in the same format to speed
+    up conversion.
+
+    Parameters
+    ----------
+    arg : string in time format, datetime.time, list, tuple, 1-d array,  Series
+    format : str, default None
+        Format used to convert arg into a time object.  If None, fixed formats
+        are used.
+    infer_time_format: bool, default False
+        Infer the time format based on the first non-NaN element.  If all
+        strings are in the same format, this will speed up conversion.
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception
+        - If 'coerce', then invalid parsing will be set as None
+        - If 'ignore', then invalid parsing will return the input
+
+    Returns
+    -------
+    datetime.time
+    """
+
+    def _convert_listlike(arg, format):
+
+        if isinstance(arg, (list, tuple)):
+            arg = np.array(arg, dtype="O")
+
+        elif getattr(arg, "ndim", 1) > 1:
+            raise TypeError(
+                "arg must be a string, datetime, list, tuple, 1-d array, or Series"
+            )
+
+        arg = np.asarray(arg, dtype="O")
+
+        if infer_time_format and format is None:
+            format = _guess_time_format_for_array(arg)
+
+        times: list[time | None] = []
+        if format is not None:
+            for element in arg:
+                try:
+                    times.append(datetime.strptime(element, format).time())
+                except (ValueError, TypeError) as err:
+                    if errors == "raise":
+                        msg = (
+                            f"Cannot convert {element} to a time with given "
+                            f"format {format}"
+                        )
+                        raise ValueError(msg) from err
+                    elif errors == "ignore":
+                        return arg
+                    else:
+                        times.append(None)
+        else:
+            formats = _time_formats[:]
+            format_found = False
+            for element in arg:
+                time_object = None
+                for time_format in formats:
+                    try:
+                        time_object = datetime.strptime(element, time_format).time()
+                        if not format_found:
+                            # Put the found format in front
+                            fmt = formats.pop(formats.index(time_format))
+                            formats.insert(0, fmt)
+                            format_found = True
+                        break
+                    except (ValueError, TypeError):
+                        continue
+
+                if time_object is not None:
+                    times.append(time_object)
+                elif errors == "raise":
+                    raise ValueError(f"Cannot convert arg {arg} to a time")
+                elif errors == "ignore":
+                    return arg
+                else:
+                    times.append(None)
+
+        return times
+
+    if arg is None:
+        return arg
+    elif isinstance(arg, time):
+        return arg
+    elif isinstance(arg, ABCSeries):
+        values = _convert_listlike(arg._values, format)
+        return arg._constructor(values, index=arg.index, name=arg.name)
+    elif isinstance(arg, ABCIndex):
+        return _convert_listlike(arg, format)
+    elif is_list_like(arg):
+        return _convert_listlike(arg, format)
+
+    return _convert_listlike(np.array([arg]), format)[0]
+
+
+# Fixed time formats for time parsing
+_time_formats = [
+    "%H:%M",
+    "%H%M",
+    "%I:%M%p",
+    "%I%M%p",
+    "%H:%M:%S",
+    "%H%M%S",
+    "%I:%M:%S%p",
+    "%I%M%S%p",
+]
+
+
+def _guess_time_format_for_array(arr):
+    # Try to guess the format based on the first non-NaN element
+    non_nan_elements = notna(arr).nonzero()[0]
+    if len(non_nan_elements):
+        element = arr[non_nan_elements[0]]
+        for time_format in _time_formats:
+            try:
+                datetime.strptime(element, time_format)
+                return time_format
+            except ValueError:
+                pass
+
+    return None