first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,63 @@
import numpy as np
import pytest
from pandas.core.arrays import TimedeltaArray
class TestTimedeltaArrayConstructor:
def test_only_1dim_accepted(self):
# GH#25282
arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]")
with pytest.raises(ValueError, match="Only 1-dimensional"):
# 3-dim, we allow 2D to sneak in for ops purposes GH#29853
TimedeltaArray(arr.reshape(2, 2, 1))
with pytest.raises(ValueError, match="Only 1-dimensional"):
# 0-dim
TimedeltaArray(arr[[0]].squeeze())
def test_freq_validation(self):
# ensure that the public constructor cannot create an invalid instance
arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10**9
msg = (
"Inferred frequency None from passed values does not "
"conform to passed frequency D"
)
with pytest.raises(ValueError, match=msg):
TimedeltaArray(arr.view("timedelta64[ns]"), freq="D")
def test_non_array_raises(self):
with pytest.raises(ValueError, match="list"):
TimedeltaArray([1, 2, 3])
def test_other_type_raises(self):
with pytest.raises(ValueError, match="dtype bool cannot be converted"):
TimedeltaArray(np.array([1, 2, 3], dtype="bool"))
def test_incorrect_dtype_raises(self):
# TODO: why TypeError for 'category' but ValueError for i8?
with pytest.raises(
ValueError, match=r"category cannot be converted to timedelta64\[ns\]"
):
TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
with pytest.raises(
ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]"
):
TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64"))
def test_copy(self):
data = np.array([1, 2, 3], dtype="m8[ns]")
arr = TimedeltaArray(data, copy=False)
assert arr._data is data
arr = TimedeltaArray(data, copy=True)
assert arr._data is not data
assert arr._data.base is not data
def test_from_sequence_dtype(self):
msg = "dtype .*object.* cannot be converted to timedelta64"
with pytest.raises(ValueError, match=msg):
TimedeltaArray._from_sequence([], dtype=object)

View File

@ -0,0 +1,214 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Timedelta
import pandas._testing as tm
from pandas.core import nanops
from pandas.core.arrays import TimedeltaArray
class TestReductions:
@pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"])
@pytest.mark.parametrize("skipna", [True, False])
def test_reductions_empty(self, name, skipna):
tdi = pd.TimedeltaIndex([])
arr = tdi.array
result = getattr(tdi, name)(skipna=skipna)
assert result is pd.NaT
result = getattr(arr, name)(skipna=skipna)
assert result is pd.NaT
@pytest.mark.parametrize("skipna", [True, False])
def test_sum_empty(self, skipna):
tdi = pd.TimedeltaIndex([])
arr = tdi.array
result = tdi.sum(skipna=skipna)
assert isinstance(result, Timedelta)
assert result == Timedelta(0)
result = arr.sum(skipna=skipna)
assert isinstance(result, Timedelta)
assert result == Timedelta(0)
def test_min_max(self):
arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"])
result = arr.min()
expected = Timedelta("2H")
assert result == expected
result = arr.max()
expected = Timedelta("5H")
assert result == expected
result = arr.min(skipna=False)
assert result is pd.NaT
result = arr.max(skipna=False)
assert result is pd.NaT
def test_sum(self):
tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"])
arr = tdi.array
result = arr.sum(skipna=True)
expected = Timedelta(hours=17)
assert isinstance(result, Timedelta)
assert result == expected
result = tdi.sum(skipna=True)
assert isinstance(result, Timedelta)
assert result == expected
result = arr.sum(skipna=False)
assert result is pd.NaT
result = tdi.sum(skipna=False)
assert result is pd.NaT
result = arr.sum(min_count=9)
assert result is pd.NaT
result = tdi.sum(min_count=9)
assert result is pd.NaT
result = arr.sum(min_count=1)
assert isinstance(result, Timedelta)
assert result == expected
result = tdi.sum(min_count=1)
assert isinstance(result, Timedelta)
assert result == expected
def test_npsum(self):
# GH#25282, GH#25335 np.sum should return a Timedelta, not timedelta64
tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"])
arr = tdi.array
result = np.sum(tdi)
expected = Timedelta(hours=17)
assert isinstance(result, Timedelta)
assert result == expected
result = np.sum(arr)
assert isinstance(result, Timedelta)
assert result == expected
def test_sum_2d_skipna_false(self):
arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2)
arr[-1, -1] = "Nat"
tda = TimedeltaArray(arr)
result = tda.sum(skipna=False)
assert result is pd.NaT
result = tda.sum(axis=0, skipna=False)
expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values
tm.assert_timedelta_array_equal(result, expected)
result = tda.sum(axis=1, skipna=False)
expected = pd.TimedeltaIndex(
[
Timedelta(seconds=1),
Timedelta(seconds=5),
Timedelta(seconds=9),
pd.NaT,
]
)._values
tm.assert_timedelta_array_equal(result, expected)
# Adding a Timestamp makes this a test for DatetimeArray.std
@pytest.mark.parametrize(
"add",
[
Timedelta(0),
pd.Timestamp("2021-01-01"),
pd.Timestamp("2021-01-01", tz="UTC"),
pd.Timestamp("2021-01-01", tz="Asia/Tokyo"),
],
)
def test_std(self, add):
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add
arr = tdi.array
result = arr.std(skipna=True)
expected = Timedelta(hours=2)
assert isinstance(result, Timedelta)
assert result == expected
result = tdi.std(skipna=True)
assert isinstance(result, Timedelta)
assert result == expected
if getattr(arr, "tz", None) is None:
result = nanops.nanstd(np.asarray(arr), skipna=True)
assert isinstance(result, Timedelta)
assert result == expected
result = arr.std(skipna=False)
assert result is pd.NaT
result = tdi.std(skipna=False)
assert result is pd.NaT
if getattr(arr, "tz", None) is None:
result = nanops.nanstd(np.asarray(arr), skipna=False)
assert result is pd.NaT
def test_median(self):
tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
arr = tdi.array
result = arr.median(skipna=True)
expected = Timedelta(hours=2)
assert isinstance(result, Timedelta)
assert result == expected
result = tdi.median(skipna=True)
assert isinstance(result, Timedelta)
assert result == expected
result = arr.median(skipna=False)
assert result is pd.NaT
result = tdi.median(skipna=False)
assert result is pd.NaT
def test_mean(self):
tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
arr = tdi._data
# manually verified result
expected = Timedelta(arr.dropna()._ndarray.mean())
result = arr.mean()
assert result == expected
result = arr.mean(skipna=False)
assert result is pd.NaT
result = arr.dropna().mean(skipna=False)
assert result == expected
result = arr.mean(axis=0)
assert result == expected
def test_mean_2d(self):
tdi = pd.timedelta_range("14 days", periods=6)
tda = tdi._data.reshape(3, 2)
result = tda.mean(axis=0)
expected = tda[1]
tm.assert_timedelta_array_equal(result, expected)
result = tda.mean(axis=1)
expected = tda[:, 0] + Timedelta(hours=12)
tm.assert_timedelta_array_equal(result, expected)
result = tda.mean(axis=None)
expected = tdi.mean()
assert result == expected