first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,171 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
# The various methods we support
downsample_methods = [
"min",
"max",
"first",
"last",
"sum",
"mean",
"sem",
"median",
"prod",
"var",
"std",
"ohlc",
"quantile",
]
upsample_methods = ["count", "size"]
series_methods = ["nunique"]
resample_methods = downsample_methods + upsample_methods + series_methods
@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param
@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param
@pytest.fixture
def simple_date_range_series():
"""
Series with date range index and random data for test purposes.
"""
def _simple_date_range_series(start, end, freq="D"):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_date_range_series
@pytest.fixture
def simple_period_range_series():
"""
Series with period range index and random data for test purposes.
"""
def _simple_period_range_series(start, end, freq="D"):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return _simple_period_range_series
@pytest.fixture
def _index_start():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 1)
@pytest.fixture
def _index_end():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 10)
@pytest.fixture
def _index_freq():
"""Fixture for parametrization of index, series and frame."""
return "D"
@pytest.fixture
def _index_name():
"""Fixture for parametrization of index, series and frame."""
return None
@pytest.fixture
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
"""
Fixture for parametrization of date_range, period_range and
timedelta_range indexes
"""
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
@pytest.fixture
def _static_values(index):
"""
Fixture for parametrization of values used in parametrization of
Series and DataFrames with date_range, period_range and
timedelta_range indexes
"""
return np.arange(len(index))
@pytest.fixture
def _series_name():
"""
Fixture for parametrization of Series name for Series used with
date_range, period_range and timedelta_range indexes
"""
return None
@pytest.fixture
def series(index, _series_name, _static_values):
"""
Fixture for parametrization of Series with date_range, period_range and
timedelta_range indexes
"""
return Series(_static_values, index=index, name=_series_name)
@pytest.fixture
def empty_series_dti(series):
"""
Fixture for parametrization of empty Series with date_range,
period_range and timedelta_range indexes
"""
return series[:0]
@pytest.fixture
def frame(index, _series_name, _static_values):
"""
Fixture for parametrization of DataFrame with date_range, period_range
and timedelta_range indexes
"""
# _series_name is intentionally unused
return DataFrame({"value": _static_values}, index=index)
@pytest.fixture
def empty_frame_dti(series):
"""
Fixture for parametrization of empty DataFrame with date_range,
period_range and timedelta_range indexes
"""
index = series.index[:0]
return DataFrame(index=index)
@pytest.fixture(params=[Series, DataFrame])
def series_and_frame(request, series, frame):
"""
Fixture for parametrization of Series and DataFrame with date_range,
period_range and timedelta_range indexes
"""
if request.param == Series:
return series
if request.param == DataFrame:
return frame

View File

@@ -0,0 +1,256 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import (
DataFrame,
NaT,
PeriodIndex,
Series,
)
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.resample import _asfreq_compat
# a fixture value can be overridden by the test parameter value. Note that the
# value of the fixture can be overridden this way even if the test doesn't use
# it directly (doesn't mention it in the function prototype).
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa:E501
# in this module we override the fixture values defined in conftest.py
# tuples of '_index_factory,_series_name,_index_start,_index_end'
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
all_ts = pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end",
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
)
@pytest.fixture
def create_index(_index_factory):
def _create_index(*args, **kwargs):
"""return the _index_factory created using the args, kwargs"""
return _index_factory(*args, **kwargs)
return _create_index
@pytest.mark.parametrize("freq", ["2D", "1H"])
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq(series_and_frame, freq, create_index):
obj = series_and_frame
result = obj.resample(freq).asfreq()
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq_fill_value(series, create_index):
# test for fill value during resampling, issue 3715
s = series
result = s.resample("1H").asfreq()
new_index = create_index(s.index[0], s.index[-1], freq="1H")
expected = s.reindex(new_index)
tm.assert_series_equal(result, expected)
frame = s.to_frame("value")
frame.iloc[1] = None
result = frame.resample("1H").asfreq(fill_value=4.0)
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
expected = frame.reindex(new_index, fill_value=4.0)
tm.assert_frame_equal(result, expected)
@all_ts
def test_resample_interpolate(frame):
# # 12925
df = frame
tm.assert_frame_equal(
df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
)
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
"but got an instance of 'Index'"
)
with pytest.raises(TypeError, match=msg):
xp.resample("A").mean()
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_series(freq, empty_series_dti, resample_method):
# GH12771 & GH12868
if resample_method == "ohlc":
pytest.skip("need to test for ohlc from GH13083")
s = empty_series_dti
result = getattr(s.resample(freq), resample_method)()
expected = s.copy()
expected.index = _asfreq_compat(s.index, freq)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
tm.assert_series_equal(result, expected, check_dtype=False)
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_nat_index_series(request, freq, series, resample_method):
# GH39227
if freq == "M":
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
s = series.copy()
s.index = PeriodIndex([NaT] * len(s), freq=freq)
result = getattr(s.resample(freq), resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = s[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
# GH28427
result = getattr(empty_series_dti.resample(freq), resample_method)()
index = _asfreq_compat(empty_series_dti.index, freq)
expected = Series([], dtype="int64", index=index, name=empty_series_dti.name)
tm.assert_series_equal(result, expected)
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
# GH13212
df = empty_frame_dti
# count retains dimensions too
result = getattr(df.resample(freq), resample_method)()
if resample_method != "size":
expected = df.copy()
else:
# GH14962
expected = Series([], dtype=object)
expected.index = _asfreq_compat(df.index, freq)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
tm.assert_almost_equal(result, expected, check_dtype=False)
# test size for GH13212 (currently stays as df)
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_count_empty_dataframe(freq, empty_frame_dti):
# GH28427
empty_frame_dti["a"] = []
result = empty_frame_dti.resample(freq).count()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame({"a": []}, dtype="int64", index=index)
tm.assert_frame_equal(result, expected)
@all_ts
@pytest.mark.parametrize("freq", ["M", "D", "H"])
def test_resample_size_empty_dataframe(freq, empty_frame_dti):
# GH28427
empty_frame_dti["a"] = []
result = empty_frame_dti.resample(freq).size()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = Series([], dtype="int64", index=index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
empty_series_dti = Series([], index, dtype)
try:
getattr(empty_series_dti.resample("d"), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object_)
pass
@all_ts
def test_apply_to_empty_series(empty_series_dti):
# GH 14313
s = empty_series_dti
for freq in ["M", "D", "H"]:
result = s.resample(freq).apply(lambda x: 1)
expected = s.resample(freq).apply(np.sum)
tm.assert_series_equal(result, expected, check_dtype=False)
@all_ts
def test_resampler_is_iterable(series):
# GH 15314
freq = "H"
tg = Grouper(freq=freq, convention="start")
grouped = series.groupby(tg)
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
tm.assert_series_equal(rv, gv)
@all_ts
def test_resample_quantile(series):
# GH 15023
s = series
q = 0.75
freq = "H"
result = s.resample(freq).quantile(q)
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,316 @@
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import (
PeriodIndex,
period_range,
)
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.tseries.offsets import (
BDay,
Minute,
)
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
all_ts = pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end",
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
)
@pytest.fixture()
def _index_factory():
return period_range
@pytest.fixture
def create_index(_index_factory):
def _create_index(*args, **kwargs):
"""return the _index_factory created using the args, kwargs"""
return _index_factory(*args, **kwargs)
return _create_index
# new test to check that all FutureWarning are triggered
def test_deprecating_on_loffset_and_base():
# GH 31809
idx = date_range("2001-01-01", periods=4, freq="T")
df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"])
with tm.assert_produces_warning(FutureWarning):
pd.Grouper(freq="10s", base=0)
with tm.assert_produces_warning(FutureWarning):
pd.Grouper(freq="10s", loffset="0s")
# not checking the stacklevel for .groupby().resample() because it's complicated to
# reconcile it with the stacklevel for Series.resample() and DataFrame.resample();
# see GH #37603
with tm.assert_produces_warning(FutureWarning):
df.groupby("a").resample("3T", base=0).sum()
with tm.assert_produces_warning(FutureWarning):
df.groupby("a").resample("3T", loffset="0s").sum()
msg = "'offset' and 'base' cannot be present at the same time"
with tm.assert_produces_warning(FutureWarning):
with pytest.raises(ValueError, match=msg):
df.groupby("a").resample("3T", base=0, offset=0).sum()
with tm.assert_produces_warning(FutureWarning):
df.resample("3T", base=0).sum()
with tm.assert_produces_warning(FutureWarning):
df.resample("3T", loffset="0s").sum()
@all_ts
@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]])
def test_resample_loffset_arg_type(frame, create_index, arg):
# GH 13218, 15002
df = frame
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D")
# loffset coerces PeriodIndex to DateTimeIndex
if isinstance(expected_index, PeriodIndex):
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({"value": expected_means}, index=expected_index)
with tm.assert_produces_warning(FutureWarning):
result_agg = df.resample("2D", loffset="2H").agg(arg)
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
tm.assert_frame_equal(result_agg, expected)
@pytest.mark.parametrize(
"loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")]
)
def test_resample_loffset(loffset):
# GH 7687
rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
s = Series(np.random.randn(14), index=rng)
with tm.assert_produces_warning(FutureWarning):
result = s.resample(
"5min", closed="right", label="right", loffset=loffset
).mean()
idx = date_range("1/1/2000", periods=4, freq="5min")
expected = Series(
[s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
index=idx + timedelta(minutes=1),
)
tm.assert_series_equal(result, expected)
assert result.index.freq == Minute(5)
# from daily
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
ser = Series(np.random.rand(len(dti)), dti)
# to weekly
result = ser.resample("w-sun").last()
business_day_offset = BDay()
with tm.assert_produces_warning(FutureWarning):
expected = ser.resample("w-sun", loffset=-business_day_offset).last()
assert result.index[0] - business_day_offset == expected.index[0]
def test_resample_loffset_upsample():
# GH 20744
rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
s = Series(np.random.randn(14), index=rng)
with tm.assert_produces_warning(FutureWarning):
result = s.resample(
"5min", closed="right", label="right", loffset=timedelta(minutes=1)
).ffill()
idx = date_range("1/1/2000", periods=4, freq="5min")
expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1))
tm.assert_series_equal(result, expected)
def test_resample_loffset_count():
# GH 12725
start_time = "1/1/2000 00:00:00"
rng = date_range(start_time, periods=100, freq="S")
ts = Series(np.random.randn(len(rng)), index=rng)
with tm.assert_produces_warning(FutureWarning):
result = ts.resample("10S", loffset="1s").count()
expected_index = date_range(start_time, periods=10, freq="10S") + timedelta(
seconds=1
)
expected = Series(10, index=expected_index)
tm.assert_series_equal(result, expected)
# Same issue should apply to .size() since it goes through
# same code path
with tm.assert_produces_warning(FutureWarning):
result = ts.resample("10S", loffset="1s").size()
tm.assert_series_equal(result, expected)
def test_resample_base():
rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s")
ts = Series(np.random.randn(len(rng)), index=rng)
with tm.assert_produces_warning(FutureWarning):
resampled = ts.resample("5min", base=2).mean()
exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min")
tm.assert_index_equal(resampled.index, exp_rng)
def test_resample_float_base():
# GH25161
dt = pd.to_datetime(
["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"]
)
s = Series(np.arange(3), index=dt)
base = 17 + 43.51 / 60
with tm.assert_produces_warning(FutureWarning):
result = s.resample("3min", base=base).size()
expected = Series(
3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min")
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]])
def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
# make sure passing loffset returns DatetimeIndex in all cases
# basic method taken from Base.test_resample_loffset_arg_type()
df = frame
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D")
# loffset coerces PeriodIndex to DateTimeIndex
expected_index = expected_index.to_timestamp()
expected_index += timedelta(hours=2)
expected = DataFrame({"value": expected_means}, index=expected_index)
with tm.assert_produces_warning(FutureWarning):
result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
if isinstance(agg_arg, list):
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
tm.assert_frame_equal(result_agg, expected)
@pytest.mark.parametrize(
"start,end,start_freq,end_freq,base,offset",
[
("19910905", "19910909 03:00", "H", "24H", 10, "10H"),
("19910905", "19910909 12:00", "H", "24H", 10, "10H"),
("19910905", "19910909 23:00", "H", "24H", 10, "10H"),
("19910905 10:00", "19910909", "H", "24H", 10, "10H"),
("19910905 10:00", "19910909 10:00", "H", "24H", 10, "10H"),
("19910905", "19910909 10:00", "H", "24H", 10, "10H"),
("19910905 12:00", "19910909", "H", "24H", 10, "10H"),
("19910905 12:00", "19910909 03:00", "H", "24H", 10, "10H"),
("19910905 12:00", "19910909 12:00", "H", "24H", 10, "10H"),
("19910905 12:00", "19910909 12:00", "H", "24H", 34, "34H"),
("19910905 12:00", "19910909 12:00", "H", "17H", 10, "10H"),
("19910905 12:00", "19910909 12:00", "H", "17H", 3, "3H"),
("19910905 12:00", "19910909 1:00", "H", "M", 3, "3H"),
("19910905", "19910913 06:00", "2H", "24H", 10, "10H"),
("19910905", "19910905 01:39", "Min", "5Min", 3, "3Min"),
("19910905", "19910905 03:18", "2Min", "5Min", 3, "3Min"),
],
)
def test_resample_with_non_zero_base(start, end, start_freq, end_freq, base, offset):
# GH 23882
s = Series(0, index=period_range(start, end, freq=start_freq))
s = s + np.arange(len(s))
with tm.assert_produces_warning(FutureWarning):
result = s.resample(end_freq, base=base).mean()
result = result.to_timestamp(end_freq)
# test that the replacement argument 'offset' works
result_offset = s.resample(end_freq, offset=offset).mean()
result_offset = result_offset.to_timestamp(end_freq)
tm.assert_series_equal(result, result_offset)
# to_timestamp casts 24H -> D
result = result.asfreq(end_freq) if end_freq == "24H" else result
with tm.assert_produces_warning(FutureWarning):
expected = s.to_timestamp().resample(end_freq, base=base).mean()
if end_freq == "M":
# TODO: is non-tick the relevant characteristic? (GH 33815)
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
def test_resample_base_with_timedeltaindex():
# GH 10530
rng = timedelta_range(start="0s", periods=25, freq="s")
ts = Series(np.random.randn(len(rng)), index=rng)
with tm.assert_produces_warning(FutureWarning):
with_base = ts.resample("2s", base=5).mean()
without_base = ts.resample("2s").mean()
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_interpolate_posargs_deprecation():
# GH 41485
idx = pd.to_datetime(["1992-08-27 07:46:48", "1992-08-27 07:46:59"])
s = Series([1, 4], index=idx)
msg = (
r"In a future version of pandas all arguments of Resampler\.interpolate "
r"except for the argument 'method' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.resample("3s").interpolate("linear", 0)
idx = pd.to_datetime(
[
"1992-08-27 07:46:48",
"1992-08-27 07:46:51",
"1992-08-27 07:46:54",
"1992-08-27 07:46:57",
]
)
expected = Series([1.0, 1.0, 1.0, 1.0], index=idx)
expected.index._data.freq = "3s"
tm.assert_series_equal(result, expected)
def test_pad_backfill_deprecation():
# GH 33396
s = Series([1, 2, 3], index=date_range("20180101", periods=3, freq="h"))
with tm.assert_produces_warning(FutureWarning, match="backfill"):
s.resample("30min").backfill()
with tm.assert_produces_warning(FutureWarning, match="pad"):
s.resample("30min").pad()

View File

@@ -0,0 +1,878 @@
from datetime import datetime
import dateutil
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.ccalendar import (
DAYS,
MONTHS,
)
from pandas._libs.tslibs.period import IncompatibleFrequency
from pandas.errors import InvalidIndexError
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import (
Period,
PeriodIndex,
period_range,
)
from pandas.core.resample import _get_period_range_edges
import pandas.tseries.offsets as offsets
@pytest.fixture()
def _index_factory():
return period_range
@pytest.fixture
def _series_name():
return "pi"
class TestPeriodIndex:
@pytest.mark.parametrize("freq", ["2D", "1H", "2H"])
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
def test_asfreq(self, series_and_frame, freq, kind):
# GH 12884, 15944
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
obj = series_and_frame
if kind == "timestamp":
expected = obj.to_timestamp().resample(freq).asfreq()
else:
start = obj.index[0].to_timestamp(how="start")
end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
new_index = date_range(start=start, end=end, freq=freq, inclusive="left")
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
result = obj.resample(freq, kind=kind).asfreq()
tm.assert_almost_equal(result, expected)
def test_asfreq_fill_value(self, series):
# test for fill value during resampling, issue 3715
s = series
new_index = date_range(
s.index[0].to_timestamp(how="start"),
(s.index[-1]).to_timestamp(how="start"),
freq="1H",
)
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0)
tm.assert_series_equal(result, expected)
frame = s.to_frame("value")
new_index = date_range(
frame.index[0].to_timestamp(how="start"),
(frame.index[-1]).to_timestamp(how="start"),
freq="1H",
)
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"])
@pytest.mark.parametrize("kind", [None, "period", "timestamp"])
@pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}])
def test_selection(self, index, freq, kind, kwargs):
# This is a bug, these should be implemented
# GH 14008
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame(
{"date": index, "a": rng},
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
)
msg = (
"Resampling from level= or on= selection with a PeriodIndex is "
r"not currently supported, use \.set_index\(\.\.\.\) to "
"explicitly set index"
)
with pytest.raises(NotImplementedError, match=msg):
df.resample(freq, kind=kind, **kwargs)
@pytest.mark.parametrize("month", MONTHS)
@pytest.mark.parametrize("meth", ["ffill", "bfill"])
@pytest.mark.parametrize("conv", ["start", "end"])
@pytest.mark.parametrize("targ", ["D", "B", "M"])
def test_annual_upsample_cases(
self, targ, conv, meth, month, simple_period_range_series
):
ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}")
result = getattr(ts.resample(targ, convention=conv), meth)()
expected = result.to_timestamp(targ, how=conv)
expected = expected.asfreq(targ, meth).to_period()
tm.assert_series_equal(result, expected)
def test_basic_downsample(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
result = ts.resample("a-dec").mean()
expected = ts.groupby(ts.index.year).mean()
expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec")
tm.assert_series_equal(result, expected)
# this is ok
tm.assert_series_equal(ts.resample("a-dec").mean(), result)
tm.assert_series_equal(ts.resample("a").mean(), result)
@pytest.mark.parametrize(
"rule,expected_error_msg",
[
("a-dec", "<YearEnd: month=12>"),
("q-mar", "<QuarterEnd: startingMonth=3>"),
("M", "<MonthEnd>"),
("w-thu", "<Week: weekday=3>"),
],
)
def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
# These are incompatible period rules for resampling
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
msg = (
"Frequency <Week: weekday=2> cannot be resampled to "
f"{expected_error_msg}, as they are not sub or super periods"
)
with pytest.raises(IncompatibleFrequency, match=msg):
ts.resample(rule).mean()
@pytest.mark.parametrize("freq", ["D", "2D"])
def test_basic_upsample(self, freq, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
result = ts.resample("a-dec").mean()
resampled = result.resample(freq, convention="end").ffill()
expected = result.to_timestamp(freq, how="end")
expected = expected.asfreq(freq, "ffill").to_period(freq)
tm.assert_series_equal(resampled, expected)
def test_upsample_with_limit(self):
rng = period_range("1/1/2000", periods=5, freq="A")
ts = Series(np.random.randn(len(rng)), rng)
result = ts.resample("M", convention="end").ffill(limit=2)
expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
tm.assert_series_equal(result, expected)
def test_annual_upsample(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
df = DataFrame({"a": ts})
rdf = df.resample("D").ffill()
exp = df["a"].resample("D").ffill()
tm.assert_series_equal(rdf["a"], exp)
rng = period_range("2000", "2003", freq="A-DEC")
ts = Series([1, 2, 3, 4], index=rng)
result = ts.resample("M").ffill()
ex_index = period_range("2000-01", "2003-12", freq="M")
expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("month", MONTHS)
@pytest.mark.parametrize("target", ["D", "B", "M"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_quarterly_upsample(
self, month, target, convention, simple_period_range_series
):
freq = f"Q-{month}"
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("target", ["D", "B"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_monthly_upsample(self, target, convention, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
tm.assert_series_equal(result, expected)
def test_resample_basic(self):
# GH3609
s = Series(
range(100),
index=date_range("20130101", freq="s", periods=100, name="idx"),
dtype="float",
)
s[10:30] = np.nan
index = PeriodIndex(
[Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
name="idx",
)
expected = Series([34.5, 79.5], index=index)
result = s.to_period().resample("T", kind="period").mean()
tm.assert_series_equal(result, expected)
result2 = s.resample("T", kind="period").mean()
tm.assert_series_equal(result2, expected)
@pytest.mark.parametrize(
"freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
)
def test_resample_count(self, freq, expected_vals):
# GH12774
series = Series(1, index=period_range(start="2000", periods=100))
result = series.resample(freq).count()
expected_index = period_range(
start="2000", freq=freq, periods=len(expected_vals)
)
expected = Series(expected_vals, index=expected_index)
tm.assert_series_equal(result, expected)
def test_resample_same_freq(self, resample_method):
# GH12770
series = Series(range(3), index=period_range(start="2000", periods=3, freq="M"))
expected = series
result = getattr(series.resample("M"), resample_method)()
tm.assert_series_equal(result, expected)
def test_resample_incompat_freq(self):
msg = (
"Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, "
"as they are not sub or super periods"
)
with pytest.raises(IncompatibleFrequency, match=msg):
Series(
range(3), index=period_range(start="2000", periods=3, freq="M")
).resample("W").mean()
def test_with_local_timezone_pytz(self):
# see gh-5430
local_timezone = pytz.timezone("America/Los_Angeles")
start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
# 1 day later
end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
index = date_range(start, end, freq="H")
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = period_range(start=start, end=end, freq="D") - offsets.Day()
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)
def test_resample_with_pytz(self):
# GH 13238
s = Series(
2, index=date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern")
)
result = s.resample("D").mean()
expected = Series(
2.0,
index=pd.DatetimeIndex(
["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D"
),
)
tm.assert_series_equal(result, expected)
# Especially assert that the timezone is LMT for pytz
assert result.index.tz == pytz.timezone("US/Eastern")
def test_with_local_timezone_dateutil(self):
# see gh-5430
local_timezone = "dateutil/America/Los_Angeles"
start = datetime(
year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)
# 1 day later
end = datetime(
year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
)
index = date_range(start, end, freq="H", name="idx")
series = Series(1, index=index)
series = series.tz_convert(local_timezone)
result = series.resample("D", kind="period").mean()
# Create the expected series
# Index is moved back a day with the timezone conversion from UTC to
# Pacific
expected_index = (
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
)
expected = Series(1.0, index=expected_index)
tm.assert_series_equal(result, expected)
def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T")
s = Series(np.zeros(len(index)), index=index)
expected = s.tz_localize("US/Pacific")
expected.index = pd.DatetimeIndex(expected.index, freq="900S")
result = expected.resample("900S").mean()
tm.assert_series_equal(result, expected)
# GH 23742
index = date_range(start="2017-10-10", end="2017-10-20", freq="1H")
index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
df = DataFrame(data=list(range(len(index))), index=index)
result = df.groupby(pd.Grouper(freq="1D")).count()
expected = date_range(
start="2017-10-09",
end="2017-10-20",
freq="D",
tz="America/Sao_Paulo",
nonexistent="shift_forward",
inclusive="left",
)
tm.assert_index_equal(result.index, expected)
def test_resample_ambiguous_time_bin_edge(self):
# GH 10117
idx = date_range(
"2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London"
)
expected = Series(np.zeros(len(idx)), index=idx)
result = expected.resample("30T").mean()
tm.assert_series_equal(result, expected)
def test_fill_method_and_how_upsample(self):
# GH2073
s = Series(
np.arange(9, dtype="int64"),
index=date_range("2010-01-01", periods=9, freq="Q"),
)
last = s.resample("M").ffill()
both = s.resample("M").ffill().resample("M").last().astype("int64")
tm.assert_series_equal(last, both)
@pytest.mark.parametrize("day", DAYS)
@pytest.mark.parametrize("target", ["D", "B"])
@pytest.mark.parametrize("convention", ["start", "end"])
def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
freq = f"W-{day}"
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
result = ts.resample(target, convention=convention).ffill()
expected = result.to_timestamp(target, how=convention)
expected = expected.asfreq(target, "ffill").to_period()
tm.assert_series_equal(result, expected)
def test_resample_to_timestamps(self, simple_period_range_series):
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
result = ts.resample("A-DEC", kind="timestamp").mean()
expected = ts.to_timestamp(how="start").resample("A-DEC").mean()
tm.assert_series_equal(result, expected)
def test_resample_to_quarterly(self, simple_period_range_series):
for month in MONTHS:
ts = simple_period_range_series("1990", "1992", freq=f"A-{month}")
quar_ts = ts.resample(f"Q-{month}").ffill()
stamps = ts.to_timestamp("D", how="start")
qdates = period_range(
ts.index[0].asfreq("D", "start"),
ts.index[-1].asfreq("D", "end"),
freq=f"Q-{month}",
)
expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
expected.index = qdates
tm.assert_series_equal(quar_ts, expected)
# conforms, but different month
ts = simple_period_range_series("1990", "1992", freq="A-JUN")
for how in ["start", "end"]:
result = ts.resample("Q-MAR", convention=how).ffill()
expected = ts.asfreq("Q-MAR", how=how)
expected = expected.reindex(result.index, method="ffill")
# .to_timestamp('D')
# expected = expected.resample('Q-MAR').ffill()
tm.assert_series_equal(result, expected)
def test_resample_fill_missing(self):
rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A")
s = Series(np.random.randn(4), index=rng)
stamps = s.to_timestamp()
filled = s.resample("A").ffill()
expected = stamps.resample("A").ffill().to_period("A")
tm.assert_series_equal(filled, expected)
def test_cant_fill_missing_dups(self):
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A")
s = Series(np.random.randn(5), index=rng)
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
s.resample("A").ffill()
@pytest.mark.parametrize("freq", ["5min"])
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
def test_resample_5minute(self, freq, kind):
rng = period_range("1/1/2000", "1/5/2000", freq="T")
ts = Series(np.random.randn(len(rng)), index=rng)
expected = ts.to_timestamp().resample(freq).mean()
if kind != "timestamp":
expected = expected.to_period(freq)
result = ts.resample(freq, kind=kind).mean()
tm.assert_series_equal(result, expected)
def test_upsample_daily_business_daily(self, simple_period_range_series):
ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
result = ts.resample("D").asfreq()
expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
tm.assert_series_equal(result, expected)
ts = simple_period_range_series("1/1/2000", "2/1/2000")
result = ts.resample("H", convention="s").asfreq()
exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H")
expected = ts.asfreq("H", how="s").reindex(exp_rng)
tm.assert_series_equal(result, expected)
def test_resample_irregular_sparse(self):
dr = date_range(start="1/1/2012", freq="5min", periods=1000)
s = Series(np.array(100), index=dr)
# subset the data.
subset = s[:"2012-01-04 06:55"]
result = subset.resample("10min").apply(len)
expected = s.resample("10min").apply(len).loc[result.index]
tm.assert_series_equal(result, expected)
def test_resample_weekly_all_na(self):
rng = date_range("1/1/2000", periods=10, freq="W-WED")
ts = Series(np.random.randn(len(rng)), index=rng)
result = ts.resample("W-THU").asfreq()
assert result.isna().all()
result = ts.resample("W-THU").asfreq().ffill()[:-1]
expected = ts.asfreq("W-THU").ffill()
tm.assert_series_equal(result, expected)
def test_resample_tz_localized(self):
dr = date_range(start="2012-4-13", end="2012-5-1")
ts = Series(range(len(dr)), index=dr)
ts_utc = ts.tz_localize("UTC")
ts_local = ts_utc.tz_convert("America/Los_Angeles")
result = ts_local.resample("W").mean()
ts_local_naive = ts_local.copy()
ts_local_naive.index = [
x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()
]
exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
exp.index = pd.DatetimeIndex(exp.index, freq="W")
tm.assert_series_equal(result, exp)
# it works
result = ts_local.resample("D").mean()
# #2245
idx = date_range(
"2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney"
)
s = Series([1, 2], index=idx)
result = s.resample("D", closed="right", label="right").mean()
ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
expected = Series([1.5], index=ex_index)
tm.assert_series_equal(result, expected)
# for good measure
result = s.resample("D", kind="period").mean()
ex_index = period_range("2001-09-20", periods=1, freq="D")
expected = Series([1.5], index=ex_index)
tm.assert_series_equal(result, expected)
# GH 6397
# comparing an offset that doesn't propagate tz's
rng = date_range("1/1/2011", periods=20000, freq="H")
rng = rng.tz_localize("EST")
ts = DataFrame(index=rng)
ts["first"] = np.random.randn(len(rng))
ts["second"] = np.cumsum(np.random.randn(len(rng)))
expected = DataFrame(
{
"first": ts.resample("A").sum()["first"],
"second": ts.resample("A").mean()["second"],
},
columns=["first", "second"],
)
result = (
ts.resample("A")
.agg({"first": np.sum, "second": np.mean})
.reindex(columns=["first", "second"])
)
tm.assert_frame_equal(result, expected)
def test_closed_left_corner(self):
# #1465
s = Series(
np.random.randn(21),
index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
)
s[0] = np.nan
result = s.resample("10min", closed="left", label="right").mean()
exp = s[1:].resample("10min", closed="left", label="right").mean()
tm.assert_series_equal(result, exp)
result = s.resample("10min", closed="left", label="left").mean()
exp = s[1:].resample("10min", closed="left", label="left").mean()
ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
tm.assert_index_equal(result.index, ex_index)
tm.assert_series_equal(result, exp)
def test_quarterly_resampling(self):
rng = period_range("2000Q1", periods=10, freq="Q-DEC")
ts = Series(np.arange(10), index=rng)
result = ts.resample("A").mean()
exp = ts.to_timestamp().resample("A").mean().to_period()
tm.assert_series_equal(result, exp)
def test_resample_weekly_bug_1726(self):
# 8/6/12 is a Monday
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
n = len(ind)
data = [[x] * 5 for x in range(n)]
df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
# it works!
df.resample("W-MON", closed="left", label="left").first()
def test_resample_with_dst_time_change(self):
# GH 15549
index = (
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
.tz_localize("UTC")
.tz_convert("America/Chicago")
)
df = DataFrame([1, 2], index=index)
result = df.resample("12h", closed="right", label="right").last().ffill()
expected_index_values = [
"2016-03-09 12:00:00-06:00",
"2016-03-10 00:00:00-06:00",
"2016-03-10 12:00:00-06:00",
"2016-03-11 00:00:00-06:00",
"2016-03-11 12:00:00-06:00",
"2016-03-12 00:00:00-06:00",
"2016-03-12 12:00:00-06:00",
"2016-03-13 00:00:00-06:00",
"2016-03-13 13:00:00-05:00",
"2016-03-14 01:00:00-05:00",
"2016-03-14 13:00:00-05:00",
"2016-03-15 01:00:00-05:00",
"2016-03-15 13:00:00-05:00",
]
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
"America/Chicago"
)
index = pd.DatetimeIndex(index, freq="12h")
expected = DataFrame(
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
index=index,
)
tm.assert_frame_equal(result, expected)
def test_resample_bms_2752(self):
# GH2753
foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64)
res1 = foo.resample("BMS").mean()
res2 = foo.resample("BMS").mean().resample("B").mean()
assert res1.index[0] == Timestamp("20000103")
assert res1.index[0] == res2.index[0]
# def test_monthly_convention_span(self):
# rng = period_range('2000-01', periods=3, freq='M')
# ts = Series(np.arange(3), index=rng)
# # hacky way to get same thing
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
# expected = ts.asfreq('D', how='end').reindex(exp_index)
# expected = expected.fillna(method='bfill')
# result = ts.resample('D', convention='span').mean()
# tm.assert_series_equal(result, expected)
def test_default_right_closed_label(self):
end_freq = ["D", "Q", "M", "D"]
end_types = ["M", "A", "Q", "W"]
for from_freq, to_freq in zip(end_freq, end_types):
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
tm.assert_frame_equal(
resampled, df.resample(to_freq, closed="right", label="right").mean()
)
def test_default_left_closed_label(self):
others = ["MS", "AS", "QS", "D", "H"]
others_freq = ["D", "Q", "M", "H", "T"]
for from_freq, to_freq in zip(others_freq, others):
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
df = DataFrame(np.random.randn(len(idx), 2), idx)
resampled = df.resample(to_freq).mean()
tm.assert_frame_equal(
resampled, df.resample(to_freq, closed="left", label="left").mean()
)
def test_all_values_single_bin(self):
# 2070
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
s = Series(np.random.randn(len(index)), index=index)
result = s.resample("A").mean()
tm.assert_almost_equal(result[0], s.mean())
def test_evenly_divisible_with_no_extra_bins(self):
# 4076
# when the frequency is evenly divisible, sometimes extra bins
df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9))
result = df.resample("5D").mean()
expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
expected.index = pd.DatetimeIndex(
[Timestamp("2000-1-1"), Timestamp("2000-1-6")], freq="5D"
)
tm.assert_frame_equal(result, expected)
index = date_range(start="2001-5-4", periods=28)
df = DataFrame(
[
{
"REST_KEY": 1,
"DLY_TRN_QT": 80,
"DLY_SLS_AMT": 90,
"COOP_DLY_TRN_QT": 30,
"COOP_DLY_SLS_AMT": 20,
}
]
* 28
+ [
{
"REST_KEY": 2,
"DLY_TRN_QT": 70,
"DLY_SLS_AMT": 10,
"COOP_DLY_TRN_QT": 50,
"COOP_DLY_SLS_AMT": 20,
}
]
* 28,
index=index.append(index),
).sort_index()
index = date_range("2001-5-4", periods=4, freq="7D")
expected = DataFrame(
[
{
"REST_KEY": 14,
"DLY_TRN_QT": 14,
"DLY_SLS_AMT": 14,
"COOP_DLY_TRN_QT": 14,
"COOP_DLY_SLS_AMT": 14,
}
]
* 4,
index=index,
)
result = df.resample("7D").count()
tm.assert_frame_equal(result, expected)
expected = DataFrame(
[
{
"REST_KEY": 21,
"DLY_TRN_QT": 1050,
"DLY_SLS_AMT": 700,
"COOP_DLY_TRN_QT": 560,
"COOP_DLY_SLS_AMT": 280,
}
]
* 4,
index=index,
)
result = df.resample("7D").sum()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)])
@pytest.mark.parametrize("kind", [None, "period"])
def test_upsampling_ohlc(self, freq, period_mult, kind):
# GH 13083
pi = period_range(start="2000", freq="D", periods=10)
s = Series(range(len(pi)), index=pi)
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
# timestamp-based resampling doesn't include all sub-periods
# of the last original period, so extend accordingly:
new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
expected = expected.reindex(new_index)
result = s.resample(freq, kind=kind).ohlc()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"periods, values",
[
(
[
pd.NaT,
"1970-01-01 00:00:00",
pd.NaT,
"1970-01-01 00:00:02",
"1970-01-01 00:00:03",
],
[2, 3, 5, 7, 11],
),
(
[
pd.NaT,
pd.NaT,
"1970-01-01 00:00:00",
pd.NaT,
pd.NaT,
pd.NaT,
"1970-01-01 00:00:02",
"1970-01-01 00:00:03",
pd.NaT,
pd.NaT,
],
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
),
],
)
@pytest.mark.parametrize(
"freq, expected_values",
[
("1s", [3, np.NaN, 7, 11]),
("2s", [3, (7 + 11) / 2]),
("3s", [(3 + 7) / 2, 11]),
],
)
def test_resample_with_nat(self, periods, values, freq, expected_values):
# GH 13224
index = PeriodIndex(periods, freq="S")
frame = DataFrame(values, index=index)
expected_index = period_range(
"1970-01-01 00:00:00", periods=len(expected_values), freq=freq
)
expected = DataFrame(expected_values, index=expected_index)
result = frame.resample(freq).mean()
tm.assert_frame_equal(result, expected)
def test_resample_with_only_nat(self):
# GH 13224
pi = PeriodIndex([pd.NaT] * 3, freq="S")
frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame(index=expected_index, columns=["a"], dtype="float64")
result = frame.resample("1s").mean()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"start,end,start_freq,end_freq,offset",
[
("19910905", "19910909 03:00", "H", "24H", "10H"),
("19910905", "19910909 12:00", "H", "24H", "10H"),
("19910905", "19910909 23:00", "H", "24H", "10H"),
("19910905 10:00", "19910909", "H", "24H", "10H"),
("19910905 10:00", "19910909 10:00", "H", "24H", "10H"),
("19910905", "19910909 10:00", "H", "24H", "10H"),
("19910905 12:00", "19910909", "H", "24H", "10H"),
("19910905 12:00", "19910909 03:00", "H", "24H", "10H"),
("19910905 12:00", "19910909 12:00", "H", "24H", "10H"),
("19910905 12:00", "19910909 12:00", "H", "24H", "34H"),
("19910905 12:00", "19910909 12:00", "H", "17H", "10H"),
("19910905 12:00", "19910909 12:00", "H", "17H", "3H"),
("19910905 12:00", "19910909 1:00", "H", "M", "3H"),
("19910905", "19910913 06:00", "2H", "24H", "10H"),
("19910905", "19910905 01:39", "Min", "5Min", "3Min"),
("19910905", "19910905 03:18", "2Min", "5Min", "3Min"),
],
)
def test_resample_with_offset(self, start, end, start_freq, end_freq, offset):
# GH 23882 & 31809
s = Series(0, index=period_range(start, end, freq=start_freq))
s = s + np.arange(len(s))
result = s.resample(end_freq, offset=offset).mean()
result = result.to_timestamp(end_freq)
expected = s.to_timestamp().resample(end_freq, offset=offset).mean()
if end_freq == "M":
# TODO: is non-tick the relevant characteristic? (GH 33815)
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"first,last,freq,exp_first,exp_last",
[
("19910905", "19920406", "D", "19910905", "19920406"),
("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"),
(
"19910905 06:00",
"19920406 06:00",
"H",
"19910905 06:00",
"19920406 06:00",
),
("19910906", "19920406", "M", "1991-09", "1992-04"),
("19910831", "19920430", "M", "1991-08", "1992-04"),
("1991-08", "1992-04", "M", "1991-08", "1992-04"),
],
)
def test_get_period_range_edges(self, first, last, freq, exp_first, exp_last):
first = Period(first)
last = Period(last)
exp_first = Period(exp_first, freq=freq)
exp_last = Period(exp_last, freq=freq)
freq = pd.tseries.frequencies.to_offset(freq)
result = _get_period_range_edges(first, last, freq)
expected = (exp_first, exp_last)
assert result == expected
def test_sum_min_count(self):
# GH 19974
index = date_range(start="2018", freq="M", periods=6)
data = np.ones(6)
data[3:6] = np.nan
s = Series(data, index).to_period()
result = s.resample("Q").sum(min_count=1)
expected = Series(
[3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC")
)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,738 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
NamedAgg,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
test_series = Series(np.random.rand(len(dti)), dti)
_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
@pytest.fixture
def test_frame():
return _test_frame.copy()
def test_str():
r = test_series.resample("H")
assert (
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
"label=left, convention=start, origin=start_day]" in str(r)
)
r = test_series.resample("H", origin="2000-01-01")
assert (
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
"label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r)
)
def test_api():
r = test_series.resample("H")
result = r.mean()
assert isinstance(result, Series)
assert len(result) == 217
r = test_series.to_frame().resample("H")
result = r.mean()
assert isinstance(result, DataFrame)
assert len(result) == 217
def test_groupby_resample_api():
# GH 12448
# .groupby(...).resample(...) hitting warnings
# when appropriate
df = DataFrame(
{
"date": date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
# replication step
i = (
date_range("2016-01-03", periods=8).tolist()
+ date_range("2016-01-17", periods=8).tolist()
)
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
tm.assert_frame_equal(result, expected)
def test_groupby_resample_on_api():
# GH 15021
# .groupby(...).resample(on=...) results in an unexpected
# keyword warning.
df = DataFrame(
{
"key": ["A", "B"] * 5,
"dates": date_range("2016-01-01", periods=10),
"values": np.random.randn(10),
}
)
expected = df.set_index("dates").groupby("key").resample("D").mean()
result = df.groupby("key").resample("D", on="dates").mean()
tm.assert_frame_equal(result, expected)
def test_pipe(test_frame):
# GH17905
# series
r = test_series.resample("H")
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_series_equal(result, expected)
# dataframe
r = test_frame.resample("H")
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)
def test_getitem(test_frame):
r = test_frame.resample("H")
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
r = test_frame.resample("H")["B"]
assert r._selected_obj.name == test_frame.columns[1]
# technically this is allowed
r = test_frame.resample("H")["A", "B"]
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
r = test_frame.resample("H")["A", "B"]
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
@pytest.mark.parametrize("key", [["D"], ["A", "D"]])
def test_select_bad_cols(key, test_frame):
g = test_frame.resample("H")
# 'A' should not be referenced as a bad column...
# will have to rethink regex if you change message!
msg = r"^\"Columns not found: 'D'\"$"
with pytest.raises(KeyError, match=msg):
g[key]
def test_attribute_access(test_frame):
r = test_frame.resample("H")
tm.assert_series_equal(r.A.sum(), r["A"].sum())
def test_api_compat_before_use():
# make sure that we are setting the binner
# on these attributes
for attr in ["groups", "ngroups", "indices"]:
rng = date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng)), index=rng)
rs = ts.resample("30s")
# before use
getattr(rs, attr)
# after grouper is initialized is ok
rs.mean()
getattr(rs, attr)
def tests_skip_nuisance(test_frame):
df = test_frame
df["D"] = "foo"
r = df.resample("H")
result = r[["A", "B"]].sum()
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
tm.assert_frame_equal(result, expected)
expected = r[["A", "B", "C"]].sum()
result = r.sum()
tm.assert_frame_equal(result, expected)
def test_downsample_but_actually_upsampling():
# this is reindex / asfreq
rng = date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
result = ts.resample("20s").asfreq()
expected = Series(
[0, 20, 40, 60, 80],
index=date_range("2012-01-01 00:00:00", freq="20s", periods=5),
)
tm.assert_series_equal(result, expected)
def test_combined_up_downsampling_of_irregular():
# since we are really doing an operation like this
# ts2.resample('2s').mean().ffill()
# preserve these semantics
rng = date_range("1/1/2012", periods=100, freq="S")
ts = Series(np.arange(len(rng)), index=rng)
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
result = ts2.resample("2s").mean().ffill()
expected = Series(
[
0.5,
2.5,
5.0,
7.0,
7.0,
11.0,
11.0,
15.0,
16.0,
16.0,
16.0,
16.0,
25.0,
25.0,
25.0,
30.0,
],
index=pd.DatetimeIndex(
[
"2012-01-01 00:00:00",
"2012-01-01 00:00:02",
"2012-01-01 00:00:04",
"2012-01-01 00:00:06",
"2012-01-01 00:00:08",
"2012-01-01 00:00:10",
"2012-01-01 00:00:12",
"2012-01-01 00:00:14",
"2012-01-01 00:00:16",
"2012-01-01 00:00:18",
"2012-01-01 00:00:20",
"2012-01-01 00:00:22",
"2012-01-01 00:00:24",
"2012-01-01 00:00:26",
"2012-01-01 00:00:28",
"2012-01-01 00:00:30",
],
dtype="datetime64[ns]",
freq="2S",
),
)
tm.assert_series_equal(result, expected)
def test_transform():
r = test_series.resample("20min")
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
result = r.transform("mean")
tm.assert_series_equal(result, expected)
def test_fillna():
# need to upsample here
rng = date_range("1/1/2012", periods=10, freq="2S")
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
r = ts.resample("s")
expected = r.ffill()
result = r.fillna(method="ffill")
tm.assert_series_equal(result, expected)
expected = r.bfill()
result = r.fillna(method="bfill")
tm.assert_series_equal(result, expected)
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
r"\(bfill\) or nearest\. Got 0"
)
with pytest.raises(ValueError, match=msg):
r.fillna(0)
def test_apply_without_aggregation():
# both resample and groupby should work w/o aggregation
r = test_series.resample("20min")
g = test_series.groupby(pd.Grouper(freq="20min"))
for t in [g, r]:
result = t.apply(lambda x: x)
tm.assert_series_equal(result, test_series)
def test_agg_consistency():
# make sure that we are consistent across
# similar aggregations with and w/o selection list
df = DataFrame(
np.random.randn(1000, 3),
index=date_range("1/1/2012", freq="S", periods=1000),
columns=["A", "B", "C"],
)
r = df.resample("3T")
msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
with pytest.raises(KeyError, match=msg):
r.agg({"r1": "mean", "r2": "sum"})
def test_agg_consistency_int_str_column_mix():
# GH#39025
df = DataFrame(
np.random.randn(1000, 2),
index=date_range("1/1/2012", freq="S", periods=1000),
columns=[1, "a"],
)
r = df.resample("3T")
msg = r"Column\(s\) \[2, 'b'\] do not exist"
with pytest.raises(KeyError, match=msg):
r.agg({2: "mean", "b": "sum"})
# TODO(GH#14008): once GH 14008 is fixed, move these tests into
# `Base` test class
def test_agg():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
a_mean = r["A"].mean()
a_std = r["A"].std()
a_sum = r["A"].sum()
b_mean = r["B"].mean()
b_std = r["B"].std()
b_sum = r["B"].sum()
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
for t in cases:
warn = FutureWarning if t in cases[1:3] else None
with tm.assert_produces_warning(
warn,
match=r"\['date'\] did not aggregate successfully",
):
# .var on dt64 column raises and is dropped
result = t.aggregate([np.mean, np.std])
tm.assert_frame_equal(result, expected)
expected = pd.concat([a_mean, b_std], axis=1)
for t in cases:
result = t.aggregate({"A": np.mean, "B": np.std})
tm.assert_frame_equal(result, expected, check_like=True)
result = t.aggregate(A=("A", np.mean), B=("B", np.std))
tm.assert_frame_equal(result, expected, check_like=True)
result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std))
tm.assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
for t in cases:
result = t.aggregate({"A": ["mean", "std"]})
tm.assert_frame_equal(result, expected)
expected = pd.concat([a_mean, a_sum], axis=1)
expected.columns = ["mean", "sum"]
for t in cases:
result = t["A"].aggregate(["mean", "sum"])
tm.assert_frame_equal(result, expected)
result = t["A"].aggregate(mean="mean", sum="sum")
tm.assert_frame_equal(result, expected)
msg = "nested renamer is not supported"
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t.aggregate({"A": {"mean": "mean", "sum": "sum"}})
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
)
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t.aggregate(
{
"A": {"mean": "mean", "sum": "sum"},
"B": {"mean2": "mean", "sum2": "sum"},
}
)
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
)
for t in cases:
result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
tm.assert_frame_equal(result, expected, check_like=True)
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples(
[
("r1", "A", "mean"),
("r1", "A", "sum"),
("r2", "B", "mean"),
("r2", "B", "sum"),
]
)
def test_agg_misc():
# test with all three Resampler apis and TimeGrouper
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
# passed lambda
for t in cases:
result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
expected = pd.concat([r["A"].sum(), rcustom], axis=1)
tm.assert_frame_equal(result, expected, check_like=True)
result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1)))
tm.assert_frame_equal(result, expected, check_like=True)
result = t.agg(
A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1))
)
tm.assert_frame_equal(result, expected, check_like=True)
# agg with renamers
expected = pd.concat(
[t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
)
msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
for t in cases:
with pytest.raises(KeyError, match=msg):
t[["A", "B"]].agg({"result1": np.sum, "result2": np.mean})
with pytest.raises(KeyError, match=msg):
t[["A", "B"]].agg(A=("result1", np.sum), B=("result2", np.mean))
with pytest.raises(KeyError, match=msg):
t[["A", "B"]].agg(
A=NamedAgg("result1", np.sum), B=NamedAgg("result2", np.mean)
)
# agg with different hows
expected = pd.concat(
[t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
)
expected.columns = pd.MultiIndex.from_tuples(
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
)
for t in cases:
result = t.agg({"A": ["sum", "std"], "B": ["mean", "std"]})
tm.assert_frame_equal(result, expected, check_like=True)
# equivalent of using a selection list / or not
for t in cases:
result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
tm.assert_frame_equal(result, expected, check_like=True)
msg = "nested renamer is not supported"
# series like aggs
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t["A"].agg({"A": ["sum", "std"]})
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
# errors
# invalid names in the agg specification
msg = r"Column\(s\) \['B'\] do not exist"
for t in cases:
with pytest.raises(KeyError, match=msg):
t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
def test_agg_nested_dicts():
np.random.seed(1234)
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
index.name = "date"
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
df_col = df.reset_index()
df_mult = df_col.copy()
df_mult.index = pd.MultiIndex.from_arrays(
[range(10), df.index], names=["index", "date"]
)
r = df.resample("2D")
cases = [
r,
df_col.resample("2D", on="date"),
df_mult.resample("2D", level="date"),
df.groupby(pd.Grouper(freq="2D")),
]
msg = "nested renamer is not supported"
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
for t in cases:
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t[["A", "B"]].agg(
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
)
with pytest.raises(pd.core.base.SpecificationError, match=msg):
t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
def test_try_aggregate_non_existing_column():
# GH 16766
data = [
{"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
{"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
{"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
]
df = DataFrame(data).set_index("dt")
# Error as we don't have 'z' column
msg = r"Column\(s\) \['z'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
def test_selection_api_validation():
# GH 13500
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
rng = np.arange(len(index), dtype=np.int64)
df = DataFrame(
{"date": index, "a": rng},
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
)
df_exp = DataFrame({"a": rng}, index=index)
# non DatetimeIndex
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
"but got an instance of 'Int64Index'"
)
with pytest.raises(TypeError, match=msg):
df.resample("2D", level="v")
msg = "The Grouper cannot specify both a key and a level!"
with pytest.raises(ValueError, match=msg):
df.resample("2D", on="date", level="d")
msg = "unhashable type: 'list'"
with pytest.raises(TypeError, match=msg):
df.resample("2D", on=["a", "date"])
msg = r"\"Level \['a', 'date'\] not found\""
with pytest.raises(KeyError, match=msg):
df.resample("2D", level=["a", "date"])
# upsampling not allowed
msg = (
"Upsampling from level= or on= selection is not supported, use "
r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like"
)
with pytest.raises(ValueError, match=msg):
df.resample("2D", level="d").asfreq()
with pytest.raises(ValueError, match=msg):
df.resample("2D", on="date").asfreq()
exp = df_exp.resample("2D").sum()
exp.index.name = "date"
tm.assert_frame_equal(exp, df.resample("2D", on="date").sum())
exp.index.name = "d"
tm.assert_frame_equal(exp, df.resample("2D", level="d").sum())
@pytest.mark.parametrize(
"col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
)
def test_agg_with_datetime_index_list_agg_func(col_name):
# GH 22660
# The parametrized column names would get converted to dates by our
# date parser. Some would result in OutOfBoundsError (ValueError) while
# others would result in OverflowError when passed into Timestamp.
# We catch these errors and move on to the correct branch.
df = DataFrame(
list(range(200)),
index=date_range(
start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
),
columns=[col_name],
)
result = df.resample("1d").aggregate(["mean"])
expected = DataFrame(
[47.5, 143.5, 195.5],
index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
)
tm.assert_frame_equal(result, expected)
def test_resample_agg_readonly():
# GH#31710 cython needs to allow readonly data
index = date_range("2020-01-01", "2020-01-02", freq="1h")
arr = np.zeros_like(index)
arr.setflags(write=False)
ser = Series(arr, index=index)
rs = ser.resample("1D")
expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24])
result = rs.agg("last")
tm.assert_series_equal(result, expected)
result = rs.agg("first")
tm.assert_series_equal(result, expected)
result = rs.agg("max")
tm.assert_series_equal(result, expected)
result = rs.agg("min")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods",
[
(
"2000-10-01 23:30:00",
"2000-10-02 00:26:00",
"7min",
[0, 3, 6, 9, 12, 15, 18, 21, 24],
"17min",
"end",
None,
[0, 18, 27, 63],
"20001002 00:26:00",
4,
),
(
"20200101 8:26:35",
"20200101 9:31:58",
"77s",
[1] * 51,
"7min",
"end",
"right",
[1, 6, 5, 6, 5, 6, 5, 6, 5, 6],
"2020-01-01 09:30:45",
10,
),
(
"2000-10-01 23:30:00",
"2000-10-02 00:26:00",
"7min",
[0, 3, 6, 9, 12, 15, 18, 21, 24],
"17min",
"end",
"left",
[0, 18, 27, 39, 24],
"20001002 00:43:00",
5,
),
(
"2000-10-01 23:30:00",
"2000-10-02 00:26:00",
"7min",
[0, 3, 6, 9, 12, 15, 18, 21, 24],
"17min",
"end_day",
None,
[3, 15, 45, 45],
"2000-10-02 00:29:00",
4,
),
],
)
def test_end_and_end_day_origin(
start,
end,
freq,
data,
resample_freq,
origin,
closed,
exp_data,
exp_end,
exp_periods,
):
rng = date_range(start, end, freq=freq)
ts = Series(data, index=rng)
res = ts.resample(resample_freq, origin=origin, closed=closed).sum()
expected = Series(
exp_data,
index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods),
)
tm.assert_series_equal(res, expected)

View File

@@ -0,0 +1,464 @@
from textwrap import dedent
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas.util._test_decorators import async_mark
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
TimedeltaIndex,
Timestamp,
)
import pandas._testing as tm
from pandas.core.api import Int64Index
from pandas.core.indexes.datetimes import date_range
test_frame = DataFrame(
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
index=date_range("1/1/2000", freq="s", periods=40),
)
@async_mark()
@td.check_file_leaks
async def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent(
"""\
import pandas._testing as tm
s = tm.makeTimeSeries()
rs = s.resample("D")
"""
)
await ip.run_code(code)
# GH 31324 newer jedi version raises Deprecation warning;
# appears resolved 2021-02-02
with tm.assert_produces_warning(None):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("rs.", 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [
["2010-01-01", "A", 2],
["2010-01-02", "A", 3],
["2010-01-05", "A", 8],
["2010-01-10", "A", 7],
["2010-01-13", "A", 3],
["2010-01-01", "B", 5],
["2010-01-03", "B", 2],
["2010-01-04", "B", 1],
["2010-01-11", "B", 7],
["2010-01-14", "B", 3],
]
df = DataFrame(data, columns=["date", "id", "score"])
df.date = pd.to_datetime(df.date)
def f(x):
return x.set_index("date").resample("D").asfreq()
expected = df.groupby("id").apply(f)
result = df.set_index("date").groupby("id").resample("D").asfreq()
tm.assert_frame_equal(result, expected)
df = DataFrame(
{
"date": date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
def f(x):
return x.resample("1D").ffill()
expected = df.groupby("group").apply(f)
result = df.groupby("group").resample("1D").ffill()
tm.assert_frame_equal(result, expected)
def test_getitem():
g = test_frame.groupby("A")
expected = g.B.apply(lambda x: x.resample("2s").mean())
result = g.resample("2s").B.mean()
tm.assert_series_equal(result, expected)
result = g.B.resample("2s").mean()
tm.assert_series_equal(result, expected)
result = g.resample("2s").mean().B
tm.assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
r = df.groupby("id").resample("1D")
result = r["buyer"].count()
expected = Series(
[1, 1],
index=pd.MultiIndex.from_tuples(
[(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
names=["id", None],
),
name="buyer",
)
tm.assert_series_equal(result, expected)
result = r["buyer"].count()
tm.assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = DataFrame(
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
)
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
result = df.groupby("id").resample("2D", on="date")["data"].sum()
tm.assert_series_equal(result, exp)
def test_groupby_with_origin():
# GH 31809
freq = "1399min" # prime number that is smaller than 24h
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
middle = "1/15/2000 00:00:00"
rng = date_range(start, end, freq="1231min") # prime number
ts = Series(np.random.randn(len(rng)), index=rng)
ts2 = ts[middle:end]
# proves that grouper without a fixed origin does not work
# when dealing with unusual frequencies
simple_grouper = pd.Grouper(freq=freq)
count_ts = ts.groupby(simple_grouper).agg("count")
count_ts = count_ts[middle:end]
count_ts2 = ts2.groupby(simple_grouper).agg("count")
with pytest.raises(AssertionError, match="Index are different"):
tm.assert_index_equal(count_ts.index, count_ts2.index)
# test origin on 1970-01-01 00:00:00
origin = Timestamp(0)
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
adjusted_count_ts = adjusted_count_ts[middle:end]
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
# test origin on 2049-10-18 20:00:00
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
adjusted2_count_ts = adjusted2_count_ts[middle:end]
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
# both grouper use an adjusted timestamp that is a multiple of 1399 min
# they should be equals even if the adjusted_timestamp is in the future
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
def test_nearest():
# GH 17496
# Resample nearest
index = date_range("1/1/2000", periods=3, freq="T")
result = Series(range(3), index=index).resample("20s").nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:20",
"2000-01-01 00:00:40",
"2000-01-01 00:01:00",
"2000-01-01 00:01:20",
"2000-01-01 00:01:40",
"2000-01-01 00:02:00",
],
dtype="datetime64[ns]",
freq="20S",
),
)
tm.assert_series_equal(result, expected)
def test_methods():
g = test_frame.groupby("A")
r = g.resample("2s")
for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_frame_equal(result, expected)
for f in ["size"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_series_equal(result, expected)
for f in ["count"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_frame_equal(result, expected)
# series only
for f in ["nunique"]:
result = getattr(r.B, f)()
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_series_equal(result, expected)
for f in ["nearest", "bfill", "ffill", "asfreq"]:
result = getattr(r, f)()
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_frame_equal(result, expected)
result = r.ohlc()
expected = g.apply(lambda x: x.resample("2s").ohlc())
tm.assert_frame_equal(result, expected)
for f in ["std", "var"]:
result = getattr(r, f)(ddof=1)
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
tm.assert_frame_equal(result, expected)
def test_apply():
g = test_frame.groupby("A")
r = g.resample("2s")
# reduction
expected = g.resample("2s").sum()
def f(x):
return x.resample("2s").sum()
result = r.apply(f)
tm.assert_frame_equal(result, expected)
def f(x):
return x.resample("2s").apply(lambda y: y.sum())
result = g.apply(f)
# y.sum() results in int64 instead of int32 on 32-bit architectures
expected = expected.astype("int64")
tm.assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = date_range("1-1-2015", "12-31-15", freq="D")
df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
def f(x):
s = Series([1, 2], index=["a", "b"])
return s
expected = df.groupby(pd.Grouper(freq="M")).apply(f)
result = df.resample("M").apply(f)
tm.assert_frame_equal(result, expected)
# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
result = df["col1"].resample("M").apply(f)
tm.assert_series_equal(result, expected)
def test_apply_columns_multilevel():
# GH 16231
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
expected = DataFrame(
2 * [[0, 0.0]],
index=date_range(start="2017-01-01", freq="1H", periods=2),
columns=pd.MultiIndex.from_tuples(
[("A", "a", "", "one"), ("B", "b", "i", "two")]
),
)
tm.assert_frame_equal(result, expected)
def test_resample_groupby_with_label():
# GH 13235
index = date_range("2000-01-01", freq="2D", periods=5)
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
result = df.groupby("col0").resample("1W", label="left").sum()
mi = [
np.array([0, 0, 1, 2]),
pd.to_datetime(
np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
),
]
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
expected = DataFrame(
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
)
tm.assert_frame_equal(result, expected)
def test_consistency_with_window():
# consistent return values with window
df = test_frame
expected = Int64Index([1, 2, 3], name="A")
result = df.groupby("A").resample("2s").mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby("A").rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(
np.random.randn(20, 3),
columns=list("aaa"),
index=date_range("2012-01-01", periods=20, freq="s"),
)
df2 = df.copy()
df2.columns = ["a", "b", "c"]
expected = df2.resample("5s").median()
result = df.resample("5s").median()
expected.columns = result.columns
tm.assert_frame_equal(result, expected)
def test_apply_to_one_column_of_df():
# GH: 36951
df = DataFrame(
{"col": range(10), "col1": range(10, 20)},
index=date_range("2012-01-01", periods=10, freq="20min"),
)
# access "col" via getattr -> make sure we handle AttributeError
result = df.resample("H").apply(lambda group: group.col.sum())
expected = Series(
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="H")
)
tm.assert_series_equal(result, expected)
# access "col" via _getitem__ -> make sure we handle KeyErrpr
result = df.resample("H").apply(lambda group: group["col"].sum())
tm.assert_series_equal(result, expected)
def test_resample_groupby_agg():
# GH: 33548
df = DataFrame(
{
"cat": [
"cat_1",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
],
"num": [5, 20, 22, 3, 4, 30, 10, 50],
"date": [
"2019-2-1",
"2018-02-03",
"2020-3-11",
"2019-2-2",
"2019-2-2",
"2018-12-4",
"2020-3-11",
"2020-12-12",
],
}
)
df["date"] = pd.to_datetime(df["date"])
resampled = df.groupby("cat").resample("Y", on="date")
expected = resampled.sum()
result = resampled.agg({"num": "sum"})
tm.assert_frame_equal(result, expected)
def test_resample_groupby_agg_listlike():
# GH 42905
ts = Timestamp("2021-02-28 00:00:00")
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
resampled = df.groupby("class").resample("M")["value"]
result = resampled.agg(["sum", "size"])
expected = DataFrame(
[[69, 1]],
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
columns=["sum", "size"],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
def test_empty(keys):
# GH 26411
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
expected = DataFrame(columns=["a", "b"]).set_index(keys, drop=False)
if len(keys) == 1:
expected.index.name = keys[0]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("consolidate", [True, False])
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
# https://github.com/pandas-dev/pandas/issues/39329
dates = date_range("2020-01-01", periods=15, freq="D")
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
df = pd.concat([df1, df2], ignore_index=True)
if consolidate:
df = df._consolidate()
result = df.groupby(["key"]).resample("W", on="date").min()
idx = pd.MultiIndex.from_arrays(
[
["A"] * 3 + ["B"] * 3,
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
],
names=["key", "date"],
)
expected = DataFrame(
{
"key": ["A"] * 3 + ["B"] * 3,
"date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
"col1": [0, 5, 12] * 2,
"col_object": ["val"] * 3 + [np.nan] * 3,
},
index=idx,
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,357 @@
from datetime import datetime
from operator import methodcaller
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
def test_apply():
grouper = Grouper(freq="A", label="right", closed="right")
grouped = test_series.groupby(grouper)
def f(x):
return x.sort_values()[-3:]
applied = grouped.apply(f)
expected = test_series.groupby(lambda x: x.year).apply(f)
applied.index = applied.index.droplevel(0)
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(applied, expected)
def test_count():
test_series[::3] = np.nan
expected = test_series.groupby(lambda x: x.year).count()
grouper = Grouper(freq="A", label="right", closed="right")
result = test_series.groupby(grouper).count()
expected.index = result.index
tm.assert_series_equal(result, expected)
result = test_series.resample("A").count()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_numpy_reduction():
result = test_series.resample("A", closed="right").prod()
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_apply_iteration():
# #2300
N = 1000
ind = date_range(start="2000-01-01", freq="D", periods=N)
df = DataFrame({"open": 1, "close": 2}, index=ind)
tg = Grouper(freq="M")
_, grouper, _ = tg._get_grouper(df)
# Errors
grouped = df.groupby(grouper, group_keys=False)
def f(df):
return df["close"] / df["open"]
# it works!
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
@pytest.mark.parametrize(
"name, func",
[
("Int64Index", tm.makeIntIndex),
("Index", tm.makeUnicodeIndex),
("Float64Index", tm.makeFloatIndex),
("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
],
)
def test_fails_on_no_datetime_index(name, func):
n = 2
index = func(n)
df = DataFrame({"a": np.random.randn(n)}, index=index)
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex "
f"or PeriodIndex, but got an instance of '{name}'"
)
with pytest.raises(TypeError, match=msg):
df.groupby(Grouper(freq="D"))
def test_aaa_group_order():
# GH 12840
# check TimeGrouper perform stable sorts
n = 20
data = np.random.randn(n, 4)
df = DataFrame(data, columns=["A", "B", "C", "D"])
df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
grouped = df.groupby(Grouper(key="key", freq="D"))
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""
data = np.random.randn(20, 4)
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
tm.assert_equal(expected, dt_result)
# if TimeGrouper is used included, 'nth' doesn't work yet
"""
for func in ['nth']:
expected = getattr(normal_grouped, func)(3)
expected.index = date_range(start='2013-01-01',
freq='D', periods=5, name='key')
dt_result = getattr(dt_grouped, func)(3)
tm.assert_frame_equal(expected, dt_result)
"""
@pytest.mark.parametrize(
"method, method_args, unit",
[
("sum", {}, 0),
("sum", {"min_count": 0}, 0),
("sum", {"min_count": 1}, np.nan),
("prod", {}, 1),
("prod", {"min_count": 0}, 1),
("prod", {"min_count": 1}, np.nan),
],
)
def test_resample_entirely_nat_window(method, method_args, unit):
s = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4))
result = methodcaller(method, **method_args)(s.resample("2d"))
expected = Series(
[0.0, unit], index=pd.DatetimeIndex(["2017-01-01", "2017-01-03"], freq="2D")
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"func, fill_value",
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
)
def test_aggregate_with_nat(func, fill_value):
# check TimeGrouper's aggregation is identical as normal groupby
# if NaT is included, 'var', 'std', 'mean', 'first','last'
# and 'nth' doesn't work yet
n = 20
data = np.random.randn(n, 4).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
dti = date_range(start="2013-01-01", freq="D", periods=5, name="key")
expected.index = dti._with_freq(None) # TODO: is this desired?
tm.assert_frame_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_aggregate_with_nat_size():
# GH 9925
n = 20
data = np.random.randn(n, 4).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = normal_grouped.size()
dt_result = dt_grouped.size()
pad = Series([0], index=[3])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
expected.index = date_range(
start="2013-01-01", freq="D", periods=5, name="key"
)._with_freq(None)
tm.assert_series_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_repr():
# GH18203
result = repr(Grouper(key="A", freq="H"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin='start_day')"
)
assert result == expected
result = repr(Grouper(key="A", freq="H", origin="2000-01-01"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
)
assert result == expected
@pytest.mark.parametrize(
"method, method_args, expected_values",
[
("sum", {}, [1, 0, 1]),
("sum", {"min_count": 0}, [1, 0, 1]),
("sum", {"min_count": 1}, [1, np.nan, 1]),
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
("prod", {}, [1, 1, 1]),
("prod", {"min_count": 0}, [1, 1, 1]),
("prod", {"min_count": 1}, [1, np.nan, 1]),
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
],
)
def test_upsample_sum(method, method_args, expected_values):
s = Series(1, index=date_range("2017", periods=2, freq="H"))
resampled = s.resample("30T")
index = pd.DatetimeIndex(
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
freq="30T",
)
result = methodcaller(method, **method_args)(resampled)
expected = Series(expected_values, index=index)
tm.assert_series_equal(result, expected)
def test_groupby_resample_interpolate():
# GH 35325
d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
df = DataFrame(d)
df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
result = (
df.set_index("week_starting")
.groupby("volume")
.resample("1D")
.interpolate(method="linear")
)
msg = "containing strings is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected_ind = pd.MultiIndex.from_tuples(
[
(50, "2018-01-07"),
(50, Timestamp("2018-01-08")),
(50, Timestamp("2018-01-09")),
(50, Timestamp("2018-01-10")),
(50, Timestamp("2018-01-11")),
(50, Timestamp("2018-01-12")),
(50, Timestamp("2018-01-13")),
(50, Timestamp("2018-01-14")),
(50, Timestamp("2018-01-15")),
(50, Timestamp("2018-01-16")),
(50, Timestamp("2018-01-17")),
(50, Timestamp("2018-01-18")),
(50, Timestamp("2018-01-19")),
(50, Timestamp("2018-01-20")),
(50, Timestamp("2018-01-21")),
(60, Timestamp("2018-01-14")),
],
names=["volume", "week_starting"],
)
expected = DataFrame(
data={
"price": [
10.0,
9.928571428571429,
9.857142857142858,
9.785714285714286,
9.714285714285714,
9.642857142857142,
9.571428571428571,
9.5,
9.428571428571429,
9.357142857142858,
9.285714285714286,
9.214285714285714,
9.142857142857142,
9.071428571428571,
9.0,
11.0,
],
"volume": [50.0] * 15 + [60],
},
index=expected_ind,
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,193 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.timedeltas import timedelta_range
def test_asfreq_bug():
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
result = df.resample("1T").asfreq()
expected = DataFrame(
data=[1, np.nan, np.nan, 3],
index=timedelta_range("0 day", periods=4, freq="1T"),
)
tm.assert_frame_equal(result, expected)
def test_resample_with_nat():
# GH 13223
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
expected = DataFrame(
{"value": [2.5, np.nan, 5.0]},
index=timedelta_range("0 day", periods=3, freq="1S"),
)
tm.assert_frame_equal(result, expected)
def test_resample_as_freq_with_subperiod():
# GH 13022
index = timedelta_range("00:00:00", "00:10:00", freq="5T")
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
result = df.resample("2T").asfreq()
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
expected = DataFrame(
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T")
)
tm.assert_frame_equal(result, expected)
def test_resample_with_timedeltas():
expected = DataFrame({"A": np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = timedelta_range("0 days", freq="30T", periods=50)
df = DataFrame(
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
)
result = df.resample("30T").sum()
tm.assert_frame_equal(result, expected)
s = df["A"]
result = s.resample("30T").sum()
tm.assert_series_equal(result, expected["A"])
def test_resample_single_period_timedelta():
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
result = s.resample("2s").sum()
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
tm.assert_series_equal(result, expected)
def test_resample_timedelta_idempotency():
# GH 12072
index = timedelta_range("0", periods=9, freq="10L")
series = Series(range(9), index=index)
result = series.resample("10L").mean()
expected = series.astype(float)
tm.assert_series_equal(result, expected)
def test_resample_offset_with_timedeltaindex():
# GH 10530 & 31809
rng = timedelta_range(start="0s", periods=25, freq="s")
ts = Series(np.random.randn(len(rng)), index=rng)
with_base = ts.resample("2s", offset="5s").mean()
without_base = ts.resample("2s").mean()
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_resample_categorical_data_with_timedeltaindex():
# GH #12169
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
df["Group"] = df["Group_obj"].astype("category")
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
expected = DataFrame(
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
)
expected = expected.reindex(["Group_obj", "Group"], axis=1)
expected["Group"] = expected["Group_obj"]
tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling
times = timedelta_range("1 day", "6 day", freq="4D")
df = DataFrame({"time": times}, index=times)
times2 = timedelta_range("1 day", "6 day", freq="2D")
exp = Series(times2, index=times2, name="time")
exp.iloc[1] = pd.NaT
res = df.resample("2D").first()["time"]
tm.assert_series_equal(res, exp)
res = df["time"].resample("2D").first()
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"start, end, freq, resample_freq",
[
("8H", "21h59min50s", "10S", "3H"), # GH 30353 example
("3H", "22H", "1H", "5H"),
("527D", "5006D", "3D", "10D"),
("1D", "10D", "1D", "2D"), # GH 13022 example
# tests that worked before GH 33498:
("8H", "21h59min50s", "10S", "2H"),
("0H", "21h59min50s", "10S", "3H"),
("10D", "85D", "D", "2D"),
],
)
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
# GH 33498
# check that the timedelta bins does not contains an extra bin
idx = timedelta_range(start=start, end=end, freq=freq)
s = Series(np.arange(len(idx)), index=idx)
result = s.resample(resample_freq).min()
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
tm.assert_index_equal(result.index, expected_index)
assert result.index.freq == expected_index.freq
assert not np.isnan(result[-1])
@pytest.mark.parametrize("duplicates", [True, False])
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
# GH 10603
df = DataFrame(
np.random.normal(size=(10000, 4)),
index=timedelta_range(start="0s", periods=10000, freq="3906250n"),
)
if duplicates:
# case with non-unique columns
df.columns = ["A", "B", "A", "C"]
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
expected = DataFrame(
[[768] * 4] * 12 + [[528] * 4],
index=timedelta_range(start="1s", periods=13, freq="3s"),
)
expected.columns = df.columns
tm.assert_frame_equal(result, expected)
def test_resample_quantile_timedelta():
# GH: 29485
df = DataFrame(
{"value": pd.to_timedelta(np.arange(4), unit="s")},
index=pd.date_range("20200101", periods=4, tz="UTC"),
)
result = df.resample("2D").quantile(0.99)
expected = DataFrame(
{
"value": [
pd.Timedelta("0 days 00:00:00.990000"),
pd.Timedelta("0 days 00:00:02.990000"),
]
},
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
)
tm.assert_frame_equal(result, expected)