first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,7 @@
"""
Test files dedicated to individual (stand-alone) Series methods
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
These may also present opportunities for sharing/de-duplicating test code.
"""

View File

@ -0,0 +1,186 @@
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
Series,
date_range,
period_range,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("fill", [None, -1])
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
assert aa.name == "ts"
assert ea.name == "ts"
assert ab.name == "ts"
assert eb.name == "ts"
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("method", ["pad", "bfill"])
@pytest.mark.parametrize("limit", [None, 1])
def test_align_fill_method(
datetime_series, first_slice, second_slice, join_type, method, limit
):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
def test_align_nocopy(datetime_series):
b = datetime_series[:5].copy()
# do copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left")
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left", copy=False)
ra[:5] = 5
assert (a[:5] == 5).all()
# do copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right")
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right", copy=False)
rb[:2] = 5
assert (b[:2] == 5).all()
def test_align_same_index(datetime_series):
a, b = datetime_series.align(datetime_series, copy=False)
assert a.index is datetime_series.index
assert b.index is datetime_series.index
a, b = datetime_series.align(datetime_series, copy=True)
assert a.index is not datetime_series.index
assert b.index is not datetime_series.index
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product(
[range(2), range(3), range(2)], names=("a", "b", "c")
)
idx = pd.Index(range(2), name="b")
s1 = Series(np.arange(12, dtype="int64"), index=midx)
s2 = Series(np.arange(2, dtype="int64"), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join="left")
res2l, res2r = s2.align(s1, join="right")
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join="right")
res2l, res2r = s2.align(s1, join="left")
exp_idx = pd.MultiIndex.from_product(
[range(2), range(2), range(2)], names=("a", "b", "c")
)
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
def test_align_with_dataframe_method(method):
# GH31788
ser = Series(range(3), index=range(3))
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
result_ser, result_df = ser.align(df, method=method)
tm.assert_series_equal(result_ser, ser)
tm.assert_frame_equal(result_df, df)
def test_align_dt64tzindex_mismatched_tzs():
idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
ser = Series(np.random.randn(len(idx1)), index=idx1)
ser_central = ser.tz_convert("US/Central")
# different timezones convert to UTC
new1, new2 = ser.align(ser_central)
assert new1.index.tz == pytz.UTC
assert new2.index.tz == pytz.UTC
def test_align_periodindex(join_type):
rng = period_range("1/1/2000", "1/1/2010", freq="A")
ts = Series(np.random.randn(len(rng)), index=rng)
# TODO: assert something?
ts.align(ts[::2], join=join_type)

View File

@ -0,0 +1,271 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSeriesAppend:
def test_append_preserve_name(self, datetime_series):
result = datetime_series[:5]._append(datetime_series[5:])
assert result.name == datetime_series.name
def test_append(self, datetime_series, string_series, object_series):
appended_series = string_series._append(object_series)
for idx, value in appended_series.items():
if idx in string_series.index:
assert value == string_series[idx]
elif idx in object_series.index:
assert value == object_series[idx]
else:
raise AssertionError("orphaned index!")
msg = "Indexes have overlapping values:"
with pytest.raises(ValueError, match=msg):
datetime_series._append(datetime_series, verify_integrity=True)
def test_append_many(self, datetime_series):
pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]]
result = pieces[0]._append(pieces[1:])
tm.assert_series_equal(result, datetime_series)
def test_append_duplicates(self):
# GH 13677
s1 = Series([1, 2, 3])
s2 = Series([4, 5, 6])
exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(s1._append(s2), exp)
tm.assert_series_equal(pd.concat([s1, s2]), exp)
# the result must have RangeIndex
exp = Series([1, 2, 3, 4, 5, 6])
tm.assert_series_equal(
s1._append(s2, ignore_index=True), exp, check_index_type=True
)
tm.assert_series_equal(
pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True
)
msg = "Indexes have overlapping values:"
with pytest.raises(ValueError, match=msg):
s1._append(s2, verify_integrity=True)
with pytest.raises(ValueError, match=msg):
pd.concat([s1, s2], verify_integrity=True)
def test_append_tuples(self):
# GH 28410
s = Series([1, 2, 3])
list_input = [s, s]
tuple_input = (s, s)
expected = s._append(list_input)
result = s._append(tuple_input)
tm.assert_series_equal(expected, result)
def test_append_dataframe_raises(self):
# GH 31413
df = DataFrame({"A": [1, 2], "B": [3, 4]})
msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
with pytest.raises(TypeError, match=msg):
df.A._append(df)
with pytest.raises(TypeError, match=msg):
df.A._append([df])
def test_append_raises_future_warning(self):
# GH#35407
with tm.assert_produces_warning(FutureWarning):
Series([1, 2]).append(Series([3, 4]))
class TestSeriesAppendWithDatetimeIndex:
def test_append(self):
rng = date_range("5/8/2012 1:45", periods=10, freq="5T")
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
result = ts._append(ts)
result_df = df._append(df)
ex_index = DatetimeIndex(np.tile(rng.values, 2))
tm.assert_index_equal(result.index, ex_index)
tm.assert_index_equal(result_df.index, ex_index)
appended = rng.append(rng)
tm.assert_index_equal(appended, ex_index)
appended = rng.append([rng, rng])
ex_index = DatetimeIndex(np.tile(rng.values, 3))
tm.assert_index_equal(appended, ex_index)
# different index names
rng1 = rng.copy()
rng2 = rng.copy()
rng1.name = "foo"
rng2.name = "bar"
assert rng1.append(rng1).name == "foo"
assert rng1.append(rng2).name is None
def test_append_tz(self):
# see gh-2938
rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern")
rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern")
rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern")
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts._append(ts2)
result_df = df._append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
def test_append_tz_explicit_pytz(self):
# see gh-2938
from pytz import timezone as timezone
rng = date_range(
"5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern")
)
rng2 = date_range(
"5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern")
)
rng3 = date_range(
"5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern")
)
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts._append(ts2)
result_df = df._append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
def test_append_tz_dateutil(self):
# see gh-2938
rng = date_range(
"5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern"
)
rng2 = date_range(
"5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern"
)
rng3 = date_range(
"5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern"
)
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
ts2 = Series(np.random.randn(len(rng2)), rng2)
df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2)
result = ts._append(ts2)
result_df = df._append(df2)
tm.assert_index_equal(result.index, rng3)
tm.assert_index_equal(result_df.index, rng3)
appended = rng.append(rng2)
tm.assert_index_equal(appended, rng3)
def test_series_append_aware(self):
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1._append(ser2)
exp_index = DatetimeIndex(
["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H"
)
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
assert ts_result.index.tz == rng1.tz
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1._append(ser2)
exp_index = DatetimeIndex(
["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H"
)
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
utc = rng1.tz
assert utc == ts_result.index.tz
# GH#7795
# different tz coerces to object dtype, not UTC
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central")
ser1 = Series([1], index=rng1)
ser2 = Series([2], index=rng2)
ts_result = ser1._append(ser2)
exp_index = Index(
[
Timestamp("1/1/2011 01:00", tz="US/Eastern"),
Timestamp("1/1/2011 02:00", tz="US/Central"),
]
)
exp = Series([1, 2], index=exp_index)
tm.assert_series_equal(ts_result, exp)
def test_series_append_aware_naive(self):
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern")
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
ts_result = ser1._append(ser2)
expected = ser1.index.astype(object).append(ser2.index.astype(object))
assert ts_result.index.equals(expected)
# mixed
rng1 = date_range("1/1/2011 01:00", periods=1, freq="H")
rng2 = range(100)
ser1 = Series(np.random.randn(len(rng1)), index=rng1)
ser2 = Series(np.random.randn(len(rng2)), index=rng2)
ts_result = ser1._append(ser2)
expected = ser1.index.astype(object).append(ser2.index)
assert ts_result.index.equals(expected)
def test_series_append_dst(self):
rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern")
ser1 = Series([1, 2, 3], index=rng1)
ser2 = Series([10, 11, 12], index=rng2)
ts_result = ser1._append(ser2)
exp_index = DatetimeIndex(
[
"2016-01-01 01:00",
"2016-01-01 02:00",
"2016-01-01 03:00",
"2016-08-01 01:00",
"2016-08-01 02:00",
"2016-08-01 03:00",
],
tz="US/Eastern",
)
exp = Series([1, 2, 3, 10, 11, 12], index=exp_index)
tm.assert_series_equal(ts_result, exp)
assert ts_result.index.tz == rng1.tz

View File

@ -0,0 +1,67 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
isna,
)
import pandas._testing as tm
class TestSeriesArgsort:
def _check_accum_op(self, name, ser, check_dtype=True):
func = getattr(np, name)
tm.assert_numpy_array_equal(
func(ser).values, func(np.array(ser)), check_dtype=check_dtype
)
# with missing values
ts = ser.copy()
ts[::2] = np.NaN
result = func(ts)[1::2]
expected = func(np.array(ts.dropna()))
tm.assert_numpy_array_equal(result.values, expected, check_dtype=False)
def test_argsort(self, datetime_series):
self._check_accum_op("argsort", datetime_series, check_dtype=False)
argsorted = datetime_series.argsort()
assert issubclass(argsorted.dtype.type, np.integer)
# GH#2967 (introduced bug in 0.11-dev I think)
s = Series([Timestamp(f"201301{i:02d}") for i in range(1, 6)])
assert s.dtype == "datetime64[ns]"
shifted = s.shift(-1)
assert shifted.dtype == "datetime64[ns]"
assert isna(shifted[4])
result = s.argsort()
expected = Series(range(5), dtype=np.intp)
tm.assert_series_equal(result, expected)
result = shifted.argsort()
expected = Series(list(range(4)) + [-1], dtype=np.intp)
tm.assert_series_equal(result, expected)
def test_argsort_stable(self):
s = Series(np.random.randint(0, 100, size=10000))
mindexer = s.argsort(kind="mergesort")
qindexer = s.argsort()
mexpected = np.argsort(s.values, kind="mergesort")
qexpected = np.argsort(s.values, kind="quicksort")
tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
msg = (
r"ndarray Expected type <class 'numpy\.ndarray'>, "
r"found <class 'pandas\.core\.series\.Series'> instead"
)
with pytest.raises(AssertionError, match=msg):
tm.assert_numpy_array_equal(qindexer, mindexer)
def test_argsort_preserve_name(self, datetime_series):
result = datetime_series.argsort()
assert result.name == datetime_series.name

View File

@ -0,0 +1,207 @@
import numpy as np
import pytest
from pandas._libs.tslibs import IncompatibleFrequency
from pandas import (
DatetimeIndex,
Series,
Timestamp,
date_range,
isna,
notna,
offsets,
)
import pandas._testing as tm
class TestSeriesAsof:
def test_asof_nanosecond_index_access(self):
ts = Timestamp("20130101").value
dti = DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(np.random.randn(100), index=dti)
first_value = ser.asof(ser.index[0])
# this used to not work bc parsing was done by dateutil that didn't
# handle nanoseconds
assert first_value == ser["2013-01-01 00:00:00.000000050+0000"]
expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
assert first_value == ser[Timestamp(expected_ts)]
def test_basic(self):
# array or list or dates
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.random.randn(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
val = result[result.index[result.index >= ub][0]]
assert ts[ub] == val
def test_scalar(self):
N = 30
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.arange(N), index=rng)
ts.iloc[5:10] = np.NaN
ts.iloc[15:20] = np.NaN
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts[4]
assert val2 == ts[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts[4]
# in there
result = ts.asof(ts.index[3])
assert result == ts[3]
# no as of value
d = ts.index[0] - offsets.BDay()
assert np.isnan(ts.asof(d))
def test_with_nan(self):
# basic asof test
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
s = Series(np.arange(len(rng)), index=rng)
r = s.resample("2h").mean()
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[3:5] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[-3:] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
def test_periodindex(self):
from pandas import (
PeriodIndex,
period_range,
)
# array or list or dates
N = 50
rng = period_range("1/1/1990", periods=N, freq="H")
ts = Series(np.random.randn(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
pix = PeriodIndex(result.index.values, freq="H")
mask = (pix >= lb) & (pix < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
ts.iloc[5:10] = np.nan
ts.iloc[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts[4]
assert val2 == ts[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts[4]
# in there
assert ts.asof(ts.index[3]) == ts[3]
# no as of value
d = ts.index[0].to_timestamp() - offsets.BDay()
assert isna(ts.asof(d))
# Mismatched freq
msg = "Input has different freq"
with pytest.raises(IncompatibleFrequency, match=msg):
ts.asof(rng.asfreq("D"))
def test_errors(self):
s = Series(
[1, 2, 3],
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
)
# non-monotonic
assert not s.index.is_monotonic
with pytest.raises(ValueError, match="requires a sorted index"):
s.asof(s.index[0])
# subset with Series
N = 10
rng = date_range("1/1/1990", periods=N, freq="53s")
s = Series(np.random.randn(N), index=rng)
with pytest.raises(ValueError, match="not valid for Series"):
s.asof(s.index[0], subset="foo")
def test_all_nans(self):
# GH 15713
# series is all nans
# testing non-default indexes
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)
# testing scalar input
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
result = Series(np.nan, index=rng).asof(date)
assert isna(result)
# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
expected = Series(np.nan, index=[4, 5], name="test")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,586 @@
from datetime import (
datetime,
timedelta,
)
from importlib import reload
import string
import sys
import numpy as np
import pytest
from pandas._libs.tslibs import iNaT
import pandas.util._test_decorators as td
from pandas import (
NA,
Categorical,
CategoricalDtype,
DatetimeTZDtype,
Index,
Interval,
NaT,
Series,
Timedelta,
Timestamp,
cut,
date_range,
)
import pandas._testing as tm
class TestAstypeAPI:
def test_arg_for_errors_in_astype(self):
# see GH#14878
ser = Series([1, 2, 3])
msg = (
r"Expected value of kwarg 'errors' to be one of \['raise', "
r"'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
ser.astype(np.float64, errors=False)
ser.astype(np.int8, errors="raise")
@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see GH#7271
ser = Series(range(0, 10, 2), name="abc")
dt1 = dtype_class({"abc": str})
result = ser.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc")
tm.assert_series_equal(result, expected)
dt2 = dtype_class({"abc": "float64"})
result = ser.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)
dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype "
r"mappings\."
)
with pytest.raises(KeyError, match=msg):
ser.astype(dt3)
dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
ser.astype(dt4)
# GH#16717
# if dtypes provided is empty, it should error
if dtype_class is Series:
dt5 = dtype_class({}, dtype=object)
else:
dt5 = dtype_class({})
with pytest.raises(KeyError, match=msg):
ser.astype(dt5)
class TestAstype:
@pytest.mark.parametrize("dtype", np.typecodes["All"])
def test_astype_empty_constructor_equality(self, dtype):
# see GH#15524
if dtype not in (
"S",
"V", # poor support (if any) currently
"M",
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)
@pytest.mark.parametrize("dtype", [str, np.str_])
@pytest.mark.parametrize(
"series",
[
Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
],
)
def test_astype_str_map(self, dtype, series):
# see GH#4405
result = series.astype(dtype)
expected = series.map(str)
tm.assert_series_equal(result, expected)
def test_astype_float_to_period(self):
result = Series([np.nan]).astype("period[D]")
expected = Series([NaT], dtype="period[D]")
tm.assert_series_equal(result, expected)
def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = Series([1, 2], dtype="int64")
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
# which is a PandasDtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
# see GH#15524, GH#15987
data = [1]
ser = Series(data)
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
request.node.add_marker(mark)
msg = (
rf"The '{dtype.__name__}' dtype has no unit\. "
rf"Please pass in '{dtype.__name__}\[ns\]' instead."
)
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
def test_astype_dt64_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti = date_range("2012-01-01", periods=3)
result = Series(dti).astype(str)
expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
tm.assert_series_equal(result, expected)
def test_astype_dt64tz_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
result = Series(dti_tz).astype(str)
expected = Series(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_astype_datetime(self):
ser = Series(iNaT, dtype="M8[ns]", index=range(5))
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series([datetime(2001, 1, 2, 0, 0)])
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
ser[1] = np.nan
assert ser.dtype == "M8[ns]"
ser = ser.astype("O")
assert ser.dtype == np.object_
def test_astype_datetime64tz(self):
ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
# astype
result = ser.astype(object)
expected = Series(ser.astype(object), dtype=object)
tm.assert_series_equal(result, expected)
result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz)
tm.assert_series_equal(result, ser)
# astype - object, preserves on construction
result = Series(ser.astype(object))
expected = ser.astype(object)
tm.assert_series_equal(result, expected)
# astype - datetime64[ns, tz]
with tm.assert_produces_warning(FutureWarning):
# dt64->dt64tz astype deprecated
result = Series(ser.values).astype("datetime64[ns, US/Eastern]")
tm.assert_series_equal(result, ser)
with tm.assert_produces_warning(FutureWarning):
# dt64->dt64tz astype deprecated
result = Series(ser.values).astype(ser.dtype)
tm.assert_series_equal(result, ser)
result = ser.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)
def test_astype_str_cast_dt64(self):
# see GH#9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
res = ts.astype(str)
expected = Series(["2010-01-04"])
tm.assert_series_equal(res, expected)
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
res = ts.astype(str)
expected = Series(["2010-01-04 00:00:00-05:00"])
tm.assert_series_equal(res, expected)
def test_astype_str_cast_td64(self):
# see GH#9757
td = Series([Timedelta(1, unit="d")])
ser = td.astype(str)
expected = Series(["1 days"])
tm.assert_series_equal(ser, expected)
def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_
def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_
@pytest.mark.parametrize(
"data, dtype",
[
(["x", "y", "z"], "string[python]"),
pytest.param(
["x", "y", "z"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
),
(["x", "y", "z"], "category"),
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
(3 * [Interval(0, 1)], None),
],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
# https://github.com/pandas-dev/pandas/issues/35471
ser = Series(data, dtype=dtype)
if errors == "ignore":
expected = ser
result = ser.astype(float, errors="ignore")
tm.assert_series_equal(result, expected)
else:
msg = "(Cannot cast)|(could not convert)"
with pytest.raises((ValueError, TypeError), match=msg):
ser.astype(float, errors=errors)
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
def test_astype_from_float_to_str(self, dtype):
# https://github.com/pandas-dev/pandas/issues/36451
ser = Series([0.1], dtype=dtype)
result = ser.astype(str)
expected = Series(["0.1"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"value, string_value",
[
(None, "None"),
(np.nan, "nan"),
(NA, "<NA>"),
],
)
def test_astype_to_str_preserves_na(self, value, string_value):
# https://github.com/pandas-dev/pandas/issues/36904
ser = Series(["a", "b", value], dtype=object)
result = ser.astype(str)
expected = Series(["a", "b", string_value], dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
ser = Series(np.random.randn(5), name="foo")
as_typed = ser.astype(dtype)
assert as_typed.dtype == dtype
assert as_typed.name == ser.name
@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
ser = Series([value])
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)
def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)
tm.assert_series_equal(result, Series(np.arange(1, 5)))
def test_astype_unicode(self):
# see GH#7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]
former_encoding = None
if sys.getdefaultencoding() == "utf-8":
test_series.append(Series(["野菜食べないとやばい".encode()]))
for ser in test_series:
res = ser.astype("unicode")
expec = ser.map(str)
tm.assert_series_equal(res, expec)
# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)
def test_astype_bytes(self):
# GH#39474
result = Series(["foo", "bar", "baz"]).astype(bytes)
assert result.dtypes == np.dtype("S3")
def test_astype_nan_to_bool(self):
# GH#43018
ser = Series(np.nan, dtype="object")
result = ser.astype("bool")
expected = Series(True, dtype="bool")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES,
)
def test_astype_ea_to_datetimetzdtype(self, dtype):
# GH37553
result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific"))
expected = Series(
{
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"),
2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"),
}
)
if dtype in tm.FLOAT_EA_DTYPES:
expected = Series(
{
0: Timestamp(
"1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific"
),
1: Timestamp(
"1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific"
),
2: Timestamp(
"1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific"
),
}
)
tm.assert_series_equal(result, expected)
def test_astype_retain_Attrs(self, any_numpy_dtype):
# GH#44414
ser = Series([0, 1, 2, 3])
ser.attrs["Location"] = "Michigan"
result = ser.astype(any_numpy_dtype).attrs
expected = ser.attrs
tm.assert_dict_equal(expected, result)
class TestAstypeString:
@pytest.mark.parametrize(
"data, dtype",
[
([True, NA], "boolean"),
(["A", NA], "category"),
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
(
["2012-01-01 00:00:00-05:00", NaT],
"datetime64[ns, US/Eastern]",
),
([1, None], "UInt16"),
(["1/1/2021", "2/1/2021"], "period[M]"),
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
# currently no way to parse IntervalArray from a list of strings
],
)
def test_astype_string_to_extension_dtype_roundtrip(
self, data, dtype, request, nullable_string_dtype
):
if dtype == "boolean" or (
dtype in ("period[M]", "datetime64[ns]", "timedelta64[ns]") and NaT in data
):
mark = pytest.mark.xfail(
reason="TODO StringArray.astype() with missing values #GH40566"
)
request.node.add_marker(mark)
# GH-40351
ser = Series(data, dtype=dtype)
# Note: just passing .astype(dtype) fails for dtype="category"
# with bc ser.dtype.categories will be object dtype whereas
# result.dtype.categories will have string dtype
result = ser.astype(nullable_string_dtype).astype(ser.dtype)
tm.assert_series_equal(result, ser)
class TestAstypeCategorical:
def test_astype_categorical_to_other(self):
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
expected = ser
tm.assert_series_equal(ser.astype("category"), expected)
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
msg = r"Cannot cast object dtype to float64"
with pytest.raises(ValueError, match=msg):
ser.astype("float64")
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
tm.assert_series_equal(cat.astype("str"), exp)
s2 = Series(Categorical(["1", "2", "3", "4"]))
exp2 = Series([1, 2, 3, 4]).astype("int")
tm.assert_series_equal(s2.astype("int"), exp2)
# object don't sort correctly, so just compare that we have the same
# values
def cmp(a, b):
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
expected = Series(np.array(ser.values), name="value_group")
cmp(ser.astype("object"), expected)
cmp(ser.astype(np.object_), expected)
# array conversion
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
tm.assert_series_equal(ser.astype("category"), ser)
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
roundtrip_expected = ser.cat.set_categories(
ser.cat.categories.sort_values()
).cat.remove_unused_categories()
result = ser.astype("object").astype("category")
tm.assert_series_equal(result, roundtrip_expected)
result = ser.astype("object").astype(CategoricalDtype())
tm.assert_series_equal(result, roundtrip_expected)
def test_astype_categorical_invalid_conversions(self):
# invalid conversion (these are NOT a dtype)
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.randint(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
msg = (
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
"not understood"
)
with pytest.raises(TypeError, match=msg):
ser.astype(Categorical)
with pytest.raises(TypeError, match=msg):
ser.astype("object").astype(Categorical)
def test_astype_categoricaldtype(self):
ser = Series(["a", "b", "a"])
result = ser.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
ser = Series(s_data, dtype=s_dtype, name=name)
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = ser.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = ser.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
if dtype_ordered is False:
# not specifying ordered, so only test once
expected = ser
result = ser.astype("category")
tm.assert_series_equal(result, expected)
def test_astype_bool_missing_to_categorical(self):
# GH-19182
ser = Series([True, False, np.nan])
assert ser.dtypes == np.object_
result = ser.astype(CategoricalDtype(categories=[True, False]))
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
tm.assert_series_equal(result, expected)
def test_astype_categories_raises(self):
# deprecated GH#17636, removed in GH#27141
ser = Series(["a", "b", "a"])
with pytest.raises(TypeError, match="got an unexpected"):
ser.astype("category", categories=["a", "b"], ordered=True)
@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
def test_astype_from_categorical(self, items):
ser = Series(items)
exp = Series(Categorical(items))
res = ser.astype("category")
tm.assert_series_equal(res, exp)
def test_astype_from_categorical_with_keywords(self):
# with keywords
lst = ["a", "b", "c", "a"]
ser = Series(lst)
exp = Series(Categorical(lst, ordered=True))
res = ser.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True))
tm.assert_series_equal(res, exp)

View File

@ -0,0 +1,30 @@
import numpy as np
class TestAutoCorr:
def test_autocorr(self, datetime_series):
# Just run the function
corr1 = datetime_series.autocorr()
# Now run it with the lag parameter
corr2 = datetime_series.autocorr(lag=1)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2
# Choose a random lag between 1 and length of Series - 2
# and compare the result with the Series corr() function
n = 1 + np.random.randint(max(1, len(datetime_series) - 2))
corr1 = datetime_series.corr(datetime_series.shift(n))
corr2 = datetime_series.autocorr(lag=n)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2

View File

@ -0,0 +1,84 @@
import numpy as np
import pytest
from pandas import (
Series,
bdate_range,
date_range,
period_range,
)
import pandas._testing as tm
class TestBetween:
def test_between(self):
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right)
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
def test_between_datetime_object_dtype(self):
ser = Series(bdate_range("1/1/2000", periods=20).astype(object))
ser[::2] = np.nan
result = ser[ser.between(ser[3], ser[17])]
expected = ser[3:18].dropna()
tm.assert_series_equal(result, expected)
result = ser[ser.between(ser[3], ser[17], inclusive="neither")]
expected = ser[5:16].dropna()
tm.assert_series_equal(result, expected)
def test_between_period_values(self):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
left, right = ser[[2, 7]]
result = ser.between(left, right)
expected = (ser >= left) & (ser <= right)
tm.assert_series_equal(result, expected)
def test_between_inclusive_string(self): # :issue:`40628`
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right, inclusive="both")
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="left")
expected = (series >= left) & (series < right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="right")
expected = (series > left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="neither")
expected = (series > left) & (series < right)
tm.assert_series_equal(result, expected)
def test_between_error_args(self): # :issue:`40628`
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
value_error_msg = (
"Inclusive has to be either string of 'both',"
"'left', 'right', or 'neither'."
)
with pytest.raises(ValueError, match=value_error_msg):
series = Series(date_range("1/1/2000", periods=10))
series.between(left, right, inclusive="yes")
def test_between_inclusive_warning(self):
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
with tm.assert_produces_warning(FutureWarning):
result = series.between(left, right, inclusive=False)
expected = (series > left) & (series < right)
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning):
result = series.between(left, right, inclusive=True)
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,151 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
isna,
notna,
)
import pandas._testing as tm
class TestSeriesClip:
def test_clip(self, datetime_series):
val = datetime_series.median()
assert datetime_series.clip(lower=val).min() == val
assert datetime_series.clip(upper=val).max() == val
result = datetime_series.clip(-0.5, 0.5)
expected = np.clip(datetime_series, -0.5, 0.5)
tm.assert_series_equal(result, expected)
assert isinstance(expected, Series)
def test_clip_types_and_nulls(self):
sers = [
Series([np.nan, 1.0, 2.0, 3.0]),
Series([None, "a", "b", "c"]),
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
]
for s in sers:
thresh = s[2]
lower = s.clip(lower=thresh)
upper = s.clip(upper=thresh)
assert lower[notna(lower)].min() == thresh
assert upper[notna(upper)].max() == thresh
assert list(isna(s)) == list(isna(lower))
assert list(isna(s)) == list(isna(upper))
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
# Ensure that clipping method can handle NA values with out failing
# GH#40581
if nulls_fixture is pd.NaT:
# constructor will raise, see
# test_constructor_mismatched_null_nullable_dtype
return
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
s_clipped_upper = ser.clip(upper=2.0)
s_clipped_lower = ser.clip(lower=2.0)
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
tm.assert_series_equal(s_clipped_upper, expected_upper)
tm.assert_series_equal(s_clipped_lower, expected_lower)
def test_clip_with_na_args(self):
"""Should process np.nan argument as None"""
# GH#17276
s = Series([1, 2, 3])
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
# GH#19992
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
# GH#40420
s = Series([1, 2, 3])
result = s.clip(0, [np.nan, np.nan, np.nan])
tm.assert_series_equal(s, result)
def test_clip_against_series(self):
# GH#6966
s = Series([1.0, 1.0, 4.0])
lower = Series([1.0, 2.0, 3.0])
upper = Series([1.5, 2.5, 3.5])
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH#15390
original = Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = Series([1, 2, 3])
if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)
def test_clip_with_datetimes(self):
# GH#11838
# naive and tz-aware datetimes
t = Timestamp("2015-12-01 09:30:30")
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
result = s.clip(upper=t)
expected = Series(
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
)
tm.assert_series_equal(result, expected)
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
s = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
]
)
result = s.clip(upper=t)
expected = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
]
)
tm.assert_series_equal(result, expected)
def test_clip_with_timestamps_and_oob_datetimes(self):
# GH-42794
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)])
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
expected = Series([Timestamp.min, Timestamp.max], dtype="object")
tm.assert_series_equal(result, expected)
def test_clip_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series.clip except "
r"for the arguments 'lower' and 'upper' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.clip(0, 1, 0)
expected = Series([1, 1, 1])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,17 @@
from pandas import Series
import pandas._testing as tm
class TestCombine:
def test_combine_scalar(self):
# GH#21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
ser = Series([i * 10 for i in range(5)])
result = ser.combine(3, lambda x, y: x + y)
expected = Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)
result = ser.combine(22, lambda x, y: min(x, y))
expected = Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,104 @@
from datetime import datetime
import numpy as np
import pandas as pd
from pandas import (
Period,
Series,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm
class TestCombineFirst:
def test_combine_first_period_datetime(self):
# GH#3367
didx = date_range(start="1950-01-31", end="1950-07-31", freq="M")
pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M")
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = Series([9, 9, 9, 9, 9, 9, 9], index=idx)
result = a.combine_first(b)
expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
tm.assert_series_equal(result, expected)
def test_combine_first_name(self, datetime_series):
result = datetime_series.combine_first(datetime_series[:5])
assert result.name == datetime_series.name
def test_combine_first(self):
values = tm.makeIntIndex(20).values.astype(float)
series = Series(values, index=tm.makeIntIndex(20))
series_copy = series * 2
series_copy[::2] = np.NaN
# nothing used from the input
combined = series.combine_first(series_copy)
tm.assert_series_equal(combined, series)
# Holes filled from input
combined = series_copy.combine_first(series)
assert np.isfinite(combined).all()
tm.assert_series_equal(combined[::2], series[::2])
tm.assert_series_equal(combined[1::2], series_copy[1::2])
# mixed types
index = tm.makeStringIndex(20)
floats = Series(np.random.randn(20), index=index)
strings = Series(tm.makeStringIndex(10), index=index[::2])
combined = strings.combine_first(floats)
tm.assert_series_equal(strings, combined.loc[index[::2]])
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
# corner case
ser = Series([1.0, 2, 3], index=[0, 1, 2])
empty = Series([], index=[], dtype=object)
result = ser.combine_first(empty)
ser.index = ser.index.astype("O")
tm.assert_series_equal(ser, result)
def test_combine_first_dt64(self):
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = to_datetime(Series([np.NaN, "2011"]))
rs = s0.combine_first(s1)
xp = to_datetime(Series(["2010", "2011"]))
tm.assert_series_equal(rs, xp)
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = Series([np.NaN, "2011"])
rs = s0.combine_first(s1)
msg = "containing strings is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
xp = Series([datetime(2010, 1, 1), "2011"])
tm.assert_series_equal(rs, xp)
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
ser1 = Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
name="ser1",
)
ser2 = Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
index=[2, 3, 4],
name="ser2",
)
result = ser1.combine_first(ser2)
exp_vals = pd.DatetimeIndex(
["20150101", "20150102", "20150103", "20160515", "20160516"],
tz=tz_naive_fixture,
)
exp = Series(exp_vals, name="ser1")
tm.assert_series_equal(exp, result)

View File

@ -0,0 +1,116 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
def test_compare_axis(align_axis):
# GH#30429
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, align_axis=align_axis)
if align_axis in (1, "columns"):
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
else:
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep_shape, keep_equal",
[
(True, False),
(False, True),
(True, True),
# False, False case is already covered in test_compare_axis
],
)
def test_compare_various_formats(keep_shape, keep_equal):
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
if keep_shape:
indices = pd.Index([0, 1, 2])
columns = pd.Index(["self", "other"])
if keep_equal:
expected = pd.DataFrame(
[["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
)
else:
expected = pd.DataFrame(
[["a", "x"], [np.nan, np.nan], ["c", "z"]],
index=indices,
columns=columns,
)
else:
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
def test_compare_with_equal_nulls():
# We want to make sure two NaNs are considered the same
# and dropped where applicable
s1 = pd.Series(["a", "b", np.nan])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2)
expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
tm.assert_frame_equal(result, expected)
def test_compare_with_non_equal_nulls():
# We want to make sure the relevant NaNs do not get dropped
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", np.nan], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_multi_index():
index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
s1 = pd.Series(["a", "b", "c"], index=index)
s2 = pd.Series(["x", "b", "z"], index=index)
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
)
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_unaligned_objects():
# test Series with different indices
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
ser1.compare(ser2)
# test Series with different lengths
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3])
ser2 = pd.Series([1, 2, 3, 4])
ser1.compare(ser2)

View File

@ -0,0 +1,139 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
)
import pandas._testing as tm
class TestConvert:
def test_convert(self):
# GH#10265
dt = datetime(2001, 1, 1, 0, 0)
td = dt - datetime(2000, 1, 1, 0, 0)
# Test coercion with mixed types
ser = Series(["a", "3.1415", dt, td])
results = ser._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)
# Test standard conversion returns original
results = ser._convert(datetime=True)
tm.assert_series_equal(results, ser)
results = ser._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)
results = ser._convert(timedelta=True)
tm.assert_series_equal(results, ser)
def test_convert_numeric_strings_with_other_true_args(self):
# test pass-through and non-conversion when other types selected
ser = Series(["1.0", "2.0", "3.0"])
results = ser._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([1.0, 2.0, 3.0])
tm.assert_series_equal(results, expected)
results = ser._convert(True, False, True)
tm.assert_series_equal(results, ser)
def test_convert_datetime_objects(self):
ser = Series(
[datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O"
)
results = ser._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
tm.assert_series_equal(results, expected)
results = ser._convert(datetime=False, numeric=True, timedelta=True)
tm.assert_series_equal(results, ser)
def test_convert_datetime64(self):
# no-op if already dt64 dtype
ser = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
]
)
result = ser._convert(datetime=True)
expected = Series(
[Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
dtype="M8[ns]",
)
tm.assert_series_equal(result, expected)
result = ser._convert(datetime=True)
tm.assert_series_equal(result, expected)
def test_convert_timedeltas(self):
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
ser = Series([td, td], dtype="O")
results = ser._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([td, td])
tm.assert_series_equal(results, expected)
results = ser._convert(True, True, False)
tm.assert_series_equal(results, ser)
def test_convert_numeric_strings(self):
ser = Series([1.0, 2, 3], index=["a", "b", "c"])
result = ser._convert(numeric=True)
tm.assert_series_equal(result, ser)
# force numeric conversion
res = ser.copy().astype("O")
res["a"] = "1"
result = res._convert(numeric=True)
tm.assert_series_equal(result, ser)
res = ser.copy().astype("O")
res["a"] = "1."
result = res._convert(numeric=True)
tm.assert_series_equal(result, ser)
res = ser.copy().astype("O")
res["a"] = "garbled"
result = res._convert(numeric=True)
expected = ser.copy()
expected["a"] = np.nan
tm.assert_series_equal(result, expected)
def test_convert_mixed_type_noop(self):
# GH 4119, not converting a mixed type (e.g.floats and object)
ser = Series([1, "na", 3, 4])
result = ser._convert(datetime=True, numeric=True)
expected = Series([1, np.nan, 3, 4])
tm.assert_series_equal(result, expected)
ser = Series([1, "", 3, 4])
result = ser._convert(datetime=True, numeric=True)
tm.assert_series_equal(result, expected)
def test_convert_preserve_non_object(self):
# preserve if non-object
ser = Series([1], dtype="float32")
result = ser._convert(datetime=True)
tm.assert_series_equal(result, ser)
def test_convert_no_arg_error(self):
ser = Series(["1.0", "2"])
msg = r"At least one of datetime, numeric or timedelta must be True\."
with pytest.raises(ValueError, match=msg):
ser._convert()
def test_convert_preserve_bool(self):
ser = Series([1, True, 3, 5], dtype=object)
res = ser._convert(datetime=True, numeric=True)
expected = Series([1, 1, 3, 5], dtype="i8")
tm.assert_series_equal(res, expected)
def test_convert_preserve_all_bool(self):
ser = Series([False, True, False, False], dtype=object)
res = ser._convert(datetime=True, numeric=True)
expected = Series([False, True, False, False], dtype=bool)
tm.assert_series_equal(res, expected)

View File

@ -0,0 +1,237 @@
from itertools import product
import numpy as np
import pytest
from pandas.core.dtypes.common import is_interval_dtype
import pandas as pd
import pandas._testing as tm
# Each test case consists of a tuple with the data and dtype to create the
# test Series, the default dtype for the expected result (which is valid
# for most cases), and the specific cases where the result deviates from
# this default. Those overrides are defined as a dict with (keyword, val) as
# dictionary key. In case of multiple items, the last override takes precedence.
test_cases = [
(
# data
[1, 2, 3],
# original dtype
np.dtype("int32"),
# default expected dtype
"Int32",
# exceptions on expected dtype
{("convert_integer", False): np.dtype("int32")},
),
(
[1, 2, 3],
np.dtype("int64"),
"Int64",
{("convert_integer", False): np.dtype("int64")},
),
(
["x", "y", "z"],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
(
[True, False, np.nan],
np.dtype("O"),
pd.BooleanDtype(),
{("convert_boolean", False): np.dtype("O")},
),
(
["h", "i", np.nan],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
( # GH32117
["h", "i", 1],
np.dtype("O"),
np.dtype("O"),
{},
),
(
[10, np.nan, 20],
np.dtype("float"),
"Int64",
{
("convert_integer", False, "convert_floating", True): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype("float"),
},
),
(
[np.nan, 100.5, 200],
np.dtype("float"),
"Float64",
{("convert_floating", False): np.dtype("float")},
),
(
[3, 4, 5],
"Int8",
"Int8",
{},
),
(
[[1, 2], [3, 4], [5]],
None,
np.dtype("O"),
{},
),
(
[4, 5, 6],
np.dtype("uint32"),
"UInt32",
{("convert_integer", False): np.dtype("uint32")},
),
(
[-10, 12, 13],
np.dtype("i1"),
"Int8",
{("convert_integer", False): np.dtype("i1")},
),
(
[1.2, 1.3],
np.dtype("float32"),
"Float32",
{("convert_floating", False): np.dtype("float32")},
),
(
[1, 2.0],
object,
"Int64",
{
("convert_integer", False): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(
[1, 2.5],
object,
"Float64",
{
("convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
"datetime64[ns]",
np.dtype("datetime64[ns]"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
object,
np.dtype("datetime64[ns]"),
{("infer_objects", False): np.dtype("object")},
),
(pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}),
(
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
None,
pd.IntervalDtype("int64", "right"),
{},
),
]
class TestSeriesConvertDtypes:
@pytest.mark.parametrize(
"data, maindtype, expected_default, expected_other",
test_cases,
)
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
def test_convert_dtypes(
self, data, maindtype, params, expected_default, expected_other
):
warn = None
if (
hasattr(data, "dtype")
and data.dtype == "M8[ns]"
and isinstance(maindtype, pd.DatetimeTZDtype)
):
# this astype is deprecated in favor of tz_localize
warn = FutureWarning
if maindtype is not None:
with tm.assert_produces_warning(warn):
series = pd.Series(data, dtype=maindtype)
else:
series = pd.Series(data)
result = series.convert_dtypes(*params)
param_names = [
"infer_objects",
"convert_string",
"convert_integer",
"convert_boolean",
"convert_floating",
]
params_dict = dict(zip(param_names, params))
expected_dtype = expected_default
for spec, dtype in expected_other.items():
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
expected_dtype = dtype
warn2 = None
if (
hasattr(data, "dtype")
and data.dtype == "M8[ns]"
and isinstance(expected_dtype, pd.DatetimeTZDtype)
):
# this astype is deprecated in favor of tz_localize
warn2 = FutureWarning
with tm.assert_produces_warning(warn2):
expected = pd.Series(data, dtype=expected_dtype)
tm.assert_series_equal(result, expected)
# Test that it is a copy
copy = series.copy(deep=True)
if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float NaN to integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan
# Make sure original not changed
tm.assert_series_equal(series, copy)
def test_convert_string_dtype(self, nullable_string_dtype):
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
# that are already string dtype
df = pd.DataFrame(
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
)
result = df.convert_dtypes()
tm.assert_frame_equal(df, result)
def test_convert_bool_dtype(self):
# GH32287
df = pd.DataFrame({"A": pd.array([True])})
tm.assert_frame_equal(df, df.convert_dtypes())
def test_convert_byte_string_dtype(self):
# GH-43183
byte_str = b"binary-string"
df = pd.DataFrame(data={"A": byte_str}, index=[0])
result = df.convert_dtypes()
expected = df
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,74 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
)
import pandas._testing as tm
class TestCopy:
@pytest.mark.parametrize("deep", [None, False, True])
def test_copy(self, deep):
ser = Series(np.arange(10), dtype="float64")
# default deep is True
if deep is None:
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
ser2[::2] = np.NaN
if deep is None or deep is True:
# Did not modify original Series
assert np.isnan(ser2[0])
assert not np.isnan(ser[0])
else:
# we DID modify the original Series
assert np.isnan(ser2[0])
assert np.isnan(ser[0])
@pytest.mark.parametrize("deep", [None, False, True])
def test_copy_tzaware(self, deep):
# GH#11794
# copy of tz-aware
expected = Series([Timestamp("2012/01/01", tz="UTC")])
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
ser = Series([Timestamp("2012/01/01", tz="UTC")])
if deep is None:
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
ser2[0] = Timestamp("1999/01/01", tz="UTC")
# default deep is True
if deep is None or deep is True:
# Did not modify original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected)
else:
# we DID modify the original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected2)
def test_copy_name(self, datetime_series):
result = datetime_series.copy()
assert result.name == datetime_series.name
def test_copy_index_name_checking(self, datetime_series):
# don't want to be able to modify the index stored elsewhere after
# making a copy
datetime_series.index.name = None
assert datetime_series.index.name is None
assert datetime_series is datetime_series
cp = datetime_series.copy()
cp.index.name = "foo"
assert datetime_series.index.name is None

View File

@ -0,0 +1,98 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestSeriesCount:
def test_count_level_series(self):
index = MultiIndex(
levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]],
codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]],
)
ser = Series(np.random.randn(len(index)), index=index)
with tm.assert_produces_warning(FutureWarning):
result = ser.count(level=0)
expected = ser.groupby(level=0).count()
tm.assert_series_equal(
result.astype("f8"), expected.reindex(result.index).fillna(0)
)
with tm.assert_produces_warning(FutureWarning):
result = ser.count(level=1)
expected = ser.groupby(level=1).count()
tm.assert_series_equal(
result.astype("f8"), expected.reindex(result.index).fillna(0)
)
def test_count_multiindex(self, series_with_multilevel_index):
ser = series_with_multilevel_index
series = ser.copy()
series.index.names = ["a", "b"]
with tm.assert_produces_warning(FutureWarning):
result = series.count(level="b")
with tm.assert_produces_warning(FutureWarning):
expect = ser.count(level=1).rename_axis("b")
tm.assert_series_equal(result, expect)
with tm.assert_produces_warning(FutureWarning):
result = series.count(level="a")
with tm.assert_produces_warning(FutureWarning):
expect = ser.count(level=0).rename_axis("a")
tm.assert_series_equal(result, expect)
msg = "Level x not found"
with pytest.raises(KeyError, match=msg):
with tm.assert_produces_warning(FutureWarning):
series.count("x")
def test_count_level_without_multiindex(self):
ser = Series(range(3))
msg = "Series.count level is only valid with a MultiIndex"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning):
ser.count(level=1)
def test_count(self, datetime_series):
assert datetime_series.count() == len(datetime_series)
datetime_series[::2] = np.NaN
assert datetime_series.count() == np.isfinite(datetime_series).sum()
mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]])
ts = Series(np.arange(len(mi)), index=mi)
with tm.assert_produces_warning(FutureWarning):
left = ts.count(level=1)
right = Series([2, 3, 1], index=[1, 2, np.nan])
tm.assert_series_equal(left, right)
ts.iloc[[0, 3, 5]] = np.nan
with tm.assert_produces_warning(FutureWarning):
tm.assert_series_equal(ts.count(level=1), right - 1)
# GH#29478
with pd.option_context("use_inf_as_na", True):
assert Series([pd.Timestamp("1990/1/1")]).count() == 1
def test_count_categorical(self):
ser = Series(
Categorical(
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
)
)
result = ser.count()
assert result == 2

View File

@ -0,0 +1,176 @@
import math
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Series,
isna,
)
import pandas._testing as tm
class TestSeriesCov:
def test_cov(self, datetime_series):
# full overlap
tm.assert_almost_equal(
datetime_series.cov(datetime_series), datetime_series.std() ** 2
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].cov(datetime_series[5:]),
datetime_series[5:15].std() ** 2,
)
# No overlap
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.cov(cp))
# min_periods
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.cov(ts2, min_periods=12))
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
def test_cov_ddof(self, test_ddof):
# GH#34611
np_array1 = np.random.rand(10)
np_array2 = np.random.rand(10)
s1 = Series(np_array1)
s2 = Series(np_array2)
result = s1.cov(s2, ddof=test_ddof)
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
assert math.isclose(expected, result)
class TestSeriesCorr:
@td.skip_if_no_scipy
def test_corr(self, datetime_series):
import scipy.stats as stats
# full overlap
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
# partial overlap
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.corr(ts2, min_periods=12))
# No overlap
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.corr(cp))
A = tm.makeTimeSeries()
B = tm.makeTimeSeries()
result = A.corr(B)
expected, _ = stats.pearsonr(A, B)
tm.assert_almost_equal(result, expected)
@td.skip_if_no_scipy
def test_corr_rank(self):
import scipy.stats as stats
# kendall and spearman
A = tm.makeTimeSeries()
B = tm.makeTimeSeries()
A[-5:] = A[:5]
result = A.corr(B, method="kendall")
expected = stats.kendalltau(A, B)[0]
tm.assert_almost_equal(result, expected)
result = A.corr(B, method="spearman")
expected = stats.spearmanr(A, B)[0]
tm.assert_almost_equal(result, expected)
# results from R
A = Series(
[
-0.89926396,
0.94209606,
-1.03289164,
-0.95445587,
0.76910310,
-0.06430576,
-2.09704447,
0.40660407,
-0.89926396,
0.94209606,
]
)
B = Series(
[
-1.01270225,
-0.62210117,
-1.56895827,
0.59592943,
-0.01680292,
1.17258718,
-1.06009347,
-0.10222060,
-0.89076239,
0.89372375,
]
)
kexp = 0.4319297
sexp = 0.5853767
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
def test_corr_invalid_method(self):
# GH PR #22298
s1 = Series(np.random.randn(10))
s2 = Series(np.random.randn(10))
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
with pytest.raises(ValueError, match=msg):
s1.corr(s2, method="____")
def test_corr_callable_method(self, datetime_series):
# simple correlation example
# returns 1 if exact equality, 0 otherwise
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
# simple example
s1 = Series([1, 2, 3, 4, 5])
s2 = Series([5, 4, 3, 2, 1])
expected = 0
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
# full overlap
tm.assert_almost_equal(
datetime_series.corr(datetime_series, method=my_corr), 1.0
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
)
# No overlap
assert np.isnan(
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
)
# dataframe example
df = pd.DataFrame([s1, s2])
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)

View File

@ -0,0 +1,151 @@
import numpy as np
from pandas import (
Period,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSeriesDescribe:
def test_describe_ints(self):
ser = Series([0, 1, 2, 3, 4], name="int_data")
result = ser.describe()
expected = Series(
[5, 2, ser.std(), 0, 1, 2, 3, 4],
name="int_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_bools(self):
ser = Series([True, True, False, False, False], name="bool_data")
result = ser.describe()
expected = Series(
[5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_strs(self):
ser = Series(["a", "a", "b", "c", "d"], name="str_data")
result = ser.describe()
expected = Series(
[5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_timedelta64(self):
ser = Series(
[
Timedelta("1 days"),
Timedelta("2 days"),
Timedelta("3 days"),
Timedelta("4 days"),
Timedelta("5 days"),
],
name="timedelta_data",
)
result = ser.describe()
expected = Series(
[5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]],
name="timedelta_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_period(self):
ser = Series(
[Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")],
name="period_data",
)
result = ser.describe()
expected = Series(
[3, 2, ser[0], 2],
name="period_data",
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
def test_describe_empty_object(self):
# https://github.com/pandas-dev/pandas/issues/27183
s = Series([None, None], dtype=object)
result = s.describe()
expected = Series(
[0, 0, np.nan, np.nan],
dtype=object,
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
result = s[:0].describe()
tm.assert_series_equal(result, expected)
# ensure NaN, not None
assert np.isnan(result.iloc[2])
assert np.isnan(result.iloc[3])
def test_describe_with_tz(self, tz_naive_fixture):
# GH 21332
tz = tz_naive_fixture
name = str(tz_naive_fixture)
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe(datetime_is_numeric=True)
expected = Series(
[
5,
Timestamp(2018, 1, 3).tz_localize(tz),
start.tz_localize(tz),
s[1],
s[2],
s[3],
end.tz_localize(tz),
],
name=name,
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_with_tz_warns(self):
name = tz = "CET"
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
with tm.assert_produces_warning(FutureWarning):
result = s.describe()
expected = Series(
[
5,
5,
s.value_counts().index[0],
1,
start.tz_localize(tz),
end.tz_localize(tz),
],
name=name,
index=["count", "unique", "top", "freq", "first", "last"],
)
tm.assert_series_equal(result, expected)
def test_datetime_is_numeric_includes_datetime(self):
s = Series(date_range("2012", periods=3))
result = s.describe(datetime_is_numeric=True)
expected = Series(
[
3,
Timestamp("2012-01-02"),
Timestamp("2012-01-01"),
Timestamp("2012-01-01T12:00:00"),
Timestamp("2012-01-02"),
Timestamp("2012-01-02T12:00:00"),
Timestamp("2012-01-03"),
],
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,84 @@
import numpy as np
import pytest
from pandas import (
Series,
TimedeltaIndex,
date_range,
)
import pandas._testing as tm
class TestSeriesDiff:
def test_diff_np(self):
# TODO(__array_function__): could make np.diff return a Series
# matching ser.diff()
ser = Series(np.arange(5))
res = np.diff(ser)
expected = np.array([1, 1, 1, 1])
tm.assert_numpy_array_equal(res, expected)
def test_diff_int(self):
# int dtype
a = 10000000000000000
b = a + 1
ser = Series([a, b])
result = ser.diff()
assert result[1] == 1
def test_diff_tz(self):
# Combined datetime diff, normal diff and boolean diff test
ts = tm.makeTimeSeries(name="ts")
ts.diff()
# neg n
result = ts.diff(-1)
expected = ts - ts.shift(-1)
tm.assert_series_equal(result, expected)
# 0
result = ts.diff(0)
expected = ts - ts
tm.assert_series_equal(result, expected)
def test_diff_dt64(self):
# datetime diff (GH#3100)
ser = Series(date_range("20130102", periods=5))
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)
# timedelta diff
result = result - result.shift(1) # previous result
expected = expected.diff() # previously expected
tm.assert_series_equal(result, expected)
def test_diff_dt64tz(self):
# with tz
ser = Series(
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
)
result = ser.diff()
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input,output,diff",
[([False, True, True, False, False], [np.nan, True, False, True, False], 1)],
)
def test_diff_bool(self, input, output, diff):
# boolean series (test for fixing #17294)
ser = Series(input)
result = ser.diff()
expected = Series(output)
tm.assert_series_equal(result, expected)
def test_diff_object_dtype(self):
# object series
ser = Series([False, True, 5.0, np.nan, True, False])
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,111 @@
import pytest
from pandas import (
Index,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"data, index, drop_labels, axis, expected_data, expected_index",
[
# Unique Index
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
# GH 5248 Non-Unique Index
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
],
)
def test_drop_unique_and_non_unique_index(
data, index, axis, drop_labels, expected_data, expected_index
):
s = Series(data=data, index=index)
result = s.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, index, drop_labels, axis, error_type, error_desc",
[
# single string/tuple-like
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
# bad axis
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
],
)
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
ser = Series(data, index=index)
with pytest.raises(error_type, match=error_desc):
ser.drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
s = Series(range(3), index=list("abc"))
result = s.drop("bc", errors="ignore")
tm.assert_series_equal(result, s)
result = s.drop(["a", "d"], errors="ignore")
expected = s.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = Series(index=index, dtype=object).drop(drop_labels)
expected = Series(index=expected_index, dtype=object)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, index, drop_labels",
[
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True]),
],
)
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
dtype = object if data is None else None
ser = Series(data=data, index=index, dtype=dtype)
with pytest.raises(KeyError, match="not found in axis"):
ser.drop(drop_labels)
def test_drop_pos_args_deprecation():
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series\.drop "
r"except for the argument 'labels' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.drop(1, 0)
expected = Series([1, 3], index=[0, 2])
tm.assert_series_equal(result, expected)
def test_drop_index_ea_dtype(any_numeric_ea_dtype):
# GH#45860
df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype))
idx = Index([df.index[1]])
result = df.drop(idx)
expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,257 @@
import numpy as np
import pytest
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, False, False, True, True, False])),
("last", Series([False, True, True, False, False, False, False])),
(False, Series([False, True, True, False, True, True, False])),
],
)
def test_drop_duplicates(any_numpy_dtype, keep, expected):
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
if tc.dtype == "bool":
pytest.skip("tested separately in test_drop_duplicates_bool")
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, True])),
("last", Series([True, True, False, False])),
(False, Series([True, True, True, True])),
],
)
def test_drop_duplicates_bool(keep, expected):
tc = Series([True, False, True, False])
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
tm.assert_series_equal(sc, tc[~expected])
assert return_value is None
@pytest.mark.parametrize("values", [[], list(range(5))])
def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
tc = Series(values, dtype=np.dtype(any_numpy_dtype))
expected = Series([False] * len(tc), dtype="bool")
if tc.dtype == "bool":
# 0 -> False and 1-> True
# any other value would be duplicated
tc = tc[:2]
expected = expected[:2]
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
result_dropped = tc.drop_duplicates(keep=keep)
tm.assert_series_equal(result_dropped, tc)
# validate shallow copy
assert result_dropped is not tc
class TestSeriesDropDuplicates:
@pytest.fixture(
params=["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"]
)
def dtype(self, request):
return request.param
@pytest.fixture
def cat_series_unused_category(self, dtype, ordered):
# Test case 1
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
cat = Categorical(input1, categories=cat_array, ordered=ordered)
tc1 = Series(cat)
return tc1
def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category):
tc1 = cat_series_unused_category
expected = Series([False, False, False, True])
result = tc1.duplicated()
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates()
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keeplast(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, False])
result = tc1.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keepfalse(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, True])
result = tc1.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
@pytest.fixture
def cat_series(self, dtype, ordered):
# no unused categories, unlike cat_series_unused_category
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
cat = Categorical(input2, categories=cat_array, ordered=ordered)
tc2 = Series(cat)
return tc2
def test_drop_duplicates_categorical_non_bool2(self, cat_series):
tc2 = cat_series
expected = Series([False, False, False, False, True, True, False])
result = tc2.duplicated()
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates()
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, False, False, False])
result = tc2.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, True, True, False])
result = tc2.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_bool(self, ordered):
tc = Series(
Categorical(
[True, False, True, False], categories=[True, False], ordered=ordered
)
)
expected = Series([False, False, True, True])
tm.assert_series_equal(tc.duplicated(), expected)
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, False, False])
tm.assert_series_equal(tc.duplicated(keep="last"), expected)
tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, True, True])
tm.assert_series_equal(tc.duplicated(keep=False), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.drop_duplicates()
expected = Series(
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
index=[0, 1, 4],
)
tm.assert_series_equal(result, expected)
def test_drop_duplicates_pos_args_deprecation():
# GH#41485
s = Series(["a", "b", "c", "b"])
msg = (
"In a future version of pandas all arguments of "
"Series.drop_duplicates will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.drop_duplicates("last")
expected = Series(["a", "c", "b"], index=[0, 2, 3])
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,115 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
NaT,
Period,
Series,
Timestamp,
)
import pandas._testing as tm
class TestDropna:
def test_dropna_empty(self):
ser = Series([], dtype=object)
assert len(ser.dropna()) == 0
return_value = ser.dropna(inplace=True)
assert return_value is None
assert len(ser) == 0
# invalid axis
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
ser.dropna(axis=1)
def test_dropna_preserve_name(self, datetime_series):
datetime_series[:5] = np.nan
result = datetime_series.dropna()
assert result.name == datetime_series.name
name = datetime_series.name
ts = datetime_series.copy()
return_value = ts.dropna(inplace=True)
assert return_value is None
assert ts.name == name
def test_dropna_no_nan(self):
for ser in [
Series([1, 2, 3], name="x"),
Series([False, True, False], name="x"),
]:
result = ser.dropna()
tm.assert_series_equal(result, ser)
assert result is not ser
s2 = ser.copy()
return_value = s2.dropna(inplace=True)
assert return_value is None
tm.assert_series_equal(s2, ser)
def test_dropna_intervals(self):
ser = Series(
[np.nan, 1, 2, 3],
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
)
result = ser.dropna()
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
def test_dropna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
result = ser.dropna()
expected = Series([Period("2011-01", freq="M")])
tm.assert_series_equal(result, expected)
def test_datetime64_tz_dropna(self):
# DatetimeLikeBlock
ser = Series(
[
Timestamp("2011-01-01 10:00"),
NaT,
Timestamp("2011-01-03 10:00"),
NaT,
]
)
result = ser.dropna()
expected = Series(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2]
)
tm.assert_series_equal(result, expected)
# DatetimeTZBlock
idx = DatetimeIndex(
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo"
)
ser = Series(idx)
assert ser.dtype == "datetime64[ns, Asia/Tokyo]"
result = ser.dropna()
expected = Series(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
],
index=[0, 2],
)
assert result.dtype == "datetime64[ns, Asia/Tokyo]"
tm.assert_series_equal(result, expected)
def test_dropna_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series\.dropna "
r"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dropna(0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,8 @@
import numpy as np
class TestSeriesDtypes:
def test_dtype(self, datetime_series):
assert datetime_series.dtype == np.dtype("float64")
assert datetime_series.dtypes == np.dtype("float64")

View File

@ -0,0 +1,52 @@
import numpy as np
import pytest
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True], name="name")),
("last", Series([True, True, False, False, False], name="name")),
(False, Series([True, True, True, False, True], name="name")),
],
)
def test_duplicated_keep(keep, expected):
ser = Series(["a", "b", "b", "c", "a"], name="name")
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True])),
("last", Series([True, True, False, False, False])),
(False, Series([True, True, True, False, True])),
],
)
def test_duplicated_nan_none(keep, expected):
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
def test_duplicated_categorical_bool_na(nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.duplicated()
expected = Series([False, False, True, True, False])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,142 @@
from contextlib import nullcontext
import copy
import numpy as np
import pytest
from pandas._libs.missing import is_matching_na
from pandas.core.dtypes.common import is_float
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"arr, idx",
[
([1, 2, 3, 4], [0, 2, 1, 3]),
([1, np.nan, 3, np.nan], [0, 2, 1, 3]),
(
[1, np.nan, 3, np.nan],
MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]),
),
],
)
def test_equals(arr, idx):
s1 = Series(arr, index=idx)
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = 9
assert not s1.equals(s2)
@pytest.mark.parametrize(
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
)
def test_equals_list_array(val):
# GH20676 Verify equals operator for list of Numpy arrays
arr = np.array([1, 2])
s1 = Series([arr, arr])
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = val
cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
if isinstance(val, str)
else nullcontext()
)
with cm:
assert not s1.equals(s2)
def test_equals_false_negative():
# GH8437 Verify false negative behavior of equals function for dtype object
arr = [False, np.nan]
s1 = Series(arr)
s2 = s1.copy()
s3 = Series(index=range(2), dtype=object)
s4 = s3.copy()
s5 = s3.copy()
s6 = s3.copy()
s3[:-1] = s4[:-1] = s5[0] = s6[0] = False
assert s1.equals(s1)
assert s1.equals(s2)
assert s1.equals(s3)
assert s1.equals(s4)
assert s1.equals(s5)
assert s5.equals(s6)
def test_equals_matching_nas():
# matching but not identical NAs
left = Series([np.datetime64("NaT")], dtype=object)
right = Series([np.datetime64("NaT")], dtype=object)
assert left.equals(right)
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.timedelta64("NaT")], dtype=object)
right = Series([np.timedelta64("NaT")], dtype=object)
assert left.equals(right)
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.float64("NaN")], dtype=object)
right = Series([np.float64("NaN")], dtype=object)
assert left.equals(right)
assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype))
assert left.array.equals(right.array)
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
# GH#39650
left = nulls_fixture
right = nulls_fixture2
if hasattr(right, "copy"):
right = right.copy()
else:
right = copy.copy(right)
ser = Series([left], dtype=object)
ser2 = Series([right], dtype=object)
if is_matching_na(left, right):
assert ser.equals(ser2)
elif (left is None and is_float(right)) or (right is None and is_float(left)):
assert ser.equals(ser2)
else:
assert not ser.equals(ser2)
def test_equals_none_vs_nan():
# GH#39650
ser = Series([1, None], dtype=object)
ser2 = Series([1, np.nan], dtype=object)
assert ser.equals(ser2)
assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype))
assert ser.array.equals(ser2.array)
def test_equals_None_vs_float():
# GH#44190
left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object)
right = Series([None] * len(left))
# these series were found to be equal due to a bug, check that they are correctly
# found to not equal
assert not left.equals(right)
assert not right.equals(left)
assert not left.to_frame().equals(right.to_frame())
assert not right.to_frame().equals(left.to_frame())
assert not Index(left, dtype="object").equals(Index(right, dtype="object"))
assert not Index(right, dtype="object").equals(Index(left, dtype="object"))

View File

@ -0,0 +1,144 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_basic():
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_mixed_type():
s = pd.Series(
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
)
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
index=[0, 0, 0, 1, 2, 3, 4, 4],
dtype=object,
name="foo",
)
tm.assert_series_equal(result, expected)
def test_empty():
s = pd.Series(dtype=object)
result = s.explode()
expected = s.copy()
tm.assert_series_equal(result, expected)
def test_nested_lists():
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
result = s.explode()
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
tm.assert_series_equal(result, expected)
def test_multi_index():
s = pd.Series(
[[0, 1, 2], np.nan, [], (3, 4)],
name="foo",
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
)
result = s.explode()
index = pd.MultiIndex.from_tuples(
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
names=["foo", "bar"],
)
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_large():
s = pd.Series([range(256)]).explode()
result = s.explode()
tm.assert_series_equal(result, s)
def test_invert_array():
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
listify = df.apply(lambda x: x.array, axis=1)
result = listify.explode()
tm.assert_series_equal(result, df["a"].rename())
@pytest.mark.parametrize(
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
)
def non_object_dtype(s):
result = s.explode()
tm.assert_series_equal(result, s)
def test_typical_usecase():
df = pd.DataFrame(
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
columns=["var1", "var2"],
)
exploded = df.var1.str.split(",").explode()
result = df[["var2"]].join(exploded)
expected = pd.DataFrame(
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
columns=["var2", "var1"],
index=[0, 0, 0, 1, 1, 1],
)
tm.assert_frame_equal(result, expected)
def test_nested_EA():
# a nested EA array
s = pd.Series(
[
pd.date_range("20170101", periods=3, tz="UTC"),
pd.date_range("20170104", periods=3, tz="UTC"),
]
)
result = s.explode()
expected = pd.Series(
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
)
tm.assert_series_equal(result, expected)
def test_duplicate_index():
# GH 28005
s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
result = s.explode()
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
tm.assert_series_equal(result, expected)
def test_ignore_index():
# GH 34932
s = pd.Series([[1, 2], [3, 4]])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
def test_explode_sets():
# https://github.com/pandas-dev/pandas/issues/35614
s = pd.Series([{"a", "b", "c"}], index=[1])
result = s.explode().sort_values()
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
tm.assert_series_equal(result, expected)
def test_explode_scalars_can_ignore_index():
# https://github.com/pandas-dev/pandas/issues/40487
s = pd.Series([1, 2, 3], index=["a", "b", "c"])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,957 @@
from datetime import (
datetime,
timedelta,
timezone,
)
import numpy as np
import pytest
import pytz
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
NaT,
Period,
Series,
Timedelta,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
class TestSeriesFillNA:
def test_fillna_nat(self):
series = Series([0, 1, 2, NaT.value], dtype="M8[ns]")
filled = series.fillna(method="pad")
filled2 = series.fillna(value=series.values[2])
expected = series.copy()
expected.values[3] = expected.values[2]
tm.assert_series_equal(filled, expected)
tm.assert_series_equal(filled2, expected)
df = DataFrame({"A": series})
filled = df.fillna(method="pad")
filled2 = df.fillna(value=series.values[2])
expected = DataFrame({"A": expected})
tm.assert_frame_equal(filled, expected)
tm.assert_frame_equal(filled2, expected)
series = Series([NaT.value, 0, 1, 2], dtype="M8[ns]")
filled = series.fillna(method="bfill")
filled2 = series.fillna(value=series[1])
expected = series.copy()
expected[0] = expected[1]
tm.assert_series_equal(filled, expected)
tm.assert_series_equal(filled2, expected)
df = DataFrame({"A": series})
filled = df.fillna(method="bfill")
filled2 = df.fillna(value=series[1])
expected = DataFrame({"A": expected})
tm.assert_frame_equal(filled, expected)
tm.assert_frame_equal(filled2, expected)
def test_fillna_value_or_method(self, datetime_series):
msg = "Cannot specify both 'value' and 'method'"
with pytest.raises(ValueError, match=msg):
datetime_series.fillna(value=0, method="ffill")
def test_fillna(self):
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
tm.assert_series_equal(ts, ts.fillna(method="ffill"))
ts[2] = np.NaN
exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index)
tm.assert_series_equal(ts.fillna(method="ffill"), exp)
exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index)
tm.assert_series_equal(ts.fillna(method="backfill"), exp)
exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index)
tm.assert_series_equal(ts.fillna(value=5), exp)
msg = "Must specify a fill 'value' or 'method'"
with pytest.raises(ValueError, match=msg):
ts.fillna()
def test_fillna_nonscalar(self):
# GH#5703
s1 = Series([np.nan])
s2 = Series([1])
result = s1.fillna(s2)
expected = Series([1.0])
tm.assert_series_equal(result, expected)
result = s1.fillna({})
tm.assert_series_equal(result, s1)
result = s1.fillna(Series((), dtype=object))
tm.assert_series_equal(result, s1)
result = s2.fillna(s1)
tm.assert_series_equal(result, s2)
result = s1.fillna({0: 1})
tm.assert_series_equal(result, expected)
result = s1.fillna({1: 1})
tm.assert_series_equal(result, Series([np.nan]))
result = s1.fillna({0: 1, 1: 1})
tm.assert_series_equal(result, expected)
result = s1.fillna(Series({0: 1, 1: 1}))
tm.assert_series_equal(result, expected)
result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
tm.assert_series_equal(result, s1)
def test_fillna_aligns(self):
s1 = Series([0, 1, 2], list("abc"))
s2 = Series([0, np.nan, 2], list("bac"))
result = s2.fillna(s1)
expected = Series([0, 0, 2.0], list("bac"))
tm.assert_series_equal(result, expected)
def test_fillna_limit(self):
ser = Series(np.nan, index=[0, 1, 2])
result = ser.fillna(999, limit=1)
expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
tm.assert_series_equal(result, expected)
result = ser.fillna(999, limit=2)
expected = Series([999, 999, np.nan], index=[0, 1, 2])
tm.assert_series_equal(result, expected)
def test_fillna_dont_cast_strings(self):
# GH#9043
# make sure a string representation of int/float values can be filled
# correctly without raising errors or being converted
vals = ["0", "1.5", "-0.3"]
for val in vals:
ser = Series([0, 1, np.nan, np.nan, 4], dtype="float64")
result = ser.fillna(val)
expected = Series([0, 1, val, val, 4], dtype="object")
tm.assert_series_equal(result, expected)
def test_fillna_consistency(self):
# GH#16402
# fillna with a tz aware to a tz-naive, should result in object
ser = Series([Timestamp("20130101"), NaT])
result = ser.fillna(Timestamp("20130101", tz="US/Eastern"))
expected = Series(
[Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")],
dtype="object",
)
tm.assert_series_equal(result, expected)
# where (we ignore the errors=)
result = ser.where(
[True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore"
)
tm.assert_series_equal(result, expected)
result = ser.where(
[True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore"
)
tm.assert_series_equal(result, expected)
# with a non-datetime
result = ser.fillna("foo")
expected = Series([Timestamp("20130101"), "foo"])
tm.assert_series_equal(result, expected)
# assignment
ser2 = ser.copy()
ser2[1] = "foo"
tm.assert_series_equal(ser2, expected)
def test_fillna_downcast(self):
# GH#15277
# infer int64 from float64
ser = Series([1.0, np.nan])
result = ser.fillna(0, downcast="infer")
expected = Series([1, 0])
tm.assert_series_equal(result, expected)
# infer int64 from float64 when fillna value is a dict
ser = Series([1.0, np.nan])
result = ser.fillna({1: 0}, downcast="infer")
expected = Series([1, 0])
tm.assert_series_equal(result, expected)
def test_fillna_downcast_infer_objects_to_numeric(self):
# GH#44241 if we have object-dtype, 'downcast="infer"' should
# _actually_ infer
arr = np.arange(5).astype(object)
arr[3] = np.nan
ser = Series(arr)
res = ser.fillna(3, downcast="infer")
expected = Series(np.arange(5), dtype=np.int64)
tm.assert_series_equal(res, expected)
res = ser.ffill(downcast="infer")
expected = Series([0, 1, 2, 2, 4], dtype=np.int64)
tm.assert_series_equal(res, expected)
res = ser.bfill(downcast="infer")
expected = Series([0, 1, 2, 4, 4], dtype=np.int64)
tm.assert_series_equal(res, expected)
# with a non-round float present, we will downcast to float64
ser[2] = 2.5
expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64)
res = ser.fillna(3, downcast="infer")
tm.assert_series_equal(res, expected)
res = ser.ffill(downcast="infer")
expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64)
tm.assert_series_equal(res, expected)
res = ser.bfill(downcast="infer")
expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
tm.assert_series_equal(res, expected)
def test_timedelta_fillna(self, frame_or_series):
# GH#3371
ser = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130102"),
Timestamp("20130103 9:01:01"),
]
)
td = ser.diff()
obj = frame_or_series(td)
# reg fillna
result = obj.fillna(Timedelta(seconds=0))
expected = Series(
[
timedelta(0),
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
# interpreted as seconds, no longer supported
msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'"
with pytest.raises(TypeError, match=msg):
obj.fillna(1)
result = obj.fillna(Timedelta(seconds=1))
expected = Series(
[
timedelta(seconds=1),
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
result = obj.fillna(timedelta(days=1, seconds=1))
expected = Series(
[
timedelta(days=1, seconds=1),
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
result = obj.fillna(np.timedelta64(10**9))
expected = Series(
[
timedelta(seconds=1),
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
]
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
result = obj.fillna(NaT)
expected = Series(
[
NaT,
timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1),
],
dtype="m8[ns]",
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
# ffill
td[2] = np.nan
obj = frame_or_series(td)
result = obj.ffill()
expected = td.fillna(Timedelta(seconds=0))
expected[0] = np.nan
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
# bfill
td[2] = np.nan
obj = frame_or_series(td)
result = obj.bfill()
expected = td.fillna(Timedelta(seconds=0))
expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
def test_datetime64_fillna(self):
ser = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130102"),
Timestamp("20130103 9:01:01"),
]
)
ser[2] = np.nan
# ffill
result = ser.ffill()
expected = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130103 9:01:01"),
]
)
tm.assert_series_equal(result, expected)
# bfill
result = ser.bfill()
expected = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130103 9:01:01"),
Timestamp("20130103 9:01:01"),
]
)
tm.assert_series_equal(result, expected)
def test_datetime64_fillna_backfill(self):
# GH#6587
# make sure that we are treating as integer when filling
msg = "containing strings is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
# this also tests inference of a datetime-like with NaT's
ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"])
expected = Series(
[
"2013-08-05 15:30:00.000001",
"2013-08-05 15:30:00.000001",
"2013-08-05 15:30:00.000001",
],
dtype="M8[ns]",
)
result = ser.fillna(method="backfill")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
def test_datetime64_tz_fillna(self, tz):
# DatetimeLikeBlock
ser = Series(
[
Timestamp("2011-01-01 10:00"),
NaT,
Timestamp("2011-01-03 10:00"),
NaT,
]
)
null_loc = Series([False, True, False, True])
result = ser.fillna(Timestamp("2011-01-02 10:00"))
expected = Series(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00"),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-02 10:00"),
]
)
tm.assert_series_equal(expected, result)
# check s is not changed
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
expected = Series(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00", tz=tz),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-02 10:00", tz=tz),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna("AAA")
expected = Series(
[
Timestamp("2011-01-01 10:00"),
"AAA",
Timestamp("2011-01-03 10:00"),
"AAA",
],
dtype=object,
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(
{
1: Timestamp("2011-01-02 10:00", tz=tz),
3: Timestamp("2011-01-04 10:00"),
}
)
expected = Series(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00", tz=tz),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-04 10:00"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(
{1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")}
)
expected = Series(
[
Timestamp("2011-01-01 10:00"),
Timestamp("2011-01-02 10:00"),
Timestamp("2011-01-03 10:00"),
Timestamp("2011-01-04 10:00"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
# DatetimeTZBlock
idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz)
ser = Series(idx)
assert ser.dtype == f"datetime64[ns, {tz}]"
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(Timestamp("2011-01-02 10:00"))
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Timestamp("2011-01-02 10:00"),
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2011-01-02 10:00"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
idx = DatetimeIndex(
[
"2011-01-01 10:00",
"2011-01-02 10:00",
"2011-01-03 10:00",
"2011-01-02 10:00",
],
tz=tz,
)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
idx = DatetimeIndex(
[
"2011-01-01 10:00",
"2011-01-02 10:00",
"2011-01-03 10:00",
"2011-01-02 10:00",
],
tz=tz,
)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna("AAA")
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
"AAA",
Timestamp("2011-01-03 10:00", tz=tz),
"AAA",
],
dtype=object,
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(
{
1: Timestamp("2011-01-02 10:00", tz=tz),
3: Timestamp("2011-01-04 10:00"),
}
)
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Timestamp("2011-01-02 10:00", tz=tz),
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2011-01-04 10:00"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
result = ser.fillna(
{
1: Timestamp("2011-01-02 10:00", tz=tz),
3: Timestamp("2011-01-04 10:00", tz=tz),
}
)
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Timestamp("2011-01-02 10:00", tz=tz),
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2011-01-04 10:00", tz=tz),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
# filling with a naive/other zone, coerce to object
result = ser.fillna(Timestamp("20130101"))
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Timestamp("2013-01-01"),
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2013-01-01"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
expected = Series(
[
Timestamp("2011-01-01 10:00", tz=tz),
Timestamp("2013-01-01", tz="US/Pacific"),
Timestamp("2011-01-03 10:00", tz=tz),
Timestamp("2013-01-01", tz="US/Pacific"),
]
)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(isna(ser), null_loc)
def test_fillna_dt64tz_with_method(self):
# with timezone
# GH#15855
ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT])
exp = Series(
[
Timestamp("2012-11-11 00:00:00+01:00"),
Timestamp("2012-11-11 00:00:00+01:00"),
]
)
tm.assert_series_equal(ser.fillna(method="pad"), exp)
ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")])
exp = Series(
[
Timestamp("2012-11-11 00:00:00+01:00"),
Timestamp("2012-11-11 00:00:00+01:00"),
]
)
tm.assert_series_equal(ser.fillna(method="bfill"), exp)
def test_fillna_pytimedelta(self):
# GH#8209
ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"])
result = ser.fillna(timedelta(1))
expected = Series(Timedelta("1 days"), index=["A", "B"])
tm.assert_series_equal(result, expected)
def test_fillna_period(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
res = ser.fillna(Period("2012-01", freq="M"))
exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")])
tm.assert_series_equal(res, exp)
assert res.dtype == "Period[M]"
def test_fillna_dt64_timestamp(self, frame_or_series):
ser = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130102"),
Timestamp("20130103 9:01:01"),
]
)
ser[2] = np.nan
obj = frame_or_series(ser)
# reg fillna
result = obj.fillna(Timestamp("20130104"))
expected = Series(
[
Timestamp("20130101"),
Timestamp("20130101"),
Timestamp("20130104"),
Timestamp("20130103 9:01:01"),
]
)
expected = frame_or_series(expected)
tm.assert_equal(result, expected)
result = obj.fillna(NaT)
expected = obj
tm.assert_equal(result, expected)
def test_fillna_dt64_non_nao(self):
# GH#27419
ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")])
val = np.datetime64("1975-04-05", "ms")
result = ser.fillna(val)
expected = Series(
[Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")]
)
tm.assert_series_equal(result, expected)
def test_fillna_numeric_inplace(self):
x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
y = x.copy()
return_value = y.fillna(value=0, inplace=True)
assert return_value is None
expected = x.fillna(value=0)
tm.assert_series_equal(y, expected)
# ---------------------------------------------------------------
# CategoricalDtype
@pytest.mark.parametrize(
"fill_value, expected_output",
[
("a", ["a", "a", "b", "a", "a"]),
({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]),
({1: "a"}, ["a", "a", "b", np.nan, np.nan]),
({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]),
(Series("a"), ["a", np.nan, "b", np.nan, np.nan]),
(Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]),
(Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]),
(Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]),
],
)
def test_fillna_categorical(self, fill_value, expected_output):
# GH#17033
# Test fillna for a Categorical series
data = ["a", np.nan, "b", np.nan, np.nan]
ser = Series(Categorical(data, categories=["a", "b"]))
exp = Series(Categorical(expected_output, categories=["a", "b"]))
result = ser.fillna(fill_value)
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"fill_value, expected_output",
[
(Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]),
(Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]),
(
Series(
Categorical(
["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"]
)
),
["a", "d", "b", "d", "a"],
),
],
)
def test_fillna_categorical_with_new_categories(self, fill_value, expected_output):
# GH#26215
data = ["a", np.nan, "b", np.nan, np.nan]
ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"]))
exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"]))
result = ser.fillna(fill_value)
tm.assert_series_equal(result, exp)
def test_fillna_categorical_raises(self):
data = ["a", np.nan, "b", np.nan, np.nan]
ser = Series(Categorical(data, categories=["a", "b"]))
cat = ser._values
msg = "Cannot setitem on a Categorical with a new category"
with pytest.raises(TypeError, match=msg):
ser.fillna("d")
msg2 = "Length of 'value' does not match."
with pytest.raises(ValueError, match=msg2):
cat.fillna(Series("d"))
with pytest.raises(TypeError, match=msg):
ser.fillna({1: "d", 3: "a"})
msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
with pytest.raises(TypeError, match=msg):
ser.fillna(["a", "b"])
msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
with pytest.raises(TypeError, match=msg):
ser.fillna(("a", "b"))
msg = (
'"value" parameter must be a scalar, dict '
'or Series, but you passed a "DataFrame"'
)
with pytest.raises(TypeError, match=msg):
ser.fillna(DataFrame({1: ["a"], 3: ["b"]}))
@pytest.mark.parametrize("dtype", [float, "float32", "float64"])
@pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
def test_fillna_float_casting(self, dtype, fill_type):
# GH-43424
ser = Series([np.nan, 1.2], dtype=dtype)
fill_values = Series([2, 2], dtype=fill_type)
result = ser.fillna(fill_values)
expected = Series([2.0, 1.2], dtype=dtype)
tm.assert_series_equal(result, expected)
def test_fillna_f32_upcast_with_dict(self):
# GH-43424
ser = Series([np.nan, 1.2], dtype=np.float32)
result = ser.fillna({0: 1})
expected = Series([1.0, 1.2], dtype=np.float32)
tm.assert_series_equal(result, expected)
# ---------------------------------------------------------------
# Invalid Usages
def test_fillna_invalid_method(self, datetime_series):
try:
datetime_series.fillna(method="ffil")
except ValueError as inst:
assert "ffil" in str(inst)
def test_fillna_listlike_invalid(self):
ser = Series(np.random.randint(-100, 100, 50))
msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
with pytest.raises(TypeError, match=msg):
ser.fillna([1, 2])
msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
with pytest.raises(TypeError, match=msg):
ser.fillna((1, 2))
def test_fillna_method_and_limit_invalid(self):
# related GH#9217, make sure limit is an int and greater than 0
ser = Series([1, 2, 3, None])
msg = "|".join(
[
r"Cannot specify both 'value' and 'method'\.",
"Limit must be greater than 0",
"Limit must be an integer",
]
)
for limit in [-1, 0, 1.0, 2.0]:
for method in ["backfill", "bfill", "pad", "ffill", None]:
with pytest.raises(ValueError, match=msg):
ser.fillna(1, limit=limit, method=method)
def test_fillna_datetime64_with_timezone_tzinfo(self):
# https://github.com/pandas-dev/pandas/issues/38851
# different tzinfos representing UTC treated as equal
ser = Series(date_range("2020", periods=3, tz="UTC"))
expected = ser.copy()
ser[1] = NaT
result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc))
tm.assert_series_equal(result, expected)
# but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent
ts = Timestamp("2000-01-01", tz="US/Pacific")
ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific"))
assert ser2.dtype.kind == "M"
with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"):
result = ser2.fillna(ts)
expected = Series([ser[0], ts, ser[2]], dtype=object)
# TODO(2.0): once deprecation is enforced
# expected = Series(
# [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
# dtype=ser2.dtype,
# )
tm.assert_series_equal(result, expected)
def test_fillna_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
srs = Series([1, 2, 3, np.nan], dtype=float)
msg = (
r"In a future version of pandas all arguments of Series.fillna "
r"except for the argument 'value' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = srs.fillna(0, None, None)
expected = Series([1, 2, 3, 0], dtype=float)
tm.assert_series_equal(result, expected)
class TestFillnaPad:
def test_fillna_bug(self):
ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
filled = ser.fillna(method="ffill")
expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index)
tm.assert_series_equal(filled, expected)
filled = ser.fillna(method="bfill")
expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index)
tm.assert_series_equal(filled, expected)
def test_ffill(self):
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
ts[2] = np.NaN
tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill"))
def test_ffill_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series.ffill "
r"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.ffill(0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)
def test_ffill_mixed_dtypes_without_missing_data(self):
# GH#14956
series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
result = series.ffill()
tm.assert_series_equal(series, result)
def test_bfill(self):
ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
ts[2] = np.NaN
tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill"))
def test_bfill_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series.bfill "
r"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.bfill(0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)
def test_pad_nan(self):
x = Series(
[np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float
)
return_value = x.fillna(method="pad", inplace=True)
assert return_value is None
expected = Series(
[np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float
)
tm.assert_series_equal(x[1:], expected[1:])
assert np.isnan(x[0]), np.isnan(expected[0])
def test_series_fillna_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
result = s[:2].reindex(index)
result = result.fillna(method="pad", limit=5)
expected = s[:2].reindex(index).fillna(method="pad")
expected[-3:] = np.nan
tm.assert_series_equal(result, expected)
result = s[-2:].reindex(index)
result = result.fillna(method="bfill", limit=5)
expected = s[-2:].reindex(index).fillna(method="backfill")
expected[:3] = np.nan
tm.assert_series_equal(result, expected)
def test_series_pad_backfill_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
result = s[:2].reindex(index, method="pad", limit=5)
expected = s[:2].reindex(index).fillna(method="pad")
expected[-3:] = np.nan
tm.assert_series_equal(result, expected)
result = s[-2:].reindex(index, method="backfill", limit=5)
expected = s[-2:].reindex(index).fillna(method="backfill")
expected[:3] = np.nan
tm.assert_series_equal(result, expected)
def test_fillna_int(self):
ser = Series(np.random.randint(-100, 100, 50))
return_value = ser.fillna(method="ffill", inplace=True)
assert return_value is None
tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser)
def test_datetime64tz_fillna_round_issue(self):
# GH#14872
data = Series(
[NaT, NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
)
filled = data.fillna(method="bfill")
expected = Series(
[
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
]
)
tm.assert_series_equal(filled, expected)

View File

@ -0,0 +1,29 @@
from pandas import (
Index,
Series,
date_range,
)
import pandas._testing as tm
class TestGetNumericData:
def test_get_numeric_data_preserve_dtype(self):
# get the numeric data
obj = Series([1, 2, 3])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
tm.assert_series_equal(result, expected)
obj = Series([True, False, True])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,8 @@
import pandas._testing as tm
def test_head_tail(string_series):
tm.assert_series_equal(string_series.head(), string_series[:5])
tm.assert_series_equal(string_series.head(0), string_series[0:0])
tm.assert_series_equal(string_series.tail(), string_series[-5:])
tm.assert_series_equal(string_series.tail(0), string_series[0:0])

View File

@ -0,0 +1,23 @@
import numpy as np
from pandas import Series
import pandas._testing as tm
class TestInferObjects:
def test_infer_objects_series(self):
# GH#11221
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = Series([1, 2, 3])
tm.assert_series_equal(actual, expected)
actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = Series([1.0, 2.0, 3.0, np.nan])
tm.assert_series_equal(actual, expected)
# only soft conversions, unconvertable pass thru unchanged
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
expected = Series([1, 2, 3, None, "a"])
assert actual.dtype == "object"
tm.assert_series_equal(actual, expected)

View File

@ -0,0 +1,829 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
date_range,
isna,
)
import pandas._testing as tm
from pandas.util.version import Version
@pytest.fixture(
params=[
"linear",
"index",
"values",
"nearest",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def nontemporal_method(request):
"""Fixture that returns an (method name, required kwargs) pair.
This fixture does not include method 'time' as a parameterization; that
method requires a Series with a DatetimeIndex, and is generally tested
separately from these non-temporal methods.
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
@pytest.fixture(
params=[
"linear",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def interp_methods_ind(request):
"""Fixture that returns a (method name, required kwargs) pair to
be tested for various Index types.
This fixture does not include methods - 'time', 'index', 'nearest',
'values' as a parameterization
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
class TestSeriesInterpolateData:
def test_interpolate(self, datetime_series, string_series):
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
ts_copy = ts.copy()
ts_copy[5:10] = np.NaN
linear_interp = ts_copy.interpolate(method="linear")
tm.assert_series_equal(linear_interp, ts)
ord_ts = Series(
[d.toordinal() for d in datetime_series.index], index=datetime_series.index
).astype(float)
ord_ts_copy = ord_ts.copy()
ord_ts_copy[5:10] = np.NaN
time_interp = ord_ts_copy.interpolate(method="time")
tm.assert_series_equal(time_interp, ord_ts)
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.NaN])
msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method="time")
@td.skip_if_no_scipy
def test_interpolate_cubicspline(self):
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3]
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interpolate_pchip(self):
ser = Series(np.sort(np.random.uniform(size=100)))
# interpolate at new_index
new_index = ser.index.union(
Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
).astype(float)
interp_s = ser.reindex(new_index).interpolate(method="pchip")
# does not blow up, GH5977
interp_s[49:51]
@td.skip_if_no_scipy
def test_interpolate_akima(self):
ser = Series([10, 11, 12, 13])
# interpolate at new_index where `der` is zero
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima")
tm.assert_series_equal(interp_s[1:3], expected)
# interpolate at new_index where `der` is a non-zero int
expected = Series(
[11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
tm.assert_series_equal(interp_s[1:3], expected)
@td.skip_if_no_scipy
def test_interpolate_piecewise_polynomial(self):
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
tm.assert_series_equal(interp_s[1:3], expected)
@td.skip_if_no_scipy
def test_interpolate_from_derivatives(self):
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
tm.assert_series_equal(interp_s[1:3], expected)
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy
),
],
)
def test_interpolate_corners(self, kwargs):
s = Series([np.nan, np.nan])
tm.assert_series_equal(s.interpolate(**kwargs), s)
s = Series([], dtype=object).interpolate()
tm.assert_series_equal(s.interpolate(**kwargs), s)
def test_interpolate_index_values(self):
s = Series(np.nan, index=np.sort(np.random.rand(30)))
s[::3] = np.random.randn(10)
vals = s.index.values.astype(float)
result = s.interpolate(method="index")
expected = s.copy()
bad = isna(expected.values)
good = ~bad
expected = Series(
np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]
)
tm.assert_series_equal(result[bad], expected)
# 'values' is synonymous with 'index' for the method kwarg
other_result = s.interpolate(method="values")
tm.assert_series_equal(other_result, result)
tm.assert_series_equal(other_result[bad], expected)
def test_interpolate_non_ts(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
"time-weighted interpolation only works on Series or DataFrames "
"with a DatetimeIndex"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="time")
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy
),
],
)
def test_nan_interpolate(self, kwargs):
s = Series([0, 1, np.nan, 3])
result = s.interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0])
tm.assert_series_equal(result, expected)
def test_nan_irregular_index(self):
s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
result = s.interpolate()
expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
tm.assert_series_equal(result, expected)
def test_nan_str_index(self):
s = Series([0, 1, 2, np.nan], index=list("abcd"))
result = s.interpolate()
expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_quad(self):
sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
result = sq.interpolate(method="quadratic")
expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_scipy_basic(self):
s = Series([1, 3, np.nan, 12, np.nan, 25])
# slinear
expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
result = s.interpolate(method="slinear")
tm.assert_series_equal(result, expected)
result = s.interpolate(method="slinear", downcast="infer")
tm.assert_series_equal(result, expected)
# nearest
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="nearest")
tm.assert_series_equal(result, expected.astype("float"))
result = s.interpolate(method="nearest", downcast="infer")
tm.assert_series_equal(result, expected)
# zero
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="zero")
tm.assert_series_equal(result, expected.astype("float"))
result = s.interpolate(method="zero", downcast="infer")
tm.assert_series_equal(result, expected)
# quadratic
# GH #15662.
expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
result = s.interpolate(method="quadratic")
tm.assert_series_equal(result, expected)
result = s.interpolate(method="quadratic", downcast="infer")
tm.assert_series_equal(result, expected)
# cubic
expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
result = s.interpolate(method="cubic")
tm.assert_series_equal(result, expected)
def test_interp_limit(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit):
# GH 9217: make sure limit is greater than zero.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
with pytest.raises(ValueError, match="Limit must be greater than 0"):
s.interpolate(limit=limit, method=method, **kwargs)
def test_interpolate_invalid_float_limit(self, nontemporal_method):
# GH 9217: make sure limit is an integer.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
s.interpolate(limit=limit, method=method, **kwargs)
@pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])
msg = f"method must be one of.* Got '{invalid_method}' instead"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)
# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)
def test_interp_invalid_method_and_value(self):
# GH#36624
ser = Series([1, 3, np.nan, 12, np.nan, 25])
msg = "Cannot pass both fill_value and method"
with pytest.raises(ValueError, match=msg):
ser.interpolate(fill_value=3, method="pad")
def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
# Provide 'forward' (the default) explicitly here.
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="forward")
tm.assert_series_equal(result, expected)
result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD")
tm.assert_series_equal(result, expected)
def test_interp_unlimited(self):
# these test are for issue #16282 default Limit=None is unlimited
s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
result = s.interpolate(method="linear", limit_direction="backward")
tm.assert_series_equal(result, expected)
def test_interp_limit_bad_direction(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
r"Invalid limit_direction: expecting one of \['forward', "
r"'backward', 'both'\], got 'abc'"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit=2, limit_direction="abc")
# raises an error even if no limit is specified.
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_direction="abc")
# limit_area introduced GH #16284
def test_interp_limit_area(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit_area="inside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]
)
result = s.interpolate(method="linear", limit_area="inside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="inside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
result = s.interpolate(method="linear", limit_area="outside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]
)
result = s.interpolate(method="linear", limit_area="outside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="backward"
)
tm.assert_series_equal(result, expected)
# raises an error even if limit type is wrong.
msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_area="abc")
@pytest.mark.parametrize(
"method, limit_direction, expected",
[
("pad", "backward", "forward"),
("ffill", "backward", "forward"),
("backfill", "forward", "backward"),
("bfill", "forward", "backward"),
("pad", "both", "forward"),
("ffill", "both", "forward"),
("backfill", "both", "backward"),
("bfill", "both", "backward"),
],
)
def test_interp_limit_direction_raises(self, method, limit_direction, expected):
# https://github.com/pandas-dev/pandas/pull/34746
s = Series([1, 2, 3])
msg = f"`limit_direction` must be '{expected}' for method `{method}`"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method, limit_direction=limit_direction)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
{"method": "pad", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
{"method": "pad", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
# Check that this works on a longer series of nans.
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])
expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series(
[1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]
)
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_to_ends(self):
# These test are for issue #10420 -- flow back to beginning.
s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_before_ends(self):
# These test are for issue #11115 -- limit ends properly.
s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_all_good(self):
s = Series([1, 2, 3])
result = s.interpolate(method="polynomial", order=1)
tm.assert_series_equal(result, s)
# non-scipy
result = s.interpolate()
tm.assert_series_equal(result, s)
@pytest.mark.parametrize(
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
)
def test_interp_multiIndex(self, check_scipy):
idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
s = Series([1, 2, np.nan], index=idx)
expected = s.copy()
expected.loc[2] = 2
result = s.interpolate()
tm.assert_series_equal(result, expected)
msg = "Only `method=linear` interpolation is supported on MultiIndexes"
if check_scipy:
with pytest.raises(ValueError, match=msg):
s.interpolate(method="polynomial", order=1)
@td.skip_if_no_scipy
def test_interp_nonmono_raise(self):
s = Series([1, np.nan, 3], index=[0, 2, 1])
msg = "krogh interpolation requires that the index be monotonic"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="krogh")
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ["nearest", "pad"])
def test_interp_datetime64(self, method, tz_naive_fixture):
df = Series(
[1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
)
result = df.interpolate(method=method)
expected = Series(
[1.0, 1.0, 3.0],
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
)
tm.assert_series_equal(result, expected)
def test_interp_pad_datetime64tz_values(self):
# GH#27628 missing.interpolate_2d should handle datetimetz values
dti = date_range("2015-04-05", periods=3, tz="US/Central")
ser = Series(dti)
ser[1] = pd.NaT
result = ser.interpolate(method="pad")
expected = Series(dti)
expected[1] = expected[0]
tm.assert_series_equal(result, expected)
def test_interp_limit_no_nans(self):
# GH 7173
s = Series([1.0, 2.0, 3.0])
result = s.interpolate(limit=1)
expected = s
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ["polynomial", "spline"])
def test_no_order(self, method):
# see GH-10633, GH-24014
s = Series([0, 1, np.nan, 3])
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)
@td.skip_if_no_scipy
@pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="spline", order=order)
@td.skip_if_no_scipy
def test_spline(self):
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
result = s.interpolate(method="spline", order=1)
expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_spline_extrapolate(self):
s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
result3 = s.interpolate(method="spline", order=1, ext=3)
expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
tm.assert_series_equal(result3, expected3)
result1 = s.interpolate(method="spline", order=1, ext=0)
expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result1, expected1)
@td.skip_if_no_scipy
def test_spline_smooth(self):
s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
assert (
s.interpolate(method="spline", order=3, s=0)[5]
!= s.interpolate(method="spline", order=3)[5]
)
@td.skip_if_no_scipy
def test_spline_interpolation(self):
s = Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
result1 = s.interpolate(method="spline", order=1)
expected1 = s.interpolate(method="spline", order=1)
tm.assert_series_equal(result1, expected1)
def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
result = df.interpolate(method="time")
expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
tm.assert_series_equal(result, expected)
# test for non uniform spacing
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
result = df.interpolate(method="time")
expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4]))
tm.assert_series_equal(result, expected)
def test_series_interpolate_method_values(self):
# GH#1646
rng = date_range("1/1/2000", "1/20/2000", freq="D")
ts = Series(np.random.randn(len(rng)), index=rng)
ts[::2] = np.nan
result = ts.interpolate(method="values")
exp = ts.interpolate()
tm.assert_series_equal(result, exp)
def test_series_interpolate_intraday(self):
# #1698
index = date_range("1/1/2012", periods=4, freq="12D")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
exp = ts.reindex(new_index).interpolate(method="time")
index = date_range("1/1/2012", periods=4, freq="12H")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
result = ts.reindex(new_index).interpolate(method="time")
tm.assert_numpy_array_equal(result.values, exp.values)
@pytest.mark.parametrize(
"ind",
[
["a", "b", "c", "d"],
pd.period_range(start="2019-01-01", periods=4),
pd.interval_range(start=0, end=4),
],
)
def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
# gh 21662
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
if method == "pchip":
pytest.importorskip("scipy")
if method == "linear":
result = df[0].interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
else:
expected_error = (
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)
with pytest.raises(ValueError, match=expected_error):
df[0].interpolate(method=method, **kwargs)
@td.skip_if_no_scipy
def test_interpolate_timedelta_index(self, request, interp_methods_ind):
"""
Tests for non numerical index types - object, period, timedelta
Note that all methods except time, index, nearest and values
are tested here.
"""
# gh 21662
ind = pd.timedelta_range(start=1, periods=4)
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
import scipy
if method in {"cubic", "zero"} or (
method == "barycentric" and Version(scipy.__version__) < Version("1.5.0")
):
request.node.add_marker(
pytest.mark.xfail(
reason=f"{method} interpolation is not supported for TimedeltaIndex"
)
)
result = df[0].interpolate(method=method, **kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending, expected_values",
[(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
)
def test_interpolate_unsorted_index(self, ascending, expected_values):
# GH 21037
ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
result = ts.sort_index(ascending=ascending).interpolate(method="index")
expected = Series(data=expected_values, index=expected_values, dtype=float)
tm.assert_series_equal(result, expected)
def test_interpolate_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series.interpolate except "
r"for the argument 'method' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.interpolate("pad", 0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,28 @@
import numpy as np
from pandas import (
Series,
date_range,
)
class TestIsMonotonic:
def test_is_monotonic_numeric(self):
ser = Series(np.random.randint(0, 10, size=1000))
assert not ser.is_monotonic
ser = Series(np.arange(1000))
assert ser.is_monotonic is True
assert ser.is_monotonic_increasing is True
ser = Series(np.arange(1000, 0, -1))
assert ser.is_monotonic_decreasing is True
def test_is_monotonic_dt64(self):
ser = Series(date_range("20130101", periods=10))
assert ser.is_monotonic is True
assert ser.is_monotonic_increasing is True
ser = Series(list(reversed(ser)))
assert ser.is_monotonic is False
assert ser.is_monotonic_decreasing is True

View File

@ -0,0 +1,41 @@
import numpy as np
import pytest
from pandas import Series
from pandas.core.construction import create_series_with_explicit_dtype
@pytest.mark.parametrize(
"data, expected",
[
(np.random.randint(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(["foo", "bar", np.nan], True),
(["foo", "foo", np.nan], False),
(["foo", "bar", np.nan, np.nan], False),
],
)
def test_is_unique(data, expected):
# GH#11946 / GH#25180
ser = create_series_with_explicit_dtype(data, dtype_if_empty=object)
assert ser.is_unique is expected
def test_is_unique_class_ne(capsys):
# GH#20661
class Foo:
def __init__(self, val):
self._value = val
def __ne__(self, other):
raise Exception("NEQ not supported")
with capsys.disabled():
li = [Foo(i) for i in range(5)]
ser = Series(li, index=list(range(5)))
ser.is_unique
captured = capsys.readouterr()
assert len(captured.err) == 0

View File

@ -0,0 +1,203 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
from pandas.core.arrays import PeriodArray
class TestSeriesIsIn:
def test_isin(self):
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
result = s.isin(["A", "C"])
expected = Series([True, False, True, False, False, False, True, True])
tm.assert_series_equal(result, expected)
# GH#16012
# This specific issue has to have a series over 1e6 in len, but the
# comparison array (in_list) must be large enough so that numpy doesn't
# do a manual masking trick that will avoid this issue altogether
s = Series(list("abcdefghijk" * 10**5))
# If numpy doesn't do the manual comparison/mask, these
# unorderable mixed types are what cause the exception in numpy
in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6
assert s.isin(in_list).sum() == 200000
def test_isin_with_string_scalar(self):
# GH#4763
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
msg = (
r"only list-like objects are allowed to be passed to isin\(\), "
r"you passed a \[str\]"
)
with pytest.raises(TypeError, match=msg):
s.isin("a")
s = Series(["aaa", "b", "c"])
with pytest.raises(TypeError, match=msg):
s.isin("aaa")
def test_isin_with_i8(self):
# GH#5021
expected = Series([True, True, False, False, False])
expected2 = Series([False, True, False, False, False])
# datetime64[ns]
s = Series(date_range("jan-01-2013", "jan-05-2013"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
result = s.isin(s[0:2].values)
tm.assert_series_equal(result, expected)
# fails on dtype conversion in the first place
result = s.isin(np.asarray(s[0:2].values).astype("datetime64[D]"))
tm.assert_series_equal(result, expected)
result = s.isin([s[1]])
tm.assert_series_equal(result, expected2)
result = s.isin([np.datetime64(s[1])])
tm.assert_series_equal(result, expected2)
result = s.isin(set(s[0:2]))
tm.assert_series_equal(result, expected)
# timedelta64[ns]
s = Series(pd.to_timedelta(range(5), unit="d"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
def test_isin_empty(self, empty):
# see GH#16991
s = Series(["a", "b"])
expected = Series([False, False])
result = s.isin(empty)
tm.assert_series_equal(expected, result)
def test_isin_read_only(self):
# https://github.com/pandas-dev/pandas/issues/37174
arr = np.array([1, 2, 3])
arr.setflags(write=False)
s = Series([1, 2, 3])
result = s.isin(arr)
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, None])
def test_isin_dt64_values_vs_ints(self, dtype):
# GH#36621 dont cast integers to datetimes for isin
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
comps = np.asarray([1356998400000000000], dtype=dtype)
res = dti.isin(comps)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(comps)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, comps)
tm.assert_numpy_array_equal(res, expected)
def test_isin_tzawareness_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
other = dti.tz_localize("UTC")
res = dti.isin(other)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
def test_isin_period_freq_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
pi = dti.to_period("M")
ser = Series(pi)
# We construct another PeriodIndex with the same i8 values
# but different dtype
dtype = dti.to_period("Y").dtype
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)
res = pi.isin(other)
expected = np.array([False] * len(pi), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
@pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]])
def test_isin_float_in_int_series(self, values):
# GH#19356 GH#21804
ser = Series(values)
result = ser.isin([-9, -0.5])
expected = Series([True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
@pytest.mark.parametrize(
"data,values,expected",
[
([0, 1, 0], [1], [False, True, False]),
([0, 1, 0], [1, pd.NA], [False, True, False]),
([0, pd.NA, 0], [1, 0], [True, False, True]),
([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
([0, 1, pd.NA], [1, np.nan], [False, True, False]),
([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
],
)
def test_isin_masked_types(self, dtype, data, values, expected):
# GH#42405
ser = Series(data, dtype=dtype)
result = ser.isin(values)
expected = Series(expected, dtype="boolean")
tm.assert_series_equal(result, expected)
@pytest.mark.slow
def test_isin_large_series_mixed_dtypes_and_nan():
# https://github.com/pandas-dev/pandas/issues/37094
# combination of object dtype for the values and > 1_000_000 elements
ser = Series([1, 2, np.nan] * 1_000_000)
result = ser.isin({"foo", "bar"})
expected = Series([False] * 3 * 1_000_000)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, True, True, False, True, True, True], dtype=bool),
)
],
)
def test_isin_complex_numbers(array, expected):
# GH 17927
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,35 @@
"""
We also test Series.notna in this file.
"""
import numpy as np
from pandas import (
Period,
Series,
)
import pandas._testing as tm
class TestIsna:
def test_isna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
expected = Series([False, True])
result = ser.isna()
tm.assert_series_equal(result, expected)
result = ser.notna()
tm.assert_series_equal(result, ~expected)
def test_isna(self):
ser = Series([0, 5.4, 3, np.nan, -0.001])
expected = Series([False, False, False, True, False])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)
ser = Series(["hi", "", np.nan])
expected = Series([False, False, True])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)

View File

@ -0,0 +1,59 @@
"""
Series.item method, mainly testing that we get python scalars as opposed to
numpy scalars.
"""
import pytest
from pandas import (
Series,
Timedelta,
Timestamp,
date_range,
)
class TestItem:
def test_item(self):
# We are testing that we get python scalars as opposed to numpy scalars
ser = Series([1])
result = ser.item()
assert result == 1
assert result == ser.iloc[0]
assert isinstance(result, int) # i.e. not np.int64
ser = Series([0.5], index=[3])
result = ser.item()
assert isinstance(result, float)
assert result == 0.5
ser = Series([1, 2])
msg = "can only convert an array of size 1"
with pytest.raises(ValueError, match=msg):
ser.item()
dti = date_range("2016-01-01", periods=2)
with pytest.raises(ValueError, match=msg):
dti.item()
with pytest.raises(ValueError, match=msg):
Series(dti).item()
val = dti[:1].item()
assert isinstance(val, Timestamp)
val = Series(dti)[:1].item()
assert isinstance(val, Timestamp)
tdi = dti - dti
with pytest.raises(ValueError, match=msg):
tdi.item()
with pytest.raises(ValueError, match=msg):
Series(tdi).item()
val = tdi[:1].item()
assert isinstance(val, Timedelta)
val = Series(tdi)[:1].item()
assert isinstance(val, Timedelta)
# Case where ser[0] would not work
ser = Series(dti, index=[5, 6])
val = ser[:1].item()
assert val == dti[0]

View File

@ -0,0 +1,78 @@
import operator
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
class TestMatmul:
def test_matmul(self):
# matmul test is for GH#10259
a = Series(np.random.randn(4), index=["p", "q", "r", "s"])
b = DataFrame(
np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"]
).T
# Series @ DataFrame -> Series
result = operator.matmul(a, b)
expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# DataFrame @ Series -> Series
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# Series @ Series -> scalar
result = operator.matmul(a, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D np.array) @ Series (__rmatmul__)
result = operator.matmul(a.values, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D list) @ Series (__rmatmul__)
result = operator.matmul(a.values.tolist(), a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D np.array) @ Series (__rmatmul__)
result = operator.matmul(b.T.values, a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D nested lists) @ Series (__rmatmul__)
result = operator.matmul(b.T.values.tolist(), a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# mixed dtype DataFrame @ Series
a["p"] = int(a.p)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# different dtypes DataFrame @ Series
a = a.astype(int)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)"
# exception raised is of type Exception
with pytest.raises(Exception, match=msg):
a.dot(a.values[:3])
msg = "matrices are not aligned"
with pytest.raises(ValueError, match=msg):
a.dot(b.T)

View File

@ -0,0 +1,233 @@
"""
Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo"
but are implicitly also testing nsmallest_foo.
"""
from itertools import product
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
main_dtypes = [
"datetime",
"datetimetz",
"timedelta",
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]
@pytest.fixture
def s_main_dtypes():
"""
A DataFrame with many dtypes
* datetime
* datetimetz
* timedelta
* [u]int{8,16,32,64}
* float{32,64}
The columns are the name of the dtype.
"""
df = pd.DataFrame(
{
"datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]),
"datetimetz": pd.to_datetime(
["2003", "2002", "2001", "2002", "2005"]
).tz_localize("US/Eastern"),
"timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]),
}
)
for dtype in [
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]:
df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype)
return df
@pytest.fixture(params=main_dtypes)
def s_main_dtypes_split(request, s_main_dtypes):
"""Each series in s_main_dtypes."""
return s_main_dtypes[request.param]
def assert_check_nselect_boundary(vals, dtype, method):
# helper function for 'test_boundary_{dtype}' tests
ser = Series(vals, dtype=dtype)
result = getattr(ser, method)(3)
expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1]
expected = ser.loc[expected_idxr]
tm.assert_series_equal(result, expected)
class TestSeriesNLargestNSmallest:
@pytest.mark.parametrize(
"r",
[
Series([3.0, 2, 1, 2, "5"], dtype="object"),
Series([3.0, 2, 1, 2, 5], dtype="object"),
# not supported on some archs
# Series([3., 2, 1, 2, 5], dtype='complex256'),
Series([3.0, 2, 1, 2, 5], dtype="complex128"),
Series(list("abcde")),
Series(list("abcde"), dtype="category"),
],
)
def test_nlargest_error(self, r):
dt = r.dtype
msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}"
args = 2, len(r), 0, -1
methods = r.nlargest, r.nsmallest
for method, arg in product(methods, args):
with pytest.raises(TypeError, match=msg):
method(arg)
def test_nsmallest_nlargest(self, s_main_dtypes_split):
# float, int, datetime64 (use i8), timedelts64 (same),
# object that are numbers, object that are strings
ser = s_main_dtypes_split
tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]])
tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]])
empty = ser.iloc[0:0]
tm.assert_series_equal(ser.nsmallest(0), empty)
tm.assert_series_equal(ser.nsmallest(-1), empty)
tm.assert_series_equal(ser.nlargest(0), empty)
tm.assert_series_equal(ser.nlargest(-1), empty)
tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values())
tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values())
tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]])
tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]])
def test_nlargest_misc(self):
ser = Series([3.0, np.nan, 1, 2, 5])
result = ser.nlargest()
expected = ser.iloc[[4, 0, 3, 2, 1]]
tm.assert_series_equal(result, expected)
result = ser.nsmallest()
expected = ser.iloc[[2, 3, 0, 4, 1]]
tm.assert_series_equal(result, expected)
msg = 'keep must be either "first", "last"'
with pytest.raises(ValueError, match=msg):
ser.nsmallest(keep="invalid")
with pytest.raises(ValueError, match=msg):
ser.nlargest(keep="invalid")
# GH#15297
ser = Series([1] * 5, index=[1, 2, 3, 4, 5])
expected_first = Series([1] * 3, index=[1, 2, 3])
expected_last = Series([1] * 3, index=[5, 4, 3])
result = ser.nsmallest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nsmallest(3, keep="last")
tm.assert_series_equal(result, expected_last)
result = ser.nlargest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nlargest(3, keep="last")
tm.assert_series_equal(result, expected_last)
@pytest.mark.parametrize("n", range(1, 5))
def test_nlargest_n(self, n):
# GH 13412
ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1])
result = ser.nlargest(n)
expected = ser.sort_values(ascending=False).head(n)
tm.assert_series_equal(result, expected)
result = ser.nsmallest(n)
expected = ser.sort_values().head(n)
tm.assert_series_equal(result, expected)
def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype):
# GH#21426
dtype_info = np.iinfo(any_int_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val, min_val + 1, max_val - 1, max_val]
assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method)
def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype):
# GH#21426
dtype_info = np.finfo(float_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype)
vals = [min_val, min_2nd, max_2nd, max_val]
assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method)
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_nlargest_boundary_datetimelike(self, nselect_method, dtype):
# GH#21426
# use int64 bounds and +1 to min_val since true minimum is NaT
# (include min_val/NaT at end to maintain same expected_idxr)
dtype_info = np.iinfo("int64")
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
assert_check_nselect_boundary(vals, dtype, nselect_method)
def test_nlargest_duplicate_keep_all_ties(self):
# see GH#16818
ser = Series([10, 9, 8, 7, 7, 7, 7, 6])
result = ser.nlargest(4, keep="all")
expected = Series([10, 9, 8, 7, 7, 7, 7])
tm.assert_series_equal(result, expected)
result = ser.nsmallest(2, keep="all")
expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data,expected", [([True, False], [True]), ([True, False, True, True], [True])]
)
def test_nlargest_boolean(self, data, expected):
# GH#26154 : ensure True > False
ser = Series(data)
result = ser.nlargest(1)
expected = Series(expected)
tm.assert_series_equal(result, expected)
def test_nlargest_nullable(self, any_numeric_ea_dtype):
# GH#42816
dtype = any_numeric_ea_dtype
arr = np.random.randn(10).astype(dtype.lower(), copy=False)
ser = Series(arr.copy(), dtype=dtype)
ser[1] = pd.NA
result = ser.nlargest(5)
expected = (
Series(np.delete(arr, 1), index=ser.index.delete(1))
.nlargest(5)
.astype(dtype)
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,24 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
def test_nunique():
# basics.rst doc example
series = Series(np.random.randn(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11
def test_nunique_categorical():
# GH#18051
ser = Series(Categorical([]))
assert ser.nunique() == 0
ser = Series(Categorical([np.nan]))
assert ser.nunique() == 0

View File

@ -0,0 +1,82 @@
import numpy as np
import pytest
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestSeriesPctChange:
def test_pct_change(self, datetime_series):
rs = datetime_series.pct_change(fill_method=None)
tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)
rs = datetime_series.pct_change(2)
filled = datetime_series.fillna(method="pad")
tm.assert_series_equal(rs, filled / filled.shift(2) - 1)
rs = datetime_series.pct_change(fill_method="bfill", limit=1)
filled = datetime_series.fillna(method="bfill", limit=1)
tm.assert_series_equal(rs, filled / filled.shift(1) - 1)
rs = datetime_series.pct_change(freq="5D")
filled = datetime_series.fillna(method="pad")
tm.assert_series_equal(
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
)
def test_pct_change_with_duplicate_axis(self):
# GH#28664
common_idx = date_range("2019-11-14", periods=5, freq="D")
result = Series(range(5), common_idx).pct_change(freq="B")
# the reason that the expected should be like this is documented at PR 28681
expected = Series([np.NaN, np.inf, np.NaN, np.NaN, 3.0], common_idx)
tm.assert_series_equal(result, expected)
def test_pct_change_shift_over_nas(self):
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
chg = s.pct_change()
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
tm.assert_series_equal(chg, expected)
@pytest.mark.parametrize(
"freq, periods, fill_method, limit",
[
("5B", 5, None, None),
("3B", 3, None, None),
("3B", 3, "bfill", None),
("7B", 7, "pad", 1),
("7B", 7, "bfill", 3),
("14B", 14, None, None),
],
)
def test_pct_change_periods_freq(
self, freq, periods, fill_method, limit, datetime_series
):
# GH#7292
rs_freq = datetime_series.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
rs_periods = datetime_series.pct_change(
periods, fill_method=fill_method, limit=limit
)
tm.assert_series_equal(rs_freq, rs_periods)
empty_ts = Series(index=datetime_series.index, dtype=object)
rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
tm.assert_series_equal(rs_freq, rs_periods)
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
def test_pct_change_with_duplicated_indices(fill_method):
# GH30463
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
result = s.pct_change(fill_method=fill_method)
expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,13 @@
from pandas import Series
import pandas._testing as tm
def test_pop():
# GH#6600
ser = Series([0, 4, 0], index=["A", "B", "C"], name=4)
result = ser.pop("B")
assert result == 4
expected = Series([0, 0], index=["A", "C"], name=4)
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,225 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import Timestamp
class TestSeriesQuantile:
def test_quantile(self, datetime_series):
q = datetime_series.quantile(0.1)
assert q == np.percentile(datetime_series.dropna(), 10)
q = datetime_series.quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# object dtype
q = Series(datetime_series, dtype=object).quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# datetime64[ns] dtype
dts = datetime_series.index.to_series()
q = dts.quantile(0.2)
assert q == Timestamp("2000-01-10 19:12:00")
# timedelta64[ns] dtype
tds = dts.diff()
q = tds.quantile(0.25)
assert q == pd.to_timedelta("24:00:00")
# GH7661
result = Series([np.timedelta64("NaT")]).sum()
assert result == pd.Timedelta(0)
msg = "percentiles should all be in the interval \\[0, 1\\]"
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
with pytest.raises(ValueError, match=msg):
datetime_series.quantile(invalid)
def test_quantile_multi(self, datetime_series):
qs = [0.1, 0.9]
result = datetime_series.quantile(qs)
expected = Series(
[
np.percentile(datetime_series.dropna(), 10),
np.percentile(datetime_series.dropna(), 90),
],
index=qs,
name=datetime_series.name,
)
tm.assert_series_equal(result, expected)
dts = datetime_series.index.to_series()
dts.name = "xxx"
result = dts.quantile((0.2, 0.2))
expected = Series(
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
index=[0.2, 0.2],
name="xxx",
)
tm.assert_series_equal(result, expected)
result = datetime_series.quantile([])
expected = Series(
[], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
)
tm.assert_series_equal(result, expected)
def test_quantile_interpolation(self, datetime_series):
# see gh-10174
# interpolation = linear (default case)
q = datetime_series.quantile(0.1, interpolation="linear")
assert q == np.percentile(datetime_series.dropna(), 10)
q1 = datetime_series.quantile(0.1)
assert q1 == np.percentile(datetime_series.dropna(), 10)
# test with and without interpolation keyword
assert q == q1
def test_quantile_interpolation_dtype(self):
# GH #10174
# interpolation = linear (default case)
q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
def test_quantile_nan(self):
# GH 13098
s = Series([1, 2, 3, 4, np.nan])
result = s.quantile(0.5)
expected = 2.5
assert result == expected
# all nan/empty
s1 = Series([], dtype=object)
cases = [s1, Series([np.nan, np.nan])]
for s in cases:
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
tm.assert_series_equal(res, Series([np.nan], index=[0.5]))
res = s.quantile([0.2, 0.3])
tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))
@pytest.mark.parametrize(
"case",
[
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
],
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
# NaT
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
pd.NaT,
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
pd.NaT,
],
[
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
pd.NaT,
],
],
)
def test_quantile_box(self, case):
s = Series(case, name="XXX")
res = s.quantile(0.5)
assert res == case[1]
res = s.quantile([0.5])
exp = Series([case[1]], index=[0.5], name="XXX")
tm.assert_series_equal(res, exp)
def test_datetime_timedelta_quantiles(self):
# covers #9694
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
def test_quantile_nat(self):
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
assert res is pd.NaT
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))
@pytest.mark.parametrize(
"values, dtype",
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
)
def test_quantile_sparse(self, values, dtype):
ser = Series(values, dtype=dtype)
result = ser.quantile([0.5])
expected = Series(np.asarray(ser)).quantile([0.5])
tm.assert_series_equal(result, expected)
def test_quantile_empty(self):
# floats
s = Series([], dtype="float64")
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
# int
s = Series([], dtype="int64")
res = s.quantile(0.5)
assert np.isnan(res)
res = s.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
# datetime
s = Series([], dtype="datetime64[ns]")
res = s.quantile(0.5)
assert res is pd.NaT
res = s.quantile([0.5])
exp = Series([pd.NaT], index=[0.5])
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize("dtype", [int, float, "Int64"])
def test_quantile_dtypes(self, dtype):
result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,484 @@
from itertools import chain
import operator
import numpy as np
import pytest
from pandas._libs.algos import (
Infinity,
NegInfinity,
)
import pandas.util._test_decorators as td
from pandas import (
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import CategoricalDtype
@pytest.fixture
def ser():
return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])
@pytest.fixture(
params=[
["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])],
["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])],
["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
]
)
def results(request):
return request.param
class TestSeriesRank:
@td.skip_if_no_scipy
def test_rank(self, datetime_series):
from scipy.stats import rankdata
datetime_series[::2] = np.nan
datetime_series[:10][::3] = 4.0
ranks = datetime_series.rank()
oranks = datetime_series.astype("O").rank()
tm.assert_series_equal(ranks, oranks)
mask = np.isnan(datetime_series)
filled = datetime_series.fillna(np.inf)
# rankdata returns a ndarray
exp = Series(rankdata(filled), index=filled.index, name="ts")
exp[mask] = np.nan
tm.assert_series_equal(ranks, exp)
iseries = Series(np.arange(5).repeat(2))
iranks = iseries.rank()
exp = iseries.astype(float).rank()
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
exp = iseries / 5.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(1, 100))
exp = Series(np.repeat(0.505, 100))
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries[1] = np.nan
exp = Series(np.repeat(50.0 / 99.0, 100))
exp[1] = np.nan
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(np.nan, 100))
exp = iseries.copy()
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
rng = date_range("1/1/1990", periods=5)
iseries = Series(np.arange(5), rng) + 1
iseries.iloc[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
exp = Series([2, 1, 3, 5, 4, 6.0])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
# GH 5968
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
exp = Series([3, 2, 1, np.nan])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
values = np.array(
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
dtype="float64",
)
random_order = np.random.permutation(len(values))
iseries = Series(values[random_order])
exp = Series(random_order + 1.0, dtype="float64")
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
def test_rank_categorical(self):
# GH issue #15420 rank incorrectly orders ordered categories
# Test ascending/descending ranking for ordered categoricals
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
ordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=True,
)
)
tm.assert_series_equal(ordered.rank(), exp)
tm.assert_series_equal(ordered.rank(ascending=False), exp_desc)
# Unordered categoricals should be ranked as objects
unordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=False,
)
)
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
res = unordered.rank()
tm.assert_series_equal(res, exp_unordered)
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
)
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
res1 = unordered1.rank()
tm.assert_series_equal(res1, exp_unordered1)
# Test na_option for rank data
na_ser = Series(
["first", "second", "third", "fourth", "fifth", "sixth", np.NaN]
).astype(
CategoricalDtype(
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
True,
)
)
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN])
tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
# Test na_option for rank data with ascending False
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.NaN])
tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
tm.assert_series_equal(
na_ser.rank(na_option="bottom", ascending=False), exp_bot
)
tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
# Test invalid values for na_option
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option="bad", ascending=False)
# invalid type
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option=True, ascending=False)
# Test with pct=True
na_ser = Series(["first", "second", "third", "fourth", np.NaN]).astype(
CategoricalDtype(["first", "second", "third", "fourth"], True)
)
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.NaN])
tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
def test_rank_signature(self):
s = Series([0, 1])
s.rank(method="average")
msg = "No axis named average for object type Series"
with pytest.raises(ValueError, match=msg):
s.rank("average")
@pytest.mark.parametrize("dtype", [None, object])
def test_rank_tie_methods(self, ser, results, dtype):
method, exp = results
ser = ser if dtype is None else ser.astype(dtype)
result = ser.rank(method=method)
tm.assert_series_equal(result, Series(exp))
@td.skip_if_no_scipy
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
@pytest.mark.parametrize(
"dtype, na_value, pos_inf, neg_inf",
[
("object", None, Infinity(), NegInfinity()),
("float64", np.nan, np.inf, -np.inf),
],
)
def test_rank_tie_methods_on_infs_nans(
self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
):
chunk = 3
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
iseries = Series(in_arr, dtype=dtype)
exp_ranks = {
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
}
ranks = exp_ranks[method]
if na_option == "top":
order = [ranks[1], ranks[0], ranks[2]]
elif na_option == "bottom":
order = [ranks[0], ranks[2], ranks[1]]
else:
order = [ranks[0], [np.nan] * chunk, ranks[1]]
expected = order if ascending else order[::-1]
expected = list(chain.from_iterable(expected))
result = iseries.rank(method=method, na_option=na_option, ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype="float64"))
def test_rank_desc_mix_nans_infs(self):
# GH 19538
# check descending ranking when mix nans and infs
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
result = iseries.rank(ascending=False)
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
tm.assert_series_equal(result, exp)
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize(
"op, value",
[
[operator.add, 0],
[operator.add, 1e6],
[operator.mul, 1e-6],
],
)
def test_rank_methods_series(self, method, op, value):
from scipy.stats import rankdata
xs = np.random.randn(9)
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
np.random.shuffle(xs)
index = [chr(ord("a") + i) for i in range(len(xs))]
vals = op(xs, value)
ts = Series(vals, index=index)
result = ts.rank(method=method)
sprank = rankdata(vals, method if method != "first" else "ordinal")
expected = Series(sprank, index=index).astype("float64")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1]),
([2], [1]),
([0], [1]),
([2, 2], [1, 1]),
([1, 2, 3], [1, 2, 3]),
([4, 2, 1], [3, 2, 1]),
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
],
)
def test_rank_dense_method(self, dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="dense")
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
def test_rank_descending(self, ser, results, dtype):
method, _ = results
if "i" in dtype:
s = ser.dropna()
else:
s = ser.astype(dtype)
res = s.rank(ascending=False)
expected = (s.max() - s).rank()
tm.assert_series_equal(res, expected)
expected = (s.max() - s).rank(method=method)
res2 = s.rank(method=method, ascending=False)
tm.assert_series_equal(res2, expected)
def test_rank_int(self, ser, results):
method, exp = results
s = ser.dropna().astype("i8")
result = s.rank(method=method)
expected = Series(exp).dropna()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_rank_object_bug(self):
# GH 13445
# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)
def test_rank_modify_inplace(self):
# GH 18521
# Check rank does not mutate series
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
expected = s.copy()
s.rank()
result = s
tm.assert_series_equal(result, expected)
# GH15630, pct should be on 100% basis when method='dense'
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_dense_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="dense", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 1.0 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_min_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="min", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_max_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="max", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.5 / 2, 1.5 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_average_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="average", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["f8", "i8"])
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 2.0 / 2.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_first_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="first", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.single_cpu
@pytest.mark.high_memory
def test_pct_max_many_rows():
# GH 18271
s = Series(np.arange(2**24 + 1))
result = s.rank(pct=True).max()
assert result == 1

View File

@ -0,0 +1,410 @@
import numpy as np
import pytest
from pandas import (
Categorical,
Index,
MultiIndex,
NaT,
Period,
PeriodIndex,
Series,
Timedelta,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
def test_reindex(datetime_series, string_series):
identity = string_series.reindex(string_series.index)
# __array_interface__ is not defined for older numpies
# and on some pythons
try:
assert np.may_share_memory(string_series.index, identity.index)
except AttributeError:
pass
assert identity.index.is_(string_series.index)
assert identity.index.identical(string_series.index)
subIndex = string_series.index[10:20]
subSeries = string_series.reindex(subIndex)
for idx, val in subSeries.items():
assert val == string_series[idx]
subIndex2 = datetime_series.index[10:20]
subTS = datetime_series.reindex(subIndex2)
for idx, val in subTS.items():
assert val == datetime_series[idx]
stuffSeries = datetime_series.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = datetime_series.index[::2]
subNonContig = datetime_series.reindex(nonContigIndex)
for idx, val in subNonContig.items():
assert val == datetime_series[idx]
# return a copy the same index here
result = datetime_series.reindex()
assert not (result is datetime_series)
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype("object")
# reindex coerces index.dtype to float, loc/iloc doesn't
tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
series = Series(rng)
result = series.reindex(range(15))
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.randn(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(datetime_series):
# (don't forget to fix this) I think it's fixed
empty = Series(dtype=object)
empty.reindex(datetime_series.index, method="pad") # it works
# corner case: pad empty series
reindexed = empty.reindex(datetime_series.index, method="pad")
# pass non-Index
reindexed = datetime_series.reindex(list(datetime_series.index))
datetime_series.index = datetime_series.index._with_freq(None)
tm.assert_series_equal(datetime_series, reindexed)
# bad fill method
ts = datetime_series[::2]
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
r"\(bfill\) or nearest\. Got foo"
)
with pytest.raises(ValueError, match=msg):
ts.reindex(datetime_series.index, method="foo")
def test_reindex_pad():
s = Series(np.arange(10), dtype="int64")
s2 = s[::2]
reindexed = s2.reindex(s.index, method="pad")
reindexed2 = s2.reindex(s.index, method="ffill")
tm.assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
tm.assert_series_equal(reindexed, expected)
# GH4604
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
new_index = ["a", "g", "c", "f"]
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
tm.assert_series_equal(result, expected.astype("float64"))
result = s.reindex(new_index).ffill(downcast="infer")
tm.assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method="ffill")
tm.assert_series_equal(result, expected)
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
tm.assert_series_equal(result, expected)
# GH4618 shifted series downcasting
s = Series(False, index=range(0, 5))
result = s.shift(1).fillna(method="bfill")
expected = Series(False, index=range(0, 5))
tm.assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
result = s.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
tm.assert_series_equal(expected, result)
def test_reindex_int(datetime_series):
ts = datetime_series[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float_
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.int_
def test_reindex_bool(datetime_series):
# A series other than float, int, string, or object
ts = datetime_series[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(datetime_series):
# fail
ts = datetime_series[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range("20000101", periods=3)
# reindexing to an invalid Categorical
s = Series(["a", "b", "c"], dtype="category")
result = s.reindex(index)
expected = Series(
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
)
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_astype_order_consistency():
# GH#17444
ser = Series([1, 2, 3], index=[2, 0, 1])
new_index = [0, 1, 2]
temp_dtype = "category"
new_dtype = str
result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
tm.assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1.0, 2.0, 3.0])
result = floats.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value="foo")
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/42921
if using_array_manager:
pytest.skip("Array manager does not promote dtype, hence we fail")
if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
# use the scalar that is not compatible with the dtype for this test
fill_value = Timestamp(0)
ser = Series([NaT], dtype=dtype)
result = ser.reindex([0, 1], fill_value=fill_value)
expected = Series([None, fill_value], index=[0, 1], dtype=object)
tm.assert_series_equal(result, expected)
def test_reindex_datetimeindexes_tz_naive_and_aware():
# GH 8306
idx = date_range("20131101", tz="America/Chicago", periods=7)
newidx = date_range("20131103", periods=10, freq="H")
s = Series(range(7), index=idx)
msg = (
r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
r"and datetime64\[ns\]"
)
with pytest.raises(TypeError, match=msg):
s.reindex(newidx, method="ffill")
def test_reindex_empty_series_tz_dtype():
# GH 20869
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"p_values, o_values, values, expected_values",
[
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
[1.0, 1.0],
[1.0, 1.0, np.nan],
),
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[1.0, 1.0],
[1.0, 1.0],
),
],
)
def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
# GH#28337
period_index = PeriodIndex(p_values)
object_index = Index(o_values)
ser = Series(values, index=period_index)
result = ser.reindex(object_index)
expected = Series(expected_values, index=object_index)
tm.assert_series_equal(result, expected)
def test_reindex_too_many_args():
# GH 40980
ser = Series([1, 2])
with pytest.raises(
TypeError, match=r"Only one positional argument \('index'\) is allowed"
):
ser.reindex([2, 3], False)
def test_reindex_double_index():
# GH 40980
ser = Series([1, 2])
msg = r"'index' passed as both positional and keyword argument"
with pytest.raises(TypeError, match=msg):
ser.reindex([2, 3], index=[3, 4])
def test_reindex_no_posargs():
# GH 40980
ser = Series([1, 2])
result = ser.reindex(index=[1, 0])
expected = Series([2, 1], index=[1, 0])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
ser = Series(
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
)
result = ser.reindex(np.array(["b"]), level=0)
expected = Series(
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
)
tm.assert_series_equal(result, expected)
def test_reindex_missing_category():
# GH#18185
ser = Series([1, 2, 3, 1], dtype="category")
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
with pytest.raises(TypeError, match=msg):
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)

View File

@ -0,0 +1,41 @@
from datetime import datetime
import numpy as np
from pandas import Series
import pandas._testing as tm
def test_reindex_like(datetime_series):
other = datetime_series[::2]
tm.assert_series_equal(
datetime_series.reindex(other.index), datetime_series.reindex_like(other)
)
# GH#7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method="pad")
expected = Series([5, np.nan], index=[day1, day3])
tm.assert_series_equal(result, expected)
def test_reindex_like_nearest():
ser = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
other = ser.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
result = ser.reindex_like(other, method="nearest")
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=1)
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4])
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,136 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestRename:
def test_rename(self, datetime_series):
ts = datetime_series
renamer = lambda x: x.strftime("%Y%m%d")
renamed = ts.rename(renamer)
assert renamed.index[0] == renamer(ts.index[0])
# dict
rename_dict = dict(zip(ts.index, renamed.index))
renamed2 = ts.rename(rename_dict)
tm.assert_series_equal(renamed, renamed2)
def test_rename_partial_dict(self):
# partial dict
ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
renamed = ser.rename({"b": "foo", "d": "bar"})
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
def test_rename_retain_index_name(self):
# index with name
renamer = Series(
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
)
renamed = renamer.rename({})
assert renamed.index.name == renamer.index.name
def test_rename_by_series(self):
ser = Series(range(5), name="foo")
renamer = Series({1: 10, 2: 20})
result = ser.rename(renamer)
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
tm.assert_series_equal(result, expected)
def test_rename_set_name(self):
ser = Series(range(4), index=list("abcd"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
result = ser.rename(name)
assert result.name == name
tm.assert_numpy_array_equal(result.index.values, ser.index.values)
assert ser.name is None
def test_rename_set_name_inplace(self):
ser = Series(range(3), index=list("abc"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
ser.rename(name, inplace=True)
assert ser.name == name
exp = np.array(["a", "b", "c"], dtype=np.object_)
tm.assert_numpy_array_equal(ser.index.values, exp)
def test_rename_axis_supported(self):
# Supporting axis for compatibility, detailed in GH-18589
ser = Series(range(5))
ser.rename({}, axis=0)
ser.rename({}, axis="index")
with pytest.raises(ValueError, match="No axis named 5"):
ser.rename({}, axis=5)
def test_rename_inplace(self, datetime_series):
renamer = lambda x: x.strftime("%Y%m%d")
expected = renamer(datetime_series.index[0])
datetime_series.rename(renamer, inplace=True)
assert datetime_series.index[0] == expected
def test_rename_with_custom_indexer(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3]).rename(ix)
assert ser.name is ix
def test_rename_with_custom_indexer_inplace(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3])
ser.rename(ix, inplace=True)
assert ser.name is ix
def test_rename_callable(self):
# GH 17407
ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
result = ser.rename(str)
expected = ser.rename(lambda i: str(i))
tm.assert_series_equal(result, expected)
assert result.name == expected.name
def test_rename_none(self):
# GH 40977
ser = Series([1, 2], name="foo")
result = ser.rename(None)
expected = Series([1, 2])
tm.assert_series_equal(result, expected)
def test_rename_series_with_multiindex(self):
# issue #43659
arrays = [
["bar", "baz", "baz", "foo", "qux"],
["one", "one", "two", "two", "one"],
]
index = MultiIndex.from_arrays(arrays, names=["first", "second"])
ser = Series(np.ones(5), index=index)
result = ser.rename(index={"one": "yes"}, level="second", errors="raise")
arrays_expected = [
["bar", "baz", "baz", "foo", "qux"],
["yes", "yes", "two", "two", "yes"],
]
index_expected = MultiIndex.from_arrays(
arrays_expected, names=["first", "second"]
)
series_expected = Series(np.ones(5), index=index_expected)
tm.assert_series_equal(result, series_expected)

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestSeriesRenameAxis:
def test_rename_axis_mapper(self):
# GH 19978
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
ser = Series(list(range(len(mi))), index=mi)
result = ser.rename_axis(index={"ll": "foo"})
assert result.index.names == ["foo", "nn"]
result = ser.rename_axis(index=str.upper, axis=0)
assert result.index.names == ["LL", "NN"]
result = ser.rename_axis(index=["foo", "goo"])
assert result.index.names == ["foo", "goo"]
with pytest.raises(TypeError, match="unexpected"):
ser.rename_axis(columns="wrong")
def test_rename_axis_inplace(self, datetime_series):
# GH 15704
expected = datetime_series.rename_axis("foo")
result = datetime_series
no_return = result.rename_axis("foo", inplace=True)
assert no_return is None
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
def test_rename_axis_none(self, kwargs):
# GH 25034
index = Index(list("abc"), name="foo")
ser = Series([1, 2, 3], index=index)
result = ser.rename_axis(**kwargs)
expected_index = index.rename(None) if kwargs else index
expected = Series([1, 2, 3], index=expected_index)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,40 @@
import numpy as np
import pytest
from pandas import (
MultiIndex,
Series,
)
import pandas._testing as tm
class TestRepeat:
def test_repeat(self):
ser = Series(np.random.randn(3), index=["a", "b", "c"])
reps = ser.repeat(5)
exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5))
tm.assert_series_equal(reps, exp)
to_rep = [2, 3, 4]
reps = ser.repeat(to_rep)
exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep))
tm.assert_series_equal(reps, exp)
def test_numpy_repeat(self):
ser = Series(np.arange(3), name="x")
expected = Series(
ser.values.repeat(2), name="x", index=ser.index.values.repeat(2)
)
tm.assert_series_equal(np.repeat(ser, 2), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(ser, 2, axis=0)
def test_repeat_with_multiindex(self):
# GH#9361, fixed by GH#7891
m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)])
data = ["a", "b", "c", "d"]
m_df = Series(data, index=m_idx)
assert m_df.repeat(3).shape == (3 * len(data),)

View File

@ -0,0 +1,653 @@
import re
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
class TestSeriesReplace:
def test_replace_explicit_none(self):
# GH#36984 if the user explicitly passes value=None, give it to them
ser = pd.Series([0, 0, ""], dtype=object)
result = ser.replace("", None)
expected = pd.Series([0, 0, None], dtype=object)
tm.assert_series_equal(result, expected)
df = pd.DataFrame(np.zeros((3, 3)))
df.iloc[2, 2] = ""
result = df.replace("", None)
expected = pd.DataFrame(
{
0: np.zeros(3),
1: np.zeros(3),
2: np.array([0.0, 0.0, None], dtype=object),
}
)
assert expected.iloc[2, 2] is None
tm.assert_frame_equal(result, expected)
# GH#19998 same thing with object dtype
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
result = ser.replace("a", None)
expected = pd.Series([10, 20, 30, None, None, "b", None])
assert expected.iloc[-1] is None
tm.assert_series_equal(result, expected)
def test_replace_noop_doesnt_downcast(self):
# GH#44498
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
res = ser.replace({np.nan: None}) # should be a no-op
tm.assert_series_equal(res, ser)
assert res.dtype == object
# same thing but different calling convention
res = ser.replace(np.nan, None)
tm.assert_series_equal(res, ser)
assert res.dtype == object
def test_replace(self):
N = 100
ser = pd.Series(np.random.randn(N))
ser[0:4] = np.nan
ser[6:10] = 0
# replace list with a single value
return_value = ser.replace([np.nan], -1, inplace=True)
assert return_value is None
exp = ser.fillna(-1)
tm.assert_series_equal(ser, exp)
rs = ser.replace(0.0, np.nan)
ser[ser == 0.0] = np.nan
tm.assert_series_equal(rs, ser)
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
def test_replace_nan_with_inf(self):
ser = pd.Series([np.nan, 0, np.inf])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
filled = ser.copy()
filled[4] = 0
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
def test_replace_listlike_value_listlike_target(self, datetime_series):
ser = pd.Series(datetime_series.index)
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
# malformed
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
with pytest.raises(ValueError, match=msg):
ser.replace([1, 2, 3], [np.nan, 0])
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
result = ser.replace([1, 2], [np.nan, 0])
tm.assert_series_equal(result, ser)
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
def test_replace_gh5319(self):
# API change from 0.12?
# GH 5319
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace([np.nan])
tm.assert_series_equal(result, expected)
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace(np.nan)
tm.assert_series_equal(result, expected)
def test_replace_datetime64(self):
# GH 5797
ser = pd.Series(pd.date_range("20130101", periods=5))
expected = ser.copy()
expected.loc[2] = pd.Timestamp("20120101")
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
tm.assert_series_equal(result, expected)
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
tm.assert_series_equal(result, expected)
def test_replace_nat_with_tz(self):
# GH 11792: Test with replacing NaT in a list with tz data
ts = pd.Timestamp("2015/01/01", tz="UTC")
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
tm.assert_series_equal(expected, result)
def test_replace_timedelta_td64(self):
tdi = pd.timedelta_range(0, periods=5)
ser = pd.Series(tdi)
# Using a single dict argument means we go through replace_list
result = ser.replace({ser[1]: ser[3]})
expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
tm.assert_series_equal(result, expected)
def test_replace_with_single_list(self):
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([1, 2, 3])
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
s = ser.copy()
return_value = s.replace([1, 2, 3], inplace=True)
assert return_value is None
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
# make sure things don't get corrupted when fillna call fails
s = ser.copy()
msg = (
r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
r"\(bfill\)\. Got crash_cymbal"
)
with pytest.raises(ValueError, match=msg):
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
assert return_value is None
tm.assert_series_equal(s, ser)
def test_replace_mixed_types(self):
ser = pd.Series(np.arange(5), dtype="int64")
def check_replace(to_rep, val, expected):
sc = ser.copy()
result = ser.replace(to_rep, val)
return_value = sc.replace(to_rep, val, inplace=True)
assert return_value is None
tm.assert_series_equal(expected, result)
tm.assert_series_equal(expected, sc)
# 3.0 can still be held in our int64 series, so we do not upcast GH#44940
tr, v = [3], [3.0]
check_replace(tr, v, ser)
# Note this matches what we get with the scalars 3 and 3.0
check_replace(tr[0], v[0], ser)
# MUST upcast to float
e = pd.Series([0, 1, 2, 3.5, 4])
tr, v = [3], [3.5]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, "a"])
tr, v = [3, 4], [3.5, "a"]
check_replace(tr, v, e)
# again casts to object
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)
# test an object with dates + floats + integers + strings
dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_bool_with_string_no_op(self):
s = pd.Series([True, False, True])
result = s.replace("fun", "in-the-sun")
tm.assert_series_equal(s, result)
def test_replace_bool_with_string(self):
# nonexistent elements
s = pd.Series([True, False, True])
result = s.replace(True, "2u")
expected = pd.Series(["2u", False, "2u"])
tm.assert_series_equal(expected, result)
def test_replace_bool_with_bool(self):
s = pd.Series([True, False, True])
result = s.replace(True, False)
expected = pd.Series([False] * len(s))
tm.assert_series_equal(expected, result)
def test_replace_with_dict_with_bool_keys(self):
s = pd.Series([True, False, True])
result = s.replace({"asdf": "asdb", True: "yes"})
expected = pd.Series(["yes", False, "yes"])
tm.assert_series_equal(result, expected)
def test_replace_Int_with_na(self, any_int_ea_dtype):
# GH 38267
result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
tm.assert_series_equal(result, expected)
result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
result.replace(1, pd.NA, inplace=True)
tm.assert_series_equal(result, expected)
def test_replace2(self):
N = 100
ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
# GH 32621, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
result = ser.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)
def test_replace_with_empty_dictlike(self):
# GH 15289
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace({}))
with tm.assert_produces_warning(FutureWarning):
empty_series = pd.Series([])
tm.assert_series_equal(s, s.replace(empty_series))
def test_replace_string_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_replacer_equals_replacement(self):
# GH 20656
# make sure all replacers are matching against original values
s = pd.Series(["a", "b"])
expected = pd.Series(["b", "a"])
result = s.replace({"a": "b", "b": "a"})
tm.assert_series_equal(expected, result)
def test_replace_unicode_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_mixed_types_with_string(self):
# Testing mixed
s = pd.Series([1, 2, 3, "4", 4, 5])
result = s.replace([2, "4"], np.nan)
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
(pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
],
)
def test_replace_categorical(self, categorical, numeric):
# GH 24971, GH#23305
ser = pd.Series(categorical)
result = ser.replace({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
if 2 not in expected.cat.categories:
# i.e. categories should be [1, 2] even if there are no "B"s present
# GH#44940
expected = expected.cat.add_categories(2)
tm.assert_series_equal(expected, result)
def test_replace_categorical_single(self):
# GH 26988
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
s = pd.Series(dti)
c = s.astype("category")
expected = c.copy()
expected = expected.cat.add_categories("foo")
expected[2] = "foo"
expected = expected.cat.remove_unused_categories()
assert c[2] != "foo"
result = c.replace(c[2], "foo")
tm.assert_series_equal(expected, result)
assert c[2] != "foo" # ensure non-inplace call does not alter original
return_value = c.replace(c[2], "foo", inplace=True)
assert return_value is None
tm.assert_series_equal(expected, c)
first_value = c[0]
return_value = c.replace(c[1], c[0], inplace=True)
assert return_value is None
assert c[0] == c[1] == first_value # test replacing with existing value
def test_replace_with_no_overflowerror(self):
# GH 25616
# casts to object without Exception from OverflowError
s = pd.Series([0, 1, 2, 3, 4])
result = s.replace([3], ["100000000000000000000"])
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
tm.assert_series_equal(result, expected)
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
result = s.replace(["100000000000000000000"], [1])
expected = pd.Series([0, 1, "100000000000000000001"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, to_replace, exp",
[
([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
(["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
],
)
def test_replace_commutative(self, ser, to_replace, exp):
# GH 16051
# DataFrame.replace() overwrites when values are non-numeric
series = pd.Series(ser)
expected = pd.Series(exp)
result = series.replace(to_replace)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
)
def test_replace_no_cast(self, ser, exp):
# GH 9113
# BUG: replace int64 dtype with bool coerces to int64
series = pd.Series(ser)
result = series.replace(2, True)
expected = pd.Series(exp)
tm.assert_series_equal(result, expected)
def test_replace_invalid_to_replace(self):
# GH 18634
# API: replace() should raise an exception if invalid argument is given
series = pd.Series(["a", "b", "c "])
msg = (
r"Expecting 'to_replace' to be either a scalar, array-like, "
r"dict or None, got invalid type.*"
)
with pytest.raises(TypeError, match=msg):
series.replace(lambda x: x.strip())
@pytest.mark.parametrize("frame", [False, True])
def test_replace_nonbool_regex(self, frame):
obj = pd.Series(["a", "b", "c "])
if frame:
obj = obj.to_frame()
msg = "'to_replace' must be 'None' if 'regex' is not a bool"
with pytest.raises(ValueError, match=msg):
obj.replace(to_replace=["a"], regex="foo")
@pytest.mark.parametrize("frame", [False, True])
def test_replace_empty_copy(self, frame):
obj = pd.Series([], dtype=np.float64)
if frame:
obj = obj.to_frame()
res = obj.replace(4, 5, inplace=True)
assert res is None
res = obj.replace(4, 5, inplace=False)
tm.assert_equal(res, obj)
assert res is not obj
def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
# GH#33340
ser = pd.Series([1, 2, "A", fixed_now_ts, True])
to_replace = {0: 1, 2: "A"}
value = "foo"
msg = "Series.replace cannot use dict-like to_replace and non-None value"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
to_replace = 1
value = {0: "foo", 2: "bar"}
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
def test_replace_extension_other(self, frame_or_series):
# https://github.com/pandas-dev/pandas/issues/34530
obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
result = obj.replace("", "") # no exception
# should not have changed dtype
tm.assert_equal(obj, result)
def _check_replace_with_method(self, ser: pd.Series):
df = ser.to_frame()
res = ser.replace(ser[1], method="pad")
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
tm.assert_series_equal(res, expected)
res_df = df.replace(ser[1], method="pad")
tm.assert_frame_equal(res_df, expected.to_frame())
ser2 = ser.copy()
res2 = ser2.replace(ser[1], method="pad", inplace=True)
assert res2 is None
tm.assert_series_equal(ser2, expected)
res_df2 = df.replace(ser[1], method="pad", inplace=True)
assert res_df2 is None
tm.assert_frame_equal(df, expected.to_frame())
def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
ser = pd.Series(arr)
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_interval_with_method(self, as_categorical):
# in particular interval that can't hold NA
idx = pd.IntervalIndex.from_breaks(range(4))
ser = pd.Series(idx)
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_period", [True, False])
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_datetimelike_with_method(self, as_period, as_categorical):
idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
if as_period:
idx = idx.tz_localize(None).to_period("D")
ser = pd.Series(idx)
ser.iloc[-2] = pd.NaT
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
def test_replace_with_compiled_regex(self):
# https://github.com/pandas-dev/pandas/issues/35680
s = pd.Series(["a", "b", "c"])
regex = re.compile("^a$")
result = s.replace({regex: "z"}, regex=True)
expected = pd.Series(["z", "b", "c"])
tm.assert_series_equal(result, expected)
def test_pandas_replace_na(self):
# GH#43344
ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string")
regex_mapping = {
"AA": "CC",
"BB": "CC",
"EE": "CC",
"CC": "CC-REPL",
}
result = ser.replace(regex_mapping, regex=True)
exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"dtype, input_data, to_replace, expected_data",
[
("bool", [True, False], {True: False}, [False, False]),
("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
(
pd.IntervalDtype("int64"),
IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
{pd.Interval(1, 2): pd.Interval(10, 20)},
IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
),
(
pd.IntervalDtype("float64"),
IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
{pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
),
(
pd.PeriodDtype("M"),
[pd.Period("2020-05", freq="M")],
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
[pd.Period("2020-06", freq="M")],
),
],
)
def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
# GH#33484
ser = pd.Series(input_data, dtype=dtype)
result = ser.replace(to_replace)
expected = pd.Series(expected_data, dtype=dtype)
tm.assert_series_equal(result, expected)
def test_replace_string_dtype(self):
# GH#40732, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype="string")
res = ser.replace({"one": "1", "two": "2"})
expected = pd.Series(["1", "2", np.nan], dtype="string")
tm.assert_series_equal(res, expected)
# GH#31644
ser2 = pd.Series(["A", np.nan], dtype="string")
res2 = ser2.replace("A", "B")
expected2 = pd.Series(["B", np.nan], dtype="string")
tm.assert_series_equal(res2, expected2)
ser3 = pd.Series(["A", "B"], dtype="string")
res3 = ser3.replace("A", pd.NA)
expected3 = pd.Series([pd.NA, "B"], dtype="string")
tm.assert_series_equal(res3, expected3)
def test_replace_string_dtype_list_to_replace(self):
# GH#41215, GH#44940
ser = pd.Series(["abc", "def"], dtype="string")
res = ser.replace(["abc", "any other string"], "xyz")
expected = pd.Series(["xyz", "def"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_string_dtype_regex(self):
# GH#31644
ser = pd.Series(["A", "B"], dtype="string")
res = ser.replace(r".", "C", regex=True)
expected = pd.Series(["C", "C"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_nullable_numeric(self):
# GH#40732, GH#44940
floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
assert floats.replace({1.0: 9}).dtype == floats.dtype
assert floats.replace(1.0, 9).dtype == floats.dtype
assert floats.replace({1.0: 9.0}).dtype == floats.dtype
assert floats.replace(1.0, 9.0).dtype == floats.dtype
res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
assert res.dtype == floats.dtype
ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
assert ints.replace({1: 9}).dtype == ints.dtype
assert ints.replace(1, 9).dtype == ints.dtype
assert ints.replace({1: 9.0}).dtype == ints.dtype
assert ints.replace(1, 9.0).dtype == ints.dtype
# FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"])
expected = pd.Series([1])
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,188 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
date_range,
)
import pandas._testing as tm
class TestResetIndex:
def test_reset_index_dti_round_trip(self):
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes[0] == np.dtype("M8[ns]")
d3 = d2.set_index("index")
tm.assert_frame_equal(d1, d3, check_names=False)
# GH#2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
df = df.set_index("Date")
assert df.index[0] == stamp
assert df.reset_index()["Date"][0] == stamp
def test_reset_index(self):
df = tm.makeDataFrame()[:5]
ser = df.stack()
ser.index.names = ["hash", "category"]
ser.name = "value"
df = ser.reset_index()
assert "value" in df
df = ser.reset_index(name="value2")
assert "value2" in df
# check inplace
s = ser.reset_index(drop=True)
s2 = ser
return_value = s2.reset_index(drop=True, inplace=True)
assert return_value is None
tm.assert_series_equal(s, s2)
# level
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.randn(6), index=index)
rs = s.reset_index(level=1)
assert len(rs.columns) == 2
rs = s.reset_index(level=[0, 2], drop=True)
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
assert isinstance(rs, Series)
def test_reset_index_name(self):
s = Series([1, 2, 3], index=Index(range(3), name="x"))
assert s.reset_index().index.name is None
assert s.reset_index(drop=True).index.name is None
def test_reset_index_level(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
for levels in ["A", "B"], [0, 1]:
# With MultiIndex
s = df.set_index(["A", "B"])["C"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels)
tm.assert_frame_equal(result, df)
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
tm.assert_frame_equal(result, df[["C"]])
with pytest.raises(KeyError, match="Level E "):
s.reset_index(level=["A", "E"])
# With single-level Index
s = df.set_index("A")["B"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[0], drop=True)
tm.assert_series_equal(result, df["B"])
with pytest.raises(IndexError, match="Too many levels"):
s.reset_index(level=[0, 1, 2])
# Check that .reset_index([],drop=True) doesn't fail
result = Series(range(4)).reset_index([], drop=True)
expected = Series(range(4))
tm.assert_series_equal(result, expected)
def test_reset_index_range(self):
# GH 12071
s = Series(range(2), name="A", dtype="int64")
series_result = s.reset_index()
assert isinstance(series_result.index, RangeIndex)
series_expected = DataFrame(
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
)
tm.assert_frame_equal(series_result, series_expected)
def test_reset_index_drop_errors(self):
# GH 20925
# KeyError raised for series index when passed level name is missing
s = Series(range(4))
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong", drop=True)
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong")
# KeyError raised for series when level to be dropped is missing
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
with pytest.raises(KeyError, match="not found"):
s.reset_index("wrong", drop=True)
def test_reset_index_with_drop(self, series_with_multilevel_index):
ser = series_with_multilevel_index
deleveled = ser.reset_index()
assert isinstance(deleveled, DataFrame)
assert len(deleveled.columns) == len(ser.index.levels) + 1
assert deleveled.index.name == ser.index.name
deleveled = ser.reset_index(drop=True)
assert isinstance(deleveled, Series)
assert deleveled.index.name == ser.index.name
def test_drop_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3], index=Index([1, 2, 3], name="a"))
msg = (
r"In a future version of pandas all arguments of Series\.reset_index "
r"except for the argument 'level' will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.reset_index("a", False)
expected = DataFrame({"a": [1, 2, 3], 0: [1, 2, 3]})
tm.assert_frame_equal(result, expected)
def test_reset_index_inplace_and_drop_ignore_name(self):
# GH#44575
ser = Series(range(2), name="old")
ser.reset_index(name="new", drop=True, inplace=True)
expected = Series(range(2), name="old")
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"array, dtype",
[
(["a", "b"], object),
(
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
pd.PeriodDtype(freq="Q-DEC"),
),
],
)
def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype):
# GH 19602 - Preserve dtype on empty Series with MultiIndex
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
expected = Series(
{"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object}
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,64 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
class TestSeriesRound:
def test_round(self, datetime_series):
datetime_series.index.name = "index_name"
result = datetime_series.round(2)
expected = Series(
np.round(datetime_series.values, 2), index=datetime_series.index, name="ts"
)
tm.assert_series_equal(result, expected)
assert result.name == datetime_series.name
def test_round_numpy(self, any_float_dtype):
# See GH#12600
ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype)
out = np.round(ser, decimals=0)
expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(out, expected)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.round(ser, decimals=0, out=ser)
def test_round_numpy_with_nan(self, any_float_dtype):
# See GH#14197
ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype)
with tm.assert_produces_warning(None):
result = ser.round()
expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(result, expected)
def test_round_builtin(self, any_float_dtype):
ser = Series(
[1.123, 2.123, 3.123],
index=range(3),
dtype=any_float_dtype,
)
result = round(ser)
expected_rounded0 = Series(
[1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype
)
tm.assert_series_equal(result, expected_rounded0)
decimals = 2
expected_rounded = Series(
[1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype
)
result = round(ser, decimals)
tm.assert_series_equal(result, expected_rounded)
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(self, method, freq):
# GH14940
ser = Series([pd.NaT])
expected = Series(pd.NaT)
round_method = getattr(ser.dt, method)
tm.assert_series_equal(round_method(freq), expected)

View File

@ -0,0 +1,67 @@
import numpy as np
from pandas import (
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import is_scalar
class TestSeriesSearchSorted:
def test_searchsorted(self):
ser = Series([1, 2, 3])
result = ser.searchsorted(1, side="left")
assert is_scalar(result)
assert result == 0
result = ser.searchsorted(1, side="right")
assert is_scalar(result)
assert result == 1
def test_searchsorted_numeric_dtypes_scalar(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted(30)
assert is_scalar(res)
assert res == 2
res = ser.searchsorted([30])
exp = np.array([2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_numeric_dtypes_vector(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted([91, 2e6])
exp = np.array([3, 4], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_datetime64_scalar(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
val = Timestamp("20120102")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_scalar_mixed_timezones(self):
# GH 30086
ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC"))
val = Timestamp("20120102", tz="America/New_York")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_list(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
vals = [Timestamp("20120102"), Timestamp("20120104")]
res = ser.searchsorted(vals)
exp = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_sorter(self):
# GH8490
ser = Series([3, 1, 2])
res = ser.searchsorted([0, 3], sorter=np.argsort(ser))
exp = np.array([0, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)

View File

@ -0,0 +1,21 @@
from datetime import datetime
from pandas import Series
class TestSetName:
def test_set_name(self):
ser = Series([1, 2, 3])
ser2 = ser._set_name("foo")
assert ser2.name == "foo"
assert ser.name is None
assert ser is not ser2
def test_set_name_attribute(self):
ser = Series([1, 2, 3])
ser2 = Series([1, 2, 3], name="bar")
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
ser.name = name
assert ser.name == name
ser2.name = name
assert ser2.name == name

View File

@ -0,0 +1,334 @@
import random
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"])
def sort_kind(request):
return request.param
class TestSeriesSortIndex:
def test_sort_index_name(self, datetime_series):
result = datetime_series.sort_index(ascending=False)
assert result.name == datetime_series.name
def test_sort_index(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
rindex = list(datetime_series.index)
random.shuffle(rindex)
random_order = datetime_series.reindex(rindex)
sorted_series = random_order.sort_index()
tm.assert_series_equal(sorted_series, datetime_series)
# descending
sorted_series = random_order.sort_index(ascending=False)
tm.assert_series_equal(
sorted_series, datetime_series.reindex(datetime_series.index[::-1])
)
# compat on level
sorted_series = random_order.sort_index(level=0)
tm.assert_series_equal(sorted_series, datetime_series)
# compat on axis
sorted_series = random_order.sort_index(axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
random_order.sort_values(axis=1)
sorted_series = random_order.sort_index(level=0, axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
with pytest.raises(ValueError, match=msg):
random_order.sort_index(level=0, axis=1)
def test_sort_index_inplace(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
# For GH#11402
rindex = list(datetime_series.index)
random.shuffle(rindex)
# descending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=False, inplace=True)
assert result is None
expected = datetime_series.reindex(datetime_series.index[::-1])
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
# ascending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=True, inplace=True)
assert result is None
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
def test_sort_index_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
res = s.sort_index(level="A")
tm.assert_series_equal(backwards, res)
res = s.sort_index(level=["A", "B"])
tm.assert_series_equal(backwards, res)
res = s.sort_index(level="A", sort_remaining=False)
tm.assert_series_equal(s, res)
res = s.sort_index(level=["A", "B"], sort_remaining=False)
tm.assert_series_equal(s, res)
@pytest.mark.parametrize("level", ["A", 0]) # GH#21052
def test_sort_index_multiindex(self, level):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
# implicit sort_remaining=True
res = s.sort_index(level=level)
tm.assert_series_equal(backwards, res)
# GH#13496
# sort has no effect without remaining lvls
res = s.sort_index(level=level, sort_remaining=False)
tm.assert_series_equal(s, res)
def test_sort_index_kind(self, sort_kind):
# GH#14444 & GH#13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position(self):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first")
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last")
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_sort_index_intervals(self):
s = Series(
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
)
result = s.sort_index()
expected = s
tm.assert_series_equal(result, expected)
result = s.sort_index(ascending=False)
expected = Series(
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ascending, ignore_index, output_index",
[
([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]),
],
)
def test_sort_index_ignore_index(
self, inplace, original_list, sorted_list, ascending, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {
"ascending": ascending,
"ignore_index": ignore_index,
"inplace": inplace,
}
if inplace:
result_ser = ser.copy()
result_ser.sort_index(**kwargs)
else:
result_ser = ser.sort_index(**kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_sort_index_ascending_list(self):
# GH#16934
# Set up a Series with a three level MultiIndex
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
[4, 3, 2, 1, 4, 3, 2, 1],
]
tuples = zip(*arrays)
mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
ser = Series(range(8), index=mi)
# Sort with boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=False)
expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
tm.assert_series_equal(result, expected)
# Sort with list of boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=[False, True])
expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
None,
(True, None),
(False, "True"),
],
)
def test_sort_index_ascending_bad_value_raises(self, ascending):
ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9])
match = 'For argument "ascending" expected type bool'
with pytest.raises(ValueError, match=match):
ser.sort_index(ascending=ascending)
class TestSeriesSortIndexKey:
def test_sort_index_multiindex_key(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level="C", key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level="C", key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_multiindex_key_multi_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level=["A", "C"], key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level=["A", "C"], key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_key(self):
series = Series(np.arange(6, dtype="int64"), index=list("aaBBca"))
result = series.sort_index()
expected = series.iloc[[2, 3, 0, 1, 5, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower())
expected = series.iloc[[0, 1, 5, 2, 3, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower(), ascending=False)
expected = series.iloc[[4, 2, 3, 0, 1, 5]]
tm.assert_series_equal(result, expected)
def test_sort_index_key_int(self):
series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64"))
result = series.sort_index()
tm.assert_series_equal(result, series)
result = series.sort_index(key=lambda x: -x)
expected = series.sort_index(ascending=False)
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: 2 * x)
tm.assert_series_equal(result, series)
def test_sort_index_kind_key(self, sort_kind, sort_by_key):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_kind_neg_key(self, sort_kind):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position_key(self, sort_by_key):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first", key=sort_by_key)
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last", key=sort_by_key)
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_changes_length_raises(self):
s = Series([1, 2, 3])
with pytest.raises(ValueError, match="change the shape"):
s.sort_index(key=lambda x: x[:1])
def test_sort_values_key_type(self):
s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"]))
result = s.sort_index(key=lambda x: x.month)
expected = s.iloc[[0, 1, 2]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.day)
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.year)
expected = s.iloc[[2, 0, 1]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.month_name())
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
def test_sort_index_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series.sort_index "
r"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.sort_index(0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,256 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Series,
)
import pandas._testing as tm
class TestSeriesSortValues:
def test_sort_values(self, datetime_series):
# check indexes are reordered corresponding with the values
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
result = ser.sort_values()
tm.assert_series_equal(expected, result)
ts = datetime_series.copy()
ts[:5] = np.NaN
vals = ts.values
result = ts.sort_values()
assert np.isnan(result[-5:]).all()
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
# na_position
result = ts.sort_values(na_position="first")
assert np.isnan(result[:5]).all()
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
# something object-type
ser = Series(["A", "B"], [1, 2])
# no failure
ser.sort_values()
# ascending=False
ordered = ts.sort_values(ascending=False)
expected = np.sort(ts.dropna().values)[::-1]
tm.assert_almost_equal(expected, ordered.dropna().values)
ordered = ts.sort_values(ascending=False, na_position="first")
tm.assert_almost_equal(expected, ordered.dropna().values)
# ascending=[False] should behave the same as ascending=False
ordered = ts.sort_values(ascending=[False])
expected = ts.sort_values(ascending=False)
tm.assert_series_equal(expected, ordered)
ordered = ts.sort_values(ascending=[False], na_position="first")
expected = ts.sort_values(ascending=False, na_position="first")
tm.assert_series_equal(expected, ordered)
msg = 'For argument "ascending" expected type bool, received type NoneType.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=None)
msg = r"Length of ascending \(0\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[])
msg = r"Length of ascending \(3\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[1, 2, 3])
msg = r"Length of ascending \(2\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[False, False])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending="foobar")
# inplace=True
ts = datetime_series.copy()
return_value = ts.sort_values(ascending=False, inplace=True)
assert return_value is None
tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False))
tm.assert_index_equal(
ts.index, datetime_series.sort_values(ascending=False).index
)
# GH#5856/5853
# Series.sort_values operating on a view
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0]
msg = (
"This Series is a view of some other array, to sort in-place "
"you must create a copy"
)
with pytest.raises(ValueError, match=msg):
s.sort_values(inplace=True)
def test_sort_values_categorical(self):
c = Categorical(["a", "b", "b", "a"], ordered=False)
cat = Series(c.copy())
# sort in the categories order
expected = Series(
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
)
result = cat.sort_values()
tm.assert_series_equal(result, expected)
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
cat = Series(
Categorical(
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
)
)
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
res = cat.sort_values(ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
raw_cat1 = Categorical(
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
)
raw_cat2 = Categorical(
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
)
s = ["a", "b", "c", "d"]
df = DataFrame(
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
)
# Cats must be sorted in a dataframe
res = df.sort_values(by=["string"], ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
assert res["sort"].dtype == "category"
res = df.sort_values(by=["sort"], ascending=False)
exp = df.sort_values(by=["string"], ascending=True)
tm.assert_series_equal(res["values"], exp["values"])
assert res["sort"].dtype == "category"
assert res["unsort"].dtype == "category"
# unordered cat, but we allow this
df.sort_values(by=["unsort"], ascending=False)
# multi-columns sort
# GH#7848
df = DataFrame(
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df["grade"] = Categorical(df["raw_grade"], ordered=True)
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
# sorts 'grade' according to the order of the categories
result = df.sort_values(by=["grade"])
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
tm.assert_frame_equal(result, expected)
# multi
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ignore_index, output_index",
[
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
],
)
def test_sort_values_ignore_index(
self, inplace, original_list, sorted_list, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {"ignore_index": ignore_index, "inplace": inplace}
if inplace:
result_ser = ser.copy()
result_ser.sort_values(ascending=False, **kwargs)
else:
result_ser = ser.sort_values(ascending=False, **kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_sort_values_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
ser = Series([1, 2, 3])
msg = (
r"In a future version of pandas all arguments of Series\.sort_values "
r"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.sort_values(0)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)
def test_mergesort_decending_stability(self):
# GH 28697
s = Series([1, 2, 1, 3], ["first", "b", "second", "c"])
result = s.sort_values(ascending=False, kind="mergesort")
expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"])
tm.assert_series_equal(result, expected)
def test_sort_values_validate_ascending_for_value_error(self):
# GH41634
ser = Series([23, 7, 21])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ser.sort_values(ascending="False")
@pytest.mark.parametrize("ascending", [False, 0, 1, True])
def test_sort_values_validate_ascending_functional(self, ascending):
# GH41634
ser = Series([23, 7, 21])
expected = np.sort(ser.values)
sorted_ser = ser.sort_values(ascending=ascending)
if not ascending:
expected = expected[::-1]
result = sorted_ser.values
tm.assert_numpy_array_equal(result, expected)
class TestSeriesSortingKey:
def test_sort_values_key(self):
series = Series(np.array(["Hello", "goodbye"]))
result = series.sort_values(axis=0)
expected = series
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x.str.lower())
expected = series[::-1]
tm.assert_series_equal(result, expected)
def test_sort_values_key_nan(self):
series = Series(np.array([0, 5, np.nan, 3, 2, np.nan]))
result = series.sort_values(axis=0)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x + 5)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: -x, ascending=False)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,181 @@
from datetime import datetime
from io import StringIO
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
from pandas.io.common import get_handle
class TestSeriesToCSV:
def read_csv(self, path, **kwargs):
params = {"index_col": 0, "header": None, "parse_dates": True}
params.update(**kwargs)
header = params.get("header")
out = pd.read_csv(path, **params).squeeze("columns")
if header is None:
out.name = out.index.name = None
return out
def test_from_csv(self, datetime_series, string_series):
# freq doesn't round-trip
datetime_series.index = datetime_series.index._with_freq(None)
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
ts = self.read_csv(path)
tm.assert_series_equal(datetime_series, ts, check_names=False)
assert ts.name is None
assert ts.index.name is None
# see gh-10483
datetime_series.to_csv(path, header=True)
ts_h = self.read_csv(path, header=0)
assert ts_h.name == "ts"
string_series.to_csv(path, header=False)
series = self.read_csv(path)
tm.assert_series_equal(string_series, series, check_names=False)
assert series.name is None
assert series.index.name is None
string_series.to_csv(path, header=True)
series_h = self.read_csv(path, header=0)
assert series_h.name == "series"
with open(path, "w") as outfile:
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
series = self.read_csv(path, sep="|")
check_series = Series(
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
)
tm.assert_series_equal(check_series, series)
series = self.read_csv(path, sep="|", parse_dates=False)
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
tm.assert_series_equal(check_series, series)
def test_to_csv(self, datetime_series):
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
with open(path, newline=None) as f:
lines = f.readlines()
assert lines[1] != "\n"
datetime_series.to_csv(path, index=False, header=False)
arr = np.loadtxt(path)
tm.assert_almost_equal(arr, datetime_series.values)
def test_to_csv_unicode_index(self):
buf = StringIO()
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
s.to_csv(buf, encoding="UTF-8", header=False)
buf.seek(0)
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
tm.assert_series_equal(s, s2)
def test_to_csv_float_format(self):
with tm.ensure_clean() as filename:
ser = Series([0.123456, 0.234567, 0.567567])
ser.to_csv(filename, float_format="%.2f", header=False)
rs = self.read_csv(filename)
xp = Series([0.12, 0.23, 0.57])
tm.assert_series_equal(rs, xp)
def test_to_csv_list_entries(self):
s = Series(["jack and jill", "jesse and frank"])
split = s.str.split(r"\s+and\s+")
buf = StringIO()
split.to_csv(buf, header=False)
def test_to_csv_path_is_none(self):
# GH 8215
# Series.to_csv() was returning None, inconsistent with
# DataFrame.to_csv() which returned string
s = Series([1, 2, 3])
csv_str = s.to_csv(path_or_buf=None, header=False)
assert isinstance(csv_str, str)
@pytest.mark.parametrize(
"s,encoding",
[
(
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
None,
),
# GH 21241, 21118
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
(Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
],
)
def test_to_csv_compression(self, s, encoding, compression):
with tm.ensure_clean() as filename:
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
# test the round trip - to_csv -> read_csv
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# test the round trip using file handle - to_csv -> read_csv
with get_handle(
filename, "w", compression=compression, encoding=encoding
) as handles:
s.to_csv(handles.handle, encoding=encoding, header=True)
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# explicitly ensure file was compressed
with tm.decompress_file(filename, compression) as fh:
text = fh.read().decode(encoding or "utf8")
assert s.name in text
with tm.decompress_file(filename, compression) as fh:
tm.assert_series_equal(
s,
pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
)
def test_to_csv_interval_index(self):
# GH 28210
s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
s.to_csv(path, header=False)
result = self.read_csv(path, index_col=0)
# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
expected = s.copy()
expected.index = expected.index.astype(str)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,38 @@
import collections
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
class TestSeriesToDict:
@pytest.mark.parametrize(
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
)
def test_to_dict(self, mapping, datetime_series):
# GH#16122
result = Series(datetime_series.to_dict(mapping), name="ts")
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
from_method = Series(datetime_series.to_dict(collections.Counter))
from_constructor = Series(collections.Counter(datetime_series.items()))
tm.assert_series_equal(from_method, from_constructor)
@pytest.mark.parametrize(
"input",
(
{"a": np.int64(64), "b": 10},
{"a": np.int64(64), "b": 10, "c": "ABC"},
{"a": np.uint64(64), "b": 10, "c": "ABC"},
),
)
def test_to_dict_return_types(self, input):
# GH25969
d = Series(input).to_dict()
assert isinstance(d["a"], int)
assert isinstance(d["b"], int)

View File

@ -0,0 +1,61 @@
from pandas import (
DataFrame,
Index,
Series,
)
import pandas._testing as tm
class TestToFrame:
def test_to_frame_respects_name_none(self):
# GH#44212 if we explicitly pass name=None, then that should be respected,
# not changed to 0
# GH-45448 this is first deprecated to only change in the future
ser = Series(range(3))
with tm.assert_produces_warning(FutureWarning):
result = ser.to_frame(None)
# exp_index = Index([None], dtype=object)
exp_index = Index([0])
tm.assert_index_equal(result.columns, exp_index)
with tm.assert_produces_warning(FutureWarning):
result = ser.rename("foo").to_frame(None)
exp_index = Index(["foo"], dtype=object)
tm.assert_index_equal(result.columns, exp_index)
def test_to_frame(self, datetime_series):
datetime_series.name = None
rs = datetime_series.to_frame()
xp = DataFrame(datetime_series.values, index=datetime_series.index)
tm.assert_frame_equal(rs, xp)
datetime_series.name = "testname"
rs = datetime_series.to_frame()
xp = DataFrame(
{"testname": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
rs = datetime_series.to_frame(name="testdifferent")
xp = DataFrame(
{"testdifferent": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
def test_to_frame_expanddim(self):
# GH#9762
class SubclassedSeries(Series):
@property
def _constructor_expanddim(self):
return SubclassedFrame
class SubclassedFrame(DataFrame):
pass
ser = SubclassedSeries([1, 2, 3], name="X")
result = ser.to_frame()
assert isinstance(result, SubclassedFrame)
expected = SubclassedFrame({"X": [1, 2, 3]})
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,65 @@
from datetime import datetime
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestTruncate:
def test_truncate_datetimeindex_tz(self):
# GH 9243
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
s = Series(range(len(idx)), index=idx)
with tm.assert_produces_warning(FutureWarning):
# GH#36148 in the future will require tzawareness compat
s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
lb = idx[1]
ub = idx[3]
result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime())
expected = Series([1, 2, 3], index=idx[1:4])
tm.assert_series_equal(result, expected)
def test_truncate_periodindex(self):
# GH 17717
idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series1 = Series([1, 2, 3], index=idx1)
result1 = series1.truncate(after="2017-09-02")
expected_idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
)
tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1))
idx2 = pd.PeriodIndex(
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series2 = Series([1, 2, 3], index=idx2)
result2 = series2.sort_index().truncate(after="2017-09-02")
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
tm.assert_series_equal(result2, Series([2], index=expected_idx2))
def test_truncate_one_element_series(self):
# GH 35544
series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))
before = pd.Timestamp("2020-08-02")
after = pd.Timestamp("2020-08-04")
result = series.truncate(before=before, after=after)
# the input Series and the expected Series are the same
tm.assert_series_equal(result, series)
def test_truncate_index_only_one_unique_value(self):
# GH 42365
obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5)
truncated = obj.truncate("2021-06-28", "2021-07-01")
tm.assert_series_equal(truncated, obj)

View File

@ -0,0 +1,113 @@
import pytest
import pytz
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestTZLocalize:
def test_series_tz_localize_ambiguous_bool(self):
# make sure that we are correctly accepting bool values as ambiguous
# GH#14402
ts = Timestamp("2015-11-01 01:00:03")
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
ser = Series([ts])
expected0 = Series([expected0])
expected1 = Series([expected1])
with tm.external_error_raised(pytz.AmbiguousTimeError):
ser.dt.tz_localize("US/Central")
result = ser.dt.tz_localize("US/Central", ambiguous=True)
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=False)
tm.assert_series_equal(result, expected1)
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
tm.assert_series_equal(result, expected1)
def test_series_tz_localize_matching_index(self):
# Matching the index of the result with that of the original series
# GH 43080
dt_series = Series(
date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.tz_localize("Europe/Berlin")
expected = Series(
date_range(
start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin"
),
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"])
@pytest.mark.parametrize(
"method, exp",
[
["shift_forward", "2015-03-29 03:00:00"],
["NaT", NaT],
["raise", None],
["foo", "invalid"],
],
)
def test_tz_localize_nonexistent(self, tz, method, exp):
# GH 8917
n = 60
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min")
ser = Series(1, index=dti)
df = ser.to_frame()
if method == "raise":
with tm.external_error_raised(pytz.NonExistentTimeError):
dti.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
ser.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
df.tz_localize(tz, nonexistent=method)
elif exp == "invalid":
with pytest.raises(ValueError, match="argument must be one of"):
dti.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match="argument must be one of"):
ser.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match="argument must be one of"):
df.tz_localize(tz, nonexistent=method)
else:
result = ser.tz_localize(tz, nonexistent=method)
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
tm.assert_series_equal(result, expected)
result = df.tz_localize(tz, nonexistent=method)
expected = expected.to_frame()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_series_tz_localize_empty(self, tzstr):
# GH#2248
ser = Series(dtype=object)
ser2 = ser.tz_localize("utc")
assert ser2.index.tz == pytz.utc
ser2 = ser.tz_localize(tzstr)
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))

View File

@ -0,0 +1,52 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestUnique:
def test_unique_data_ownership(self):
# it works! GH#1807
Series(Series(["a", "c", "b"]).unique()).sort_values()
def test_unique(self):
# GH#714 also, dtype=float
ser = Series([1.2345] * 100)
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
# explicit f4 dtype
ser = Series([1.2345] * 100, dtype="f4")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_nan_object_dtype(self):
# NAs in object arrays GH#714
ser = Series(["foo"] * 100, dtype="O")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_none(self):
# decision about None
ser = Series([1, 2, 3, None, None, None], dtype=object)
result = ser.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)
def test_unique_categorical(self):
# GH#18051
cat = Categorical([])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)
cat = Categorical([np.nan])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)

View File

@ -0,0 +1,149 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_unstack_preserves_object():
mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
ser = Series(np.arange(4.0), index=mi, dtype=object)
res1 = ser.unstack()
assert (res1.dtypes == object).all()
res2 = ser.unstack(level=0)
assert (res2.dtypes == object).all()
def test_unstack():
index = MultiIndex(
levels=[["bar", "foo"], ["one", "three", "two"]],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
)
s = Series(np.arange(4.0), index=index)
unstacked = s.unstack()
expected = DataFrame(
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
index=["bar", "foo"],
columns=["one", "three", "two"],
)
tm.assert_frame_equal(unstacked, expected)
unstacked = s.unstack(level=0)
tm.assert_frame_equal(unstacked, expected.T)
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.randn(6), index=index)
exp_index = MultiIndex(
levels=[["one", "two", "three"], [0, 1]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
unstacked = s.unstack(0).sort_index()
tm.assert_frame_equal(unstacked, expected)
# GH5873
idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame(
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
)
tm.assert_frame_equal(left, right)
idx = MultiIndex.from_arrays(
[
["cat", "cat", "cat", "dog", "dog"],
["a", "a", "b", "a", "b"],
[1, 2, 1, 1, np.nan],
]
)
ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame(
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
columns=["cat", "dog"],
)
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
right.index = MultiIndex.from_tuples(tpls)
tm.assert_frame_equal(ts.unstack(level=0), right)
def test_unstack_tuplename_in_multiindex():
# GH 19966
idx = MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
ser = Series(1, index=idx)
result = ser.unstack(("A", "a"))
expected = DataFrame(
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
index=pd.Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1], [1, 1], [1, 1], [1, 1]],
MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1], [1, 1, 1, 1]],
pd.Index([3, 4], name="C"),
MultiIndex.from_tuples(
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
ser = Series(1, index=idx)
result = ser.unstack(unstack_idx)
expected = DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)
def test_unstack_multi_index_categorical_values():
mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"])
ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
result = ser.unstack()
dti = ser.index.levels[0]
c = pd.Categorical(["foo"] * len(dti))
expected = DataFrame(
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
columns=pd.Index(list("ABCD"), name="minor"),
index=dti.rename("major"),
)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,129 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
CategoricalDtype,
DataFrame,
NaT,
Series,
Timestamp,
)
import pandas._testing as tm
class TestUpdate:
def test_update(self):
s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
s2 = Series([np.nan, 3.5, np.nan, 5.0])
s.update(s2)
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
tm.assert_series_equal(s, expected)
# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df["c"] = np.nan
df["c"].update(Series(["foo"], index=[0]))
expected = DataFrame(
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"other, dtype, expected",
[
# other is int
([61, 63], "int32", Series([10, 61, 12], dtype="int32")),
([61, 63], "int64", Series([10, 61, 12])),
([61, 63], float, Series([10.0, 61.0, 12.0])),
([61, 63], object, Series([10, 61, 12], dtype=object)),
# other is float, but can be cast to int
([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")),
([61.0, 63.0], "int64", Series([10, 61, 12])),
([61.0, 63.0], float, Series([10.0, 61.0, 12.0])),
([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)),
# others is float, cannot be cast to int
([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])),
([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])),
([61.1, 63.1], float, Series([10.0, 61.1, 12.0])),
([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)),
# other is object, cannot be cast
([(61,), (63,)], "int32", Series([10, (61,), 12])),
([(61,), (63,)], "int64", Series([10, (61,), 12])),
([(61,), (63,)], float, Series([10.0, (61,), 12.0])),
([(61,), (63,)], object, Series([10, (61,), 12])),
],
)
def test_update_dtypes(self, other, dtype, expected):
ser = Series([10, 11, 12], dtype=dtype)
other = Series(other, index=[1, 3])
ser.update(other)
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"series, other, expected",
[
# update by key
(
Series({"a": 1, "b": 2, "c": 3, "d": 4}),
{"b": 5, "c": np.nan},
Series({"a": 1, "b": 5, "c": 3, "d": 4}),
),
# update by position
(Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])),
],
)
def test_update_from_non_series(self, series, other, expected):
# GH 33215
series.update(other)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, other, expected, dtype",
[
(["a", None], [None, "b"], ["a", "b"], "string[python]"),
pytest.param(
["a", None],
[None, "b"],
["a", "b"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
),
([1, None], [None, 2], [1, 2], "Int64"),
([True, None], [None, False], [True, False], "boolean"),
(
["a", None],
[None, "b"],
["a", "b"],
CategoricalDtype(categories=["a", "b"]),
),
(
[Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
[NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
[Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
"datetime64[ns, Europe/London]",
),
],
)
def test_update_extension_array_series(self, data, other, expected, dtype):
result = Series(data, dtype=dtype)
other = Series(other, dtype=dtype)
expected = Series(expected, dtype=dtype)
result.update(other)
tm.assert_series_equal(result, expected)
def test_update_with_categorical_type(self):
# GH 25744
dtype = CategoricalDtype(["a", "b", "c", "d"])
s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype)
s2 = Series(["b", "a"], index=[1, 2], dtype=dtype)
s1.update(s2)
result = s1
expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,228 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
Series,
)
import pandas._testing as tm
class TestSeriesValueCounts:
def test_value_counts_datetime(self):
# most dtypes are tested in tests/base
values = [
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 11:00"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"]
)
exp = Series([3, 2, 1], index=exp_idx, name="xxx")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.DatetimeIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_datetime_tz(self):
values = [
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
tz="US/Eastern",
)
exp = Series([3, 2, 1], index=exp_idx, name="xxx")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
idx = pd.DatetimeIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_period(self):
values = [
pd.Period("2011-01", freq="M"),
pd.Period("2011-02", freq="M"),
pd.Period("2011-03", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-03", freq="M"),
]
exp_idx = pd.PeriodIndex(["2011-01", "2011-03", "2011-02"], freq="M")
exp = Series([3, 2, 1], index=exp_idx, name="xxx")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.PeriodIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_ordered(self):
# most dtypes are tested in tests/base
values = Categorical([1, 2, 3, 1, 1, 3], ordered=True)
exp_idx = CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=True)
exp = Series([3, 2, 1], index=exp_idx, name="xxx")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_not_ordered(self):
values = Categorical([1, 2, 3, 1, 1, 3], ordered=False)
exp_idx = CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=False)
exp = Series([3, 2, 1], index=exp_idx, name="xxx")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical(self):
# GH#12835
cats = Categorical(list("abcccb"), categories=list("cabd"))
ser = Series(cats, name="xxx")
res = ser.value_counts(sort=False)
exp_index = CategoricalIndex(list("cabd"), categories=cats.categories)
exp = Series([3, 1, 2, 0], name="xxx", index=exp_index)
tm.assert_series_equal(res, exp)
res = ser.value_counts(sort=True)
exp_index = CategoricalIndex(list("cbad"), categories=cats.categories)
exp = Series([3, 2, 1, 0], name="xxx", index=exp_index)
tm.assert_series_equal(res, exp)
# check object dtype handles the Series.name as the same
# (tested in tests/base)
ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx")
res = ser.value_counts()
exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"])
tm.assert_series_equal(res, exp)
def test_value_counts_categorical_with_nan(self):
# see GH#9443
# sanity check
ser = Series(["a", "b", "a"], dtype="category")
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]))
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# same Series via two different constructions --> same behaviour
series = [
Series(["a", "b", None, "a", None, None], dtype="category"),
Series(
Categorical(["a", "b", None, "a", None, None], categories=["a", "b"])
),
]
for ser in series:
# None is a NaN value, so we exclude its count here
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]))
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# we don't exclude the count of None and sort by counts
exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]))
res = ser.value_counts(dropna=False)
tm.assert_series_equal(res, exp)
# When we aren't sorting by counts, and np.nan isn't a
# category, it should be last.
exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]))
res = ser.value_counts(dropna=False, sort=False)
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"ser, dropna, exp",
[
(
Series([False, True, True, pd.NA]),
False,
Series([2, 1, 1], index=[True, False, pd.NA]),
),
(
Series([False, True, True, pd.NA]),
True,
Series([2, 1], index=[True, False]),
),
(
Series(range(3), index=[True, False, np.nan]).index,
False,
Series([1, 1, 1], index=[True, False, np.nan]),
),
],
)
def test_value_counts_bool_with_nan(self, ser, dropna, exp):
# GH32146
out = ser.value_counts(dropna=dropna)
tm.assert_series_equal(out, exp)
@pytest.mark.parametrize(
"input_array,expected",
[
(
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex128)),
),
(
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
Series([3, 2, 1], index=pd.Index([3j, 1 + 1j, 1], dtype=np.complex64)),
),
],
)
def test_value_counts_complex_numbers(self, input_array, expected):
# GH 17927
# Complex Index dtype is cast to object
result = Series(input_array).value_counts()
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,29 @@
import numpy as np
import pytest
from pandas import (
IntervalIndex,
Series,
period_range,
)
import pandas._testing as tm
class TestValues:
@pytest.mark.parametrize(
"data",
[
period_range("2000", periods=4),
IntervalIndex.from_breaks([1, 2, 3, 4]),
],
)
def test_values_object_extension_dtypes(self, data):
# https://github.com/pandas-dev/pandas/issues/23995
result = Series(data).values
expected = np.array(data.astype(object))
tm.assert_numpy_array_equal(result, expected)
def test_values(self, datetime_series):
tm.assert_almost_equal(
datetime_series.values, datetime_series, check_dtype=False
)

View File

@ -0,0 +1,58 @@
import numpy as np
import pytest
from pandas import (
Index,
Series,
array,
date_range,
)
import pandas._testing as tm
class TestView:
def test_view_i8_to_datetimelike(self):
dti = date_range("2000", periods=4, tz="US/Central")
ser = Series(dti.asi8)
result = ser.view(dti.dtype)
tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None))
pi = dti.tz_localize(None).to_period("D")
ser = Series(pi.asi8)
result = ser.view(pi.dtype)
tm.assert_period_array_equal(result._values, pi._data)
def test_view_tz(self):
# GH#24024
ser = Series(date_range("2000", periods=4, tz="US/Central"))
result = ser.view("i8")
expected = Series(
[
946706400000000000,
946792800000000000,
946879200000000000,
946965600000000000,
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize(
"second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize("box", [Series, Index, array])
def test_view_between_datetimelike(self, first, second, box):
dti = date_range("2016-01-01", periods=3)
orig = box(dti)
obj = orig.view(first)
assert obj.dtype == first
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)
res = obj.view(second)
assert res.dtype == second
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)