2022-05-23 00:16:32 +04:00

768 lines
25 KiB
Python

import numpy as np
import pytest
from pandas.compat.numpy import np_percentile_argname
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
)
import pandas._testing as tm
class TestDataFrameQuantile:
@pytest.mark.parametrize(
"df,expected",
[
[
DataFrame(
{
0: Series(pd.arrays.SparseArray([1, 2])),
1: Series(pd.arrays.SparseArray([3, 4])),
}
),
Series([1.5, 3.5], name=0.5),
],
[
DataFrame(Series([0.0, None, 1.0, 2.0], dtype="Sparse[float]")),
Series([1.0], name=0.5),
],
],
)
def test_quantile_sparse(self, df, expected):
# GH#17198
# GH#24600
result = df.quantile()
tm.assert_series_equal(result, expected)
def test_quantile(self, datetime_frame):
from numpy import percentile
df = datetime_frame
q = df.quantile(0.1, axis=0)
assert q["A"] == percentile(df["A"], 10)
tm.assert_index_equal(q.index, df.columns)
q = df.quantile(0.9, axis=1)
assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90)
tm.assert_index_equal(q.index, df.index)
# test degenerate case
q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0)
assert np.isnan(q["x"]) and np.isnan(q["y"])
# non-numeric exclusion
df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
rs = df.quantile(0.5)
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
xp = df.median().rename(0.5)
tm.assert_series_equal(rs, xp)
# axis
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
result = df.quantile(0.5, axis=1)
expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5)
tm.assert_series_equal(result, expected)
result = df.quantile([0.5, 0.75], axis=1)
expected = DataFrame(
{1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75]
)
tm.assert_frame_equal(result, expected, check_index_type=True)
# We may want to break API in the future to change this
# so that we exclude non-numeric along the same axis
# See GH #7312
df = DataFrame([[1, 2, 3], ["a", "b", 4]])
result = df.quantile(0.5, axis=1)
expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
tm.assert_series_equal(result, expected)
def test_quantile_date_range(self):
# GH 2460
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
ser = Series(dti)
df = DataFrame(ser)
result = df.quantile(numeric_only=False)
expected = Series(
["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]"
)
tm.assert_series_equal(result, expected)
def test_quantile_axis_mixed(self):
# mixed on axis=1
df = DataFrame(
{
"A": [1, 2, 3],
"B": [2.0, 3.0, 4.0],
"C": pd.date_range("20130101", periods=3),
"D": ["foo", "bar", "baz"],
}
)
result = df.quantile(0.5, axis=1)
expected = Series([1.5, 2.5, 3.5], name=0.5)
tm.assert_series_equal(result, expected)
# must raise
msg = "'<' not supported between instances of 'Timestamp' and 'float'"
with pytest.raises(TypeError, match=msg):
df.quantile(0.5, axis=1, numeric_only=False)
def test_quantile_axis_parameter(self):
# GH 9543/9544
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
result = df.quantile(0.5, axis=0)
expected = Series([2.0, 3.0], index=["A", "B"], name=0.5)
tm.assert_series_equal(result, expected)
expected = df.quantile(0.5, axis="index")
tm.assert_series_equal(result, expected)
result = df.quantile(0.5, axis=1)
expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5)
tm.assert_series_equal(result, expected)
result = df.quantile(0.5, axis="columns")
tm.assert_series_equal(result, expected)
msg = "No axis named -1 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.quantile(0.1, axis=-1)
msg = "No axis named column for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.quantile(0.1, axis="column")
def test_quantile_interpolation(self):
# see gh-10174
# interpolation method other than default linear
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
result = df.quantile(0.5, axis=1, interpolation="nearest")
expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5)
tm.assert_series_equal(result, expected)
# cross-check interpolation=nearest results in original dtype
exp = np.percentile(
np.array([[1, 2, 3], [2, 3, 4]]),
0.5,
axis=0,
**{np_percentile_argname: "nearest"},
)
expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="int64")
tm.assert_series_equal(result, expected)
# float
df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3])
result = df.quantile(0.5, axis=1, interpolation="nearest")
expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5)
tm.assert_series_equal(result, expected)
exp = np.percentile(
np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]),
0.5,
axis=0,
**{np_percentile_argname: "nearest"},
)
expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="float64")
tm.assert_series_equal(result, expected)
# axis
result = df.quantile([0.5, 0.75], axis=1, interpolation="lower")
expected = DataFrame(
{1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75]
)
tm.assert_frame_equal(result, expected)
# test degenerate case
df = DataFrame({"x": [], "y": []})
q = df.quantile(0.1, axis=0, interpolation="higher")
assert np.isnan(q["x"]) and np.isnan(q["y"])
# multi
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"])
result = df.quantile([0.25, 0.5], interpolation="midpoint")
# https://github.com/numpy/numpy/issues/7163
expected = DataFrame(
[[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]],
index=[0.25, 0.5],
columns=["a", "b", "c"],
)
tm.assert_frame_equal(result, expected)
def test_quantile_interpolation_datetime(self, datetime_frame):
# see gh-10174
# interpolation = linear (default case)
df = datetime_frame
q = df.quantile(0.1, axis=0, interpolation="linear")
assert q["A"] == np.percentile(df["A"], 10)
def test_quantile_interpolation_int(self, int_frame):
# see gh-10174
df = int_frame
# interpolation = linear (default case)
q = df.quantile(0.1)
assert q["A"] == np.percentile(df["A"], 10)
# test with and without interpolation keyword
q1 = df.quantile(0.1, axis=0, interpolation="linear")
assert q1["A"] == np.percentile(df["A"], 10)
tm.assert_series_equal(q, q1)
def test_quantile_multi(self):
df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"])
result = df.quantile([0.25, 0.5])
expected = DataFrame(
[[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]],
index=[0.25, 0.5],
columns=["a", "b", "c"],
)
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.quantile([0.25, 0.5], axis=1)
expected = DataFrame(
[[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2]
)
# empty
result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0)
expected = DataFrame(
{"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9]
)
tm.assert_frame_equal(result, expected)
def test_quantile_datetime(self):
df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]})
# exclude datetime
result = df.quantile(0.5)
expected = Series([2.5], index=["b"])
# datetime
result = df.quantile(0.5, numeric_only=False)
expected = Series(
[Timestamp("2010-07-02 12:00:00"), 2.5], index=["a", "b"], name=0.5
)
tm.assert_series_equal(result, expected)
# datetime w/ multi
result = df.quantile([0.5], numeric_only=False)
expected = DataFrame(
[[Timestamp("2010-07-02 12:00:00"), 2.5]], index=[0.5], columns=["a", "b"]
)
tm.assert_frame_equal(result, expected)
# axis = 1
df["c"] = pd.to_datetime(["2011", "2012"])
result = df[["a", "c"]].quantile(0.5, axis=1, numeric_only=False)
expected = Series(
[Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")],
index=[0, 1],
name=0.5,
)
tm.assert_series_equal(result, expected)
result = df[["a", "c"]].quantile([0.5], axis=1, numeric_only=False)
expected = DataFrame(
[[Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")]],
index=[0.5],
columns=[0, 1],
)
tm.assert_frame_equal(result, expected)
# empty when numeric_only=True
result = df[["a", "c"]].quantile(0.5)
expected = Series([], index=[], dtype=np.float64, name=0.5)
tm.assert_series_equal(result, expected)
result = df[["a", "c"]].quantile([0.5])
expected = DataFrame(index=[0.5])
tm.assert_frame_equal(result, expected)
def test_quantile_invalid(self, datetime_frame):
msg = "percentiles should all be in the interval \\[0, 1\\]"
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
with pytest.raises(ValueError, match=msg):
datetime_frame.quantile(invalid)
def test_quantile_box(self):
df = DataFrame(
{
"A": [
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
],
"B": [
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
],
"C": [
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
],
}
)
res = df.quantile(0.5, numeric_only=False)
exp = Series(
[
Timestamp("2011-01-02"),
Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timedelta("2 days"),
],
name=0.5,
index=["A", "B", "C"],
)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5], numeric_only=False)
exp = DataFrame(
[
[
Timestamp("2011-01-02"),
Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timedelta("2 days"),
]
],
index=[0.5],
columns=["A", "B", "C"],
)
tm.assert_frame_equal(res, exp)
# DatetimeLikeBlock may be consolidated and contain NaT in different loc
df = DataFrame(
{
"A": [
Timestamp("2011-01-01"),
pd.NaT,
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
],
"a": [
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
pd.NaT,
Timestamp("2011-01-03"),
],
"B": [
Timestamp("2011-01-01", tz="US/Eastern"),
pd.NaT,
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
],
"b": [
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
pd.NaT,
Timestamp("2011-01-03", tz="US/Eastern"),
],
"C": [
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
pd.NaT,
],
"c": [
pd.NaT,
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
],
},
columns=list("AaBbCc"),
)
res = df.quantile(0.5, numeric_only=False)
exp = Series(
[
Timestamp("2011-01-02"),
Timestamp("2011-01-02"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timedelta("2 days"),
pd.Timedelta("2 days"),
],
name=0.5,
index=list("AaBbCc"),
)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5], numeric_only=False)
exp = DataFrame(
[
[
Timestamp("2011-01-02"),
Timestamp("2011-01-02"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
pd.Timedelta("2 days"),
pd.Timedelta("2 days"),
]
],
index=[0.5],
columns=list("AaBbCc"),
)
tm.assert_frame_equal(res, exp)
def test_quantile_nan(self):
# GH 14357 - float block where some cols have missing values
df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)})
df.iloc[-1, 1] = np.nan
res = df.quantile(0.5)
exp = Series([3.0, 2.5], index=["a", "b"], name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5, 0.75])
exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75])
tm.assert_frame_equal(res, exp)
res = df.quantile(0.5, axis=1)
exp = Series(np.arange(1.0, 6.0), name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5, 0.75], axis=1)
exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75])
tm.assert_frame_equal(res, exp)
# full-nan column
df["b"] = np.nan
res = df.quantile(0.5)
exp = Series([3.0, np.nan], index=["a", "b"], name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5, 0.75])
exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75])
tm.assert_frame_equal(res, exp)
def test_quantile_nat(self):
# full NaT column
df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]})
res = df.quantile(0.5, numeric_only=False)
exp = Series([pd.NaT], index=["a"], name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5], numeric_only=False)
exp = DataFrame({"a": [pd.NaT]}, index=[0.5])
tm.assert_frame_equal(res, exp)
# mixed non-null / full null column
df = DataFrame(
{
"a": [
Timestamp("2012-01-01"),
Timestamp("2012-01-02"),
Timestamp("2012-01-03"),
],
"b": [pd.NaT, pd.NaT, pd.NaT],
}
)
res = df.quantile(0.5, numeric_only=False)
exp = Series([Timestamp("2012-01-02"), pd.NaT], index=["a", "b"], name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5], numeric_only=False)
exp = DataFrame(
[[Timestamp("2012-01-02"), pd.NaT]], index=[0.5], columns=["a", "b"]
)
tm.assert_frame_equal(res, exp)
def test_quantile_empty_no_rows_floats(self):
# floats
df = DataFrame(columns=["a", "b"], dtype="float64")
res = df.quantile(0.5)
exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5])
exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5])
tm.assert_frame_equal(res, exp)
res = df.quantile(0.5, axis=1)
exp = Series([], index=[], dtype="float64", name=0.5)
tm.assert_series_equal(res, exp)
res = df.quantile([0.5], axis=1)
exp = DataFrame(columns=[], index=[0.5])
tm.assert_frame_equal(res, exp)
def test_quantile_empty_no_rows_ints(self):
# ints
df = DataFrame(columns=["a", "b"], dtype="int64")
res = df.quantile(0.5)
exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5)
tm.assert_series_equal(res, exp)
def test_quantile_empty_no_rows_dt64(self):
# datetimes
df = DataFrame(columns=["a", "b"], dtype="datetime64[ns]")
res = df.quantile(0.5, numeric_only=False)
exp = Series(
[pd.NaT, pd.NaT], index=["a", "b"], dtype="datetime64[ns]", name=0.5
)
tm.assert_series_equal(res, exp)
# Mixed dt64/dt64tz
df["a"] = df["a"].dt.tz_localize("US/Central")
res = df.quantile(0.5, numeric_only=False)
exp = exp.astype(object)
tm.assert_series_equal(res, exp)
# both dt64tz
df["b"] = df["b"].dt.tz_localize("US/Central")
res = df.quantile(0.5, numeric_only=False)
exp = exp.astype(df["b"].dtype)
tm.assert_series_equal(res, exp)
def test_quantile_empty_no_columns(self):
# GH#23925 _get_numeric_data may drop all columns
df = DataFrame(pd.date_range("1/1/18", periods=5))
df.columns.name = "captain tightpants"
result = df.quantile(0.5)
expected = Series([], index=[], name=0.5, dtype=np.float64)
expected.index.name = "captain tightpants"
tm.assert_series_equal(result, expected)
result = df.quantile([0.5])
expected = DataFrame([], index=[0.5], columns=[])
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)
def test_quantile_item_cache(self, using_array_manager):
# previous behavior incorrect retained an invalid _item_cache entry
df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
df["D"] = df["A"] * 2
ser = df["A"]
if not using_array_manager:
assert len(df._mgr.blocks) == 2
df.quantile(numeric_only=False)
ser.values[0] = 99
assert df.iloc[0, 0] == df["A"][0]
class TestQuantileExtensionDtype:
# TODO: tests for axis=1?
# TODO: empty case?
@pytest.fixture(
params=[
pytest.param(
pd.IntervalIndex.from_breaks(range(10)),
marks=pytest.mark.xfail(reason="raises when trying to add Intervals"),
),
pd.period_range("2016-01-01", periods=9, freq="D"),
pd.date_range("2016-01-01", periods=9, tz="US/Pacific"),
pd.timedelta_range("1 Day", periods=9),
pd.array(np.arange(9), dtype="Int64"),
pd.array(np.arange(9), dtype="Float64"),
],
ids=lambda x: str(x.dtype),
)
def index(self, request):
# NB: not actually an Index object
idx = request.param
idx.name = "A"
return idx
@pytest.fixture
def obj(self, index, frame_or_series):
# bc index is not always an Index (yet), we need to re-patch .name
obj = frame_or_series(index).copy()
if frame_or_series is Series:
obj.name = "A"
else:
obj.columns = ["A"]
return obj
def compute_quantile(self, obj, qs):
if isinstance(obj, Series):
result = obj.quantile(qs)
else:
result = obj.quantile(qs, numeric_only=False)
return result
def test_quantile_ea(self, obj, index):
# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
obj = obj.iloc[indexer]
qs = [0.5, 0, 1]
result = self.compute_quantile(obj, qs)
# expected here assumes len(index) == 9
expected = Series(
[index[4], index[0], index[-1]], dtype=index.dtype, index=qs, name="A"
)
expected = type(obj)(expected)
tm.assert_equal(result, expected)
def test_quantile_ea_with_na(self, obj, index):
obj.iloc[0] = index._na_value
obj.iloc[-1] = index._na_value
# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
obj = obj.iloc[indexer]
qs = [0.5, 0, 1]
result = self.compute_quantile(obj, qs)
# expected here assumes len(index) == 9
expected = Series(
[index[4], index[1], index[-2]], dtype=index.dtype, index=qs, name="A"
)
expected = type(obj)(expected)
tm.assert_equal(result, expected)
# TODO(GH#39763): filtering can be removed after GH#39763 is fixed
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
def test_quantile_ea_all_na(
self, obj, index, frame_or_series, using_array_manager, request
):
if (
using_array_manager
and frame_or_series is DataFrame
and index.dtype == "m8[ns]"
):
mark = pytest.mark.xfail(
reason="obj.astype fails bc obj is incorrectly dt64 at this point"
)
request.node.add_marker(mark)
obj.iloc[:] = index._na_value
# TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed
obj[:] = obj.astype(index.dtype)
assert np.all(obj.dtypes == index.dtype)
# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
obj = obj.iloc[indexer]
qs = [0.5, 0, 1]
result = self.compute_quantile(obj, qs)
expected = index.take([-1, -1, -1], allow_fill=True, fill_value=index._na_value)
expected = Series(expected, index=qs, name="A")
expected = type(obj)(expected)
tm.assert_equal(result, expected)
def test_quantile_ea_scalar(self, obj, index):
# scalar qs
# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
obj = obj.iloc[indexer]
qs = 0.5
result = self.compute_quantile(obj, qs)
expected = Series({"A": index[4]}, dtype=index.dtype, name=0.5)
if isinstance(obj, Series):
expected = expected["A"]
assert result == expected
else:
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dtype, expected_data, expected_index, axis",
[
["float64", [], [], 1],
["int64", [], [], 1],
["float64", [np.nan, np.nan], ["a", "b"], 0],
["int64", [np.nan, np.nan], ["a", "b"], 0],
],
)
def test_empty_numeric(self, dtype, expected_data, expected_index, axis):
# GH 14564
df = DataFrame(columns=["a", "b"], dtype=dtype)
result = df.quantile(0.5, axis=axis)
expected = Series(
expected_data, name=0.5, index=Index(expected_index), dtype="float64"
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dtype, expected_data, expected_index, axis, expected_dtype",
[
pytest.param(
"datetime64[ns]",
[],
[],
1,
"datetime64[ns]",
marks=pytest.mark.xfail(reason="#GH 41544"),
),
["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"],
],
)
def test_empty_datelike(
self, dtype, expected_data, expected_index, axis, expected_dtype
):
# GH 14564
df = DataFrame(columns=["a", "b"], dtype=dtype)
result = df.quantile(0.5, axis=axis, numeric_only=False)
expected = Series(
expected_data, name=0.5, index=Index(expected_index), dtype=expected_dtype
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"expected_data, expected_index, axis",
[
[[np.nan, np.nan], range(2), 1],
[[], [], 0],
],
)
def test_datelike_numeric_only(self, expected_data, expected_index, axis):
# GH 14564
df = DataFrame(
{
"a": pd.to_datetime(["2010", "2011"]),
"b": [0, 5],
"c": pd.to_datetime(["2011", "2012"]),
}
)
result = df[["a", "c"]].quantile(0.5, axis=axis)
expected = Series(
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
)
tm.assert_series_equal(result, expected)