mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 14:49:07 +00:00
first commit
This commit is contained in:
@ -0,0 +1,52 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return pd.array(
|
||||
list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return pd.array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
@ -0,0 +1,303 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import np_version_under1p20
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
import pandas.core.ops as ops
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(dtype, opname, exp):
|
||||
a = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
b = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
|
||||
# array / array
|
||||
expected = pd.array(exp, dtype=dtype)
|
||||
|
||||
op = getattr(operator, opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
op = getattr(ops, "r" + opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a - b
|
||||
expected = pd.array([1, 1, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_div(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a / b
|
||||
expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||||
def test_divide_by_zero(zero, negative):
|
||||
# https://github.com/pandas-dev/pandas/issues/27398, GH#22793
|
||||
a = pd.array([0, 1, -1, None], dtype="Int64")
|
||||
result = a / zero
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
|
||||
np.array([False, False, False, True]),
|
||||
)
|
||||
if negative:
|
||||
expected *= -1
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_floordiv(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a // b
|
||||
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
|
||||
expected = pd.array([0, 2, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_mod(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a % b
|
||||
expected = pd.array([0, 0, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_scalar():
|
||||
a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
result = a**0
|
||||
expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**1
|
||||
expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**pd.NA
|
||||
expected = pd.array([None, None, 1, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**np.nan
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed
|
||||
a = a[1:] # Can't raise integers to negative powers.
|
||||
|
||||
result = 0**a
|
||||
expected = pd.array([1, 0, None, 0], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 1**a
|
||||
expected = pd.array([1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.NA**a
|
||||
expected = pd.array([1, None, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = np.nan**a
|
||||
expected = FloatingArray(
|
||||
np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_array():
|
||||
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
|
||||
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
|
||||
result = a**b
|
||||
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_rpow_one_to_na():
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
# https://github.com/pandas-dev/pandas/issues/29997
|
||||
arr = pd.array([np.nan, np.nan], dtype="Int64")
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_numpy_zero_dim_ndarray(other):
|
||||
arr = pd.array([1, None, 2])
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators):
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
# invalid scalars
|
||||
msg = (
|
||||
r"(:?can only perform ops with numeric values)"
|
||||
r"|(:?IntegerArray cannot perform the operation mod)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops("foo")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only perform ops with numeric values",
|
||||
"cannot perform .* with this index type: DatetimeArray",
|
||||
"Addition/subtraction of integers and integer-arrays "
|
||||
"with DatetimeArray is no longer supported. *",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
|
||||
# Various
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# TODO test unsigned overflow
|
||||
|
||||
|
||||
def test_arith_coerce_scalar(data, all_arithmetic_operators):
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
s = pd.Series(data)
|
||||
other = 0.01
|
||||
|
||||
result = op(s, other)
|
||||
expected = op(s.astype(float), other)
|
||||
expected = expected.astype("Float64")
|
||||
# rfloordiv results in nan instead of inf
|
||||
if all_arithmetic_operators == "__rfloordiv__" and np_version_under1p20:
|
||||
# for numpy 1.20 https://github.com/numpy/numpy/pull/16161
|
||||
# updated floordiv, now matches our behavior defined in core.ops
|
||||
mask = (
|
||||
((expected == np.inf) | (expected == -np.inf)).fillna(False).to_numpy(bool)
|
||||
)
|
||||
expected.array._data[mask] = np.nan
|
||||
# rmod results in NaN that wasn't NA in original nullable Series -> unmask it
|
||||
elif all_arithmetic_operators == "__rmod__":
|
||||
mask = (s == 0).fillna(False).to_numpy(bool)
|
||||
expected.array._mask[mask] = False
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [1.0, np.array(1.0)])
|
||||
def test_arithmetic_conversion(all_arithmetic_operators, other):
|
||||
# if we have a float operand we should have a float result
|
||||
# if that is equal to an integer
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = op(s, other)
|
||||
assert result.dtype == "Float64"
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series([1, 2, np.nan], dtype="Int64"),
|
||||
"B": pd.Series([1, np.nan, 3], dtype="UInt8"),
|
||||
"C": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, None], dtype="boolean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean"])
|
||||
def test_reduce_to_float(op):
|
||||
# some reduce ops always return float, even if the result
|
||||
# is a rounded number
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, float)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"source, neg_target, abs_target",
|
||||
[
|
||||
([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
|
||||
([1, 2, None], [-1, -2, None], [1, 2, None]),
|
||||
([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
|
||||
],
|
||||
)
|
||||
def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
|
||||
dtype = any_signed_int_ea_dtype
|
||||
arr = pd.array(source, dtype=dtype)
|
||||
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||||
neg_target = pd.array(neg_target, dtype=dtype)
|
||||
abs_target = pd.array(abs_target, dtype=dtype)
|
||||
|
||||
tm.assert_extension_array_equal(neg_result, neg_target)
|
||||
tm.assert_extension_array_equal(pos_result, arr)
|
||||
assert not tm.shares_memory(pos_result, arr)
|
||||
tm.assert_extension_array_equal(abs_result, abs_target)
|
@ -0,0 +1,38 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.arrays.masked_shared import (
|
||||
ComparisonOps,
|
||||
NumericOps,
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(NumericOps, ComparisonOps):
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_compare_to_int(self, dtype, comparison_op):
|
||||
# GH 28930
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
s1 = pd.Series([1, None, 3], dtype=dtype)
|
||||
s2 = pd.Series([1, None, 3], dtype="float")
|
||||
|
||||
method = getattr(s1, op_name)
|
||||
result = method(2)
|
||||
|
||||
method = getattr(s2, op_name)
|
||||
expected = method(2).astype("boolean")
|
||||
expected[s2.isna()] = pd.NA
|
||||
|
||||
self.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals():
|
||||
# GH-30652
|
||||
# equals is generally tested in /tests/extension/base/methods, but this
|
||||
# specifically tests that two arrays of the same class but different dtype
|
||||
# do not evaluate equal
|
||||
a1 = pd.array([1, 2, None], dtype="Int64")
|
||||
a2 = pd.array([1, 2, None], dtype="Int32")
|
||||
assert a1.equals(a2) is False
|
@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "Int64"], "Int64"),
|
||||
(["UInt64", "UInt64"], "UInt64"),
|
||||
(["Int8", "Int8"], "Int8"),
|
||||
(["Int8", "Int16"], "Int16"),
|
||||
(["UInt8", "Int8"], "Int16"),
|
||||
(["Int32", "UInt32"], "Int64"),
|
||||
(["Int64", "UInt64"], "Float64"),
|
||||
(["Int64", "boolean"], "Int64"),
|
||||
(["UInt8", "boolean"], "UInt8"),
|
||||
],
|
||||
)
|
||||
def test_concat_series(to_concat_dtypes, result_dtype):
|
||||
|
||||
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat(
|
||||
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
|
||||
)
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "int64"], "Int64"),
|
||||
(["UInt64", "uint64"], "UInt64"),
|
||||
(["Int8", "int8"], "Int8"),
|
||||
(["Int8", "int16"], "Int16"),
|
||||
(["UInt8", "int8"], "Int16"),
|
||||
(["Int32", "uint32"], "Int64"),
|
||||
(["Int64", "uint64"], "Float64"),
|
||||
(["Int64", "bool"], "Int64"),
|
||||
(["UInt8", "bool"], "UInt8"),
|
||||
],
|
||||
)
|
||||
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
|
||||
|
||||
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
|
||||
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
|
||||
result = pd.concat([s1, s2], ignore_index=True)
|
||||
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat([s2, s1], ignore_index=True)
|
||||
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,231 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_integer
|
||||
from pandas.core.arrays import IntegerArray
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[pd.array, IntegerArray._from_sequence])
|
||||
def constructor(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_uses_pandas_na():
|
||||
a = pd.array([1, None], dtype=Int64Dtype())
|
||||
assert a[1] is pd.NA
|
||||
|
||||
|
||||
def test_from_dtype_from_float(data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / array
|
||||
expected = pd.Series(data).dropna().reset_index(drop=True)
|
||||
dropped = np.array(data.dropna()).astype(np.dtype(dtype.type))
|
||||
result = pd.Series(dropped, dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_conversions(data_missing):
|
||||
# astype to object series
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df["A"].astype("object")
|
||||
expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# convert to object ndarray
|
||||
# we assert that we are exactly equal
|
||||
# including type conversions of scalars
|
||||
result = df["A"].astype("object").values
|
||||
expected = np.array([pd.NA, 1], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for r, e in zip(result, expected):
|
||||
if pd.isnull(r):
|
||||
assert pd.isnull(e)
|
||||
elif is_integer(r):
|
||||
assert r == e
|
||||
assert is_integer(e)
|
||||
else:
|
||||
assert r == e
|
||||
assert type(r) == type(e)
|
||||
|
||||
|
||||
def test_integer_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.astype(float), mask)
|
||||
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values)
|
||||
|
||||
|
||||
def test_integer_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = IntegerArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, np.nan]),
|
||||
([None], [np.nan]),
|
||||
([None, np.nan], [np.nan, np.nan]),
|
||||
([np.nan, np.nan], [np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_none_is_nan(a, b):
|
||||
result = pd.array(a, dtype="Int64")
|
||||
expected = pd.array(b, dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_error(values):
|
||||
# error in converting existing arrays to IntegerArrays
|
||||
msg = "|".join(
|
||||
[
|
||||
r"cannot be converted to an IntegerDtype",
|
||||
r"invalid literal for int\(\) with base 10:",
|
||||
r"values must be a 1D list-like",
|
||||
r"Cannot pass scalar",
|
||||
]
|
||||
)
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
pd.array(values, dtype="Int64")
|
||||
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
IntegerArray._from_sequence(values)
|
||||
|
||||
|
||||
def test_to_integer_array_inferred_dtype(constructor):
|
||||
# if values has dtype -> respect it
|
||||
result = constructor(np.array([1, 2], dtype="int8"))
|
||||
assert result.dtype == Int8Dtype()
|
||||
result = constructor(np.array([1, 2], dtype="int32"))
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
# if values have no dtype -> always int64
|
||||
result = constructor([1, 2])
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_dtype_keyword(constructor):
|
||||
result = constructor([1, 2], dtype="Int8")
|
||||
assert result.dtype == Int8Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_float():
|
||||
result = IntegerArray._from_sequence([1.0, 2.0])
|
||||
expected = pd.array([1, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
|
||||
IntegerArray._from_sequence([1.5, 2.0])
|
||||
|
||||
# for float dtypes, the itemsize is not preserved
|
||||
result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_str():
|
||||
result = IntegerArray._from_sequence(["1", "2", None])
|
||||
expected = pd.array([1, 2, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1", "2", ""])
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1.5", "2.0"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, int_values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
|
||||
([False, True], [0, 1], "Int64", Int64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_bool(
|
||||
constructor, bool_values, int_values, target_dtype, expected_dtype
|
||||
):
|
||||
result = constructor(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = pd.array(int_values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, to_dtype, result_dtype",
|
||||
[
|
||||
(np.array([1], dtype="int64"), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), "int8", Int8Dtype),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array(values, to_dtype, result_dtype):
|
||||
# convert existing arrays to IntegerArrays
|
||||
result = IntegerArray._from_sequence(values, dtype=to_dtype)
|
||||
assert result.dtype == result_dtype()
|
||||
expected = pd.array(values, dtype=result_dtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,297 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCIndex
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
UInt32Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
# TODO(#22346): preserve Int64 dtype
|
||||
# for ops that enable (mean would actually work here
|
||||
# but generally it is a float return value)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
if op in {"sum", "prod", "min", "max"}:
|
||||
assert isinstance(result, np.int64)
|
||||
else:
|
||||
assert isinstance(result, int)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_nansafe():
|
||||
# see gh-22343
|
||||
arr = pd.array([np.nan, 1, 2], dtype="Int8")
|
||||
msg = "cannot convert to 'uint32'-dtype NumPy array with missing values."
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype("uint32")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_construct_index(all_data, dropna):
|
||||
# ensure that we do not coerce to Float64Index, rather
|
||||
# keep as Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Index(pd.array(other, dtype=all_data.dtype))
|
||||
expected = pd.Index(other, dtype=all_data.dtype)
|
||||
assert all_data.dtype == expected.dtype # dont coerce to object
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_astype_index(all_data, dropna):
|
||||
# as an int/uint index to Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
dtype = all_data.dtype
|
||||
idx = pd.Index._with_infer(np.array(other))
|
||||
assert isinstance(idx, ABCIndex)
|
||||
|
||||
result = idx.astype(dtype)
|
||||
expected = idx.astype(object).astype(dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype(all_data):
|
||||
all_data = all_data[:10]
|
||||
|
||||
ints = all_data[~all_data.isna()]
|
||||
mixed = all_data
|
||||
dtype = Int8Dtype()
|
||||
|
||||
# coerce to same type - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(ints)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(ints, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype.numpy_dtype)
|
||||
expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same type - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(mixed)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(mixed, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - mixed
|
||||
s = pd.Series(mixed)
|
||||
msg = r"cannot convert to .*-dtype NumPy array with missing values.*"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(all_data.dtype.numpy_dtype)
|
||||
|
||||
# coerce to object
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype("object")
|
||||
expected = pd.Series(np.asarray(mixed))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copy():
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
# copy=True -> ensure both data and mask are actual copies
|
||||
result = arr.astype("Int64", copy=True)
|
||||
assert result is not arr
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
# copy=False
|
||||
result = arr.astype("Int64", copy=False)
|
||||
assert result is arr
|
||||
assert np.shares_memory(result._data, arr._data)
|
||||
assert np.shares_memory(result._mask, arr._mask)
|
||||
result[0] = 10
|
||||
assert arr[0] == 10
|
||||
result[0] = pd.NA
|
||||
assert arr[0] is pd.NA
|
||||
|
||||
# astype to different dtype -> always needs a copy -> even with copy=False
|
||||
# we need to ensure that also the mask is actually copied
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
result = arr.astype("Int32", copy=False)
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
|
||||
def test_astype_to_larger_numpy():
|
||||
a = pd.array([1, 2], dtype="Int32")
|
||||
result = a.astype("int64")
|
||||
expected = np.array([1, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
a = pd.array([1, 2], dtype="UInt32")
|
||||
result = a.astype("uint64")
|
||||
expected = np.array([1, 2], dtype="uint64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
|
||||
def test_astype_specific_casting(dtype):
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([1, 2, 3, None], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_floating():
|
||||
arr = pd.array([1, 2, None], dtype="Int64")
|
||||
result = arr.astype("Float64")
|
||||
expected = pd.array([1.0, 2.0, None], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_dt64():
|
||||
# GH#32435
|
||||
arr = pd.array([1, 2, 3, pd.NA]) * 10**9
|
||||
|
||||
result = arr.astype("datetime64[ns]")
|
||||
|
||||
expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_construct_cast_invalid(dtype):
|
||||
|
||||
msg = "cannot safely"
|
||||
arr = [1.2, 2.3, 3.7]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
arr = [1.2, 2.3, 3.7, np.nan]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
def test_to_numpy_na_nan(in_series):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([0.0, 1.0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="int64", na_value=-1)
|
||||
expected = np.array([0, 1, -1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="bool", na_value=False)
|
||||
expected = np.array([False, True, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
|
||||
def test_to_numpy_dtype(dtype, in_series):
|
||||
a = pd.array([0, 1], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype=dtype)
|
||||
expected = np.array([0, 1], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"])
|
||||
def test_to_numpy_na_raises(dtype):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
with pytest.raises(ValueError, match=dtype):
|
||||
a.to_numpy(dtype=dtype)
|
||||
|
||||
|
||||
def test_astype_str():
|
||||
a = pd.array([1, 2, None], dtype="Int64")
|
||||
expected = np.array(["1", "2", "<NA>"], dtype="<U21")
|
||||
|
||||
tm.assert_numpy_array_equal(a.astype(str), expected)
|
||||
tm.assert_numpy_array_equal(a.astype("str"), expected)
|
||||
|
||||
|
||||
def test_astype_boolean():
|
||||
# https://github.com/pandas-dev/pandas/issues/31102
|
||||
a = pd.array([1, 0, -1, 2, None], dtype="Int64")
|
||||
result = a.astype("boolean")
|
||||
expected = pd.array([True, False, True, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,201 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign")
|
||||
def test_ufuncs_single_int(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_int(ufunc):
|
||||
# two IntegerArrays
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_binary_output():
|
||||
a = pd.array([1, 2, np.nan])
|
||||
result = np.modf(a)
|
||||
expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
|
||||
expected = (pd.array(expected[0]), pd.array(expected[1]))
|
||||
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
|
||||
for x, y in zip(result, expected):
|
||||
tm.assert_extension_array_equal(x, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values)
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
expected = arr.sum(skipna=False)
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pandasmethname, kwargs",
|
||||
[
|
||||
("var", {"ddof": 0}),
|
||||
("var", {"ddof": 1}),
|
||||
("kurtosis", {}),
|
||||
("skew", {}),
|
||||
("sem", {}),
|
||||
],
|
||||
)
|
||||
def test_stat_method(pandasmethname, kwargs):
|
||||
s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64")
|
||||
pandasmeth = getattr(s, pandasmethname)
|
||||
result = pandasmeth(**kwargs)
|
||||
s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64")
|
||||
pandasmeth = getattr(s2, pandasmethname)
|
||||
expected = pandasmeth(**kwargs)
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = arr.value_counts(dropna=False)
|
||||
ex_index = pd.Index([1, 2, pd.NA], dtype="Int64")
|
||||
assert ex_index.dtype == "Int64"
|
||||
expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([2, 1], index=arr[:2], dtype="Int64")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_empty():
|
||||
# https://github.com/pandas-dev/pandas/issues/33317
|
||||
ser = pd.Series([], dtype="Int64")
|
||||
result = ser.value_counts()
|
||||
idx = pd.Index([], dtype=ser.dtype)
|
||||
assert idx.dtype == ser.dtype
|
||||
expected = pd.Series([], index=idx, dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
# GH 33172
|
||||
ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3
|
||||
assert expected.index.dtype == ser.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 4])
|
||||
def test_integer_array_sum(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, 3, None], dtype=dtype)
|
||||
result = arr.sum(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 6
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_integer_array_min_max(skipna, method, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([0, 1, None], dtype=dtype)
|
||||
func = getattr(arr, method)
|
||||
result = func(skipna=skipna)
|
||||
if skipna:
|
||||
assert result == (0 if method == "min" else 1)
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 9])
|
||||
def test_integer_array_prod(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, None], dtype=dtype)
|
||||
result = arr.prod(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 2
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)]
|
||||
)
|
||||
def test_integer_array_numpy_sum(values, expected):
|
||||
arr = pd.array(values, dtype="Int64")
|
||||
result = np.sum(arr)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
|
||||
def test_dataframe_reductions(op):
|
||||
# https://github.com/pandas-dev/pandas/pull/32867
|
||||
# ensure the integers are not cast to float during reductions
|
||||
df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
|
||||
result = df.max()
|
||||
assert isinstance(result["a"], np.int64)
|
||||
|
||||
|
||||
# TODO(jreback) - these need testing / are broken
|
||||
|
||||
# shift
|
||||
|
||||
# set_index (destroys type)
|
@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_array_setitem_nullable_boolean_mask():
|
||||
# GH 31446
|
||||
ser = pd.Series([1, 2], dtype="Int64")
|
||||
result = ser.where(ser > 1)
|
||||
expected = pd.Series([pd.NA, 2], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_setitem():
|
||||
# GH 31446
|
||||
arr = pd.Series([1, 2], dtype="Int64").array
|
||||
arr[arr > 1] = 1
|
||||
|
||||
expected = pd.array([1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(Int8Dtype(), "Int8Dtype()"),
|
||||
(Int16Dtype(), "Int16Dtype()"),
|
||||
(Int32Dtype(), "Int32Dtype()"),
|
||||
(Int64Dtype(), "Int64Dtype()"),
|
||||
(UInt8Dtype(), "UInt8Dtype()"),
|
||||
(UInt16Dtype(), "UInt16Dtype()"),
|
||||
(UInt32Dtype(), "UInt32Dtype()"),
|
||||
(UInt64Dtype(), "UInt64Dtype()"),
|
||||
],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(pd.array([1, None, 3]))
|
||||
expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = pd.array([1, 2, None] * 1000)
|
||||
expected = (
|
||||
"<IntegerArray>\n"
|
||||
"[ 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>, 1,\n"
|
||||
" ...\n"
|
||||
" <NA>, 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>]\n"
|
||||
"Length: 3000, dtype: Int64"
|
||||
)
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 <NA>\n1 1"
|
||||
assert result == expected
|
Reference in New Issue
Block a user