mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 14:49:07 +00:00
first commit
This commit is contained in:
@ -0,0 +1,121 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def left_array():
|
||||
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def right_array():
|
||||
return pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [True, True, None, True, False, None, None, None, None]),
|
||||
("mul", [True, False, None, False, False, None, None, None, None]),
|
||||
],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(left_array, right_array, opname, exp):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = pd.array(exp, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(left_array, right_array):
|
||||
msg = (
|
||||
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
|
||||
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left_array - right_array
|
||||
|
||||
|
||||
def test_div(left_array, right_array):
|
||||
result = left_array / right_array
|
||||
expected = FloatingArray(
|
||||
np.array(
|
||||
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
dtype="float64",
|
||||
),
|
||||
np.array([False, False, True, False, False, True, True, True, True]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname",
|
||||
[
|
||||
"floordiv",
|
||||
"mod",
|
||||
pytest.param(
|
||||
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686")
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_op_int8(left_array, right_array, opname):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
# invalid scalars
|
||||
msg = (
|
||||
"did not contain a loop with signature matching types|"
|
||||
"BooleanArray cannot perform the operation|"
|
||||
"not supported for the input types, and the inputs could not be safely coerced "
|
||||
"to any supported types according to the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops("foo")
|
||||
msg = (
|
||||
r"unsupported operand type\(s\) for|"
|
||||
"Concatenation operation is not implemented for NumPy arrays"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
if op not in ("__mul__", "__rmul__"):
|
||||
# TODO(extension) numpy's mul with object array sees booleans as numbers
|
||||
msg = (
|
||||
r"unsupported operand type\(s\) for|can only concatenate str|"
|
||||
"not all arguments converted during string formatting"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("str")
|
||||
expected = np.array(["True", "False", "<NA>"], dtype="<U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([1, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([1, 0, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.tests.arrays.masked_shared import ComparisonOps
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return pd.BooleanDtype()
|
||||
|
||||
|
||||
class TestComparisonOps(ComparisonOps):
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
self._compare_other(data, comparison_op, True)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
other = pd.array([True] * len(data), dtype="boolean")
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = np.array([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = pd.Series([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_array(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
result = op(a, b)
|
||||
|
||||
values = op(a._data, b._data)
|
||||
mask = a._mask | b._mask
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = None
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
@ -0,0 +1,323 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.arrays.boolean import coerce_to_array
|
||||
|
||||
|
||||
def test_boolean_array_constructor():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.astype(int), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, None)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values.reshape(1, -1), mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values, mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_boolean_array_constructor_copy():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = BooleanArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_boolean_array():
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, False])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, True], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True]), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, True])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_all_none():
|
||||
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
|
||||
|
||||
result = pd.array([None, None, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
|
||||
([True, np.nan], [True, None]),
|
||||
([True, pd.NA], [True, None]),
|
||||
([np.nan, np.nan], [None, None]),
|
||||
(np.array([np.nan, np.nan], dtype=float), [None, None]),
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_missing_indicators(a, b):
|
||||
result = pd.array(a, dtype="boolean")
|
||||
expected = pd.array(b, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
["1", "2"],
|
||||
# "foo",
|
||||
[1, 2],
|
||||
[1.0, 2.0],
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
np.array([1, 2]),
|
||||
np.array([1.0, 2.0]),
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_error(values):
|
||||
# error in converting existing arrays to BooleanArray
|
||||
msg = "Need to pass bool-like value"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(values, dtype="boolean")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_integer_array():
|
||||
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_float_array():
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_integer_like():
|
||||
# integers of 0's and 1's
|
||||
result = pd.array([1, 0, 1, 0], dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array([1, 0, 1, None], dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_coerce_to_array():
|
||||
# TODO this is currently not public API
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
# mixed missing from values and mask
|
||||
values = [True, False, None, False]
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True]), np.array([False, False, True, True])
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# raise errors for wrong dimension
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values.reshape(1, -1))
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values, mask=mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_coerce_to_array_from_boolean_array():
|
||||
# passing BooleanArray to coerce_to_array
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
arr = BooleanArray(values, mask)
|
||||
result = BooleanArray(*coerce_to_array(arr))
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
# no copy
|
||||
assert result._data is arr._data
|
||||
assert result._mask is arr._mask
|
||||
|
||||
result = BooleanArray(*coerce_to_array(arr), copy=True)
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
assert result._data is not arr._data
|
||||
assert result._mask is not arr._mask
|
||||
|
||||
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
|
||||
coerce_to_array(arr, mask=mask)
|
||||
|
||||
|
||||
def test_coerce_to_numpy_array():
|
||||
# with missing values -> object dtype
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also with no missing values -> object dtype
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, True], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# force bool dtype
|
||||
result = np.array(arr, dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# with missing values will raise error
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
msg = (
|
||||
"cannot convert to 'bool'-dtype NumPy array with missing values. "
|
||||
"Specify an appropriate 'na_value' for this dtype."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.array(arr, dtype="bool")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings():
|
||||
result = BooleanArray._from_sequence_of_strings(
|
||||
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
|
||||
)
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True, False, False, False]),
|
||||
np.array([False, False, False, False, False, False, True]),
|
||||
)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings_invalid_string():
|
||||
with pytest.raises(ValueError, match="cannot be cast"):
|
||||
BooleanArray._from_sequence_of_strings(["donkey"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, True], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([True, False, pd.NA], dtype="<U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values -> can convert to bool, otherwise raises
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
|
||||
# specify dtype and na_value
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([True, False, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([True, False, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([1, 0, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# converting to int or float without specifying na_value raises
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
arr.to_numpy(dtype="int64")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
|
||||
arr.to_numpy(dtype="float64")
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(
|
||||
arr, pd.array([False, False, True], dtype="boolean")
|
||||
)
|
||||
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool, copy=True)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
|
@ -0,0 +1,126 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
|
||||
)
|
||||
def test_ufuncs_binary(ufunc):
|
||||
# two BooleanArrays
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s, a)
|
||||
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Boolean with numpy array
|
||||
arr = np.array([True, True, False])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# BooleanArray with scalar
|
||||
result = ufunc(a, True)
|
||||
expected = pd.array(ufunc(a._data, True), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(True, a)
|
||||
expected = pd.array(ufunc(True, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# not handled types
|
||||
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc(a, "test")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.logical_not])
|
||||
def test_ufuncs_unary(ufunc):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ufunc(ser)
|
||||
expected = pd.Series(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_numeric():
|
||||
# np.sqrt on np.bool returns float16, which we upcast to Float32
|
||||
# bc we do not have Float16
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
res = np.sqrt(arr)
|
||||
|
||||
expected = pd.array([1, 0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[True, False], [True, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="boolean")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
if arr[-1] is pd.NA:
|
||||
expected = pd.NA
|
||||
else:
|
||||
expected = arr._data.sum()
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([True, False, pd.NA], dtype="boolean")
|
||||
result = arr.value_counts(dropna=False)
|
||||
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2
|
||||
assert expected.index.dtype == "boolean"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_diff():
|
||||
a = pd.array(
|
||||
[True, True, False, False, True, None, True, None, False], dtype="boolean"
|
||||
)
|
||||
result = pd.core.algorithms.diff(a, 1)
|
||||
expected = pd.array(
|
||||
[None, False, True, False, True, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ser.diff()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
|
||||
def test_setitem_missing_values(na):
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([True, None, None], dtype="boolean")
|
||||
arr[1] = na
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,254 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.ops.mask_ops import (
|
||||
kleene_and,
|
||||
kleene_or,
|
||||
kleene_xor,
|
||||
)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
|
||||
|
||||
class TestLogicalOps(BaseOpsUtil):
|
||||
def test_numpy_scalars_ok(self, all_logical_operators):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
op = getattr(a, all_logical_operators)
|
||||
|
||||
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
|
||||
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip("_")
|
||||
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def test_empty_ok(self, all_logical_operators):
|
||||
a = pd.array([], dtype="boolean")
|
||||
op_name = all_logical_operators
|
||||
result = getattr(a, op_name)(True)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(False)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(pd.NA)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
|
||||
)
|
||||
def test_eq_mismatched_type(self, other):
|
||||
# GH-44499
|
||||
arr = pd.array([True, False])
|
||||
result = arr == other
|
||||
expected = pd.array([False, False])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr != other
|
||||
expected = pd.array([True, True])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_logical_length_mismatch_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Lengths must match to compare"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)([True, False])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(np.array([True, False]))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
|
||||
|
||||
def test_logical_nan_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Got float instead"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(a, op_name)(np.nan)
|
||||
|
||||
@pytest.mark.parametrize("other", ["a", 1])
|
||||
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
|
||||
a = pd.array([True, False], dtype="boolean")
|
||||
with pytest.raises(TypeError, match=str(type(other).__name__)):
|
||||
getattr(a, all_logical_operators)(other)
|
||||
|
||||
def test_kleene_or(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a | b
|
||||
expected = pd.array(
|
||||
[True, True, True, True, False, None, True, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [True, None, None]),
|
||||
(True, [True, True, True]),
|
||||
(np.bool_(True), [True, True, True]),
|
||||
(False, [True, False, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_or_scalar(self, other, expected):
|
||||
# TODO: test True & False
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a | other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_and(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a & b
|
||||
expected = pd.array(
|
||||
[True, False, None, False, False, False, None, False, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, False, None]),
|
||||
(True, [True, False, None]),
|
||||
(False, [False, False, False]),
|
||||
(np.bool_(True), [True, False, None]),
|
||||
(np.bool_(False), [False, False, False]),
|
||||
],
|
||||
)
|
||||
def test_kleene_and_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a & other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_xor(self):
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a ^ b
|
||||
expected = pd.array(
|
||||
[False, True, None, True, False, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, None, None]),
|
||||
(True, [False, True, None]),
|
||||
(np.bool_(True), [False, True, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_xor_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a ^ other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
|
||||
def test_no_masked_assumptions(self, other, all_logical_operators):
|
||||
# The logical operations should not assume that masked values are False!
|
||||
a = pd.arrays.BooleanArray(
|
||||
np.array([True, True, True, False, False, False, True, False, True]),
|
||||
np.array([False] * 6 + [True, True, True]),
|
||||
)
|
||||
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
if isinstance(other, list):
|
||||
other = pd.array(other, dtype="boolean")
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
if isinstance(other, BooleanArray):
|
||||
other._data[other._mask] = True
|
||||
a._data[a._mask] = False
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
|
||||
def test_error_both_scalar(operation):
|
||||
msg = r"Either `left` or `right` need to be a np\.ndarray."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# masks need to be non-None, otherwise it ends up in an infinite recursion
|
||||
operation(True, True, np.zeros(1), np.zeros(1))
|
@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUnaryOps:
|
||||
def test_invert(self):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(~a, expected)
|
||||
|
||||
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
|
||||
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
|
||||
result = ~df
|
||||
expected = pd.DataFrame(
|
||||
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_abs(self):
|
||||
# matching numpy behavior, abs is the identity function
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = abs(arr)
|
||||
|
||||
tm.assert_extension_array_equal(result, arr)
|
@ -0,0 +1,60 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
|
||||
[
|
||||
([True, pd.NA], True, True, True, pd.NA),
|
||||
([False, pd.NA], False, False, pd.NA, False),
|
||||
([pd.NA], False, True, pd.NA, pd.NA),
|
||||
([], False, True, False, True),
|
||||
# GH-33253: all True / all False values buggy with skipna=False
|
||||
([True, True], True, True, True, True),
|
||||
([False, False], False, False, False, False),
|
||||
],
|
||||
)
|
||||
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
|
||||
# the methods return numpy scalars
|
||||
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
|
||||
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
|
||||
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
|
||||
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
|
||||
|
||||
for con in [pd.array, pd.Series]:
|
||||
a = con(values, dtype="boolean")
|
||||
assert a.any() is exp_any
|
||||
assert a.all() is exp_all
|
||||
assert a.any(skipna=False) is exp_any_noskip
|
||||
assert a.all(skipna=False) is exp_all_noskip
|
||||
|
||||
assert np.any(a.any()) is exp_any
|
||||
assert np.all(a.all()) is exp_all
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_reductions_return_types(dropna, data, all_numeric_reductions):
|
||||
op = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
if dropna:
|
||||
s = s.dropna()
|
||||
|
||||
if op == "sum":
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op == "prod":
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op in ("min", "max"):
|
||||
assert isinstance(getattr(s, op)(), np.bool_)
|
||||
else:
|
||||
# "mean", "std", "var", "median", "kurt", "skew"
|
||||
assert isinstance(getattr(s, op)(), np.float64)
|
@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_repr():
|
||||
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
|
||||
expected = " A\n0 True\n1 False\n2 <NA>"
|
||||
assert repr(df) == expected
|
||||
|
||||
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
|
||||
assert repr(df.A) == expected
|
||||
|
||||
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
|
||||
assert repr(df.A.array) == expected
|
Reference in New Issue
Block a user