2022-05-23 00:16:32 +04:00

1088 lines
44 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from functools import lru_cache
import os
import re
import shutil
import subprocess
import sys
import pytest
import pyarrow as pa
pytestmark = pytest.mark.gdb
here = os.path.dirname(os.path.abspath(__file__))
# The GDB script may be found in the source tree (if available)
# or in another location given by the ARROW_GDB_SCRIPT environment variable.
gdb_script = (os.environ.get('ARROW_GDB_SCRIPT') or
os.path.join(here, "../../../cpp/gdb_arrow.py"))
gdb_command = ["gdb", "--nx"]
def environment_for_gdb():
env = {}
for var in ['PATH', 'LD_LIBRARY_PATH']:
try:
env[var] = os.environ[var]
except KeyError:
pass
return env
@lru_cache()
def is_gdb_available():
try:
# Try to use the same arguments as in GdbSession so that the
# same error return gets propagated.
proc = subprocess.run(gdb_command + ["--version"],
env=environment_for_gdb(), bufsize=0,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
except FileNotFoundError:
return False
return proc.returncode == 0
@lru_cache()
def python_executable():
path = shutil.which("python3")
assert path is not None, "Couldn't find python3 executable"
return path
def skip_if_gdb_unavailable():
if not is_gdb_available():
pytest.skip("gdb command unavailable")
def skip_if_gdb_script_unavailable():
if not os.path.exists(gdb_script):
pytest.skip("gdb script not found")
class GdbSession:
proc = None
verbose = True
def __init__(self, *args, **env):
# Let stderr through to let pytest display it separately on errors
gdb_env = environment_for_gdb()
gdb_env.update(env)
self.proc = subprocess.Popen(gdb_command + list(args),
env=gdb_env, bufsize=0,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
self.last_stdout = []
self.last_stdout_line = b""
def wait_until_ready(self):
"""
Record output until the gdb prompt displays. Return recorded output.
"""
# TODO: add timeout?
while (not self.last_stdout_line.startswith(b"(gdb) ") and
self.proc.poll() is None):
block = self.proc.stdout.read(4096)
if self.verbose:
sys.stdout.buffer.write(block)
sys.stdout.buffer.flush()
block, sep, last_line = block.rpartition(b"\n")
if sep:
self.last_stdout.append(self.last_stdout_line)
self.last_stdout.append(block + sep)
self.last_stdout_line = last_line
else:
assert block == b""
self.last_stdout_line += last_line
if self.proc.poll() is not None:
raise IOError("gdb session terminated unexpectedly")
out = b"".join(self.last_stdout).decode('utf-8')
self.last_stdout = []
self.last_stdout_line = b""
return out
def issue_command(self, line):
line = line.encode('utf-8') + b"\n"
if self.verbose:
sys.stdout.buffer.write(line)
sys.stdout.buffer.flush()
self.proc.stdin.write(line)
self.proc.stdin.flush()
def run_command(self, line):
self.issue_command(line)
return self.wait_until_ready()
def print_value(self, expr):
"""
Ask gdb to print the value of an expression and return the result.
"""
out = self.run_command(f"p {expr}")
out, n = re.subn(r"^\$\d+ = ", "", out)
assert n == 1, out
# gdb may add whitespace depending on result width, remove it
return out.strip()
def select_frame(self, func_name):
"""
Select the innermost frame with the given function name.
"""
# Ideally, we would use the "frame function" command,
# but it's not available on old GDB versions (such as 8.1.1),
# so instead parse the stack trace for a matching frame number.
out = self.run_command("info stack")
pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " "
m = re.search(pat, out)
if m is None:
pytest.fail(f"Could not select frame for function {func_name}")
frame_num = int(m[1])
out = self.run_command(f"frame {frame_num}")
assert f"in {func_name}" in out
def join(self):
if self.proc is not None:
self.proc.stdin.close()
self.proc.stdout.close() # avoid ResourceWarning
self.proc.kill()
self.proc.wait()
self.proc = None
def __del__(self):
self.join()
@pytest.fixture(scope='session')
def gdb():
skip_if_gdb_unavailable()
gdb = GdbSession("-q", python_executable())
try:
gdb.wait_until_ready()
gdb.run_command("set confirm off")
gdb.run_command("set print array-indexes on")
# Make sure gdb formatting is not terminal-dependent
gdb.run_command("set width unlimited")
gdb.run_command("set charset UTF-8")
yield gdb
finally:
gdb.join()
@pytest.fixture(scope='session')
def gdb_arrow(gdb):
if 'deb' not in pa.cpp_build_info.build_type:
pytest.skip("Arrow C++ debug symbols not available")
skip_if_gdb_script_unavailable()
gdb.run_command(f"source {gdb_script}")
lib_path_var = 'PATH' if sys.platform == 'win32' else 'LD_LIBRARY_PATH'
lib_path = os.environ.get(lib_path_var)
if lib_path:
# GDB starts the inferior process in a pristine shell, need
# to propagate the library search path to find the Arrow DLL
gdb.run_command(f"set env {lib_path_var} {lib_path}")
code = "from pyarrow.lib import _gdb_test_session; _gdb_test_session()"
out = gdb.run_command(f"run -c '{code}'")
assert ("Trace/breakpoint trap" in out or
"received signal" in out), out
gdb.select_frame("arrow::gdb::TestSession")
return gdb
def test_gdb_session(gdb):
out = gdb.run_command("show version")
assert out.startswith("GNU gdb ("), out
def test_gdb_arrow(gdb_arrow):
s = gdb_arrow.print_value("42 + 1")
assert s == "43"
def check_stack_repr(gdb, expr, expected):
"""
Check printing a stack-located value.
"""
s = gdb.print_value(expr)
if isinstance(expected, re.Pattern):
assert expected.match(s), s
else:
assert s == expected
def check_heap_repr(gdb, expr, expected):
"""
Check printing a heap-located value, given its address.
"""
s = gdb.print_value(f"*{expr}")
# GDB may prefix the value with an address or type specification
if s != expected:
assert s.endswith(f" {expected}")
def test_status(gdb_arrow):
check_stack_repr(gdb_arrow, "ok_status", "arrow::Status::OK()")
check_stack_repr(gdb_arrow, "error_status",
'arrow::Status::IOError("This is an error")')
check_stack_repr(
gdb_arrow, "error_detail_status",
'arrow::Status::IOError("This is an error", '
'detail=[custom-detail-id] "This is a detail")')
check_stack_repr(gdb_arrow, "ok_result", "arrow::Result<int>(42)")
check_stack_repr(
gdb_arrow, "error_result",
'arrow::Result<int>(arrow::Status::IOError("This is an error"))')
check_stack_repr(
gdb_arrow, "error_detail_result",
'arrow::Result<int>(arrow::Status::IOError("This is an error", '
'detail=[custom-detail-id] "This is a detail"))')
def test_string_view(gdb_arrow):
check_stack_repr(gdb_arrow, "string_view_empty",
"arrow::util::string_view of size 0")
check_stack_repr(gdb_arrow, "string_view_abc",
'arrow::util::string_view of size 3, "abc"')
check_stack_repr(
gdb_arrow, "string_view_special_chars",
r'arrow::util::string_view of size 12, "foo\"bar\000\r\n\t\037"')
check_stack_repr(
gdb_arrow, "string_view_very_long",
'arrow::util::string_view of size 5006, '
'"abc", \'K\' <repeats 5000 times>...')
def test_buffer_stack(gdb_arrow):
check_stack_repr(gdb_arrow, "buffer_null",
"arrow::Buffer of size 0, read-only")
check_stack_repr(gdb_arrow, "buffer_abc",
'arrow::Buffer of size 3, read-only, "abc"')
check_stack_repr(
gdb_arrow, "buffer_special_chars",
r'arrow::Buffer of size 12, read-only, "foo\"bar\000\r\n\t\037"')
check_stack_repr(gdb_arrow, "buffer_mutable",
'arrow::MutableBuffer of size 3, mutable, "abc"')
def test_buffer_heap(gdb_arrow):
check_heap_repr(gdb_arrow, "heap_buffer",
'arrow::Buffer of size 3, read-only, "abc"')
check_heap_repr(gdb_arrow, "heap_buffer_mutable.get()",
'arrow::Buffer of size 3, mutable, "abc"')
def test_optionals(gdb_arrow):
check_stack_repr(gdb_arrow, "int_optional",
"arrow::util::optional<int>(42)")
check_stack_repr(gdb_arrow, "null_int_optional",
"arrow::util::optional<int>(nullopt)")
def test_variants(gdb_arrow):
check_stack_repr(
gdb_arrow, "int_variant",
"arrow::util::Variant of index 0 (actual type int), value 42")
check_stack_repr(
gdb_arrow, "bool_variant",
"arrow::util::Variant of index 1 (actual type bool), value false")
check_stack_repr(
gdb_arrow, "string_variant",
re.compile(r'^arrow::util::Variant of index 2 \(actual type '
r'std::.*string.*\), value .*"hello".*'))
def test_decimals(gdb_arrow):
v128 = "98765432109876543210987654321098765432"
check_stack_repr(gdb_arrow, "decimal128_zero", "arrow::Decimal128(0)")
check_stack_repr(gdb_arrow, "decimal128_pos",
f"arrow::Decimal128({v128})")
check_stack_repr(gdb_arrow, "decimal128_neg",
f"arrow::Decimal128(-{v128})")
check_stack_repr(gdb_arrow, "basic_decimal128_zero",
"arrow::BasicDecimal128(0)")
check_stack_repr(gdb_arrow, "basic_decimal128_pos",
f"arrow::BasicDecimal128({v128})")
check_stack_repr(gdb_arrow, "basic_decimal128_neg",
f"arrow::BasicDecimal128(-{v128})")
v256 = ("9876543210987654321098765432109876543210"
"987654321098765432109876543210987654")
check_stack_repr(gdb_arrow, "decimal256_zero", "arrow::Decimal256(0)")
check_stack_repr(gdb_arrow, "decimal256_pos",
f"arrow::Decimal256({v256})")
check_stack_repr(gdb_arrow, "decimal256_neg",
f"arrow::Decimal256(-{v256})")
check_stack_repr(gdb_arrow, "basic_decimal256_zero",
"arrow::BasicDecimal256(0)")
check_stack_repr(gdb_arrow, "basic_decimal256_pos",
f"arrow::BasicDecimal256({v256})")
check_stack_repr(gdb_arrow, "basic_decimal256_neg",
f"arrow::BasicDecimal256(-{v256})")
def test_metadata(gdb_arrow):
check_heap_repr(gdb_arrow, "empty_metadata.get()",
"arrow::KeyValueMetadata of size 0")
check_heap_repr(
gdb_arrow, "metadata.get()",
('arrow::KeyValueMetadata of size 2 = {'
'["key_text"] = "some value", ["key_binary"] = "z\\000\\037\\377"}'))
def test_types_stack(gdb_arrow):
check_stack_repr(gdb_arrow, "null_type", "arrow::null()")
check_stack_repr(gdb_arrow, "bool_type", "arrow::boolean()")
check_stack_repr(gdb_arrow, "date32_type", "arrow::date32()")
check_stack_repr(gdb_arrow, "date64_type", "arrow::date64()")
check_stack_repr(gdb_arrow, "time_type_s",
"arrow::time32(arrow::TimeUnit::SECOND)")
check_stack_repr(gdb_arrow, "time_type_ms",
"arrow::time32(arrow::TimeUnit::MILLI)")
check_stack_repr(gdb_arrow, "time_type_us",
"arrow::time64(arrow::TimeUnit::MICRO)")
check_stack_repr(gdb_arrow, "time_type_ns",
"arrow::time64(arrow::TimeUnit::NANO)")
check_stack_repr(gdb_arrow, "timestamp_type_s",
"arrow::timestamp(arrow::TimeUnit::SECOND)")
check_stack_repr(
gdb_arrow, "timestamp_type_ms_timezone",
'arrow::timestamp(arrow::TimeUnit::MILLI, "Europe/Paris")')
check_stack_repr(gdb_arrow, "timestamp_type_us",
"arrow::timestamp(arrow::TimeUnit::MICRO)")
check_stack_repr(
gdb_arrow, "timestamp_type_ns_timezone",
'arrow::timestamp(arrow::TimeUnit::NANO, "Europe/Paris")')
check_stack_repr(gdb_arrow, "day_time_interval_type",
"arrow::day_time_interval()")
check_stack_repr(gdb_arrow, "month_interval_type",
"arrow::month_interval()")
check_stack_repr(gdb_arrow, "month_day_nano_interval_type",
"arrow::month_day_nano_interval()")
check_stack_repr(gdb_arrow, "duration_type_s",
"arrow::duration(arrow::TimeUnit::SECOND)")
check_stack_repr(gdb_arrow, "duration_type_ns",
"arrow::duration(arrow::TimeUnit::NANO)")
check_stack_repr(gdb_arrow, "decimal128_type",
"arrow::decimal128(16, 5)")
check_stack_repr(gdb_arrow, "decimal256_type",
"arrow::decimal256(42, 12)")
check_stack_repr(gdb_arrow, "binary_type", "arrow::binary()")
check_stack_repr(gdb_arrow, "string_type", "arrow::utf8()")
check_stack_repr(gdb_arrow, "large_binary_type", "arrow::large_binary()")
check_stack_repr(gdb_arrow, "large_string_type", "arrow::large_utf8()")
check_stack_repr(gdb_arrow, "fixed_size_binary_type",
"arrow::fixed_size_binary(10)")
check_stack_repr(gdb_arrow, "list_type",
"arrow::list(arrow::uint8())")
check_stack_repr(gdb_arrow, "large_list_type",
"arrow::large_list(arrow::large_utf8())")
check_stack_repr(gdb_arrow, "fixed_size_list_type",
"arrow::fixed_size_list(arrow::float64(), 3)")
check_stack_repr(
gdb_arrow, "map_type_unsorted",
"arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=false)")
check_stack_repr(
gdb_arrow, "map_type_sorted",
"arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=true)")
check_stack_repr(gdb_arrow, "struct_type_empty",
"arrow::struct_({})")
check_stack_repr(
gdb_arrow, "struct_type",
('arrow::struct_({arrow::field("ints", arrow::int8()), '
'arrow::field("strs", arrow::utf8(), nullable=false)})'))
check_stack_repr(
gdb_arrow, "sparse_union_type",
('arrow::sparse_union(fields={arrow::field("ints", arrow::int8()), '
'arrow::field("strs", arrow::utf8(), nullable=false)}, '
'type_codes={7, 42})'))
check_stack_repr(
gdb_arrow, "dense_union_type",
('arrow::dense_union(fields={arrow::field("ints", arrow::int8()), '
'arrow::field("strs", arrow::utf8(), nullable=false)}, '
'type_codes={7, 42})'))
check_stack_repr(
gdb_arrow, "dict_type_unordered",
"arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=false)")
check_stack_repr(
gdb_arrow, "dict_type_ordered",
"arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=true)")
check_stack_repr(
gdb_arrow, "uuid_type",
('arrow::ExtensionType "extension<uuid>" '
'with storage type arrow::fixed_size_binary(16)'))
def test_types_heap(gdb_arrow):
check_heap_repr(gdb_arrow, "heap_null_type", "arrow::null()")
check_heap_repr(gdb_arrow, "heap_bool_type", "arrow::boolean()")
check_heap_repr(gdb_arrow, "heap_time_type_ns",
"arrow::time64(arrow::TimeUnit::NANO)")
check_heap_repr(
gdb_arrow, "heap_timestamp_type_ns_timezone",
'arrow::timestamp(arrow::TimeUnit::NANO, "Europe/Paris")')
check_heap_repr(gdb_arrow, "heap_decimal128_type",
"arrow::decimal128(16, 5)")
check_heap_repr(gdb_arrow, "heap_list_type",
"arrow::list(arrow::uint8())")
check_heap_repr(gdb_arrow, "heap_large_list_type",
"arrow::large_list(arrow::large_utf8())")
check_heap_repr(gdb_arrow, "heap_fixed_size_list_type",
"arrow::fixed_size_list(arrow::float64(), 3)")
check_heap_repr(
gdb_arrow, "heap_map_type",
"arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=false)")
check_heap_repr(
gdb_arrow, "heap_struct_type",
('arrow::struct_({arrow::field("ints", arrow::int8()), '
'arrow::field("strs", arrow::utf8(), nullable=false)})'))
check_heap_repr(
gdb_arrow, "heap_dict_type",
"arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=false)")
check_heap_repr(
gdb_arrow, "heap_uuid_type",
('arrow::ExtensionType "extension<uuid>" '
'with storage type arrow::fixed_size_binary(16)'))
def test_fields_stack(gdb_arrow):
check_stack_repr(gdb_arrow, "int_field",
'arrow::field("ints", arrow::int64())')
check_stack_repr(
gdb_arrow, "float_field",
'arrow::field("floats", arrow::float32(), nullable=false)')
def test_fields_heap(gdb_arrow):
check_heap_repr(gdb_arrow, "heap_int_field",
'arrow::field("ints", arrow::int64())')
def test_scalars_stack(gdb_arrow):
check_stack_repr(gdb_arrow, "null_scalar", "arrow::NullScalar")
check_stack_repr(gdb_arrow, "bool_scalar",
"arrow::BooleanScalar of value true")
check_stack_repr(gdb_arrow, "bool_scalar_null",
"arrow::BooleanScalar of null value")
check_stack_repr(gdb_arrow, "int8_scalar",
"arrow::Int8Scalar of value -42")
check_stack_repr(gdb_arrow, "uint8_scalar",
"arrow::UInt8Scalar of value 234")
check_stack_repr(gdb_arrow, "int64_scalar",
"arrow::Int64Scalar of value -9223372036854775808")
check_stack_repr(gdb_arrow, "uint64_scalar",
"arrow::UInt64Scalar of value 18446744073709551615")
check_stack_repr(gdb_arrow, "half_float_scalar",
"arrow::HalfFloatScalar of value -1.5 [48640]")
check_stack_repr(gdb_arrow, "float_scalar",
"arrow::FloatScalar of value 1.25")
check_stack_repr(gdb_arrow, "double_scalar",
"arrow::DoubleScalar of value 2.5")
check_stack_repr(gdb_arrow, "time_scalar_s",
"arrow::Time32Scalar of value 100s")
check_stack_repr(gdb_arrow, "time_scalar_ms",
"arrow::Time32Scalar of value 1000ms")
check_stack_repr(gdb_arrow, "time_scalar_us",
"arrow::Time64Scalar of value 10000us")
check_stack_repr(gdb_arrow, "time_scalar_ns",
"arrow::Time64Scalar of value 100000ns")
check_stack_repr(gdb_arrow, "time_scalar_null",
"arrow::Time64Scalar of null value [ns]")
check_stack_repr(gdb_arrow, "duration_scalar_s",
"arrow::DurationScalar of value -100s")
check_stack_repr(gdb_arrow, "duration_scalar_ms",
"arrow::DurationScalar of value -1000ms")
check_stack_repr(gdb_arrow, "duration_scalar_us",
"arrow::DurationScalar of value -10000us")
check_stack_repr(gdb_arrow, "duration_scalar_ns",
"arrow::DurationScalar of value -100000ns")
check_stack_repr(gdb_arrow, "duration_scalar_null",
"arrow::DurationScalar of null value [ns]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_s",
"arrow::TimestampScalar of value 12345s [no timezone]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_ms",
"arrow::TimestampScalar of value -123456ms [no timezone]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_us",
"arrow::TimestampScalar of value 1234567us [no timezone]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_ns",
"arrow::TimestampScalar of value -12345678ns [no timezone]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_null",
"arrow::TimestampScalar of null value [ns, no timezone]")
check_stack_repr(
gdb_arrow, "timestamp_scalar_s_tz",
'arrow::TimestampScalar of value 12345s ["Europe/Paris"]')
check_stack_repr(
gdb_arrow, "timestamp_scalar_ms_tz",
'arrow::TimestampScalar of value -123456ms ["Europe/Paris"]')
check_stack_repr(
gdb_arrow, "timestamp_scalar_us_tz",
'arrow::TimestampScalar of value 1234567us ["Europe/Paris"]')
check_stack_repr(
gdb_arrow, "timestamp_scalar_ns_tz",
'arrow::TimestampScalar of value -12345678ns ["Europe/Paris"]')
check_stack_repr(
gdb_arrow, "timestamp_scalar_null_tz",
'arrow::TimestampScalar of null value [ns, "Europe/Paris"]')
check_stack_repr(gdb_arrow, "month_interval_scalar",
"arrow::MonthIntervalScalar of value 23M")
check_stack_repr(gdb_arrow, "month_interval_scalar_null",
"arrow::MonthIntervalScalar of null value")
check_stack_repr(gdb_arrow, "day_time_interval_scalar",
"arrow::DayTimeIntervalScalar of value 23d-456ms")
check_stack_repr(gdb_arrow, "day_time_interval_scalar_null",
"arrow::DayTimeIntervalScalar of null value")
check_stack_repr(
gdb_arrow, "month_day_nano_interval_scalar",
"arrow::MonthDayNanoIntervalScalar of value 1M23d-456ns")
check_stack_repr(
gdb_arrow, "month_day_nano_interval_scalar_null",
"arrow::MonthDayNanoIntervalScalar of null value")
check_stack_repr(gdb_arrow, "date32_scalar",
"arrow::Date32Scalar of value 23d [1970-01-24]")
check_stack_repr(gdb_arrow, "date32_scalar_null",
"arrow::Date32Scalar of null value")
check_stack_repr(gdb_arrow, "date64_scalar",
"arrow::Date64Scalar of value 3888000000ms [1970-02-15]")
check_stack_repr(gdb_arrow, "date64_scalar_null",
"arrow::Date64Scalar of null value")
check_stack_repr(
gdb_arrow, "decimal128_scalar_null",
"arrow::Decimal128Scalar of null value [precision=10, scale=4]")
check_stack_repr(
gdb_arrow, "decimal128_scalar_pos_scale_pos",
"arrow::Decimal128Scalar of value 123.4567 [precision=10, scale=4]")
check_stack_repr(
gdb_arrow, "decimal128_scalar_pos_scale_neg",
"arrow::Decimal128Scalar of value -123.4567 [precision=10, scale=4]")
check_stack_repr(
gdb_arrow, "decimal128_scalar_neg_scale_pos",
("arrow::Decimal128Scalar of value 1.234567e+10 "
"[precision=10, scale=-4]"))
check_stack_repr(
gdb_arrow, "decimal128_scalar_neg_scale_neg",
("arrow::Decimal128Scalar of value -1.234567e+10 "
"[precision=10, scale=-4]"))
check_stack_repr(
gdb_arrow, "decimal256_scalar_null",
"arrow::Decimal256Scalar of null value [precision=50, scale=4]")
check_stack_repr(
gdb_arrow, "decimal256_scalar_pos_scale_pos",
("arrow::Decimal256Scalar of value "
"123456789012345678901234567890123456789012.3456 "
"[precision=50, scale=4]"))
check_stack_repr(
gdb_arrow, "decimal256_scalar_pos_scale_neg",
("arrow::Decimal256Scalar of value "
"-123456789012345678901234567890123456789012.3456 "
"[precision=50, scale=4]"))
check_stack_repr(
gdb_arrow, "decimal256_scalar_neg_scale_pos",
("arrow::Decimal256Scalar of value "
"1.234567890123456789012345678901234567890123456e+49 "
"[precision=50, scale=-4]"))
check_stack_repr(
gdb_arrow, "decimal256_scalar_neg_scale_neg",
("arrow::Decimal256Scalar of value "
"-1.234567890123456789012345678901234567890123456e+49 "
"[precision=50, scale=-4]"))
check_stack_repr(
gdb_arrow, "binary_scalar_null",
"arrow::BinaryScalar of null value")
check_stack_repr(
gdb_arrow, "binary_scalar_unallocated",
"arrow::BinaryScalar of value <unallocated>")
check_stack_repr(
gdb_arrow, "binary_scalar_empty",
'arrow::BinaryScalar of size 0, value ""')
check_stack_repr(
gdb_arrow, "binary_scalar_abc",
'arrow::BinaryScalar of size 3, value "abc"')
check_stack_repr(
gdb_arrow, "binary_scalar_bytes",
r'arrow::BinaryScalar of size 3, value "\000\037\377"')
check_stack_repr(
gdb_arrow, "large_binary_scalar_abc",
'arrow::LargeBinaryScalar of size 3, value "abc"')
check_stack_repr(
gdb_arrow, "string_scalar_null",
"arrow::StringScalar of null value")
check_stack_repr(
gdb_arrow, "string_scalar_unallocated",
"arrow::StringScalar of value <unallocated>")
check_stack_repr(
gdb_arrow, "string_scalar_empty",
'arrow::StringScalar of size 0, value ""')
check_stack_repr(
gdb_arrow, "string_scalar_hehe",
'arrow::StringScalar of size 6, value "héhé"')
# FIXME: excessive escaping ('\\xff' vs. '\x00')
check_stack_repr(
gdb_arrow, "string_scalar_invalid_chars",
r'arrow::StringScalar of size 11, value "abc\x00def\\xffghi"')
check_stack_repr(
gdb_arrow, "large_string_scalar_hehe",
'arrow::LargeStringScalar of size 6, value "héhé"')
check_stack_repr(
gdb_arrow, "fixed_size_binary_scalar",
'arrow::FixedSizeBinaryScalar of size 3, value "abc"')
check_stack_repr(
gdb_arrow, "fixed_size_binary_scalar_null",
'arrow::FixedSizeBinaryScalar of size 3, null value')
check_stack_repr(
gdb_arrow, "dict_scalar",
re.compile(
(r'^arrow::DictionaryScalar of index '
r'arrow::Int8Scalar of value 42, '
r'dictionary arrow::StringArray ')))
check_stack_repr(
gdb_arrow, "dict_scalar_null",
('arrow::DictionaryScalar of type '
'arrow::dictionary(arrow::int8(), arrow::utf8(), ordered=false), '
'null value'))
check_stack_repr(
gdb_arrow, "list_scalar",
('arrow::ListScalar of value arrow::Int32Array of '
'length 3, offset 0, null count 0 = {[0] = 4, [1] = 5, [2] = 6}'))
check_stack_repr(
gdb_arrow, "list_scalar_null",
'arrow::ListScalar of type arrow::list(arrow::int32()), null value')
check_stack_repr(
gdb_arrow, "large_list_scalar",
('arrow::LargeListScalar of value arrow::Int32Array of '
'length 3, offset 0, null count 0 = {[0] = 4, [1] = 5, [2] = 6}'))
check_stack_repr(
gdb_arrow, "large_list_scalar_null",
('arrow::LargeListScalar of type arrow::large_list(arrow::int32()), '
'null value'))
check_stack_repr(
gdb_arrow, "fixed_size_list_scalar",
('arrow::FixedSizeListScalar of value arrow::Int32Array of '
'length 3, offset 0, null count 0 = {[0] = 4, [1] = 5, [2] = 6}'))
check_stack_repr(
gdb_arrow, "fixed_size_list_scalar_null",
('arrow::FixedSizeListScalar of type '
'arrow::fixed_size_list(arrow::int32(), 3), null value'))
check_stack_repr(
gdb_arrow, "struct_scalar",
('arrow::StructScalar = {["ints"] = arrow::Int32Scalar of value 42, '
'["strs"] = arrow::StringScalar of size 9, value "some text"}'))
check_stack_repr(
gdb_arrow, "struct_scalar_null",
('arrow::StructScalar of type arrow::struct_('
'{arrow::field("ints", arrow::int32()), '
'arrow::field("strs", arrow::utf8())}), null value'))
check_stack_repr(
gdb_arrow, "sparse_union_scalar",
('arrow::SparseUnionScalar of type code 7, '
'value arrow::Int32Scalar of value 43'))
check_stack_repr(
gdb_arrow, "sparse_union_scalar_null", re.compile(
r'^arrow::SparseUnionScalar of type arrow::sparse_union\(.*\), '
r'type code 7, null value$'))
check_stack_repr(
gdb_arrow, "dense_union_scalar",
('arrow::DenseUnionScalar of type code 7, '
'value arrow::Int32Scalar of value 43'))
check_stack_repr(
gdb_arrow, "dense_union_scalar_null", re.compile(
r'^arrow::DenseUnionScalar of type arrow::dense_union\(.*\), '
r'type code 7, null value$'))
check_stack_repr(
gdb_arrow, "extension_scalar",
('arrow::ExtensionScalar of type "extension<uuid>", '
'value arrow::FixedSizeBinaryScalar of size 16, '
'value "0123456789abcdef"'))
check_stack_repr(
gdb_arrow, "extension_scalar_null",
'arrow::ExtensionScalar of type "extension<uuid>", null value')
def test_scalars_heap(gdb_arrow):
check_heap_repr(gdb_arrow, "heap_null_scalar", "arrow::NullScalar")
check_heap_repr(gdb_arrow, "heap_bool_scalar",
"arrow::BooleanScalar of value true")
check_heap_repr(
gdb_arrow, "heap_decimal128_scalar",
"arrow::Decimal128Scalar of value 123.4567 [precision=10, scale=4]")
check_heap_repr(
gdb_arrow, "heap_decimal256_scalar",
("arrow::Decimal256Scalar of value "
"123456789012345678901234567890123456789012.3456 "
"[precision=50, scale=4]"))
check_heap_repr(
gdb_arrow, "heap_map_scalar",
('arrow::MapScalar of type arrow::map(arrow::utf8(), arrow::int32(), '
'keys_sorted=false), value length 2, offset 0, null count 0'))
check_heap_repr(
gdb_arrow, "heap_map_scalar_null",
('arrow::MapScalar of type arrow::map(arrow::utf8(), arrow::int32(), '
'keys_sorted=false), null value'))
def test_array_data(gdb_arrow):
check_stack_repr(
gdb_arrow, "int32_array_data",
("arrow::ArrayData of type arrow::int32(), length 4, offset 0, "
"null count 1 = {[0] = -5, [1] = 6, [2] = null, [3] = 42}"))
def test_arrays_stack(gdb_arrow):
check_stack_repr(
gdb_arrow, "int32_array",
("arrow::Int32Array of length 4, offset 0, null count 1 = "
"{[0] = -5, [1] = 6, [2] = null, [3] = 42}"))
check_stack_repr(
gdb_arrow, "list_array",
("arrow::ListArray of type arrow::list(arrow::int64()), "
"length 3, offset 0, null count 1"))
def test_arrays_heap(gdb_arrow):
# Null
check_heap_repr(
gdb_arrow, "heap_null_array",
"arrow::NullArray of length 2, offset 0, null count 2")
# Primitive
check_heap_repr(
gdb_arrow, "heap_int32_array",
("arrow::Int32Array of length 4, offset 0, null count 1 = {"
"[0] = -5, [1] = 6, [2] = null, [3] = 42}"))
check_heap_repr(
gdb_arrow, "heap_int32_array_no_nulls",
("arrow::Int32Array of length 4, offset 0, null count 0 = {"
"[0] = -5, [1] = 6, [2] = 3, [3] = 42}"))
check_heap_repr(
gdb_arrow, "heap_int32_array_sliced_1_9",
("arrow::Int32Array of length 9, offset 1, unknown null count = {"
"[0] = 2, [1] = -3, [2] = 4, [3] = null, [4] = -5, [5] = 6, "
"[6] = -7, [7] = 8, [8] = null}"))
check_heap_repr(
gdb_arrow, "heap_int32_array_sliced_2_6",
("arrow::Int32Array of length 6, offset 2, unknown null count = {"
"[0] = -3, [1] = 4, [2] = null, [3] = -5, [4] = 6, [5] = -7}"))
check_heap_repr(
gdb_arrow, "heap_int32_array_sliced_8_4",
("arrow::Int32Array of length 4, offset 8, unknown null count = {"
"[0] = 8, [1] = null, [2] = -9, [3] = -10}"))
check_heap_repr(
gdb_arrow, "heap_int32_array_sliced_empty",
"arrow::Int32Array of length 0, offset 6, unknown null count")
check_heap_repr(
gdb_arrow, "heap_double_array",
("arrow::DoubleArray of length 2, offset 0, null count 1 = {"
"[0] = -1.5, [1] = null}"))
check_heap_repr(
gdb_arrow, "heap_float16_array",
("arrow::HalfFloatArray of length 2, offset 0, null count 0 = {"
"[0] = 0.0, [1] = -1.5}"))
# Boolean
check_heap_repr(
gdb_arrow, "heap_bool_array",
("arrow::BooleanArray of length 18, offset 0, null count 6 = {"
"[0] = false, [1] = false, [2] = true, [3] = true, [4] = null, "
"[5] = null, [6] = false, [7] = false, [8] = true, [9] = true, "
"[10] = null, [11] = null, [12] = false, [13] = false, "
"[14] = true, [15] = true, [16] = null, [17] = null}"))
check_heap_repr(
gdb_arrow, "heap_bool_array_sliced_1_9",
("arrow::BooleanArray of length 9, offset 1, unknown null count = {"
"[0] = false, [1] = true, [2] = true, [3] = null, [4] = null, "
"[5] = false, [6] = false, [7] = true, [8] = true}"))
check_heap_repr(
gdb_arrow, "heap_bool_array_sliced_2_6",
("arrow::BooleanArray of length 6, offset 2, unknown null count = {"
"[0] = true, [1] = true, [2] = null, [3] = null, [4] = false, "
"[5] = false}"))
check_heap_repr(
gdb_arrow, "heap_bool_array_sliced_empty",
"arrow::BooleanArray of length 0, offset 6, unknown null count")
# Temporal
check_heap_repr(
gdb_arrow, "heap_date32_array",
("arrow::Date32Array of length 6, offset 0, null count 1 = {"
"[0] = 0d [1970-01-01], [1] = null, [2] = 18336d [2020-03-15], "
"[3] = -9004d [1945-05-08], [4] = -719162d [0001-01-01], "
"[5] = -719163d [year <= 0]}"))
check_heap_repr(
gdb_arrow, "heap_date64_array",
("arrow::Date64Array of length 5, offset 0, null count 0 = {"
"[0] = 1584230400000ms [2020-03-15], "
"[1] = -777945600000ms [1945-05-08], "
"[2] = -62135596800000ms [0001-01-01], "
"[3] = -62135683200000ms [year <= 0], "
"[4] = 123ms [non-multiple of 86400000]}"))
check_heap_repr(
gdb_arrow, "heap_time32_array_s",
("arrow::Time32Array of type arrow::time32(arrow::TimeUnit::SECOND), "
"length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -123s, [2] = 456s}"))
check_heap_repr(
gdb_arrow, "heap_time32_array_ms",
("arrow::Time32Array of type arrow::time32(arrow::TimeUnit::MILLI), "
"length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -123ms, [2] = 456ms}"))
check_heap_repr(
gdb_arrow, "heap_time64_array_us",
("arrow::Time64Array of type arrow::time64(arrow::TimeUnit::MICRO), "
"length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -123us, [2] = 456us}"))
check_heap_repr(
gdb_arrow, "heap_time64_array_ns",
("arrow::Time64Array of type arrow::time64(arrow::TimeUnit::NANO), "
"length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -123ns, [2] = 456ns}"))
check_heap_repr(
gdb_arrow, "heap_month_interval_array",
("arrow::MonthIntervalArray of length 3, offset 0, null count 1 = {"
"[0] = 123M, [1] = -456M, [2] = null}"))
check_heap_repr(
gdb_arrow, "heap_day_time_interval_array",
("arrow::DayTimeIntervalArray of length 2, offset 0, null count 1 = {"
"[0] = 1d-600ms, [1] = null}"))
check_heap_repr(
gdb_arrow, "heap_month_day_nano_interval_array",
("arrow::MonthDayNanoIntervalArray of length 2, offset 0, "
"null count 1 = {[0] = 1M-600d5000ns, [1] = null}"))
check_heap_repr(
gdb_arrow, "heap_duration_array_s",
("arrow::DurationArray of type arrow::duration"
"(arrow::TimeUnit::SECOND), length 2, offset 0, null count 1 = {"
"[0] = null, [1] = -1234567890123456789s}"))
check_heap_repr(
gdb_arrow, "heap_duration_array_ns",
("arrow::DurationArray of type arrow::duration"
"(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
"[0] = null, [1] = -1234567890123456789ns}"))
check_heap_repr(
gdb_arrow, "heap_timestamp_array_s",
("arrow::TimestampArray of type arrow::timestamp"
"(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
"[0] = null, [1] = 0s [1970-01-01 00:00:00], "
"[2] = -2203932304s [1900-02-28 12:34:56], "
"[3] = 63730281600s [3989-07-14 00:00:00]}"))
check_heap_repr(
gdb_arrow, "heap_timestamp_array_ms",
("arrow::TimestampArray of type arrow::timestamp"
"(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
"[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
check_heap_repr(
gdb_arrow, "heap_timestamp_array_us",
("arrow::TimestampArray of type arrow::timestamp"
"(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
"[0] = null, "
"[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
"[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
check_heap_repr(
gdb_arrow, "heap_timestamp_array_ns",
("arrow::TimestampArray of type arrow::timestamp"
"(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
"[0] = null, "
"[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
# Decimal
check_heap_repr(
gdb_arrow, "heap_decimal128_array",
("arrow::Decimal128Array of type arrow::decimal128(30, 6), "
"length 3, offset 0, null count 1 = {"
"[0] = null, [1] = -1234567890123456789.012345, "
"[2] = 1234567890123456789.012345}"))
check_heap_repr(
gdb_arrow, "heap_decimal256_array",
("arrow::Decimal256Array of type arrow::decimal256(50, 6), "
"length 2, offset 0, null count 1 = {"
"[0] = null, "
"[1] = -123456789012345678901234567890123456789.012345}"))
check_heap_repr(
gdb_arrow, "heap_decimal128_array_sliced",
("arrow::Decimal128Array of type arrow::decimal128(30, 6), "
"length 1, offset 1, unknown null count = {"
"[0] = -1234567890123456789.012345}"))
# Binary-like
check_heap_repr(
gdb_arrow, "heap_fixed_size_binary_array",
(r'arrow::FixedSizeBinaryArray of type arrow::fixed_size_binary(3), '
r'length 3, offset 0, null count 1 = {'
r'[0] = null, [1] = "abc", [2] = "\000\037\377"}'))
check_heap_repr(
gdb_arrow, "heap_fixed_size_binary_array_zero_width",
(r'arrow::FixedSizeBinaryArray of type arrow::fixed_size_binary(0), '
r'length 2, offset 0, null count 1 = {[0] = null, [1] = ""}'))
check_heap_repr(
gdb_arrow, "heap_fixed_size_binary_array_sliced",
(r'arrow::FixedSizeBinaryArray of type arrow::fixed_size_binary(3), '
r'length 1, offset 1, unknown null count = {[0] = "abc"}'))
check_heap_repr(
gdb_arrow, "heap_binary_array",
(r'arrow::BinaryArray of length 3, offset 0, null count 1 = {'
r'[0] = null, [1] = "abcd", [2] = "\000\037\377"}'))
check_heap_repr(
gdb_arrow, "heap_large_binary_array",
(r'arrow::LargeBinaryArray of length 3, offset 0, null count 1 = {'
r'[0] = null, [1] = "abcd", [2] = "\000\037\377"}'))
check_heap_repr(
gdb_arrow, "heap_string_array",
(r'arrow::StringArray of length 3, offset 0, null count 1 = {'
r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}'))
check_heap_repr(
gdb_arrow, "heap_large_string_array",
(r'arrow::LargeStringArray of length 3, offset 0, null count 1 = {'
r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}'))
check_heap_repr(
gdb_arrow, "heap_binary_array_sliced",
(r'arrow::BinaryArray of length 1, offset 1, unknown null count = '
r'{[0] = "abcd"}'))
# Nested
check_heap_repr(
gdb_arrow, "heap_list_array",
("arrow::ListArray of type arrow::list(arrow::int64()), "
"length 3, offset 0, null count 1"))
def test_schema(gdb_arrow):
check_heap_repr(gdb_arrow, "schema_empty",
"arrow::Schema with 0 fields")
check_heap_repr(
gdb_arrow, "schema_non_empty",
('arrow::Schema with 2 fields = {["ints"] = arrow::int8(), '
'["strs"] = arrow::utf8()}'))
check_heap_repr(
gdb_arrow, "schema_with_metadata",
('arrow::Schema with 2 fields and 2 metadata items = '
'{["ints"] = arrow::int8(), ["strs"] = arrow::utf8()}'))
def test_chunked_array(gdb_arrow):
check_stack_repr(
gdb_arrow, "chunked_array",
("arrow::ChunkedArray of type arrow::int32(), length 5, null count 1 "
"with 2 chunks = {[0] = length 2, offset 0, null count 0, "
"[1] = length 3, offset 0, null count 1}"))
def test_record_batch(gdb_arrow):
expected_prefix = 'arrow::RecordBatch with 2 columns, 3 rows'
expected_suffix = (
'{["ints"] = arrow::ArrayData of type arrow::int32(), '
'length 3, offset 0, null count 0 = '
'{[0] = 1, [1] = 2, [2] = 3}, '
'["strs"] = arrow::ArrayData of type arrow::utf8(), '
'length 3, offset 0, null count 1 = '
'{[0] = "abc", [1] = null, [2] = "def"}}')
expected = f"{expected_prefix} = {expected_suffix}"
# Representations may differ between those two because of
# RecordBatch (base class) vs. SimpleRecordBatch (concrete class).
check_heap_repr(gdb_arrow, "batch", expected)
check_heap_repr(gdb_arrow, "batch.get()", expected)
expected = f"{expected_prefix}, 3 metadata items = {expected_suffix}"
check_heap_repr(gdb_arrow, "batch_with_metadata", expected)
def test_table(gdb_arrow):
expected_table = (
'arrow::Table with 2 columns, 5 rows = {'
'["ints"] = arrow::ChunkedArray of type arrow::int32(), '
'length 5, null count 0 with 2 chunks = '
'{[0] = length 3, offset 0, null count 0, '
'[1] = length 2, offset 0, null count 0}, '
'["strs"] = arrow::ChunkedArray of type arrow::utf8(), '
'length 5, null count 1 with 3 chunks = '
'{[0] = length 2, offset 0, null count 1, '
'[1] = length 1, offset 0, null count 0, '
'[2] = length 2, offset 0, null count 0}}')
# Same as RecordBatch above (Table vs. SimpleTable)
check_heap_repr(gdb_arrow, "table", expected_table)
check_heap_repr(gdb_arrow, "table.get()", expected_table)
def test_datum(gdb_arrow):
check_stack_repr(gdb_arrow, "empty_datum", "arrow::Datum (empty)")
check_stack_repr(
gdb_arrow, "scalar_datum",
"arrow::Datum of value arrow::BooleanScalar of null value")
check_stack_repr(
gdb_arrow, "array_datum",
re.compile(r"^arrow::Datum of value arrow::ArrayData of type "))
check_stack_repr(
gdb_arrow, "chunked_array_datum",
re.compile(r"^arrow::Datum of value arrow::ChunkedArray of type "))
check_stack_repr(
gdb_arrow, "batch_datum",
re.compile(r"^arrow::Datum of value arrow::RecordBatch "
r"with 2 columns, 3 rows "))
check_stack_repr(
gdb_arrow, "table_datum",
re.compile(r"^arrow::Datum of value arrow::Table "
r"with 2 columns, 5 rows "))