first commit

2025-07-01 14:07:48 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pyarrow/init.pxd
+++ b/.venv/Lib/site-packages/pyarrow/init.pxd
@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libcpp.memory cimport shared_ptr
+from pyarrow.includes.libarrow cimport (CArray, CBuffer, CDataType,
+                                        CField, CRecordBatch, CSchema,
+                                        CTable, CTensor, CSparseCOOTensor,
+                                        CSparseCSRMatrix, CSparseCSCMatrix,
+                                        CSparseCSFTensor)
+
+cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
+    cdef int import_pyarrow() except -1
+    cdef object wrap_buffer(const shared_ptr[CBuffer]& buffer)
+    cdef object wrap_data_type(const shared_ptr[CDataType]& type)
+    cdef object wrap_field(const shared_ptr[CField]& field)
+    cdef object wrap_schema(const shared_ptr[CSchema]& schema)
+    cdef object wrap_array(const shared_ptr[CArray]& sp_array)
+    cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+    cdef object wrap_sparse_tensor_coo(
+        const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
+    cdef object wrap_sparse_tensor_csr(
+        const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
+    cdef object wrap_sparse_tensor_csc(
+        const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
+    cdef object wrap_sparse_tensor_csf(
+        const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
+    cdef object wrap_table(const shared_ptr[CTable]& ctable)
+    cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
--- a/.venv/Lib/site-packages/pyarrow/init.py
+++ b/.venv/Lib/site-packages/pyarrow/init.py
@ -0,0 +1,573 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+"""
+PyArrow is the python implementation of Apache Arrow.
+
+Apache Arrow is a cross-language development platform for in-memory data.
+It specifies a standardized language-independent columnar memory format for
+flat and hierarchical data, organized for efficient analytic operations on
+modern hardware. It also provides computational libraries and zero-copy
+streaming messaging and interprocess communication.
+
+For more information see the official page at https://arrow.apache.org
+"""
+
+import gc as _gc
+import importlib as _importlib
+import os as _os
+import platform as _platform
+import sys as _sys
+import warnings as _warnings
+
+try:
+    from ._generated_version import version as __version__
+except ImportError:
+    # Package is not installed, parse git tag at runtime
+    try:
+        import setuptools_scm
+        # Code duplicated from setup.py to avoid a dependency on each other
+
+        def parse_git(root, **kwargs):
+            """
+            Parse function for setuptools_scm that ignores tags for non-C++
+            subprojects, e.g. apache-arrow-js-XXX tags.
+            """
+            from setuptools_scm.git import parse
+            kwargs['describe_command'] = \
+                "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
+            return parse(root, **kwargs)
+        __version__ = setuptools_scm.get_version('../',
+                                                 parse=parse_git)
+    except ImportError:
+        __version__ = None
+
+# ARROW-8684: Disable GC while initializing Cython extension module,
+# to workaround Cython bug in https://github.com/cython/cython/issues/3603
+_gc_enabled = _gc.isenabled()
+_gc.disable()
+import pyarrow.lib as _lib
+if _gc_enabled:
+    _gc.enable()
+
+from pyarrow.lib import (BuildInfo, RuntimeInfo, MonthDayNano,
+                         VersionInfo, cpp_build_info, cpp_version,
+                         cpp_version_info, runtime_info, cpu_count,
+                         set_cpu_count, enable_signal_handlers,
+                         io_thread_count, set_io_thread_count)
+
+
+def show_versions():
+    """
+    Print various version information, to help with error reporting.
+    """
+    def print_entry(label, value):
+        print(f"{label: <26}: {value: <8}")
+
+    print("pyarrow version info\n--------------------")
+    print_entry("Package kind", cpp_build_info.package_kind
+                if len(cpp_build_info.package_kind) > 0
+                else "not indicated")
+    print_entry("Arrow C++ library version", cpp_build_info.version)
+    print_entry("Arrow C++ compiler",
+                f"{cpp_build_info.compiler_id} {cpp_build_info.compiler_version}")
+    print_entry("Arrow C++ compiler flags", cpp_build_info.compiler_flags)
+    print_entry("Arrow C++ git revision", cpp_build_info.git_id)
+    print_entry("Arrow C++ git description", cpp_build_info.git_description)
+    print_entry("Arrow C++ build type", cpp_build_info.build_type)
+
+
+def _module_is_available(module):
+    try:
+        _importlib.import_module(f'pyarrow.{module}')
+    except ImportError:
+        return False
+    else:
+        return True
+
+
+def _filesystem_is_available(fs):
+    try:
+        import pyarrow.fs
+    except ImportError:
+        return False
+
+    try:
+        getattr(pyarrow.fs, fs)
+    except (ImportError, AttributeError):
+        return False
+    else:
+        return True
+
+
+def show_info():
+    """
+    Print detailed version and platform information, for error reporting
+    """
+    show_versions()
+
+    def print_entry(label, value):
+        print(f"  {label: <20}: {value: <8}")
+
+    print("\nPlatform:")
+    print_entry("OS / Arch", f"{_platform.system()} {_platform.machine()}")
+    print_entry("SIMD Level", runtime_info().simd_level)
+    print_entry("Detected SIMD Level", runtime_info().detected_simd_level)
+
+    pool = default_memory_pool()
+    print("\nMemory:")
+    print_entry("Default backend", pool.backend_name)
+    print_entry("Bytes allocated", f"{pool.bytes_allocated()} bytes")
+    print_entry("Max memory", f"{pool.max_memory()} bytes")
+    print_entry("Supported Backends", ', '.join(supported_memory_backends()))
+
+    print("\nOptional modules:")
+    modules = ["csv", "cuda", "dataset", "feather", "flight", "fs", "gandiva", "json",
+               "orc", "parquet", "plasma"]
+    for module in modules:
+        status = "Enabled" if _module_is_available(module) else "-"
+        print(f"  {module: <20}: {status: <8}")
+
+    print("\nFilesystems:")
+    filesystems = ["GcsFileSystem", "HadoopFileSystem", "S3FileSystem"]
+    for fs in filesystems:
+        status = "Enabled" if _filesystem_is_available(fs) else "-"
+        print(f"  {fs: <20}: {status: <8}")
+
+    print("\nCompression Codecs:")
+    codecs = ["brotli", "bz2", "gzip", "lz4_frame", "lz4", "snappy", "zstd"]
+    for codec in codecs:
+        status = "Enabled" if Codec.is_available(codec) else "-"
+        print(f"  {codec: <20}: {status: <8}")
+
+
+from pyarrow.lib import (null, bool_,
+                         int8, int16, int32, int64,
+                         uint8, uint16, uint32, uint64,
+                         time32, time64, timestamp, date32, date64, duration,
+                         month_day_nano_interval,
+                         float16, float32, float64,
+                         binary, string, utf8,
+                         large_binary, large_string, large_utf8,
+                         decimal128, decimal256,
+                         list_, large_list, map_, struct,
+                         union, sparse_union, dense_union,
+                         dictionary,
+                         field,
+                         type_for_alias,
+                         DataType, DictionaryType, StructType,
+                         ListType, LargeListType, MapType, FixedSizeListType,
+                         UnionType, SparseUnionType, DenseUnionType,
+                         TimestampType, Time32Type, Time64Type, DurationType,
+                         FixedSizeBinaryType, Decimal128Type, Decimal256Type,
+                         BaseExtensionType, ExtensionType,
+                         PyExtensionType, UnknownExtensionType,
+                         register_extension_type, unregister_extension_type,
+                         DictionaryMemo,
+                         KeyValueMetadata,
+                         Field,
+                         Schema,
+                         schema,
+                         unify_schemas,
+                         Array, Tensor,
+                         array, chunked_array, record_batch, nulls, repeat,
+                         SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
+                         SparseCSFTensor,
+                         infer_type, from_numpy_dtype,
+                         NullArray,
+                         NumericArray, IntegerArray, FloatingPointArray,
+                         BooleanArray,
+                         Int8Array, UInt8Array,
+                         Int16Array, UInt16Array,
+                         Int32Array, UInt32Array,
+                         Int64Array, UInt64Array,
+                         ListArray, LargeListArray, MapArray,
+                         FixedSizeListArray, UnionArray,
+                         BinaryArray, StringArray,
+                         LargeBinaryArray, LargeStringArray,
+                         FixedSizeBinaryArray,
+                         DictionaryArray,
+                         Date32Array, Date64Array, TimestampArray,
+                         Time32Array, Time64Array, DurationArray,
+                         MonthDayNanoIntervalArray,
+                         Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
+                         scalar, NA, _NULL as NULL, Scalar,
+                         NullScalar, BooleanScalar,
+                         Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
+                         UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
+                         HalfFloatScalar, FloatScalar, DoubleScalar,
+                         Decimal128Scalar, Decimal256Scalar,
+                         ListScalar, LargeListScalar, FixedSizeListScalar,
+                         Date32Scalar, Date64Scalar,
+                         Time32Scalar, Time64Scalar,
+                         TimestampScalar, DurationScalar,
+                         MonthDayNanoIntervalScalar,
+                         BinaryScalar, LargeBinaryScalar,
+                         StringScalar, LargeStringScalar,
+                         FixedSizeBinaryScalar, DictionaryScalar,
+                         MapScalar, StructScalar, UnionScalar,
+                         ExtensionScalar)
+
+# Buffers, allocation
+from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
+                         Codec, compress, decompress, allocate_buffer)
+
+from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+                         total_allocated_bytes, set_memory_pool,
+                         default_memory_pool, system_memory_pool,
+                         jemalloc_memory_pool, mimalloc_memory_pool,
+                         logging_memory_pool, proxy_memory_pool,
+                         log_memory_allocations, jemalloc_set_decay_ms,
+                         supported_memory_backends)
+
+# I/O
+from pyarrow.lib import (NativeFile, PythonFile,
+                         BufferedInputStream, BufferedOutputStream,
+                         CompressedInputStream, CompressedOutputStream,
+                         TransformInputStream, transcoding_input_stream,
+                         FixedSizeBufferWriter,
+                         BufferReader, BufferOutputStream,
+                         OSFile, MemoryMappedFile, memory_map,
+                         create_memory_map, MockOutputStream,
+                         input_stream, output_stream)
+
+from pyarrow._hdfsio import HdfsFile, have_libhdfs
+
+from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
+                         concat_arrays, concat_tables, TableGroupBy,
+                         RecordBatchReader)
+
+# Exceptions
+from pyarrow.lib import (ArrowCancelled,
+                         ArrowCapacityError,
+                         ArrowException,
+                         ArrowKeyError,
+                         ArrowIndexError,
+                         ArrowInvalid,
+                         ArrowIOError,
+                         ArrowMemoryError,
+                         ArrowNotImplementedError,
+                         ArrowTypeError,
+                         ArrowSerializationError)
+
+# Serialization
+from pyarrow.lib import (deserialize_from, deserialize,
+                         deserialize_components,
+                         serialize, serialize_to, read_serialized,
+                         SerializationCallbackError,
+                         DeserializationCallbackError)
+
+import pyarrow.hdfs as hdfs
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+
+from pyarrow.serialization import (default_serialization_context,
+                                   register_default_serialization_handlers,
+                                   register_torch_serialization_handlers)
+
+import pyarrow.types as types
+
+
+# deprecated top-level access
+
+
+from pyarrow.filesystem import FileSystem as _FileSystem
+from pyarrow.filesystem import LocalFileSystem as _LocalFileSystem
+from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem
+
+from pyarrow.lib import SerializationContext as _SerializationContext
+from pyarrow.lib import SerializedPyObject as _SerializedPyObject
+
+
+_localfs = _LocalFileSystem._get_instance()
+
+
+_msg = (
+    "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead."
+)
+
+_serialization_msg = (
+    "'pyarrow.{0}' is deprecated and will be removed in a future version. "
+    "Use pickle or the pyarrow IPC functionality instead."
+)
+
+_deprecated = {
+    "localfs": (_localfs, "LocalFileSystem"),
+    "FileSystem": (_FileSystem, "FileSystem"),
+    "LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"),
+    "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"),
+}
+
+_serialization_deprecatd = {
+    "SerializationContext": _SerializationContext,
+    "SerializedPyObject": _SerializedPyObject,
+}
+
+
+def __getattr__(name):
+    if name in _deprecated:
+        obj, new_name = _deprecated[name]
+        _warnings.warn(_msg.format(name, new_name),
+                       FutureWarning, stacklevel=2)
+        return obj
+    elif name in _serialization_deprecatd:
+        _warnings.warn(_serialization_msg.format(name),
+                       FutureWarning, stacklevel=2)
+        return _serialization_deprecatd[name]
+
+    raise AttributeError(
+        "module 'pyarrow' has no attribute '{0}'".format(name)
+    )
+
+
+# Entry point for starting the plasma store
+
+
+def _plasma_store_entry_point():
+    """Entry point for starting the plasma store.
+
+    This can be used by invoking e.g.
+    ``plasma_store -s /tmp/plasma -m 1000000000``
+    from the command line and will start the plasma_store executable with the
+    given arguments.
+    """
+    import pyarrow
+    plasma_store_executable = _os.path.join(pyarrow.__path__[0],
+                                            "plasma-store-server")
+    _os.execv(plasma_store_executable, _sys.argv)
+
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+read_message = _deprecate_api("read_message", "ipc.read_message",
+                              ipc.read_message, "0.17.0")
+
+read_record_batch = _deprecate_api("read_record_batch",
+                                   "ipc.read_record_batch",
+                                   ipc.read_record_batch, "0.17.0")
+
+read_schema = _deprecate_api("read_schema", "ipc.read_schema",
+                             ipc.read_schema, "0.17.0")
+
+read_tensor = _deprecate_api("read_tensor", "ipc.read_tensor",
+                             ipc.read_tensor, "0.17.0")
+
+write_tensor = _deprecate_api("write_tensor", "ipc.write_tensor",
+                              ipc.write_tensor, "0.17.0")
+
+get_record_batch_size = _deprecate_api("get_record_batch_size",
+                                       "ipc.get_record_batch_size",
+                                       ipc.get_record_batch_size, "0.17.0")
+
+get_tensor_size = _deprecate_api("get_tensor_size",
+                                 "ipc.get_tensor_size",
+                                 ipc.get_tensor_size, "0.17.0")
+
+open_stream = _deprecate_api("open_stream", "ipc.open_stream",
+                             ipc.open_stream, "0.17.0")
+
+open_file = _deprecate_api("open_file", "ipc.open_file", ipc.open_file,
+                           "0.17.0")
+
+
+def _deprecate_scalar(ty, symbol):
+    return _deprecate_class("{}Value".format(ty), symbol, "1.0.0")
+
+
+ArrayValue = _deprecate_class("ArrayValue", Scalar, "1.0.0")
+NullType = _deprecate_class("NullType", NullScalar, "1.0.0")
+
+BooleanValue = _deprecate_scalar("Boolean", BooleanScalar)
+Int8Value = _deprecate_scalar("Int8", Int8Scalar)
+Int16Value = _deprecate_scalar("Int16", Int16Scalar)
+Int32Value = _deprecate_scalar("Int32", Int32Scalar)
+Int64Value = _deprecate_scalar("Int64", Int64Scalar)
+UInt8Value = _deprecate_scalar("UInt8", UInt8Scalar)
+UInt16Value = _deprecate_scalar("UInt16", UInt16Scalar)
+UInt32Value = _deprecate_scalar("UInt32", UInt32Scalar)
+UInt64Value = _deprecate_scalar("UInt64", UInt64Scalar)
+HalfFloatValue = _deprecate_scalar("HalfFloat", HalfFloatScalar)
+FloatValue = _deprecate_scalar("Float", FloatScalar)
+DoubleValue = _deprecate_scalar("Double", DoubleScalar)
+ListValue = _deprecate_scalar("List", ListScalar)
+LargeListValue = _deprecate_scalar("LargeList", LargeListScalar)
+MapValue = _deprecate_scalar("Map", MapScalar)
+FixedSizeListValue = _deprecate_scalar("FixedSizeList", FixedSizeListScalar)
+BinaryValue = _deprecate_scalar("Binary", BinaryScalar)
+StringValue = _deprecate_scalar("String", StringScalar)
+LargeBinaryValue = _deprecate_scalar("LargeBinary", LargeBinaryScalar)
+LargeStringValue = _deprecate_scalar("LargeString", LargeStringScalar)
+FixedSizeBinaryValue = _deprecate_scalar("FixedSizeBinary",
+                                         FixedSizeBinaryScalar)
+Decimal128Value = _deprecate_scalar("Decimal128", Decimal128Scalar)
+Decimal256Value = _deprecate_scalar("Decimal256", Decimal256Scalar)
+UnionValue = _deprecate_scalar("Union", UnionScalar)
+StructValue = _deprecate_scalar("Struct", StructScalar)
+DictionaryValue = _deprecate_scalar("Dictionary", DictionaryScalar)
+Date32Value = _deprecate_scalar("Date32", Date32Scalar)
+Date64Value = _deprecate_scalar("Date64", Date64Scalar)
+Time32Value = _deprecate_scalar("Time32", Time32Scalar)
+Time64Value = _deprecate_scalar("Time64", Time64Scalar)
+TimestampValue = _deprecate_scalar("Timestamp", TimestampScalar)
+DurationValue = _deprecate_scalar("Duration", DurationScalar)
+
+
+# TODO: Deprecate these somehow in the pyarrow namespace
+from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
+                         RecordBatchFileReader, RecordBatchFileWriter,
+                         RecordBatchStreamReader, RecordBatchStreamWriter)
+
+# ----------------------------------------------------------------------
+# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
+# wheels)
+
+
+def get_include():
+    """
+    Return absolute path to directory containing Arrow C++ include
+    headers. Similar to numpy.get_include
+    """
+    return _os.path.join(_os.path.dirname(__file__), 'include')
+
+
+def _get_pkg_config_executable():
+    return _os.environ.get('PKG_CONFIG', 'pkg-config')
+
+
+def _has_pkg_config(pkgname):
+    import subprocess
+    try:
+        return subprocess.call([_get_pkg_config_executable(),
+                                '--exists', pkgname]) == 0
+    except FileNotFoundError:
+        return False
+
+
+def _read_pkg_config_variable(pkgname, cli_args):
+    import subprocess
+    cmd = [_get_pkg_config_executable(), pkgname] + cli_args
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    out, err = proc.communicate()
+    if proc.returncode != 0:
+        raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
+    return out.rstrip().decode('utf8')
+
+
+def get_libraries():
+    """
+    Return list of library names to include in the `libraries` argument for C
+    or Cython extensions using pyarrow
+    """
+    return ['arrow', 'arrow_python']
+
+
+def create_library_symlinks():
+    """
+    With Linux and macOS wheels, the bundled shared libraries have an embedded
+    ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
+    with -larrow won't work unless we create symlinks at locations like
+    site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
+    prior problems we had with shipping two copies of the shared libraries to
+    permit third party projects like turbodbc to build their C++ extensions
+    against the pyarrow wheels.
+
+    This function must only be invoked once and only when the shared libraries
+    are bundled with the Python package, which should only apply to wheel-based
+    installs. It requires write access to the site-packages/pyarrow directory
+    and so depending on your system may need to be run with root.
+    """
+    import glob
+    if _sys.platform == 'win32':
+        return
+    package_cwd = _os.path.dirname(__file__)
+
+    if _sys.platform == 'linux':
+        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
+
+        def get_symlink_path(hard_path):
+            return hard_path.rsplit('.', 1)[0]
+    else:
+        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
+
+        def get_symlink_path(hard_path):
+            return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
+
+    for lib_hard_path in bundled_libs:
+        symlink_path = get_symlink_path(lib_hard_path)
+        if _os.path.exists(symlink_path):
+            continue
+        try:
+            _os.symlink(lib_hard_path, symlink_path)
+        except PermissionError:
+            print("Tried creating symlink {}. If you need to link to "
+                  "bundled shared libraries, run "
+                  "pyarrow.create_library_symlinks() as root")
+
+
+def get_library_dirs():
+    """
+    Return lists of directories likely to contain Arrow C++ libraries for
+    linking C or Cython extensions using pyarrow
+    """
+    package_cwd = _os.path.dirname(__file__)
+    library_dirs = [package_cwd]
+
+    def append_library_dir(library_dir):
+        if library_dir not in library_dirs:
+            library_dirs.append(library_dir)
+
+    # Search library paths via pkg-config. This is necessary if the user
+    # installed libarrow and the other shared libraries manually and they
+    # are not shipped inside the pyarrow package (see also ARROW-2976).
+    pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
+    for pkgname in ["arrow", "arrow_python"]:
+        if _has_pkg_config(pkgname):
+            library_dir = _read_pkg_config_variable(pkgname,
+                                                    ["--libs-only-L"])
+            # pkg-config output could be empty if Arrow is installed
+            # as a system package.
+            if library_dir:
+                if not library_dir.startswith("-L"):
+                    raise ValueError(
+                        "pkg-config --libs-only-L returned unexpected "
+                        "value {!r}".format(library_dir))
+                append_library_dir(library_dir[2:])
+
+    if _sys.platform == 'win32':
+        # TODO(wesm): Is this necessary, or does setuptools within a conda
+        # installation add Library\lib to the linker path for MSVC?
+        python_base_install = _os.path.dirname(_sys.executable)
+        library_dir = _os.path.join(python_base_install, 'Library', 'lib')
+
+        if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
+            append_library_dir(library_dir)
+
+    # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
+    if _os.environ.get('ARROW_HOME'):
+        append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
+    else:
+        # Python wheels bundle the Arrow libraries in the pyarrow directory.
+        append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
+
+    return library_dirs
--- a/.venv/Lib/site-packages/pyarrow/_compute.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_compute.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_compute.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_compute.pxd
@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.lib cimport *
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+
+
+cdef class FunctionOptions(_Weakrefable):
+    cdef:
+        shared_ptr[CFunctionOptions] wrapped
+
+    cdef const CFunctionOptions* get_options(self) except NULL
+    cdef void init(self, const shared_ptr[CFunctionOptions]& sp)
+
+    cdef inline shared_ptr[CFunctionOptions] unwrap(self)
+
+
+cdef CExpression _bind(Expression filter, Schema schema) except *
+
+
+cdef class Expression(_Weakrefable):
+
+    cdef:
+        CExpression expr
+
+    cdef void init(self, const CExpression& sp)
+
+    @staticmethod
+    cdef wrap(const CExpression& sp)
+
+    cdef inline CExpression unwrap(self)
+
+    @staticmethod
+    cdef Expression _expr_or_scalar(object expr)
+
+
+cdef CExpression _true
--- a/.venv/Lib/site-packages/pyarrow/_compute.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_compute.pyx
--- a/.venv/Lib/site-packages/pyarrow/_compute_docstrings.py
+++ b/.venv/Lib/site-packages/pyarrow/_compute_docstrings.py
@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Custom documentation additions for compute functions.
+"""
+
+function_doc_additions = {}
+
+function_doc_additions["filter"] = """
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array(["a", "b", "c", None, "e"])
+    >>> mask = pa.array([True, False, None, False, True])
+    >>> arr.filter(mask)
+    <pyarrow.lib.StringArray object at 0x7fa826df9200>
+    [
+      "a",
+      "e"
+    ]
+    >>> arr.filter(mask, null_selection_behavior='emit_null')
+    <pyarrow.lib.StringArray object at 0x7fa826df9200>
+    [
+      "a",
+      null,
+      "e"
+    ]
+    """
+
+function_doc_additions["mode"] = """
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
+    >>> modes = pc.mode(arr, 2)
+    >>> modes[0]
+    <pyarrow.StructScalar: {'mode': 2, 'count': 5}>
+    >>> modes[1]
+    <pyarrow.StructScalar: {'mode': 1, 'count': 2}>
+    """
--- a/.venv/Lib/site-packages/pyarrow/_csv.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_csv.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_csv.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_csv.pxd
@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport _Weakrefable
+
+
+cdef class ConvertOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVConvertOptions] options
+
+    @staticmethod
+    cdef ConvertOptions wrap(CCSVConvertOptions options)
+
+
+cdef class ParseOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVParseOptions] options
+        object _invalid_row_handler
+
+    @staticmethod
+    cdef ParseOptions wrap(CCSVParseOptions options)
+
+
+cdef class ReadOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVReadOptions] options
+        public object encoding
+
+    @staticmethod
+    cdef ReadOptions wrap(CCSVReadOptions options)
+
+
+cdef class WriteOptions(_Weakrefable):
+    cdef:
+        unique_ptr[CCSVWriteOptions] options
+
+    @staticmethod
+    cdef WriteOptions wrap(CCSVWriteOptions options)
--- a/.venv/Lib/site-packages/pyarrow/_csv.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_csv.pyx
--- a/.venv/Lib/site-packages/pyarrow/_cuda.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_cuda.pxd
@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.lib cimport *
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_cuda cimport *
+
+
+cdef class Context(_Weakrefable):
+    cdef:
+        shared_ptr[CCudaContext] context
+        int device_number
+
+    cdef void init(self, const shared_ptr[CCudaContext]& ctx)
+
+
+cdef class IpcMemHandle(_Weakrefable):
+    cdef:
+        shared_ptr[CCudaIpcMemHandle] handle
+
+    cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h)
+
+
+cdef class CudaBuffer(Buffer):
+    cdef:
+        shared_ptr[CCudaBuffer] cuda_buffer
+        object base
+
+    cdef void init_cuda(self,
+                        const shared_ptr[CCudaBuffer]& buffer,
+                        object base)
+
+
+cdef class HostBuffer(Buffer):
+    cdef:
+        shared_ptr[CCudaHostBuffer] host_buffer
+
+    cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer)
+
+
+cdef class BufferReader(NativeFile):
+    cdef:
+        CCudaBufferReader* reader
+        CudaBuffer buffer
+
+
+cdef class BufferWriter(NativeFile):
+    cdef:
+        CCudaBufferWriter* writer
+        CudaBuffer buffer
--- a/.venv/Lib/site-packages/pyarrow/_cuda.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_cuda.pyx
--- a/.venv/Lib/site-packages/pyarrow/_dataset.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_dataset.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_dataset.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_dataset.pxd
@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+"""Dataset is currently unstable. APIs subject to change without notice."""
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow_dataset cimport *
+from pyarrow.lib cimport *
+from pyarrow._fs cimport FileSystem
+
+
+cdef CFileSource _make_file_source(object file, FileSystem filesystem=*)
+
+
+cdef class DatasetFactory(_Weakrefable):
+
+    cdef:
+        shared_ptr[CDatasetFactory] wrapped
+        CDatasetFactory* factory
+
+    cdef init(self, const shared_ptr[CDatasetFactory]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CDatasetFactory]& sp)
+
+    cdef inline shared_ptr[CDatasetFactory] unwrap(self) nogil
+
+
+cdef class Dataset(_Weakrefable):
+
+    cdef:
+        shared_ptr[CDataset] wrapped
+        CDataset* dataset
+
+    cdef void init(self, const shared_ptr[CDataset]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CDataset]& sp)
+
+    cdef shared_ptr[CDataset] unwrap(self) nogil
+
+
+cdef class FragmentScanOptions(_Weakrefable):
+
+    cdef:
+        shared_ptr[CFragmentScanOptions] wrapped
+
+    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFragmentScanOptions]& sp)
+
+
+cdef class FileFormat(_Weakrefable):
+
+    cdef:
+        shared_ptr[CFileFormat] wrapped
+        CFileFormat* format
+
+    cdef void init(self, const shared_ptr[CFileFormat]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFileFormat]& sp)
+
+    cdef inline shared_ptr[CFileFormat] unwrap(self)
+
+    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options)
+
+    # Return a WrittenFile after a file was written.
+    # May be overridden by subclasses, e.g. to add metadata.
+    cdef WrittenFile _finish_write(self, path, base_dir,
+                                   CFileWriter* file_writer)
+
+
+cdef class FileWriteOptions(_Weakrefable):
+
+    cdef:
+        shared_ptr[CFileWriteOptions] wrapped
+        CFileWriteOptions* c_options
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFileWriteOptions]& sp)
+
+    cdef inline shared_ptr[CFileWriteOptions] unwrap(self)
+
+
+cdef class Fragment(_Weakrefable):
+
+    cdef:
+        shared_ptr[CFragment] wrapped
+        CFragment* fragment
+
+    cdef void init(self, const shared_ptr[CFragment]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFragment]& sp)
+
+    cdef inline shared_ptr[CFragment] unwrap(self)
+
+
+cdef class FileFragment(Fragment):
+
+    cdef:
+        CFileFragment* file_fragment
+
+    cdef void init(self, const shared_ptr[CFragment]& sp)
+
+
+cdef class Partitioning(_Weakrefable):
+
+    cdef:
+        shared_ptr[CPartitioning] wrapped
+        CPartitioning* partitioning
+
+    cdef init(self, const shared_ptr[CPartitioning]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CPartitioning]& sp)
+
+    cdef inline shared_ptr[CPartitioning] unwrap(self)
+
+
+cdef class PartitioningFactory(_Weakrefable):
+
+    cdef:
+        shared_ptr[CPartitioningFactory] wrapped
+        CPartitioningFactory* factory
+
+    cdef init(self, const shared_ptr[CPartitioningFactory]& sp)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CPartitioningFactory]& sp)
+
+    cdef inline shared_ptr[CPartitioningFactory] unwrap(self)
+
+
+cdef class WrittenFile(_Weakrefable):
+
+    # The full path to the created file
+    cdef public str path
+    # Optional Parquet metadata
+    # This metadata will have the file path attribute set to the path of
+    # the written file.
+    cdef public object metadata
--- a/.venv/Lib/site-packages/pyarrow/_dataset.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_dataset.pyx
--- a/.venv/Lib/site-packages/pyarrow/_dataset_orc.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_dataset_orc.pyx
@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+"""Dataset support for ORC file format."""
+
+from pyarrow.lib cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_dataset cimport *
+
+from pyarrow._dataset cimport FileFormat
+
+
+cdef class OrcFileFormat(FileFormat):
+
+    def __init__(self):
+        self.init(shared_ptr[CFileFormat](new COrcFileFormat()))
+
+    def equals(self, OrcFileFormat other):
+        return True
+
+    @property
+    def default_extname(self):
+        return "orc"
+
+    def __reduce__(self):
+        return OrcFileFormat, tuple()
--- a/.venv/Lib/site-packages/pyarrow/_dataset_parquet.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_dataset_parquet.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_dataset_parquet.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_dataset_parquet.pyx
@ -0,0 +1,811 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+"""Dataset support for Parquest file format."""
+
+from cython.operator cimport dereference as deref
+
+import os
+import warnings
+
+import pyarrow as pa
+from pyarrow.lib cimport *
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_dataset cimport *
+from pyarrow.includes.libarrow_dataset_parquet cimport *
+from pyarrow._fs cimport FileSystem
+from pyarrow.util import _is_path_like, _stringify_path
+
+from pyarrow._compute cimport Expression, _bind
+from pyarrow._dataset cimport (
+    _make_file_source,
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    WrittenFile
+)
+
+
+from pyarrow._parquet cimport (
+    _create_writer_properties, _create_arrow_writer_properties,
+    FileMetaData, RowGroupMetaData, ColumnChunkMetaData
+)
+
+
+cdef Expression _true = Expression._scalar(True)
+
+
+ctypedef CParquetFileWriter* _CParquetFileWriterPtr
+
+
+cdef class ParquetFileFormat(FileFormat):
+    """
+    FileFormat for Parquet
+
+    Parameters
+    ----------
+    read_options : ParquetReadOptions
+        Read options for the file.
+    default_fragment_scan_options : ParquetFragmentScanOptions
+        Scan Options for the file.
+    **kwargs : dict
+        Additional options for read option or scan option.
+    """
+
+    cdef:
+        CParquetFileFormat* parquet_format
+
+    def __init__(self, read_options=None,
+                 default_fragment_scan_options=None, **kwargs):
+        cdef:
+            shared_ptr[CParquetFileFormat] wrapped
+            CParquetFileFormatReaderOptions* options
+
+        # Read/scan options
+        read_options_args = {option: kwargs[option] for option in kwargs
+                             if option in _PARQUET_READ_OPTIONS}
+        scan_args = {option: kwargs[option] for option in kwargs
+                     if option not in _PARQUET_READ_OPTIONS}
+        if read_options and read_options_args:
+            duplicates = ', '.join(sorted(read_options_args))
+            raise ValueError(f'If `read_options` is given, '
+                             f'cannot specify {duplicates}')
+        if default_fragment_scan_options and scan_args:
+            duplicates = ', '.join(sorted(scan_args))
+            raise ValueError(f'If `default_fragment_scan_options` is given, '
+                             f'cannot specify {duplicates}')
+
+        if read_options is None:
+            read_options = ParquetReadOptions(**read_options_args)
+        elif isinstance(read_options, dict):
+            # For backwards compatibility
+            duplicates = []
+            for option, value in read_options.items():
+                if option in _PARQUET_READ_OPTIONS:
+                    read_options_args[option] = value
+                else:
+                    duplicates.append(option)
+                    scan_args[option] = value
+            if duplicates:
+                duplicates = ", ".join(duplicates)
+                warnings.warn(f'The scan options {duplicates} should be '
+                              'specified directly as keyword arguments')
+            read_options = ParquetReadOptions(**read_options_args)
+        elif not isinstance(read_options, ParquetReadOptions):
+            raise TypeError('`read_options` must be either a dictionary or an '
+                            'instance of ParquetReadOptions')
+
+        if default_fragment_scan_options is None:
+            default_fragment_scan_options = ParquetFragmentScanOptions(
+                **scan_args)
+        elif isinstance(default_fragment_scan_options, dict):
+            default_fragment_scan_options = ParquetFragmentScanOptions(
+                **default_fragment_scan_options)
+        elif not isinstance(default_fragment_scan_options,
+                            ParquetFragmentScanOptions):
+            raise TypeError('`default_fragment_scan_options` must be either a '
+                            'dictionary or an instance of '
+                            'ParquetFragmentScanOptions')
+
+        wrapped = make_shared[CParquetFileFormat]()
+        options = &(wrapped.get().reader_options)
+        if read_options.dictionary_columns is not None:
+            for column in read_options.dictionary_columns:
+                options.dict_columns.insert(tobytes(column))
+        options.coerce_int96_timestamp_unit = \
+            read_options._coerce_int96_timestamp_unit
+
+        self.init(<shared_ptr[CFileFormat]> wrapped)
+        self.default_fragment_scan_options = default_fragment_scan_options
+
+    cdef void init(self, const shared_ptr[CFileFormat]& sp):
+        FileFormat.init(self, sp)
+        self.parquet_format = <CParquetFileFormat*> sp.get()
+
+    cdef WrittenFile _finish_write(self, path, base_dir,
+                                   CFileWriter* file_writer):
+        cdef:
+            FileMetaData parquet_metadata
+            CParquetFileWriter* parquet_file_writer
+
+        parquet_metadata = None
+        parquet_file_writer = dynamic_cast[_CParquetFileWriterPtr](file_writer)
+        with nogil:
+            metadata = deref(
+                deref(parquet_file_writer).parquet_writer()).metadata()
+        if metadata:
+            parquet_metadata = FileMetaData()
+            parquet_metadata.init(metadata)
+            parquet_metadata.set_file_path(os.path.relpath(path, base_dir))
+
+        return WrittenFile(path, parquet_metadata)
+
+    @property
+    def read_options(self):
+        cdef CParquetFileFormatReaderOptions* options
+        options = &self.parquet_format.reader_options
+        parquet_read_options = ParquetReadOptions(
+            dictionary_columns={frombytes(col)
+                                for col in options.dict_columns},
+        )
+        # Read options getter/setter works with strings so setting
+        # the private property which uses the C Type
+        parquet_read_options._coerce_int96_timestamp_unit = \
+            options.coerce_int96_timestamp_unit
+        return parquet_read_options
+
+    def make_write_options(self, **kwargs):
+        opts = FileFormat.make_write_options(self)
+        (<ParquetFileWriteOptions> opts).update(**kwargs)
+        return opts
+
+    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
+        if options.type_name == 'parquet':
+            self.parquet_format.default_fragment_scan_options = options.wrapped
+        else:
+            super()._set_default_fragment_scan_options(options)
+
+    def equals(self, ParquetFileFormat other):
+        return (
+            self.read_options.equals(other.read_options) and
+            self.default_fragment_scan_options ==
+            other.default_fragment_scan_options
+        )
+
+    @property
+    def default_extname(self):
+        return "parquet"
+
+    def __reduce__(self):
+        return ParquetFileFormat, (self.read_options,
+                                   self.default_fragment_scan_options)
+
+    def __repr__(self):
+        return f"<ParquetFileFormat read_options={self.read_options}>"
+
+    def make_fragment(self, file, filesystem=None,
+                      Expression partition_expression=None, row_groups=None):
+        cdef:
+            vector[int] c_row_groups
+
+        if partition_expression is None:
+            partition_expression = _true
+
+        if row_groups is None:
+            return super().make_fragment(file, filesystem,
+                                         partition_expression)
+
+        c_source = _make_file_source(file, filesystem)
+        c_row_groups = [<int> row_group for row_group in set(row_groups)]
+
+        c_fragment = <shared_ptr[CFragment]> GetResultValue(
+            self.parquet_format.MakeFragment(move(c_source),
+                                             partition_expression.unwrap(),
+                                             <shared_ptr[CSchema]>nullptr,
+                                             move(c_row_groups)))
+        return Fragment.wrap(move(c_fragment))
+
+
+class RowGroupInfo:
+    """
+    A wrapper class for RowGroup information
+
+    Parameters
+    ----------
+    id : the group id.
+    metadata : the rowgroup metadata.
+    schema : schema of the rows.
+    """
+
+    def __init__(self, id, metadata, schema):
+        self.id = id
+        self.metadata = metadata
+        self.schema = schema
+
+    @property
+    def num_rows(self):
+        return self.metadata.num_rows
+
+    @property
+    def total_byte_size(self):
+        return self.metadata.total_byte_size
+
+    @property
+    def statistics(self):
+        def name_stats(i):
+            col = self.metadata.column(i)
+
+            stats = col.statistics
+            if stats is None or not stats.has_min_max:
+                return None, None
+
+            name = col.path_in_schema
+            field_index = self.schema.get_field_index(name)
+            if field_index < 0:
+                return None, None
+
+            typ = self.schema.field(field_index).type
+            return col.path_in_schema, {
+                'min': pa.scalar(stats.min, type=typ).as_py(),
+                'max': pa.scalar(stats.max, type=typ).as_py()
+            }
+
+        return {
+            name: stats for name, stats
+            in map(name_stats, range(self.metadata.num_columns))
+            if stats is not None
+        }
+
+    def __repr__(self):
+        return "RowGroupInfo({})".format(self.id)
+
+    def __eq__(self, other):
+        if isinstance(other, int):
+            return self.id == other
+        if not isinstance(other, RowGroupInfo):
+            return False
+        return self.id == other.id
+
+
+cdef class ParquetFileFragment(FileFragment):
+    """A Fragment representing a parquet file."""
+
+    cdef:
+        CParquetFileFragment* parquet_file_fragment
+
+    cdef void init(self, const shared_ptr[CFragment]& sp):
+        FileFragment.init(self, sp)
+        self.parquet_file_fragment = <CParquetFileFragment*> sp.get()
+
+    def __reduce__(self):
+        buffer = self.buffer
+        # parquet_file_fragment.row_groups() is empty if the metadata
+        # information of the file is not yet populated
+        if not bool(self.parquet_file_fragment.row_groups()):
+            row_groups = None
+        else:
+            row_groups = [row_group.id for row_group in self.row_groups]
+
+        return self.format.make_fragment, (
+            self.path if buffer is None else buffer,
+            self.filesystem,
+            self.partition_expression,
+            row_groups
+        )
+
+    def ensure_complete_metadata(self):
+        """
+        Ensure that all metadata (statistics, physical schema, ...) have
+        been read and cached in this fragment.
+        """
+        with nogil:
+            check_status(self.parquet_file_fragment.EnsureCompleteMetadata())
+
+    @property
+    def row_groups(self):
+        metadata = self.metadata
+        cdef vector[int] row_groups = self.parquet_file_fragment.row_groups()
+        return [RowGroupInfo(i, metadata.row_group(i), self.physical_schema)
+                for i in row_groups]
+
+    @property
+    def metadata(self):
+        self.ensure_complete_metadata()
+        cdef FileMetaData metadata = FileMetaData()
+        metadata.init(self.parquet_file_fragment.metadata())
+        return metadata
+
+    @property
+    def num_row_groups(self):
+        """
+        Return the number of row groups viewed by this fragment (not the
+        number of row groups in the origin file).
+        """
+        self.ensure_complete_metadata()
+        return self.parquet_file_fragment.row_groups().size()
+
+    def split_by_row_group(self, Expression filter=None,
+                           Schema schema=None):
+        """
+        Split the fragment into multiple fragments.
+
+        Yield a Fragment wrapping each row group in this ParquetFileFragment.
+        Row groups will be excluded whose metadata contradicts the optional
+        filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's phsyical schema
+
+        Returns
+        -------
+        A list of Fragments
+        """
+        cdef:
+            vector[shared_ptr[CFragment]] c_fragments
+            CExpression c_filter
+            shared_ptr[CFragment] c_fragment
+
+        schema = schema or self.physical_schema
+        c_filter = _bind(filter, schema)
+        with nogil:
+            c_fragments = move(GetResultValue(
+                self.parquet_file_fragment.SplitByRowGroup(move(c_filter))))
+
+        return [Fragment.wrap(c_fragment) for c_fragment in c_fragments]
+
+    def subset(self, Expression filter=None, Schema schema=None,
+               object row_group_ids=None):
+        """
+        Create a subset of the fragment (viewing a subset of the row groups).
+
+        Subset can be specified by either a filter predicate (with optional
+        schema) or by a list of row group IDs. Note that when using a filter,
+        the resulting fragment can be empty (viewing no row groups).
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's phsyical schema
+        row_group_ids : list of ints
+            The row group IDs to include in the subset. Can only be specified
+            if `filter` is None.
+
+        Returns
+        -------
+        ParquetFileFragment
+        """
+        cdef:
+            CExpression c_filter
+            vector[int] c_row_group_ids
+            shared_ptr[CFragment] c_fragment
+
+        if filter is not None and row_group_ids is not None:
+            raise ValueError(
+                "Cannot specify both 'filter' and 'row_group_ids'."
+            )
+
+        if filter is not None:
+            schema = schema or self.physical_schema
+            c_filter = _bind(filter, schema)
+            with nogil:
+                c_fragment = move(GetResultValue(
+                    self.parquet_file_fragment.SubsetWithFilter(
+                        move(c_filter))))
+        elif row_group_ids is not None:
+            c_row_group_ids = [
+                <int> row_group for row_group in sorted(set(row_group_ids))
+            ]
+            with nogil:
+                c_fragment = move(GetResultValue(
+                    self.parquet_file_fragment.SubsetWithIds(
+                        move(c_row_group_ids))))
+        else:
+            raise ValueError(
+                "Need to specify one of 'filter' or 'row_group_ids'"
+            )
+
+        return Fragment.wrap(c_fragment)
+
+
+cdef class ParquetReadOptions(_Weakrefable):
+    """
+    Parquet format specific options for reading.
+
+    Parameters
+    ----------
+    dictionary_columns : list of string, default None
+        Names of columns which should be dictionary encoded as
+        they are read.
+    coerce_int96_timestamp_unit : str, default None.
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    """
+
+    cdef public:
+        set dictionary_columns
+        TimeUnit _coerce_int96_timestamp_unit
+
+    # Also see _PARQUET_READ_OPTIONS
+    def __init__(self, dictionary_columns=None,
+                 coerce_int96_timestamp_unit=None):
+        self.dictionary_columns = set(dictionary_columns or set())
+        self.coerce_int96_timestamp_unit = coerce_int96_timestamp_unit
+
+    @property
+    def coerce_int96_timestamp_unit(self):
+        return timeunit_to_string(self._coerce_int96_timestamp_unit)
+
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit):
+        if unit is not None:
+            self._coerce_int96_timestamp_unit = string_to_timeunit(unit)
+        else:
+            self._coerce_int96_timestamp_unit = TimeUnit_NANO
+
+    def equals(self, ParquetReadOptions other):
+        return (self.dictionary_columns == other.dictionary_columns and
+                self.coerce_int96_timestamp_unit ==
+                other.coerce_int96_timestamp_unit)
+
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            return False
+
+    def __repr__(self):
+        return (
+            f"<ParquetReadOptions"
+            f" dictionary_columns={self.dictionary_columns}"
+            f" coerce_int96_timestamp_unit={self.coerce_int96_timestamp_unit}>"
+        )
+
+
+cdef class ParquetFileWriteOptions(FileWriteOptions):
+
+    cdef:
+        CParquetFileWriteOptions* parquet_options
+        object _properties
+
+    def update(self, **kwargs):
+        arrow_fields = {
+            "use_deprecated_int96_timestamps",
+            "coerce_timestamps",
+            "allow_truncated_timestamps",
+        }
+
+        setters = set()
+        for name, value in kwargs.items():
+            if name not in self._properties:
+                raise TypeError("unexpected parquet write option: " + name)
+            self._properties[name] = value
+            if name in arrow_fields:
+                setters.add(self._set_arrow_properties)
+            else:
+                setters.add(self._set_properties)
+
+        for setter in setters:
+            setter()
+
+    def _set_properties(self):
+        cdef CParquetFileWriteOptions* opts = self.parquet_options
+
+        opts.writer_properties = _create_writer_properties(
+            use_dictionary=self._properties["use_dictionary"],
+            compression=self._properties["compression"],
+            version=self._properties["version"],
+            write_statistics=self._properties["write_statistics"],
+            data_page_size=self._properties["data_page_size"],
+            compression_level=self._properties["compression_level"],
+            use_byte_stream_split=(
+                self._properties["use_byte_stream_split"]
+            ),
+            column_encoding=self._properties["column_encoding"],
+            data_page_version=self._properties["data_page_version"],
+        )
+
+    def _set_arrow_properties(self):
+        cdef CParquetFileWriteOptions* opts = self.parquet_options
+
+        opts.arrow_writer_properties = _create_arrow_writer_properties(
+            use_deprecated_int96_timestamps=(
+                self._properties["use_deprecated_int96_timestamps"]
+            ),
+            coerce_timestamps=self._properties["coerce_timestamps"],
+            allow_truncated_timestamps=(
+                self._properties["allow_truncated_timestamps"]
+            ),
+            writer_engine_version="V2",
+            use_compliant_nested_type=(
+                self._properties["use_compliant_nested_type"]
+            )
+        )
+
+    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
+        FileWriteOptions.init(self, sp)
+        self.parquet_options = <CParquetFileWriteOptions*> sp.get()
+        self._properties = dict(
+            use_dictionary=True,
+            compression="snappy",
+            version="1.0",
+            write_statistics=None,
+            data_page_size=None,
+            compression_level=None,
+            use_byte_stream_split=False,
+            column_encoding=None,
+            data_page_version="1.0",
+            use_deprecated_int96_timestamps=False,
+            coerce_timestamps=None,
+            allow_truncated_timestamps=False,
+            use_compliant_nested_type=False,
+        )
+        self._set_properties()
+        self._set_arrow_properties()
+
+
+cdef set _PARQUET_READ_OPTIONS = {
+    'dictionary_columns', 'coerce_int96_timestamp_unit'
+}
+
+
+cdef class ParquetFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for Parquet fragments.
+
+    Parameters
+    ----------
+    use_buffered_stream : bool, default False
+        Read files through buffered input streams rather than loading entire
+        row groups at once. This may be enabled to reduce memory overhead.
+        Disabled by default.
+    buffer_size : int, default 8192
+        Size of buffered stream, if enabled. Default is 8KB.
+    pre_buffer : bool, default False
+        If enabled, pre-buffer the raw Parquet data instead of issuing one
+        read per column chunk. This can improve performance on high-latency
+        filesystems.
+    """
+
+    cdef:
+        CParquetFragmentScanOptions* parquet_options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, bint use_buffered_stream=False,
+                 buffer_size=8192,
+                 bint pre_buffer=False):
+        self.init(shared_ptr[CFragmentScanOptions](
+            new CParquetFragmentScanOptions()))
+        self.use_buffered_stream = use_buffered_stream
+        self.buffer_size = buffer_size
+        self.pre_buffer = pre_buffer
+
+    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
+        FragmentScanOptions.init(self, sp)
+        self.parquet_options = <CParquetFragmentScanOptions*> sp.get()
+
+    cdef CReaderProperties* reader_properties(self):
+        return self.parquet_options.reader_properties.get()
+
+    cdef ArrowReaderProperties* arrow_reader_properties(self):
+        return self.parquet_options.arrow_reader_properties.get()
+
+    @property
+    def use_buffered_stream(self):
+        return self.reader_properties().is_buffered_stream_enabled()
+
+    @use_buffered_stream.setter
+    def use_buffered_stream(self, bint use_buffered_stream):
+        if use_buffered_stream:
+            self.reader_properties().enable_buffered_stream()
+        else:
+            self.reader_properties().disable_buffered_stream()
+
+    @property
+    def buffer_size(self):
+        return self.reader_properties().buffer_size()
+
+    @buffer_size.setter
+    def buffer_size(self, buffer_size):
+        if buffer_size <= 0:
+            raise ValueError("Buffer size must be larger than zero")
+        self.reader_properties().set_buffer_size(buffer_size)
+
+    @property
+    def pre_buffer(self):
+        return self.arrow_reader_properties().pre_buffer()
+
+    @pre_buffer.setter
+    def pre_buffer(self, bint pre_buffer):
+        self.arrow_reader_properties().set_pre_buffer(pre_buffer)
+
+    def equals(self, ParquetFragmentScanOptions other):
+        return (
+            self.use_buffered_stream == other.use_buffered_stream and
+            self.buffer_size == other.buffer_size and
+            self.pre_buffer == other.pre_buffer
+        )
+
+    def __reduce__(self):
+        return ParquetFragmentScanOptions, (
+            self.use_buffered_stream, self.buffer_size, self.pre_buffer
+        )
+
+
+cdef class ParquetFactoryOptions(_Weakrefable):
+    """
+    Influences the discovery of parquet dataset.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning, PartitioningFactory, optional
+        The partitioning scheme applied to fragments, see ``Partitioning``.
+    validate_column_chunk_paths : bool, default False
+        Assert that all ColumnChunk paths are consistent. The parquet spec
+        allows for ColumnChunk data to be stored in multiple files, but
+        ParquetDatasetFactory supports only a single file with all ColumnChunk
+        data. If this flag is set construction of a ParquetDatasetFactory will
+        raise an error if ColumnChunk data is not resident in a single file.
+    """
+
+    cdef:
+        CParquetFactoryOptions options
+
+    __slots__ = ()  # avoid mistakingly creating attributes
+
+    def __init__(self, partition_base_dir=None, partitioning=None,
+                 validate_column_chunk_paths=False):
+        if isinstance(partitioning, PartitioningFactory):
+            self.partitioning_factory = partitioning
+        elif isinstance(partitioning, Partitioning):
+            self.partitioning = partitioning
+
+        if partition_base_dir is not None:
+            self.partition_base_dir = partition_base_dir
+
+        self.options.validate_column_chunk_paths = validate_column_chunk_paths
+
+    cdef inline CParquetFactoryOptions unwrap(self):
+        return self.options
+
+    @property
+    def partitioning(self):
+        """Partitioning to apply to discovered files.
+
+        NOTE: setting this property will overwrite partitioning_factory.
+        """
+        c_partitioning = self.options.partitioning.partitioning()
+        if c_partitioning.get() == nullptr:
+            return None
+        return Partitioning.wrap(c_partitioning)
+
+    @partitioning.setter
+    def partitioning(self, Partitioning value):
+        self.options.partitioning = (<Partitioning> value).unwrap()
+
+    @property
+    def partitioning_factory(self):
+        """PartitioningFactory to apply to discovered files and
+        discover a Partitioning.
+
+        NOTE: setting this property will overwrite partitioning.
+        """
+        c_factory = self.options.partitioning.factory()
+        if c_factory.get() == nullptr:
+            return None
+        return PartitioningFactory.wrap(c_factory)
+
+    @partitioning_factory.setter
+    def partitioning_factory(self, PartitioningFactory value):
+        self.options.partitioning = (<PartitioningFactory> value).unwrap()
+
+    @property
+    def partition_base_dir(self):
+        """
+        Base directory to strip paths before applying the partitioning.
+        """
+        return frombytes(self.options.partition_base_dir)
+
+    @partition_base_dir.setter
+    def partition_base_dir(self, value):
+        self.options.partition_base_dir = tobytes(value)
+
+    @property
+    def validate_column_chunk_paths(self):
+        """
+        Base directory to strip paths before applying the partitioning.
+        """
+        return self.options.validate_column_chunk_paths
+
+    @validate_column_chunk_paths.setter
+    def validate_column_chunk_paths(self, value):
+        self.options.validate_column_chunk_paths = value
+
+
+cdef class ParquetDatasetFactory(DatasetFactory):
+    """
+    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
+
+    Parameters
+    ----------
+    metadata_path : str
+        Path to the `_metadata` parquet metadata-only file generated with
+        `pyarrow.parquet.write_metadata`.
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to read the metadata_path from, and subsequent parquet
+        files.
+    format : ParquetFileFormat
+        Parquet format options.
+    options : ParquetFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+
+    cdef:
+        CParquetDatasetFactory* parquet_factory
+
+    def __init__(self, metadata_path, FileSystem filesystem not None,
+                 FileFormat format not None,
+                 ParquetFactoryOptions options=None):
+        cdef:
+            c_string c_path
+            shared_ptr[CFileSystem] c_filesystem
+            shared_ptr[CParquetFileFormat] c_format
+            CResult[shared_ptr[CDatasetFactory]] result
+            CParquetFactoryOptions c_options
+
+        c_path = tobytes(metadata_path)
+        c_filesystem = filesystem.unwrap()
+        c_format = static_pointer_cast[CParquetFileFormat, CFileFormat](
+            format.unwrap())
+        options = options or ParquetFactoryOptions()
+        c_options = options.unwrap()
+
+        with nogil:
+            result = CParquetDatasetFactory.MakeFromMetaDataPath(
+                c_path, c_filesystem, c_format, c_options)
+        self.init(GetResultValue(result))
+
+    cdef init(self, shared_ptr[CDatasetFactory]& sp):
+        DatasetFactory.init(self, sp)
+        self.parquet_factory = <CParquetDatasetFactory*> sp.get()
--- a/.venv/Lib/site-packages/pyarrow/_exec_plan.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_exec_plan.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_exec_plan.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_exec_plan.pyx
@ -0,0 +1,353 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ---------------------------------------------------------------------
+# Implement Internal ExecPlan bindings
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level = 3
+
+from cython.operator cimport dereference as deref, preincrement as inc
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_dataset cimport *
+from pyarrow.lib cimport (Table, check_status, pyarrow_unwrap_table, pyarrow_wrap_table)
+from pyarrow.lib import tobytes
+from pyarrow._compute cimport Expression, _true
+from pyarrow._dataset cimport Dataset
+from pyarrow._dataset import InMemoryDataset
+
+Initialize()  # Initialise support for Datasets in ExecPlan
+
+
+cdef execplan(inputs, output_type, vector[CDeclaration] plan, c_bool use_threads=True):
+    """
+    Internal Function to create an ExecPlan and run it.
+
+    Parameters
+    ----------
+    inputs : list of Table or Dataset
+        The sources from which the ExecPlan should fetch data.
+        In most cases this is only one, unless the first node of the
+        plan is able to get data from multiple different sources.
+    output_type : Table or InMemoryDataset
+        In which format the output should be provided.
+    plan : vector[CDeclaration]
+        The nodes of the plan that should be applied to the sources
+        to produce the output.
+    use_threads : bool, default True
+        Whenever to use multithreading or not.
+    """
+    cdef:
+        CExecutor *c_executor
+        shared_ptr[CExecContext] c_exec_context
+        shared_ptr[CExecPlan] c_exec_plan
+        vector[CDeclaration] c_decls
+        vector[CExecNode*] _empty
+        vector[CExecNode*] c_final_node_vec
+        CExecNode *c_node
+        CTable* c_table
+        shared_ptr[CTable] c_in_table
+        shared_ptr[CTable] c_out_table
+        shared_ptr[CTableSourceNodeOptions] c_tablesourceopts
+        shared_ptr[CScanNodeOptions] c_scanopts
+        shared_ptr[CExecNodeOptions] c_input_node_opts
+        shared_ptr[CSinkNodeOptions] c_sinkopts
+        shared_ptr[CAsyncExecBatchGenerator] c_async_exec_batch_gen
+        shared_ptr[CRecordBatchReader] c_recordbatchreader
+        vector[CDeclaration].iterator plan_iter
+        vector[CDeclaration.Input] no_c_inputs
+        CStatus c_plan_status
+
+    if use_threads:
+        c_executor = GetCpuThreadPool()
+    else:
+        c_executor = NULL
+
+    c_exec_context = make_shared[CExecContext](
+        c_default_memory_pool(), c_executor)
+    c_exec_plan = GetResultValue(CExecPlan.Make(c_exec_context.get()))
+
+    plan_iter = plan.begin()
+
+    # Create source nodes for each input
+    for ipt in inputs:
+        if isinstance(ipt, Table):
+            node_factory = "table_source"
+            c_in_table = pyarrow_unwrap_table(ipt)
+            c_tablesourceopts = make_shared[CTableSourceNodeOptions](
+                c_in_table, 1 << 20)
+            c_input_node_opts = static_pointer_cast[CExecNodeOptions, CTableSourceNodeOptions](
+                c_tablesourceopts)
+        elif isinstance(ipt, Dataset):
+            node_factory = "scan"
+            c_in_dataset = (<Dataset>ipt).unwrap()
+            c_scanopts = make_shared[CScanNodeOptions](
+                c_in_dataset, make_shared[CScanOptions]())
+            deref(deref(c_scanopts).scan_options).use_threads = use_threads
+            c_input_node_opts = static_pointer_cast[CExecNodeOptions, CScanNodeOptions](
+                c_scanopts)
+        else:
+            raise TypeError("Unsupported type")
+
+        if plan_iter != plan.end():
+            # Flag the source as the input of the first plan node.
+            deref(plan_iter).inputs.push_back(CDeclaration.Input(
+                CDeclaration(tobytes(node_factory),
+                             no_c_inputs, c_input_node_opts)
+            ))
+        else:
+            # Empty plan, make the source the first plan node.
+            c_decls.push_back(
+                CDeclaration(tobytes(node_factory),
+                             no_c_inputs, c_input_node_opts)
+            )
+
+    # Add Here additional nodes
+    while plan_iter != plan.end():
+        c_decls.push_back(deref(plan_iter))
+        inc(plan_iter)
+
+    # Add all CDeclarations to the plan
+    c_node = GetResultValue(
+        CDeclaration.Sequence(c_decls).AddToPlan(&deref(c_exec_plan))
+    )
+    c_final_node_vec.push_back(c_node)
+
+    # Create the output node
+    c_async_exec_batch_gen = make_shared[CAsyncExecBatchGenerator]()
+    c_sinkopts = make_shared[CSinkNodeOptions](c_async_exec_batch_gen.get())
+    GetResultValue(
+        MakeExecNode(tobytes("sink"), &deref(c_exec_plan),
+                     c_final_node_vec, deref(c_sinkopts))
+    )
+
+    # Convert the asyncgenerator to a sync batch reader
+    c_recordbatchreader = MakeGeneratorReader(c_node.output_schema(),
+                                              deref(c_async_exec_batch_gen),
+                                              deref(c_exec_context).memory_pool())
+
+    # Start execution of the ExecPlan
+    deref(c_exec_plan).Validate()
+    deref(c_exec_plan).StartProducing()
+
+    # Convert output to the expected one.
+    c_out_table = GetResultValue(
+        CTable.FromRecordBatchReader(c_recordbatchreader.get()))
+    if output_type == Table:
+        output = pyarrow_wrap_table(c_out_table)
+    elif output_type == InMemoryDataset:
+        output = InMemoryDataset(pyarrow_wrap_table(c_out_table))
+    else:
+        raise TypeError("Unsupported output type")
+
+    with nogil:
+        c_plan_status = deref(c_exec_plan).finished().status()
+    check_status(c_plan_status)
+
+    return output
+
+
+def _perform_join(join_type, left_operand not None, left_keys,
+                  right_operand not None, right_keys,
+                  left_suffix=None, right_suffix=None,
+                  use_threads=True, coalesce_keys=False,
+                  output_type=Table):
+    """
+    Perform join of two tables or datasets.
+
+    The result will be an output table with the result of the join operation
+
+    Parameters
+    ----------
+    join_type : str
+        One of supported join types.
+    left_operand : Table or Dataset
+        The left operand for the join operation.
+    left_keys : str or list[str]
+        The left key (or keys) on which the join operation should be performed.
+    right_operand : Table or Dataset
+        The right operand for the join operation.
+    right_keys : str or list[str]
+        The right key (or keys) on which the join operation should be performed.
+    left_suffix : str, default None
+        Which suffix to add to right column names. This prevents confusion
+        when the columns in left and right operands have colliding names.
+    right_suffix : str, default None
+        Which suffic to add to the left column names. This prevents confusion
+        when the columns in left and right operands have colliding names.
+    use_threads : bool, default True
+        Whenever to use multithreading or not.
+    coalesce_keys : bool, default False
+        If the duplicated keys should be omitted from one of the sides
+        in the join result.
+    output_type: Table or InMemoryDataset
+        The output type for the exec plan result.
+
+    Returns
+    -------
+    result_table : Table
+    """
+    cdef:
+        vector[CFieldRef] c_left_keys
+        vector[CFieldRef] c_right_keys
+        vector[CFieldRef] c_left_columns
+        vector[CFieldRef] c_right_columns
+        vector[CDeclaration] c_decl_plan
+        vector[CExpression] c_projections
+        vector[c_string] c_projected_col_names
+        CJoinType c_join_type
+
+    # Prepare left and right tables Keys to send them to the C++ function
+    left_keys_order = {}
+    if isinstance(left_keys, str):
+        left_keys = [left_keys]
+    for idx, key in enumerate(left_keys):
+        left_keys_order[key] = idx
+        c_left_keys.push_back(CFieldRef(<c_string>tobytes(key)))
+
+    right_keys_order = {}
+    if isinstance(right_keys, str):
+        right_keys = [right_keys]
+    for idx, key in enumerate(right_keys):
+        right_keys_order[key] = idx
+        c_right_keys.push_back(CFieldRef(<c_string>tobytes(key)))
+
+    # By default expose all columns on both left and right table
+    if isinstance(left_operand, Table):
+        left_columns = left_operand.column_names
+    elif isinstance(left_operand, Dataset):
+        left_columns = left_operand.schema.names
+    else:
+        raise TypeError("Unsupported left join member type")
+
+    if isinstance(right_operand, Table):
+        right_columns = right_operand.column_names
+    elif isinstance(right_operand, Dataset):
+        right_columns = right_operand.schema.names
+    else:
+        raise TypeError("Unsupported right join member type")
+
+    # Pick the join type
+    if join_type == "left semi":
+        c_join_type = CJoinType_LEFT_SEMI
+        right_columns = []
+    elif join_type == "right semi":
+        c_join_type = CJoinType_RIGHT_SEMI
+        left_columns = []
+    elif join_type == "left anti":
+        c_join_type = CJoinType_LEFT_ANTI
+        right_columns = []
+    elif join_type == "right anti":
+        c_join_type = CJoinType_RIGHT_ANTI
+        left_columns = []
+    elif join_type == "inner":
+        c_join_type = CJoinType_INNER
+        right_columns = set(right_columns) - set(right_keys)
+    elif join_type == "left outer":
+        c_join_type = CJoinType_LEFT_OUTER
+        right_columns = set(right_columns) - set(right_keys)
+    elif join_type == "right outer":
+        c_join_type = CJoinType_RIGHT_OUTER
+        left_columns = set(left_columns) - set(left_keys)
+    elif join_type == "full outer":
+        c_join_type = CJoinType_FULL_OUTER
+    else:
+        raise ValueError("Unsupported join type")
+
+    # Turn the columns to vectors of FieldRefs
+    # and set aside indices of keys.
+    left_column_keys_indices = {}
+    for idx, colname in enumerate(left_columns):
+        c_left_columns.push_back(CFieldRef(<c_string>tobytes(colname)))
+        if colname in left_keys:
+            left_column_keys_indices[colname] = idx
+    right_column_keys_indices = {}
+    for idx, colname in enumerate(right_columns):
+        c_right_columns.push_back(CFieldRef(<c_string>tobytes(colname)))
+        if colname in right_keys:
+            right_column_keys_indices[colname] = idx
+
+    # Add the join node to the execplan
+    if coalesce_keys:
+        c_decl_plan.push_back(
+            CDeclaration(tobytes("hashjoin"), CHashJoinNodeOptions(
+                c_join_type, c_left_keys, c_right_keys,
+                c_left_columns, c_right_columns,
+                _true,
+                <c_string>tobytes(left_suffix or ""),
+                <c_string>tobytes(right_suffix or "")
+            ))
+        )
+        if join_type == "full outer":
+            # In case of full outer joins, the join operation will output all columns
+            # so that we can coalesce the keys and exclude duplicates in a subsequent projection.
+            left_columns_set = set(left_columns)
+            right_columns_set = set(right_columns)
+            # Where the right table columns start.
+            right_operand_index = len(left_columns)
+            for idx, col in enumerate(left_columns + right_columns):
+                if idx < len(left_columns) and col in left_column_keys_indices:
+                    # Include keys only once and coalesce left+right table keys.
+                    c_projected_col_names.push_back(tobytes(col))
+                    # Get the index of the right key that is being paired
+                    # with this left key. We do so by retrieving the name
+                    # of the right key that is in the same position in the provided keys
+                    # and then looking up the index for that name in the right table.
+                    right_key_index = right_column_keys_indices[right_keys[left_keys_order[col]]]
+                    c_projections.push_back(Expression.unwrap(
+                        Expression._call("coalesce", [
+                            Expression._field(idx), Expression._field(
+                                right_operand_index+right_key_index)
+                        ])
+                    ))
+                elif idx >= right_operand_index and col in right_column_keys_indices:
+                    # Do not include right table keys. As they would lead to duplicated keys.
+                    continue
+                else:
+                    # For all the other columns incude them as they are.
+                    # Just recompute the suffixes that the join produced as the projection
+                    # would lose them otherwise.
+                    if left_suffix and idx < right_operand_index and col in right_columns_set:
+                        col += left_suffix
+                    if right_suffix and idx >= right_operand_index and col in left_columns_set:
+                        col += right_suffix
+                    c_projected_col_names.push_back(tobytes(col))
+                    c_projections.push_back(
+                        Expression.unwrap(Expression._field(idx)))
+            c_decl_plan.push_back(
+                CDeclaration(tobytes("project"), CProjectNodeOptions(
+                    c_projections, c_projected_col_names))
+            )
+    else:
+        c_decl_plan.push_back(
+            CDeclaration(tobytes("hashjoin"), CHashJoinNodeOptions(
+                c_join_type, c_left_keys, c_right_keys,
+                _true,
+                <c_string>tobytes(left_suffix or ""),
+                <c_string>tobytes(right_suffix or "")
+            ))
+        )
+
+    result_table = execplan([left_operand, right_operand],
+                            plan=c_decl_plan,
+                            output_type=output_type,
+                            use_threads=use_threads)
+
+    return result_table
--- a/.venv/Lib/site-packages/pyarrow/_feather.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_feather.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_feather.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_feather.pyx
@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ---------------------------------------------------------------------
+# Implement Feather file format
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level=3
+
+from cython.operator cimport dereference as deref
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_feather cimport *
+from pyarrow.lib cimport (check_status, Table, _Weakrefable,
+                          get_writer, get_reader, pyarrow_wrap_table)
+from pyarrow.lib import tobytes
+
+
+class FeatherError(Exception):
+    pass
+
+
+def write_feather(Table table, object dest, compression=None,
+                  compression_level=None, chunksize=None, version=2):
+    cdef shared_ptr[COutputStream] sink
+    get_writer(dest, &sink)
+
+    cdef CFeatherProperties properties
+    if version == 2:
+        properties.version = kFeatherV2Version
+    else:
+        properties.version = kFeatherV1Version
+
+    if compression == 'zstd':
+        properties.compression = CCompressionType_ZSTD
+    elif compression == 'lz4':
+        properties.compression = CCompressionType_LZ4_FRAME
+    else:
+        properties.compression = CCompressionType_UNCOMPRESSED
+
+    if chunksize is not None:
+        properties.chunksize = chunksize
+
+    if compression_level is not None:
+        properties.compression_level = compression_level
+
+    with nogil:
+        check_status(WriteFeather(deref(table.table), sink.get(),
+                                  properties))
+
+
+cdef class FeatherReader(_Weakrefable):
+    cdef:
+        shared_ptr[CFeatherReader] reader
+
+    def __cinit__(self, source, c_bool use_memory_map, c_bool use_threads):
+        cdef:
+            shared_ptr[CRandomAccessFile] reader
+            CIpcReadOptions options = CIpcReadOptions.Defaults()
+        options.use_threads = use_threads
+
+        get_reader(source, use_memory_map, &reader)
+        with nogil:
+            self.reader = GetResultValue(CFeatherReader.Open(reader, options))
+
+    @property
+    def version(self):
+        return self.reader.get().version()
+
+    def read(self):
+        cdef shared_ptr[CTable] sp_table
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(&sp_table))
+
+        return pyarrow_wrap_table(sp_table)
+
+    def read_indices(self, indices):
+        cdef:
+            shared_ptr[CTable] sp_table
+            vector[int] c_indices
+
+        for index in indices:
+            c_indices.push_back(index)
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(c_indices, &sp_table))
+
+        return pyarrow_wrap_table(sp_table)
+
+    def read_names(self, names):
+        cdef:
+            shared_ptr[CTable] sp_table
+            vector[c_string] c_names
+
+        for name in names:
+            c_names.push_back(tobytes(name))
+        with nogil:
+            check_status(self.reader.get()
+                         .Read(c_names, &sp_table))
+
+        return pyarrow_wrap_table(sp_table)
--- a/.venv/Lib/site-packages/pyarrow/_flight.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_flight.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_flight.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_flight.pyx
--- a/.venv/Lib/site-packages/pyarrow/_fs.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_fs.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_fs.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_fs.pxd
@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import _detect_compression, frombytes, tobytes
+from pyarrow.lib cimport *
+
+
+cpdef enum FileType:
+    NotFound = <int8_t> CFileType_NotFound
+    Unknown = <int8_t> CFileType_Unknown
+    File = <int8_t> CFileType_File
+    Directory = <int8_t> CFileType_Directory
+
+
+cdef class FileInfo(_Weakrefable):
+    cdef:
+        CFileInfo info
+
+    @staticmethod
+    cdef wrap(CFileInfo info)
+
+    cdef inline CFileInfo unwrap(self) nogil
+
+    @staticmethod
+    cdef CFileInfo unwrap_safe(obj)
+
+
+cdef class FileSelector(_Weakrefable):
+    cdef:
+        CFileSelector selector
+
+    @staticmethod
+    cdef FileSelector wrap(CFileSelector selector)
+
+    cdef inline CFileSelector unwrap(self) nogil
+
+
+cdef class FileSystem(_Weakrefable):
+    cdef:
+        shared_ptr[CFileSystem] wrapped
+        CFileSystem* fs
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CFileSystem]& sp)
+
+    cdef inline shared_ptr[CFileSystem] unwrap(self) nogil
+
+
+cdef class LocalFileSystem(FileSystem):
+    cdef:
+        CLocalFileSystem* localfs
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+
+
+cdef class SubTreeFileSystem(FileSystem):
+    cdef:
+        CSubTreeFileSystem* subtreefs
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+
+
+cdef class _MockFileSystem(FileSystem):
+    cdef:
+        CMockFileSystem* mockfs
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
+
+
+cdef class PyFileSystem(FileSystem):
+    cdef:
+        CPyFileSystem* pyfs
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped)
--- a/.venv/Lib/site-packages/pyarrow/_fs.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_fs.pyx
--- a/.venv/Lib/site-packages/pyarrow/_generated_version.py
+++ b/.venv/Lib/site-packages/pyarrow/_generated_version.py
@ -0,0 +1,5 @@
+# coding: utf-8
+# file generated by setuptools_scm
+# don't change, don't track in version control
+version = '8.0.0'
+version_tuple = (8, 0, 0)
--- a/.venv/Lib/site-packages/pyarrow/_hdfs.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_hdfs.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_hdfs.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_hdfs.pyx
@ -0,0 +1,149 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.lib cimport check_status
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow._fs cimport FileSystem
+
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.util import _stringify_path
+
+
+cdef class HadoopFileSystem(FileSystem):
+    """
+    HDFS backed FileSystem implementation
+
+    Parameters
+    ----------
+    host : str
+        HDFS host to connect to. Set to "default" for fs.defaultFS from
+        core-site.xml.
+    port : int, default 8020
+        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
+    replication : int, default 3
+        Number of copies each block will have.
+    buffer_size : int, default 0
+        If 0, no buffering will happen otherwise the size of the temporary read
+        and write buffer.
+    default_block_size : int, default None
+        None means the default configuration for HDFS, a typical block size is
+        128 MB.
+    kerb_ticket : string or path, default None
+        If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
+    """
+
+    cdef:
+        CHadoopFileSystem* hdfs
+
+    def __init__(self, str host, int port=8020, *, str user=None,
+                 int replication=3, int buffer_size=0,
+                 default_block_size=None, kerb_ticket=None,
+                 extra_conf=None):
+        cdef:
+            CHdfsOptions options
+            shared_ptr[CHadoopFileSystem] wrapped
+
+        if not host.startswith(('hdfs://', 'viewfs://')) and host != "default":
+            # TODO(kszucs): do more sanitization
+            host = 'hdfs://{}'.format(host)
+
+        options.ConfigureEndPoint(tobytes(host), int(port))
+        options.ConfigureReplication(replication)
+        options.ConfigureBufferSize(buffer_size)
+
+        if user is not None:
+            options.ConfigureUser(tobytes(user))
+        if default_block_size is not None:
+            options.ConfigureBlockSize(default_block_size)
+        if kerb_ticket is not None:
+            options.ConfigureKerberosTicketCachePath(
+                tobytes(_stringify_path(kerb_ticket)))
+        if extra_conf is not None:
+            for k, v in extra_conf.items():
+                options.ConfigureExtraConf(tobytes(k), tobytes(v))
+
+        with nogil:
+            wrapped = GetResultValue(CHadoopFileSystem.Make(options))
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped):
+        FileSystem.init(self, wrapped)
+        self.hdfs = <CHadoopFileSystem*> wrapped.get()
+
+    @staticmethod
+    def from_uri(uri):
+        """
+        Instantiate HadoopFileSystem object from an URI string.
+
+        The following two calls are equivalent
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
+
+        Parameters
+        ----------
+        uri : str
+            A string URI describing the connection to HDFS.
+            In order to change the user, replication, buffer_size or
+            default_block_size pass the values as query parts.
+
+        Returns
+        -------
+        HadoopFileSystem
+        """
+        cdef:
+            HadoopFileSystem self = HadoopFileSystem.__new__(HadoopFileSystem)
+            shared_ptr[CHadoopFileSystem] wrapped
+            CHdfsOptions options
+
+        options = GetResultValue(CHdfsOptions.FromUriString(tobytes(uri)))
+        with nogil:
+            wrapped = GetResultValue(CHadoopFileSystem.Make(options))
+
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+        return self
+
+    @classmethod
+    def _reconstruct(cls, kwargs):
+        return cls(**kwargs)
+
+    def __reduce__(self):
+        cdef CHdfsOptions opts = self.hdfs.options()
+        return (
+            HadoopFileSystem._reconstruct, (dict(
+                host=frombytes(opts.connection_config.host),
+                port=opts.connection_config.port,
+                user=frombytes(opts.connection_config.user),
+                replication=opts.replication,
+                buffer_size=opts.buffer_size,
+                default_block_size=opts.default_block_size,
+                kerb_ticket=frombytes(opts.connection_config.kerb_ticket),
+                extra_conf={frombytes(k): frombytes(v)
+                            for k, v in opts.connection_config.extra_conf},
+            ),)
+        )
--- a/.venv/Lib/site-packages/pyarrow/_hdfsio.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_hdfsio.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_hdfsio.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_hdfsio.pyx
@ -0,0 +1,480 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# ----------------------------------------------------------------------
+# HDFS IO implementation
+
+# cython: language_level = 3
+
+import re
+
+from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import frombytes, tobytes, ArrowIOError
+
+from queue import Queue, Empty as QueueEmpty, Full as QueueFull
+
+
+_HDFS_PATH_RE = re.compile(r'hdfs://(.*):(\d+)(.*)')
+
+
+def have_libhdfs():
+    try:
+        with nogil:
+            check_status(HaveLibHdfs())
+        return True
+    except Exception:
+        return False
+
+
+def strip_hdfs_abspath(path):
+    m = _HDFS_PATH_RE.match(path)
+    if m:
+        return m.group(3)
+    else:
+        return path
+
+
+cdef class HadoopFileSystem(_Weakrefable):
+    cdef:
+        shared_ptr[CIOHadoopFileSystem] client
+
+    cdef readonly:
+        bint is_open
+        object host
+        object user
+        object kerb_ticket
+        int port
+        dict extra_conf
+
+    def _connect(self, host, port, user, kerb_ticket, extra_conf):
+        cdef HdfsConnectionConfig conf
+
+        if host is not None:
+            conf.host = tobytes(host)
+        self.host = host
+
+        conf.port = port
+        self.port = port
+
+        if user is not None:
+            conf.user = tobytes(user)
+        self.user = user
+
+        if kerb_ticket is not None:
+            conf.kerb_ticket = tobytes(kerb_ticket)
+        self.kerb_ticket = kerb_ticket
+
+        with nogil:
+            check_status(HaveLibHdfs())
+
+        if extra_conf is not None and isinstance(extra_conf, dict):
+            conf.extra_conf = {tobytes(k): tobytes(v)
+                               for k, v in extra_conf.items()}
+        self.extra_conf = extra_conf
+
+        with nogil:
+            check_status(CIOHadoopFileSystem.Connect(&conf, &self.client))
+        self.is_open = True
+
+    @classmethod
+    def connect(cls, *args, **kwargs):
+        return cls(*args, **kwargs)
+
+    def __dealloc__(self):
+        if self.is_open:
+            self.close()
+
+    def close(self):
+        """
+        Disconnect from the HDFS cluster
+        """
+        self._ensure_client()
+        with nogil:
+            check_status(self.client.get().Disconnect())
+        self.is_open = False
+
+    cdef _ensure_client(self):
+        if self.client.get() == NULL:
+            raise IOError('HDFS client improperly initialized')
+        elif not self.is_open:
+            raise IOError('HDFS client is closed')
+
+    def exists(self, path):
+        """
+        Returns True if the path is known to the cluster, False if it does not
+        (or there is an RPC error)
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool result
+        with nogil:
+            result = self.client.get().Exists(c_path)
+        return result
+
+    def isdir(self, path):
+        cdef HdfsPathInfo info
+        try:
+            self._path_info(path, &info)
+        except ArrowIOError:
+            return False
+        return info.kind == ObjectType_DIRECTORY
+
+    def isfile(self, path):
+        cdef HdfsPathInfo info
+        try:
+            self._path_info(path, &info)
+        except ArrowIOError:
+            return False
+        return info.kind == ObjectType_FILE
+
+    def get_capacity(self):
+        """
+        Get reported total capacity of file system
+
+        Returns
+        -------
+        capacity : int
+        """
+        cdef int64_t capacity = 0
+        with nogil:
+            check_status(self.client.get().GetCapacity(&capacity))
+        return capacity
+
+    def get_space_used(self):
+        """
+        Get space used on file system
+
+        Returns
+        -------
+        space_used : int
+        """
+        cdef int64_t space_used = 0
+        with nogil:
+            check_status(self.client.get().GetUsed(&space_used))
+        return space_used
+
+    def df(self):
+        """
+        Return free space on disk, like the UNIX df command
+
+        Returns
+        -------
+        space : int
+        """
+        return self.get_capacity() - self.get_space_used()
+
+    def rename(self, path, new_path):
+        cdef c_string c_path = tobytes(path)
+        cdef c_string c_new_path = tobytes(new_path)
+        with nogil:
+            check_status(self.client.get().Rename(c_path, c_new_path))
+
+    def info(self, path):
+        """
+        Return detailed HDFS information for path
+
+        Parameters
+        ----------
+        path : string
+            Path to file or directory
+
+        Returns
+        -------
+        path_info : dict
+        """
+        cdef HdfsPathInfo info
+        self._path_info(path, &info)
+        return {
+            'path': frombytes(info.name),
+            'owner': frombytes(info.owner),
+            'group': frombytes(info.group),
+            'size': info.size,
+            'block_size': info.block_size,
+            'last_modified': info.last_modified_time,
+            'last_accessed': info.last_access_time,
+            'replication': info.replication,
+            'permissions': info.permissions,
+            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
+                     else 'file')
+        }
+
+    def stat(self, path):
+        """
+        Return basic file system statistics about path
+
+        Parameters
+        ----------
+        path : string
+            Path to file or directory
+
+        Returns
+        -------
+        stat : dict
+        """
+        cdef FileStatistics info
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .Stat(c_path, &info))
+        return {
+            'size': info.size,
+            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
+                     else 'file')
+        }
+
+    cdef _path_info(self, path, HdfsPathInfo* info):
+        cdef c_string c_path = tobytes(path)
+
+        with nogil:
+            check_status(self.client.get()
+                         .GetPathInfo(c_path, info))
+
+    def ls(self, path, bint full_info):
+        cdef:
+            c_string c_path = tobytes(path)
+            vector[HdfsPathInfo] listing
+            list results = []
+            int i
+
+        self._ensure_client()
+
+        with nogil:
+            check_status(self.client.get()
+                         .ListDirectory(c_path, &listing))
+
+        cdef const HdfsPathInfo* info
+        for i in range(<int> listing.size()):
+            info = &listing[i]
+
+            # Try to trim off the hdfs://HOST:PORT piece
+            name = strip_hdfs_abspath(frombytes(info.name))
+
+            if full_info:
+                kind = ('file' if info.kind == ObjectType_FILE
+                        else 'directory')
+
+                results.append({
+                    'kind': kind,
+                    'name': name,
+                    'owner': frombytes(info.owner),
+                    'group': frombytes(info.group),
+                    'last_modified_time': info.last_modified_time,
+                    'last_access_time': info.last_access_time,
+                    'size': info.size,
+                    'replication': info.replication,
+                    'block_size': info.block_size,
+                    'permissions': info.permissions
+                })
+            else:
+                results.append(name)
+
+        return results
+
+    def chmod(self, path, mode):
+        """
+        Change file permissions
+
+        Parameters
+        ----------
+        path : string
+            absolute path to file or directory
+        mode : int
+            POSIX-like bitmask
+        """
+        self._ensure_client()
+        cdef c_string c_path = tobytes(path)
+        cdef int c_mode = mode
+        with nogil:
+            check_status(self.client.get()
+                         .Chmod(c_path, c_mode))
+
+    def chown(self, path, owner=None, group=None):
+        """
+        Change file permissions
+
+        Parameters
+        ----------
+        path : string
+            absolute path to file or directory
+        owner : string, default None
+            New owner, None for no change
+        group : string, default None
+            New group, None for no change
+        """
+        cdef:
+            c_string c_path
+            c_string c_owner
+            c_string c_group
+            const char* c_owner_ptr = NULL
+            const char* c_group_ptr = NULL
+
+        self._ensure_client()
+
+        c_path = tobytes(path)
+        if owner is not None:
+            c_owner = tobytes(owner)
+            c_owner_ptr = c_owner.c_str()
+
+        if group is not None:
+            c_group = tobytes(group)
+            c_group_ptr = c_group.c_str()
+
+        with nogil:
+            check_status(self.client.get()
+                         .Chown(c_path, c_owner_ptr, c_group_ptr))
+
+    def mkdir(self, path):
+        """
+        Create indicated directory and any necessary parent directories
+        """
+        self._ensure_client()
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .MakeDirectory(c_path))
+
+    def delete(self, path, bint recursive=False):
+        """
+        Delete the indicated file or directory
+
+        Parameters
+        ----------
+        path : string
+        recursive : boolean, default False
+            If True, also delete child paths for directories
+        """
+        self._ensure_client()
+
+        cdef c_string c_path = tobytes(path)
+        with nogil:
+            check_status(self.client.get()
+                         .Delete(c_path, recursive == 1))
+
+    def open(self, path, mode='rb', buffer_size=None, replication=None,
+             default_block_size=None):
+        """
+        Open HDFS file for reading or writing
+
+        Parameters
+        ----------
+        mode : string
+            Must be one of 'rb', 'wb', 'ab'
+
+        Returns
+        -------
+        handle : HdfsFile
+        """
+        self._ensure_client()
+
+        cdef HdfsFile out = HdfsFile()
+
+        if mode not in ('rb', 'wb', 'ab'):
+            raise Exception("Mode must be 'rb' (read), "
+                            "'wb' (write, new file), or 'ab' (append)")
+
+        cdef c_string c_path = tobytes(path)
+        cdef c_bool append = False
+
+        # 0 in libhdfs means "use the default"
+        cdef int32_t c_buffer_size = buffer_size or 0
+        cdef int16_t c_replication = replication or 0
+        cdef int64_t c_default_block_size = default_block_size or 0
+
+        cdef shared_ptr[HdfsOutputStream] wr_handle
+        cdef shared_ptr[HdfsReadableFile] rd_handle
+
+        if mode in ('wb', 'ab'):
+            if mode == 'ab':
+                append = True
+
+            with nogil:
+                check_status(
+                    self.client.get()
+                    .OpenWritable(c_path, append, c_buffer_size,
+                                  c_replication, c_default_block_size,
+                                  &wr_handle))
+
+            out.set_output_stream(<shared_ptr[COutputStream]> wr_handle)
+            out.is_writable = True
+        else:
+            with nogil:
+                check_status(self.client.get()
+                             .OpenReadable(c_path, &rd_handle))
+
+            out.set_random_access_file(
+                <shared_ptr[CRandomAccessFile]> rd_handle)
+            out.is_readable = True
+
+        assert not out.closed
+
+        if c_buffer_size == 0:
+            c_buffer_size = 2 ** 16
+
+        out.mode = mode
+        out.buffer_size = c_buffer_size
+        out.parent = _HdfsFileNanny(self, out)
+        out.own_file = True
+
+        return out
+
+    def download(self, path, stream, buffer_size=None):
+        with self.open(path, 'rb') as f:
+            f.download(stream, buffer_size=buffer_size)
+
+    def upload(self, path, stream, buffer_size=None):
+        """
+        Upload file-like object to HDFS path
+        """
+        with self.open(path, 'wb') as f:
+            f.upload(stream, buffer_size=buffer_size)
+
+
+# ARROW-404: Helper class to ensure that files are closed before the
+# client. During deallocation of the extension class, the attributes are
+# decref'd which can cause the client to get closed first if the file has the
+# last remaining reference
+cdef class _HdfsFileNanny(_Weakrefable):
+    cdef:
+        object client
+        object file_handle_ref
+
+    def __cinit__(self, client, file_handle):
+        import weakref
+        self.client = client
+        self.file_handle_ref = weakref.ref(file_handle)
+
+    def __dealloc__(self):
+        fh = self.file_handle_ref()
+        if fh:
+            fh.close()
+        # avoid cyclic GC
+        self.file_handle_ref = None
+        self.client = None
+
+
+cdef class HdfsFile(NativeFile):
+    cdef readonly:
+        int32_t buffer_size
+        object mode
+        object parent
+
+    def __dealloc__(self):
+        self.parent = None
--- a/.venv/Lib/site-packages/pyarrow/_json.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_json.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_json.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_json.pyx
@ -0,0 +1,261 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level = 3
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (check_status, _Weakrefable, Field, MemoryPool,
+                          ensure_type, maybe_unbox_memory_pool,
+                          get_input_stream, pyarrow_wrap_table,
+                          pyarrow_wrap_data_type, pyarrow_unwrap_data_type,
+                          pyarrow_wrap_schema, pyarrow_unwrap_schema)
+
+
+cdef class ReadOptions(_Weakrefable):
+    """
+    Options for reading JSON files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+    cdef:
+        CJSONReadOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, use_threads=None, block_size=None):
+        self.options = CJSONReadOptions.Defaults()
+        if use_threads is not None:
+            self.use_threads = use_threads
+        if block_size is not None:
+            self.block_size = block_size
+
+    @property
+    def use_threads(self):
+        """
+        Whether to use multiple threads to accelerate reading.
+        """
+        return self.options.use_threads
+
+    @use_threads.setter
+    def use_threads(self, value):
+        self.options.use_threads = value
+
+    @property
+    def block_size(self):
+        """
+        How much bytes to process at a time from the input stream.
+
+        This will determine multi-threading granularity as well as the size of
+        individual chunks in the Table.
+        """
+        return self.options.block_size
+
+    @block_size.setter
+    def block_size(self, value):
+        self.options.block_size = value
+
+    def __reduce__(self):
+        return ReadOptions, (
+            self.use_threads,
+            self.block_size
+        )
+
+
+cdef class ParseOptions(_Weakrefable):
+    """
+    Options for parsing JSON files.
+
+    Parameters
+    ----------
+    explicit_schema : Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values : bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    unexpected_field_behavior : str, default "infer"
+        How JSON fields outside of explicit_schema (if given) are treated.
+
+        Possible behaviors:
+
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+    """
+
+    cdef:
+        CJSONParseOptions options
+
+    __slots__ = ()
+
+    def __init__(self, explicit_schema=None, newlines_in_values=None,
+                 unexpected_field_behavior=None):
+        self.options = CJSONParseOptions.Defaults()
+        if explicit_schema is not None:
+            self.explicit_schema = explicit_schema
+        if newlines_in_values is not None:
+            self.newlines_in_values = newlines_in_values
+        if unexpected_field_behavior is not None:
+            self.unexpected_field_behavior = unexpected_field_behavior
+
+    def __reduce__(self):
+        return ParseOptions, (
+            self.explicit_schema,
+            self.newlines_in_values,
+            self.unexpected_field_behavior
+        )
+
+    @property
+    def explicit_schema(self):
+        """
+        Optional explicit schema (no type inference, ignores other fields)
+        """
+        if self.options.explicit_schema.get() == NULL:
+            return None
+        else:
+            return pyarrow_wrap_schema(self.options.explicit_schema)
+
+    @explicit_schema.setter
+    def explicit_schema(self, value):
+        self.options.explicit_schema = pyarrow_unwrap_schema(value)
+
+    @property
+    def newlines_in_values(self):
+        """
+        Whether newline characters are allowed in JSON values.
+        Setting this to True reduces the performance of multi-threaded
+        JSON reading.
+        """
+        return self.options.newlines_in_values
+
+    @newlines_in_values.setter
+    def newlines_in_values(self, value):
+        self.options.newlines_in_values = value
+
+    @property
+    def unexpected_field_behavior(self):
+        """
+        How JSON fields outside of explicit_schema (if given) are treated.
+
+        Possible behaviors:
+
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+
+        Set to "infer" by default.
+        """
+        v = self.options.unexpected_field_behavior
+        if v == CUnexpectedFieldBehavior_Ignore:
+            return "ignore"
+        elif v == CUnexpectedFieldBehavior_Error:
+            return "error"
+        elif v == CUnexpectedFieldBehavior_InferType:
+            return "infer"
+        else:
+            raise ValueError('Unexpected value for unexpected_field_behavior')
+
+    @unexpected_field_behavior.setter
+    def unexpected_field_behavior(self, value):
+        cdef CUnexpectedFieldBehavior v
+
+        if value == "ignore":
+            v = CUnexpectedFieldBehavior_Ignore
+        elif value == "error":
+            v = CUnexpectedFieldBehavior_Error
+        elif value == "infer":
+            v = CUnexpectedFieldBehavior_InferType
+        else:
+            raise ValueError(
+                "Unexpected value `{}` for `unexpected_field_behavior`, pass "
+                "either `ignore`, `error` or `infer`.".format(value)
+            )
+
+        self.options.unexpected_field_behavior = v
+
+
+cdef _get_reader(input_file, shared_ptr[CInputStream]* out):
+    use_memory_map = False
+    get_input_stream(input_file, use_memory_map, out)
+
+cdef _get_read_options(ReadOptions read_options, CJSONReadOptions* out):
+    if read_options is None:
+        out[0] = CJSONReadOptions.Defaults()
+    else:
+        out[0] = read_options.options
+
+cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
+    if parse_options is None:
+        out[0] = CJSONParseOptions.Defaults()
+    else:
+        out[0] = parse_options.options
+
+
+def read_json(input_file, read_options=None, parse_options=None,
+              MemoryPool memory_pool=None):
+    """
+    Read a Table from a stream of JSON data.
+
+    Parameters
+    ----------
+    input_file : str, path or file-like object
+        The location of JSON data. Currently only the line-delimited JSON
+        format is supported.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults).
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults).
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from.
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+    cdef:
+        shared_ptr[CInputStream] stream
+        CJSONReadOptions c_read_options
+        CJSONParseOptions c_parse_options
+        shared_ptr[CJSONReader] reader
+        shared_ptr[CTable] table
+
+    _get_reader(input_file, &stream)
+    _get_read_options(read_options, &c_read_options)
+    _get_parse_options(parse_options, &c_parse_options)
+
+    reader = GetResultValue(
+        CJSONReader.Make(maybe_unbox_memory_pool(memory_pool),
+                         stream, c_read_options, c_parse_options))
+
+    with nogil:
+        table = GetResultValue(reader.get().Read())
+
+    return pyarrow_wrap_table(table)
--- a/.venv/Lib/site-packages/pyarrow/_orc.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_orc.pxd
@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+# cython: language_level = 3
+
+from libcpp cimport bool as c_bool
+from libc.string cimport const_char
+from libcpp.vector cimport vector as std_vector
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CArray, CSchema, CStatus,
+                                        CResult, CTable, CMemoryPool,
+                                        CKeyValueMetadata,
+                                        CRecordBatch,
+                                        CTable, CCompressionType,
+                                        CRandomAccessFile, COutputStream,
+                                        TimeUnit)
+
+cdef extern from "arrow/adapters/orc/options.h" \
+        namespace "arrow::adapters::orc" nogil:
+    cdef enum CompressionStrategy \
+            " arrow::adapters::orc::CompressionStrategy":
+        _CompressionStrategy_SPEED \
+            " arrow::adapters::orc::CompressionStrategy::kSpeed"
+        _CompressionStrategy_COMPRESSION \
+            " arrow::adapters::orc::CompressionStrategy::kCompression"
+
+    cdef enum WriterId" arrow::adapters::orc::WriterId":
+        _WriterId_ORC_JAVA_WRITER" arrow::adapters::orc::WriterId::kOrcJava"
+        _WriterId_ORC_CPP_WRITER" arrow::adapters::orc::WriterId::kOrcCpp"
+        _WriterId_PRESTO_WRITER" arrow::adapters::orc::WriterId::kPresto"
+        _WriterId_SCRITCHLEY_GO \
+            " arrow::adapters::orc::WriterId::kScritchleyGo"
+        _WriterId_TRINO_WRITER" arrow::adapters::orc::WriterId::kTrino"
+        _WriterId_UNKNOWN_WRITER" arrow::adapters::orc::WriterId::kUnknown"
+
+    cdef enum WriterVersion" arrow::adapters::orc::WriterVersion":
+        _WriterVersion_ORIGINAL \
+            " arrow::adapters::orc::WriterVersion::kOriginal"
+        _WriterVersion_HIVE_8732 \
+            " arrow::adapters::orc::WriterVersion::kHive8732"
+        _WriterVersion_HIVE_4243 \
+            " arrow::adapters::orc::WriterVersion::kHive4243"
+        _WriterVersion_HIVE_12055 \
+            " arrow::adapters::orc::WriterVersion::kHive12055"
+        _WriterVersion_HIVE_13083 \
+            " arrow::adapters::orc::WriterVersion::kHive13083"
+        _WriterVersion_ORC_101" arrow::adapters::orc::WriterVersion::kOrc101"
+        _WriterVersion_ORC_135" arrow::adapters::orc::WriterVersion::kOrc135"
+        _WriterVersion_ORC_517" arrow::adapters::orc::WriterVersion::kOrc517"
+        _WriterVersion_ORC_203" arrow::adapters::orc::WriterVersion::kOrc203"
+        _WriterVersion_ORC_14" arrow::adapters::orc::WriterVersion::kOrc14"
+        _WriterVersion_MAX" arrow::adapters::orc::WriterVersion::kMax"
+
+    cdef cppclass FileVersion" arrow::adapters::orc::FileVersion":
+        FileVersion(uint32_t major_version, uint32_t minor_version)
+        uint32_t major_version()
+        uint32_t minor_version()
+        c_string ToString()
+
+    cdef struct WriteOptions" arrow::adapters::orc::WriteOptions":
+        int64_t batch_size
+        FileVersion file_version
+        int64_t stripe_size
+        CCompressionType compression
+        int64_t compression_block_size
+        CompressionStrategy compression_strategy
+        int64_t row_index_stride
+        double padding_tolerance
+        double dictionary_key_size_threshold
+        std_vector[int64_t] bloom_filter_columns
+        double bloom_filter_fpp
+
+
+cdef extern from "arrow/adapters/orc/adapter.h" \
+        namespace "arrow::adapters::orc" nogil:
+
+    cdef cppclass ORCFileReader:
+        @staticmethod
+        CResult[unique_ptr[ORCFileReader]] Open(
+            const shared_ptr[CRandomAccessFile]& file,
+            CMemoryPool* pool)
+
+        CResult[shared_ptr[const CKeyValueMetadata]] ReadMetadata()
+
+        CResult[shared_ptr[CSchema]] ReadSchema()
+
+        CResult[shared_ptr[CRecordBatch]] ReadStripe(int64_t stripe)
+        CResult[shared_ptr[CRecordBatch]] ReadStripe(
+            int64_t stripe, std_vector[c_string])
+
+        CResult[shared_ptr[CTable]] Read()
+        CResult[shared_ptr[CTable]] Read(std_vector[c_string])
+
+        int64_t NumberOfStripes()
+        int64_t NumberOfRows()
+        FileVersion GetFileVersion()
+        c_string GetSoftwareVersion()
+        CResult[CCompressionType] GetCompression()
+        int64_t GetCompressionSize()
+        int64_t GetRowIndexStride()
+        WriterId GetWriterId()
+        int32_t GetWriterIdValue()
+        WriterVersion GetWriterVersion()
+        int64_t GetNumberOfStripeStatistics()
+        int64_t GetContentLength()
+        int64_t GetStripeStatisticsLength()
+        int64_t GetFileFooterLength()
+        int64_t GetFilePostscriptLength()
+        int64_t GetFileLength()
+        c_string GetSerializedFileTail()
+
+    cdef cppclass ORCFileWriter:
+        @staticmethod
+        CResult[unique_ptr[ORCFileWriter]] Open(
+            COutputStream* output_stream, const WriteOptions& writer_options)
+
+        CStatus Write(const CTable& table)
+
+        CStatus Close()
--- a/.venv/Lib/site-packages/pyarrow/_orc.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_orc.pyx
@ -0,0 +1,449 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+
+from cython.operator cimport dereference as deref
+from libcpp.vector cimport vector as std_vector
+from libcpp.utility cimport move
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (check_status, _Weakrefable,
+                          MemoryPool, maybe_unbox_memory_pool,
+                          Schema, pyarrow_wrap_schema,
+                          KeyValueMetadata,
+                          pyarrow_wrap_batch,
+                          RecordBatch,
+                          Table,
+                          pyarrow_wrap_table,
+                          pyarrow_unwrap_schema,
+                          pyarrow_wrap_metadata,
+                          pyarrow_unwrap_table,
+                          get_reader,
+                          get_writer)
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.util import _stringify_path
+
+
+cdef compression_type_from_enum(CCompressionType compression_type):
+    compression_map = {
+        CCompressionType_UNCOMPRESSED: 'UNCOMPRESSED',
+        CCompressionType_GZIP: 'ZLIB',
+        CCompressionType_SNAPPY: 'SNAPPY',
+        CCompressionType_LZ4: 'LZ4',
+        CCompressionType_ZSTD: 'ZSTD',
+    }
+    if compression_type in compression_map:
+        return compression_map[compression_type]
+    raise ValueError('Unsupported compression')
+
+
+cdef CCompressionType compression_type_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression must be a string')
+    name = name.upper()
+    if name == 'ZLIB':
+        return CCompressionType_GZIP
+    elif name == 'SNAPPY':
+        return CCompressionType_SNAPPY
+    elif name == 'LZ4':
+        return CCompressionType_LZ4
+    elif name == 'ZSTD':
+        return CCompressionType_ZSTD
+    elif name == 'UNCOMPRESSED':
+        return CCompressionType_UNCOMPRESSED
+    raise ValueError(f'Unknown CompressionKind: {name}')
+
+
+cdef compression_strategy_from_enum(
+    CompressionStrategy compression_strategy
+):
+    compression_strategy_map = {
+        _CompressionStrategy_SPEED: 'SPEED',
+        _CompressionStrategy_COMPRESSION: 'COMPRESSION',
+    }
+    if compression_strategy in compression_strategy_map:
+        return compression_strategy_map[compression_strategy]
+    raise ValueError('Unsupported compression strategy')
+
+
+cdef CompressionStrategy compression_strategy_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression strategy must be a string')
+    name = name.upper()
+    if name == 'COMPRESSION':
+        return _CompressionStrategy_COMPRESSION
+    elif name == 'SPEED':
+        return _CompressionStrategy_SPEED
+    raise ValueError(f'Unknown CompressionStrategy: {name}')
+
+
+cdef file_version_from_class(FileVersion file_version):
+    return frombytes(file_version.ToString())
+
+
+cdef writer_id_from_enum(WriterId writer_id):
+    writer_id_map = {
+        _WriterId_ORC_JAVA_WRITER: 'ORC_JAVA',
+        _WriterId_ORC_CPP_WRITER: 'ORC_CPP',
+        _WriterId_PRESTO_WRITER: 'PRESTO',
+        _WriterId_SCRITCHLEY_GO: 'SCRITCHLEY_GO',
+        _WriterId_TRINO_WRITER: 'TRINO',
+    }
+    if writer_id in writer_id_map:
+        return writer_id_map[writer_id]
+    raise ValueError('Unsupported writer ID')
+
+
+cdef writer_version_from_enum(WriterVersion writer_version):
+    writer_version_map = {
+        _WriterVersion_ORIGINAL: 'ORIGINAL',
+        _WriterVersion_HIVE_8732: 'HIVE_8732',
+        _WriterVersion_HIVE_4243: 'HIVE_4243',
+        _WriterVersion_HIVE_12055: 'HIVE_12055',
+        _WriterVersion_HIVE_13083: 'HIVE_13083',
+        _WriterVersion_ORC_101: 'ORC_101',
+        _WriterVersion_ORC_135: 'ORC_135',
+        _WriterVersion_ORC_517: 'ORC_517',
+        _WriterVersion_ORC_203: 'ORC_203',
+        _WriterVersion_ORC_14: 'ORC_14',
+    }
+    if writer_version in writer_version_map:
+        return writer_version_map[writer_version]
+    raise ValueError('Unsupported writer version')
+
+
+cdef shared_ptr[WriteOptions] _create_write_options(
+    file_version=None,
+    batch_size=None,
+    stripe_size=None,
+    compression=None,
+    compression_block_size=None,
+    compression_strategy=None,
+    row_index_stride=None,
+    padding_tolerance=None,
+    dictionary_key_size_threshold=None,
+    bloom_filter_columns=None,
+    bloom_filter_fpp=None
+) except *:
+    """General writer options"""
+    cdef:
+        shared_ptr[WriteOptions] options
+    options = make_shared[WriteOptions]()
+    # batch_size
+    if batch_size is not None:
+        if isinstance(batch_size, int) and batch_size > 0:
+            deref(options).batch_size = batch_size
+        else:
+            raise ValueError(f"Invalid ORC writer batch size: {batch_size}")
+    # file_version
+    if file_version is not None:
+        if file_version == "0.12":
+            deref(options).file_version = FileVersion(0, 12)
+        elif file_version == "0.11":
+            deref(options).file_version = FileVersion(0, 11)
+        else:
+            raise ValueError(f"Unsupported ORC file version: {file_version}")
+    # stripe_size
+    if stripe_size is not None:
+        if isinstance(stripe_size, int) and stripe_size > 0:
+            deref(options).stripe_size = stripe_size
+        else:
+            raise ValueError(f"Invalid ORC stripe size: {stripe_size}")
+    # compression
+    if compression is not None:
+        if isinstance(compression, str):
+            deref(options).compression = compression_type_from_name(
+                compression)
+        else:
+            raise TypeError("Unsupported ORC compression type: "
+                            f"{compression}")
+    # compression_block_size
+    if compression_block_size is not None:
+        if (isinstance(compression_block_size, int) and
+                compression_block_size > 0):
+            deref(options).compression_block_size = compression_block_size
+        else:
+            raise ValueError("Invalid ORC compression block size: "
+                             f"{compression_block_size}")
+    # compression_strategy
+    if compression_strategy is not None:
+        if isinstance(compression, str):
+            deref(options).compression_strategy = \
+                compression_strategy_from_name(compression_strategy)
+        else:
+            raise TypeError("Unsupported ORC compression strategy: "
+                            f"{compression_strategy}")
+    # row_index_stride
+    if row_index_stride is not None:
+        if isinstance(row_index_stride, int) and row_index_stride > 0:
+            deref(options).row_index_stride = row_index_stride
+        else:
+            raise ValueError("Invalid ORC row index stride: "
+                             f"{row_index_stride}")
+    # padding_tolerance
+    if padding_tolerance is not None:
+        try:
+            padding_tolerance = float(padding_tolerance)
+            deref(options).padding_tolerance = padding_tolerance
+        except Exception:
+            raise ValueError("Invalid ORC padding tolerance: "
+                             f"{padding_tolerance}")
+    # dictionary_key_size_threshold
+    if dictionary_key_size_threshold is not None:
+        try:
+            dictionary_key_size_threshold = float(
+                dictionary_key_size_threshold)
+            assert 0 <= dictionary_key_size_threshold <= 1
+            deref(options).dictionary_key_size_threshold = \
+                dictionary_key_size_threshold
+        except Exception:
+            raise ValueError("Invalid ORC dictionary key size threshold: "
+                             f"{dictionary_key_size_threshold}")
+    # bloom_filter_columns
+    if bloom_filter_columns is not None:
+        try:
+            bloom_filter_columns = list(bloom_filter_columns)
+            for col in bloom_filter_columns:
+                assert isinstance(col, int) and col >= 0
+            deref(options).bloom_filter_columns = bloom_filter_columns
+        except Exception:
+            raise ValueError("Invalid ORC BloomFilter columns: "
+                             f"{bloom_filter_columns}")
+    # Max false positive rate of the Bloom Filter
+    if bloom_filter_fpp is not None:
+        try:
+            bloom_filter_fpp = float(bloom_filter_fpp)
+            assert 0 <= bloom_filter_fpp <= 1
+            deref(options).bloom_filter_fpp = bloom_filter_fpp
+        except Exception:
+            raise ValueError("Invalid ORC BloomFilter false positive rate: "
+                             f"{bloom_filter_fpp}")
+    return options
+
+
+cdef class ORCReader(_Weakrefable):
+    cdef:
+        object source
+        CMemoryPool* allocator
+        unique_ptr[ORCFileReader] reader
+
+    def __cinit__(self, MemoryPool memory_pool=None):
+        self.allocator = maybe_unbox_memory_pool(memory_pool)
+
+    def open(self, object source, c_bool use_memory_map=True):
+        cdef:
+            shared_ptr[CRandomAccessFile] rd_handle
+
+        self.source = source
+
+        get_reader(source, use_memory_map, &rd_handle)
+        with nogil:
+            self.reader = move(GetResultValue(
+                ORCFileReader.Open(rd_handle, self.allocator)
+            ))
+
+    def metadata(self):
+        """
+        The arrow metadata for this file.
+
+        Returns
+        -------
+        metadata : pyarrow.KeyValueMetadata
+        """
+        cdef:
+            shared_ptr[const CKeyValueMetadata] sp_arrow_metadata
+
+        with nogil:
+            sp_arrow_metadata = GetResultValue(
+                deref(self.reader).ReadMetadata()
+            )
+
+        return pyarrow_wrap_metadata(sp_arrow_metadata)
+
+    def schema(self):
+        """
+        The arrow schema for this file.
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+        """
+        cdef:
+            shared_ptr[CSchema] sp_arrow_schema
+
+        with nogil:
+            sp_arrow_schema = GetResultValue(deref(self.reader).ReadSchema())
+
+        return pyarrow_wrap_schema(sp_arrow_schema)
+
+    def nrows(self):
+        return deref(self.reader).NumberOfRows()
+
+    def nstripes(self):
+        return deref(self.reader).NumberOfStripes()
+
+    def file_version(self):
+        return file_version_from_class(deref(self.reader).GetFileVersion())
+
+    def software_version(self):
+        return frombytes(deref(self.reader).GetSoftwareVersion())
+
+    def compression(self):
+        return compression_type_from_enum(
+            GetResultValue(deref(self.reader).GetCompression()))
+
+    def compression_size(self):
+        return deref(self.reader).GetCompressionSize()
+
+    def row_index_stride(self):
+        return deref(self.reader).GetRowIndexStride()
+
+    def writer(self):
+        writer_name = writer_id_from_enum(deref(self.reader).GetWriterId())
+        if writer_name == 'UNKNOWN':
+            return deref(self.reader).GetWriterIdValue()
+        else:
+            return writer_name
+
+    def writer_version(self):
+        return writer_version_from_enum(deref(self.reader).GetWriterVersion())
+
+    def nstripe_statistics(self):
+        return deref(self.reader).GetNumberOfStripeStatistics()
+
+    def content_length(self):
+        return deref(self.reader).GetContentLength()
+
+    def stripe_statistics_length(self):
+        return deref(self.reader).GetStripeStatisticsLength()
+
+    def file_footer_length(self):
+        return deref(self.reader).GetFileFooterLength()
+
+    def file_postscript_length(self):
+        return deref(self.reader).GetFilePostscriptLength()
+
+    def file_length(self):
+        return deref(self.reader).GetFileLength()
+
+    def serialized_file_tail(self):
+        return deref(self.reader).GetSerializedFileTail()
+
+    def read_stripe(self, n, columns=None):
+        cdef:
+            shared_ptr[CRecordBatch] sp_record_batch
+            RecordBatch batch
+            int64_t stripe
+            std_vector[c_string] c_names
+
+        stripe = n
+
+        if columns is None:
+            with nogil:
+                sp_record_batch = GetResultValue(
+                    deref(self.reader).ReadStripe(stripe)
+                )
+        else:
+            c_names = [tobytes(name) for name in columns]
+            with nogil:
+                sp_record_batch = GetResultValue(
+                    deref(self.reader).ReadStripe(stripe, c_names)
+                )
+
+        return pyarrow_wrap_batch(sp_record_batch)
+
+    def read(self, columns=None):
+        cdef:
+            shared_ptr[CTable] sp_table
+            std_vector[c_string] c_names
+
+        if columns is None:
+            with nogil:
+                sp_table = GetResultValue(deref(self.reader).Read())
+        else:
+            c_names = [tobytes(name) for name in columns]
+            with nogil:
+                sp_table = GetResultValue(deref(self.reader).Read(c_names))
+
+        return pyarrow_wrap_table(sp_table)
+
+
+cdef class ORCWriter(_Weakrefable):
+    cdef:
+        unique_ptr[ORCFileWriter] writer
+        shared_ptr[COutputStream] sink
+        c_bool own_sink
+
+    def open(self, object where, *,
+             file_version=None,
+             batch_size=None,
+             stripe_size=None,
+             compression=None,
+             compression_block_size=None,
+             compression_strategy=None,
+             row_index_stride=None,
+             padding_tolerance=None,
+             dictionary_key_size_threshold=None,
+             bloom_filter_columns=None,
+             bloom_filter_fpp=None):
+        cdef:
+            shared_ptr[WriteOptions] write_options
+            c_string c_where
+        try:
+            where = _stringify_path(where)
+        except TypeError:
+            get_writer(where, &self.sink)
+            self.own_sink = False
+        else:
+            c_where = tobytes(where)
+            with nogil:
+                self.sink = GetResultValue(FileOutputStream.Open(c_where))
+                self.own_sink = True
+
+        write_options = _create_write_options(
+            file_version=file_version,
+            batch_size=batch_size,
+            stripe_size=stripe_size,
+            compression=compression,
+            compression_block_size=compression_block_size,
+            compression_strategy=compression_strategy,
+            row_index_stride=row_index_stride,
+            padding_tolerance=padding_tolerance,
+            dictionary_key_size_threshold=dictionary_key_size_threshold,
+            bloom_filter_columns=bloom_filter_columns,
+            bloom_filter_fpp=bloom_filter_fpp
+        )
+
+        with nogil:
+            self.writer = move(GetResultValue(
+                ORCFileWriter.Open(self.sink.get(),
+                                   deref(write_options))))
+
+    def write(self, Table table):
+        cdef:
+            shared_ptr[CTable] sp_table
+        sp_table = pyarrow_unwrap_table(table)
+        with nogil:
+            check_status(deref(self.writer).Write(deref(sp_table)))
+
+    def close(self):
+        with nogil:
+            check_status(deref(self.writer).Close())
+            if self.own_sink:
+                check_status(deref(self.sink).Close())
--- a/.venv/Lib/site-packages/pyarrow/_parquet.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_parquet.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_parquet.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_parquet.pxd
@ -0,0 +1,638 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+# cython: language_level = 3
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CChunkedArray, CScalar, CSchema, CStatus,
+                                        CTable, CMemoryPool, CBuffer,
+                                        CKeyValueMetadata,
+                                        CRandomAccessFile, COutputStream,
+                                        TimeUnit, CRecordBatchReader)
+from pyarrow.lib cimport _Weakrefable
+
+
+cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
+    cdef cppclass Node:
+        pass
+
+    cdef cppclass GroupNode(Node):
+        pass
+
+    cdef cppclass PrimitiveNode(Node):
+        pass
+
+    cdef cppclass ColumnPath:
+        c_string ToDotString()
+        vector[c_string] ToDotVector()
+
+
+cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
+    enum ParquetType" parquet::Type::type":
+        ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
+        ParquetType_INT32" parquet::Type::INT32"
+        ParquetType_INT64" parquet::Type::INT64"
+        ParquetType_INT96" parquet::Type::INT96"
+        ParquetType_FLOAT" parquet::Type::FLOAT"
+        ParquetType_DOUBLE" parquet::Type::DOUBLE"
+        ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
+        ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
+
+    enum ParquetLogicalTypeId" parquet::LogicalType::Type::type":
+        ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED"
+        ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING"
+        ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP"
+        ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST"
+        ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM"
+        ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL"
+        ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE"
+        ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME"
+        ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP"
+        ParquetLogicalType_INT" parquet::LogicalType::Type::INT"
+        ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON"
+        ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON"
+        ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID"
+        ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE"
+
+    enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit":
+        ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN"
+        ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS"
+        ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS"
+        ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS"
+
+    enum ParquetConvertedType" parquet::ConvertedType::type":
+        ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
+        ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
+        ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
+        ParquetConvertedType_MAP_KEY_VALUE \
+            " parquet::ConvertedType::MAP_KEY_VALUE"
+        ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
+        ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
+        ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
+        ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
+        ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
+        ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
+        ParquetConvertedType_TIMESTAMP_MILLIS \
+            " parquet::ConvertedType::TIMESTAMP_MILLIS"
+        ParquetConvertedType_TIMESTAMP_MICROS \
+            " parquet::ConvertedType::TIMESTAMP_MICROS"
+        ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
+        ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
+        ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
+        ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
+        ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
+        ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
+        ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
+        ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
+        ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
+        ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
+        ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"
+
+    enum ParquetRepetition" parquet::Repetition::type":
+        ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
+        ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL"
+        ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED"
+
+    enum ParquetEncoding" parquet::Encoding::type":
+        ParquetEncoding_PLAIN" parquet::Encoding::PLAIN"
+        ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY"
+        ParquetEncoding_RLE" parquet::Encoding::RLE"
+        ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED"
+        ParquetEncoding_DELTA_BINARY_PACKED \
+            " parquet::Encoding::DELTA_BINARY_PACKED"
+        ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \
+            " parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY"
+        ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY"
+        ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY"
+        ParquetEncoding_BYTE_STREAM_SPLIT \
+            " parquet::Encoding::BYTE_STREAM_SPLIT"
+
+    enum ParquetCompression" parquet::Compression::type":
+        ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED"
+        ParquetCompression_SNAPPY" parquet::Compression::SNAPPY"
+        ParquetCompression_GZIP" parquet::Compression::GZIP"
+        ParquetCompression_LZO" parquet::Compression::LZO"
+        ParquetCompression_BROTLI" parquet::Compression::BROTLI"
+        ParquetCompression_LZ4" parquet::Compression::LZ4"
+        ParquetCompression_ZSTD" parquet::Compression::ZSTD"
+
+    enum ParquetVersion" parquet::ParquetVersion::type":
+        ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
+        ParquetVersion_V2_0" parquet::ParquetVersion::PARQUET_2_0"
+        ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
+        ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
+
+    enum ParquetSortOrder" parquet::SortOrder::type":
+        ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
+        ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED"
+        ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN"
+
+    cdef cppclass CParquetLogicalType" parquet::LogicalType":
+        c_string ToString() const
+        c_string ToJSON() const
+        ParquetLogicalTypeId type() const
+
+    cdef cppclass CParquetDecimalType \
+            " parquet::DecimalLogicalType"(CParquetLogicalType):
+        int32_t precision() const
+        int32_t scale() const
+
+    cdef cppclass CParquetIntType \
+            " parquet::IntLogicalType"(CParquetLogicalType):
+        int bit_width() const
+        c_bool is_signed() const
+
+    cdef cppclass CParquetTimeType \
+            " parquet::TimeLogicalType"(CParquetLogicalType):
+        c_bool is_adjusted_to_utc() const
+        ParquetTimeUnit time_unit() const
+
+    cdef cppclass CParquetTimestampType \
+            " parquet::TimestampLogicalType"(CParquetLogicalType):
+        c_bool is_adjusted_to_utc() const
+        ParquetTimeUnit time_unit() const
+
+    cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor":
+        c_bool Equals(const ColumnDescriptor& other)
+
+        shared_ptr[ColumnPath] path()
+        int16_t max_definition_level()
+        int16_t max_repetition_level()
+
+        ParquetType physical_type()
+        const shared_ptr[const CParquetLogicalType]& logical_type()
+        ParquetConvertedType converted_type()
+        const c_string& name()
+        int type_length()
+        int type_precision()
+        int type_scale()
+
+    cdef cppclass SchemaDescriptor:
+        const ColumnDescriptor* Column(int i)
+        shared_ptr[Node] schema()
+        GroupNode* group()
+        c_bool Equals(const SchemaDescriptor& other)
+        c_string ToString()
+        int num_columns()
+
+    cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)
+
+    enum ParquetCipher" parquet::ParquetCipher::type":
+        ParquetCipher_AES_GCM_V1" parquet::ParquetCipher::AES_GCM_V1"
+        ParquetCipher_AES_GCM_CTR_V1" parquet::ParquetCipher::AES_GCM_CTR_V1"
+
+    struct AadMetadata:
+        c_string aad_prefix
+        c_string aad_file_unique
+        c_bool supply_aad_prefix
+
+    struct EncryptionAlgorithm:
+        ParquetCipher algorithm
+        AadMetadata aad
+
+cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
+    cdef cppclass ColumnReader:
+        pass
+
+    cdef cppclass BoolReader(ColumnReader):
+        pass
+
+    cdef cppclass Int32Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int64Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int96Reader(ColumnReader):
+        pass
+
+    cdef cppclass FloatReader(ColumnReader):
+        pass
+
+    cdef cppclass DoubleReader(ColumnReader):
+        pass
+
+    cdef cppclass ByteArrayReader(ColumnReader):
+        pass
+
+    cdef cppclass RowGroupReader:
+        pass
+
+    cdef cppclass CEncodedStatistics" parquet::EncodedStatistics":
+        const c_string& max() const
+        const c_string& min() const
+        int64_t null_count
+        int64_t distinct_count
+        bint has_min
+        bint has_max
+        bint has_null_count
+        bint has_distinct_count
+
+    cdef cppclass ParquetByteArray" parquet::ByteArray":
+        uint32_t len
+        const uint8_t* ptr
+
+    cdef cppclass ParquetFLBA" parquet::FLBA":
+        const uint8_t* ptr
+
+    cdef cppclass CStatistics" parquet::Statistics":
+        int64_t null_count() const
+        int64_t distinct_count() const
+        int64_t num_values() const
+        bint HasMinMax()
+        bint HasNullCount()
+        bint HasDistinctCount()
+        c_bool Equals(const CStatistics&) const
+        void Reset()
+        c_string EncodeMin()
+        c_string EncodeMax()
+        CEncodedStatistics Encode()
+        void SetComparator()
+        ParquetType physical_type() const
+        const ColumnDescriptor* descr() const
+
+    cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics):
+        c_bool min()
+        c_bool max()
+
+    cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics):
+        int32_t min()
+        int32_t max()
+
+    cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics):
+        int64_t min()
+        int64_t max()
+
+    cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics):
+        float min()
+        float max()
+
+    cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics):
+        double min()
+        double max()
+
+    cdef cppclass CByteArrayStatistics \
+            " parquet::ByteArrayStatistics"(CStatistics):
+        ParquetByteArray min()
+        ParquetByteArray max()
+
+    cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics):
+        ParquetFLBA min()
+        ParquetFLBA max()
+
+    cdef cppclass CColumnCryptoMetaData" parquet::ColumnCryptoMetaData":
+        shared_ptr[ColumnPath] path_in_schema() const
+        c_bool encrypted_with_footer_key() const
+        const c_string& key_metadata() const
+
+    cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData":
+        int64_t file_offset() const
+        const c_string& file_path() const
+
+        c_bool is_metadata_set() const
+        ParquetType type() const
+        int64_t num_values() const
+        shared_ptr[ColumnPath] path_in_schema() const
+        bint is_stats_set() const
+        shared_ptr[CStatistics] statistics() const
+        ParquetCompression compression() const
+        const vector[ParquetEncoding]& encodings() const
+        c_bool Equals(const CColumnChunkMetaData&) const
+
+        int64_t has_dictionary_page() const
+        int64_t dictionary_page_offset() const
+        int64_t data_page_offset() const
+        int64_t index_page_offset() const
+        int64_t total_compressed_size() const
+        int64_t total_uncompressed_size() const
+        unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
+
+    cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
+        c_bool Equals(const CRowGroupMetaData&) const
+        int num_columns()
+        int64_t num_rows()
+        int64_t total_byte_size()
+        unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const
+
+    cdef cppclass CFileMetaData" parquet::FileMetaData":
+        c_bool Equals(const CFileMetaData&) const
+        uint32_t size()
+        int num_columns()
+        int64_t num_rows()
+        int num_row_groups()
+        ParquetVersion version()
+        const c_string created_by()
+        int num_schema_elements()
+
+        void set_file_path(const c_string& path)
+        void AppendRowGroups(const CFileMetaData& other) except +
+
+        unique_ptr[CRowGroupMetaData] RowGroup(int i)
+        const SchemaDescriptor* schema()
+        shared_ptr[const CKeyValueMetadata] key_value_metadata() const
+        void WriteTo(COutputStream* dst) const
+
+        inline c_bool is_encryption_algorithm_set() const
+        inline EncryptionAlgorithm encryption_algorithm() const
+        inline const c_string& footer_signing_key_metadata() const
+
+    cdef shared_ptr[CFileMetaData] CFileMetaData_Make \
+        " parquet::FileMetaData::Make"(const void* serialized_metadata,
+                                       uint32_t* metadata_len)
+
+    cdef cppclass CReaderProperties" parquet::ReaderProperties":
+        c_bool is_buffered_stream_enabled() const
+        void enable_buffered_stream()
+        void disable_buffered_stream()
+        void set_buffer_size(int64_t buf_size)
+        int64_t buffer_size() const
+        void file_decryption_properties(shared_ptr[CFileDecryptionProperties]
+                                        decryption)
+        shared_ptr[CFileDecryptionProperties] file_decryption_properties() \
+            const
+
+    CReaderProperties default_reader_properties()
+
+    cdef cppclass ArrowReaderProperties:
+        ArrowReaderProperties()
+        void set_read_dictionary(int column_index, c_bool read_dict)
+        c_bool read_dictionary()
+        void set_batch_size(int64_t batch_size)
+        int64_t batch_size()
+        void set_pre_buffer(c_bool pre_buffer)
+        c_bool pre_buffer() const
+        void set_coerce_int96_timestamp_unit(TimeUnit unit)
+        TimeUnit coerce_int96_timestamp_unit() const
+
+    ArrowReaderProperties default_arrow_reader_properties()
+
+    cdef cppclass ParquetFileReader:
+        shared_ptr[CFileMetaData] metadata()
+
+
+cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
+    cdef cppclass WriterProperties:
+        cppclass Builder:
+            Builder* data_page_version(ParquetDataPageVersion version)
+            Builder* version(ParquetVersion version)
+            Builder* compression(ParquetCompression codec)
+            Builder* compression(const c_string& path,
+                                 ParquetCompression codec)
+            Builder* compression_level(int compression_level)
+            Builder* compression_level(const c_string& path,
+                                       int compression_level)
+            Builder* encryption(
+                shared_ptr[CFileEncryptionProperties]
+                file_encryption_properties)
+            Builder* disable_dictionary()
+            Builder* enable_dictionary()
+            Builder* enable_dictionary(const c_string& path)
+            Builder* disable_statistics()
+            Builder* enable_statistics()
+            Builder* enable_statistics(const c_string& path)
+            Builder* data_pagesize(int64_t size)
+            Builder* encoding(ParquetEncoding encoding)
+            Builder* encoding(const c_string& path,
+                              ParquetEncoding encoding)
+            Builder* write_batch_size(int64_t batch_size)
+            Builder* dictionary_pagesize_limit(int64_t dictionary_pagesize_limit)
+            shared_ptr[WriterProperties] build()
+
+    cdef cppclass ArrowWriterProperties:
+        cppclass Builder:
+            Builder()
+            Builder* disable_deprecated_int96_timestamps()
+            Builder* enable_deprecated_int96_timestamps()
+            Builder* coerce_timestamps(TimeUnit unit)
+            Builder* allow_truncated_timestamps()
+            Builder* disallow_truncated_timestamps()
+            Builder* store_schema()
+            Builder* enable_compliant_nested_types()
+            Builder* disable_compliant_nested_types()
+            Builder* set_engine_version(ArrowWriterEngineVersion version)
+            shared_ptr[ArrowWriterProperties] build()
+        c_bool support_deprecated_int96_timestamps()
+
+
+cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
+    cdef cppclass FileReader:
+        FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader)
+
+        CStatus GetSchema(shared_ptr[CSchema]* out)
+
+        CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out)
+        CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out)
+
+        int num_row_groups()
+        CStatus ReadRowGroup(int i, shared_ptr[CTable]* out)
+        CStatus ReadRowGroup(int i, const vector[int]& column_indices,
+                             shared_ptr[CTable]* out)
+
+        CStatus ReadRowGroups(const vector[int]& row_groups,
+                              shared_ptr[CTable]* out)
+        CStatus ReadRowGroups(const vector[int]& row_groups,
+                              const vector[int]& column_indices,
+                              shared_ptr[CTable]* out)
+
+        CStatus GetRecordBatchReader(const vector[int]& row_group_indices,
+                                     const vector[int]& column_indices,
+                                     unique_ptr[CRecordBatchReader]* out)
+        CStatus GetRecordBatchReader(const vector[int]& row_group_indices,
+                                     unique_ptr[CRecordBatchReader]* out)
+
+        CStatus ReadTable(shared_ptr[CTable]* out)
+        CStatus ReadTable(const vector[int]& column_indices,
+                          shared_ptr[CTable]* out)
+
+        CStatus ScanContents(vector[int] columns, int32_t column_batch_size,
+                             int64_t* num_rows)
+
+        const ParquetFileReader* parquet_reader()
+
+        void set_use_threads(c_bool use_threads)
+
+        void set_batch_size(int64_t batch_size)
+
+    cdef cppclass FileReaderBuilder:
+        FileReaderBuilder()
+        CStatus Open(const shared_ptr[CRandomAccessFile]& file,
+                     const CReaderProperties& properties,
+                     const shared_ptr[CFileMetaData]& metadata)
+
+        ParquetFileReader* raw_reader()
+        FileReaderBuilder* memory_pool(CMemoryPool*)
+        FileReaderBuilder* properties(const ArrowReaderProperties&)
+        CStatus Build(unique_ptr[FileReader]* out)
+
+    CStatus FromParquetSchema(
+        const SchemaDescriptor* parquet_schema,
+        const ArrowReaderProperties& properties,
+        const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
+        shared_ptr[CSchema]* out)
+
+    CStatus StatisticsAsScalars(const CStatistics& Statistics,
+                                shared_ptr[CScalar]* min,
+                                shared_ptr[CScalar]* max)
+
+cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil:
+
+    CStatus ToParquetSchema(
+        const CSchema* arrow_schema,
+        const ArrowReaderProperties& properties,
+        const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
+        shared_ptr[SchemaDescriptor]* out)
+
+
+cdef extern from "parquet/properties.h" namespace "parquet" nogil:
+    cdef enum ArrowWriterEngineVersion:
+        V1 "parquet::ArrowWriterProperties::V1",
+        V2 "parquet::ArrowWriterProperties::V2"
+
+    cdef cppclass ParquetDataPageVersion:
+        pass
+
+    cdef ParquetDataPageVersion ParquetDataPageVersion_V1 \
+        " parquet::ParquetDataPageVersion::V1"
+    cdef ParquetDataPageVersion ParquetDataPageVersion_V2 \
+        " parquet::ParquetDataPageVersion::V2"
+
+cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
+    cdef cppclass FileWriter:
+
+        @staticmethod
+        CStatus Open(const CSchema& schema, CMemoryPool* pool,
+                     const shared_ptr[COutputStream]& sink,
+                     const shared_ptr[WriterProperties]& properties,
+                     const shared_ptr[ArrowWriterProperties]& arrow_properties,
+                     unique_ptr[FileWriter]* writer)
+
+        CStatus WriteTable(const CTable& table, int64_t chunk_size)
+        CStatus NewRowGroup(int64_t chunk_size)
+        CStatus Close()
+
+        const shared_ptr[CFileMetaData] metadata() const
+
+    CStatus WriteMetaDataFile(
+        const CFileMetaData& file_metadata,
+        const COutputStream* sink)
+
+cdef class FileEncryptionProperties:
+    """File-level encryption properties for the low-level API"""
+    cdef:
+        shared_ptr[CFileEncryptionProperties] properties
+
+    @staticmethod
+    cdef inline FileEncryptionProperties wrap(
+            shared_ptr[CFileEncryptionProperties] properties):
+
+        result = FileEncryptionProperties()
+        result.properties = properties
+        return result
+
+    cdef inline shared_ptr[CFileEncryptionProperties] unwrap(self):
+        return self.properties
+
+cdef shared_ptr[WriterProperties] _create_writer_properties(
+    use_dictionary=*,
+    compression=*,
+    version=*,
+    write_statistics=*,
+    data_page_size=*,
+    compression_level=*,
+    use_byte_stream_split=*,
+    column_encoding=*,
+    data_page_version=*,
+    FileEncryptionProperties encryption_properties=*,
+    write_batch_size=*,
+    dictionary_pagesize_limit=*) except *
+
+
+cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
+    use_deprecated_int96_timestamps=*,
+    coerce_timestamps=*,
+    allow_truncated_timestamps=*,
+    writer_engine_version=*,
+    use_compliant_nested_type=*) except *
+
+cdef class ParquetSchema(_Weakrefable):
+    cdef:
+        FileMetaData parent  # the FileMetaData owning the SchemaDescriptor
+        const SchemaDescriptor* schema
+
+cdef class FileMetaData(_Weakrefable):
+    cdef:
+        shared_ptr[CFileMetaData] sp_metadata
+        CFileMetaData* _metadata
+        ParquetSchema _schema
+
+    cdef inline init(self, const shared_ptr[CFileMetaData]& metadata):
+        self.sp_metadata = metadata
+        self._metadata = metadata.get()
+
+cdef class RowGroupMetaData(_Weakrefable):
+    cdef:
+        int index  # for pickling support
+        unique_ptr[CRowGroupMetaData] up_metadata
+        CRowGroupMetaData* metadata
+        FileMetaData parent
+
+cdef class ColumnChunkMetaData(_Weakrefable):
+    cdef:
+        unique_ptr[CColumnChunkMetaData] up_metadata
+        CColumnChunkMetaData* metadata
+        RowGroupMetaData parent
+
+    cdef inline init(self, RowGroupMetaData parent, int i):
+        self.up_metadata = parent.metadata.ColumnChunk(i)
+        self.metadata = self.up_metadata.get()
+        self.parent = parent
+
+cdef class Statistics(_Weakrefable):
+    cdef:
+        shared_ptr[CStatistics] statistics
+        ColumnChunkMetaData parent
+
+    cdef inline init(self, const shared_ptr[CStatistics]& statistics,
+                     ColumnChunkMetaData parent):
+        self.statistics = statistics
+        self.parent = parent
+
+cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil:
+    cdef cppclass CFileDecryptionProperties\
+            " parquet::FileDecryptionProperties":
+        pass
+
+    cdef cppclass CFileEncryptionProperties\
+            " parquet::FileEncryptionProperties":
+        pass
+
+cdef class FileDecryptionProperties:
+    """File-level decryption properties for the low-level API"""
+    cdef:
+        shared_ptr[CFileDecryptionProperties] properties
+
+    @staticmethod
+    cdef inline FileDecryptionProperties wrap(
+            shared_ptr[CFileDecryptionProperties] properties):
+
+        result = FileDecryptionProperties()
+        result.properties = properties
+        return result
+
+    cdef inline shared_ptr[CFileDecryptionProperties] unwrap(self):
+        return self.properties
--- a/.venv/Lib/site-packages/pyarrow/_parquet.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_parquet.pyx
--- a/.venv/Lib/site-packages/pyarrow/_parquet_encryption.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_parquet_encryption.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_parquet_encryption.pxd
+++ b/.venv/Lib/site-packages/pyarrow/_parquet_encryption.pxd
@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+# cython: language_level = 3
+
+from pyarrow.includes.common cimport *
+from pyarrow._parquet cimport (ParquetCipher,
+                               CFileEncryptionProperties,
+                               CFileDecryptionProperties,
+                               FileEncryptionProperties,
+                               FileDecryptionProperties,
+                               ParquetCipher_AES_GCM_V1,
+                               ParquetCipher_AES_GCM_CTR_V1)
+
+
+cdef extern from "parquet/encryption/kms_client.h" \
+        namespace "parquet::encryption" nogil:
+    cdef cppclass CKmsClient" parquet::encryption::KmsClient":
+        c_string WrapKey(const c_string& key_bytes,
+                         const c_string& master_key_identifier) except +
+        c_string UnwrapKey(const c_string& wrapped_key,
+                           const c_string& master_key_identifier) except +
+
+    cdef cppclass CKeyAccessToken" parquet::encryption::KeyAccessToken":
+        CKeyAccessToken(const c_string value)
+        void Refresh(const c_string& new_value)
+        const c_string& value() const
+
+    cdef cppclass CKmsConnectionConfig \
+            " parquet::encryption::KmsConnectionConfig":
+        CKmsConnectionConfig()
+        c_string kms_instance_id
+        c_string kms_instance_url
+        shared_ptr[CKeyAccessToken] refreshable_key_access_token
+        unordered_map[c_string, c_string] custom_kms_conf
+
+# Callbacks for implementing Python kms clients
+# Use typedef to emulate syntax for std::function<void(..)>
+ctypedef void CallbackWrapKey(
+    object, const c_string&, const c_string&, c_string*)
+ctypedef void CallbackUnwrapKey(
+    object, const c_string&, const c_string&, c_string*)
+
+cdef extern from "parquet/encryption/kms_client_factory.h" \
+        namespace "parquet::encryption" nogil:
+    cdef cppclass CKmsClientFactory" parquet::encryption::KmsClientFactory":
+        shared_ptr[CKmsClient] CreateKmsClient(
+            const CKmsConnectionConfig& kms_connection_config) except +
+
+# Callbacks for implementing Python kms client factories
+# Use typedef to emulate syntax for std::function<void(..)>
+ctypedef void CallbackCreateKmsClient(
+    object,
+    const CKmsConnectionConfig&, shared_ptr[CKmsClient]*)
+
+cdef extern from "parquet/encryption/crypto_factory.h" \
+        namespace "parquet::encryption" nogil:
+    cdef cppclass CEncryptionConfiguration\
+            " parquet::encryption::EncryptionConfiguration":
+        CEncryptionConfiguration(const c_string& footer_key) except +
+        c_string footer_key
+        c_string column_keys
+        ParquetCipher encryption_algorithm
+        c_bool plaintext_footer
+        c_bool double_wrapping
+        double cache_lifetime_seconds
+        c_bool internal_key_material
+        int32_t data_key_length_bits
+
+    cdef cppclass CDecryptionConfiguration\
+            " parquet::encryption::DecryptionConfiguration":
+        CDecryptionConfiguration() except +
+        double cache_lifetime_seconds
+
+    cdef cppclass CCryptoFactory" parquet::encryption::CryptoFactory":
+        void RegisterKmsClientFactory(
+            shared_ptr[CKmsClientFactory] kms_client_factory) except +
+        shared_ptr[CFileEncryptionProperties] GetFileEncryptionProperties(
+            const CKmsConnectionConfig& kms_connection_config,
+            const CEncryptionConfiguration& encryption_config) except +*
+        shared_ptr[CFileDecryptionProperties] GetFileDecryptionProperties(
+            const CKmsConnectionConfig& kms_connection_config,
+            const CDecryptionConfiguration& decryption_config) except +*
+        void RemoveCacheEntriesForToken(const c_string& access_token) except +
+        void RemoveCacheEntriesForAllTokens() except +
+
+cdef extern from "arrow/python/parquet_encryption.h" \
+        namespace "arrow::py::parquet::encryption" nogil:
+    cdef cppclass CPyKmsClientVtable \
+            " arrow::py::parquet::encryption::PyKmsClientVtable":
+        CPyKmsClientVtable()
+        function[CallbackWrapKey] wrap_key
+        function[CallbackUnwrapKey] unwrap_key
+
+    cdef cppclass CPyKmsClient\
+            " arrow::py::parquet::encryption::PyKmsClient"(CKmsClient):
+        CPyKmsClient(object handler, CPyKmsClientVtable vtable)
+
+    cdef cppclass CPyKmsClientFactoryVtable\
+            " arrow::py::parquet::encryption::PyKmsClientFactoryVtable":
+        CPyKmsClientFactoryVtable()
+        function[CallbackCreateKmsClient] create_kms_client
+
+    cdef cppclass CPyKmsClientFactory\
+            " arrow::py::parquet::encryption::PyKmsClientFactory"(
+                CKmsClientFactory):
+        CPyKmsClientFactory(object handler, CPyKmsClientFactoryVtable vtable)
+
+    cdef cppclass CPyCryptoFactory\
+            " arrow::py::parquet::encryption::PyCryptoFactory"(CCryptoFactory):
+        CResult[shared_ptr[CFileEncryptionProperties]] \
+            SafeGetFileEncryptionProperties(
+            const CKmsConnectionConfig& kms_connection_config,
+            const CEncryptionConfiguration& encryption_config)
+        CResult[shared_ptr[CFileDecryptionProperties]] \
+            SafeGetFileDecryptionProperties(
+            const CKmsConnectionConfig& kms_connection_config,
+            const CDecryptionConfiguration& decryption_config)
--- a/.venv/Lib/site-packages/pyarrow/_parquet_encryption.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_parquet_encryption.pyx
@ -0,0 +1,475 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+
+from datetime import timedelta
+import io
+import warnings
+
+from libcpp cimport nullptr
+
+from cython.operator cimport dereference as deref
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport _Weakrefable
+
+from pyarrow.lib import (ArrowException,
+                         tobytes, frombytes)
+
+cimport cpython as cp
+
+
+cdef ParquetCipher cipher_from_name(name):
+    name = name.upper()
+    if name == 'AES_GCM_V1':
+        return ParquetCipher_AES_GCM_V1
+    elif name == 'AES_GCM_CTR_V1':
+        return ParquetCipher_AES_GCM_CTR_V1
+    else:
+        raise ValueError(f'Invalid cipher name: {name!r}')
+
+
+cdef cipher_to_name(ParquetCipher cipher):
+    if ParquetCipher_AES_GCM_V1 == cipher:
+        return 'AES_GCM_V1'
+    elif ParquetCipher_AES_GCM_CTR_V1 == cipher:
+        return 'AES_GCM_CTR_V1'
+    else:
+        raise ValueError('Invalid cipher value: {0}'.format(cipher))
+
+cdef class EncryptionConfiguration(_Weakrefable):
+    """Configuration of the encryption, such as which columns to encrypt"""
+    cdef:
+        shared_ptr[CEncryptionConfiguration] configuration
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, footer_key, *, column_keys=None,
+                 encryption_algorithm=None,
+                 plaintext_footer=None, double_wrapping=None,
+                 cache_lifetime=None, internal_key_material=None,
+                 data_key_length_bits=None):
+        self.configuration.reset(
+            new CEncryptionConfiguration(tobytes(footer_key)))
+        if column_keys is not None:
+            self.column_keys = column_keys
+        if encryption_algorithm is not None:
+            self.encryption_algorithm = encryption_algorithm
+        if plaintext_footer is not None:
+            self.plaintext_footer = plaintext_footer
+        if double_wrapping is not None:
+            self.double_wrapping = double_wrapping
+        if cache_lifetime is not None:
+            self.cache_lifetime = cache_lifetime
+        if internal_key_material is not None:
+            self.internal_key_material = internal_key_material
+        if data_key_length_bits is not None:
+            self.data_key_length_bits = data_key_length_bits
+
+    @property
+    def footer_key(self):
+        """ID of the master key for footer encryption/signing"""
+        return frombytes(self.configuration.get().footer_key)
+
+    @property
+    def column_keys(self):
+        """
+        List of columns to encrypt, with master key IDs.
+        """
+        column_keys_str = frombytes(self.configuration.get().column_keys)
+        # Convert from "masterKeyID:colName,colName;masterKeyID:colName..."
+        # (see HIVE-21848) to dictionary of master key ID to column name lists
+        column_keys_to_key_list_str = dict(subString.replace(" ", "").split(
+            ":") for subString in column_keys_str.split(";"))
+        column_keys_dict = {k: v.split(
+            ",") for k, v in column_keys_to_key_list_str.items()}
+        return column_keys_dict
+
+    @column_keys.setter
+    def column_keys(self, dict value):
+        if value is not None:
+            # convert a dictionary such as
+            # '{"key1": ["col1 ", "col2"], "key2": ["col3 ", "col4"]}''
+            # to the string defined by the spec
+            # 'key1: col1 , col2; key2: col3 , col4'
+            column_keys = "; ".join(
+                ["{}: {}".format(k, ", ".join(v)) for k, v in value.items()])
+            self.configuration.get().column_keys = tobytes(column_keys)
+
+    @property
+    def encryption_algorithm(self):
+        """Parquet encryption algorithm.
+        Can be "AES_GCM_V1" (default), or "AES_GCM_CTR_V1"."""
+        return cipher_to_name(self.configuration.get().encryption_algorithm)
+
+    @encryption_algorithm.setter
+    def encryption_algorithm(self, value):
+        cipher = cipher_from_name(value)
+        self.configuration.get().encryption_algorithm = cipher
+
+    @property
+    def plaintext_footer(self):
+        """Write files with plaintext footer."""
+        return self.configuration.get().plaintext_footer
+
+    @plaintext_footer.setter
+    def plaintext_footer(self, value):
+        self.configuration.get().plaintext_footer = value
+
+    @property
+    def double_wrapping(self):
+        """Use double wrapping - where data encryption keys (DEKs) are
+        encrypted with key encryption keys (KEKs), which in turn are
+        encrypted with master keys.
+        If set to false, use single wrapping - where DEKs are
+        encrypted directly with master keys."""
+        return self.configuration.get().double_wrapping
+
+    @double_wrapping.setter
+    def double_wrapping(self, value):
+        self.configuration.get().double_wrapping = value
+
+    @property
+    def cache_lifetime(self):
+        """Lifetime of cached entities (key encryption keys,
+        local wrapping keys, KMS client objects)."""
+        return timedelta(
+            seconds=self.configuration.get().cache_lifetime_seconds)
+
+    @cache_lifetime.setter
+    def cache_lifetime(self, value):
+        if not isinstance(value, timedelta):
+            raise TypeError("cache_lifetime should be a timedelta")
+        self.configuration.get().cache_lifetime_seconds = value.total_seconds()
+
+    @property
+    def internal_key_material(self):
+        """Store key material inside Parquet file footers; this mode doesn’t
+        produce additional files. If set to false, key material is stored in
+        separate files in the same folder, which enables key rotation for
+        immutable Parquet files."""
+        return self.configuration.get().internal_key_material
+
+    @internal_key_material.setter
+    def internal_key_material(self, value):
+        self.configuration.get().internal_key_material = value
+
+    @property
+    def data_key_length_bits(self):
+        """Length of data encryption keys (DEKs), randomly generated by parquet key
+        management tools. Can be 128, 192 or 256 bits."""
+        return self.configuration.get().data_key_length_bits
+
+    @data_key_length_bits.setter
+    def data_key_length_bits(self, value):
+        self.configuration.get().data_key_length_bits = value
+
+    cdef inline shared_ptr[CEncryptionConfiguration] unwrap(self) nogil:
+        return self.configuration
+
+
+cdef class DecryptionConfiguration(_Weakrefable):
+    """Configuration of the decryption, such as cache timeout."""
+    cdef:
+        shared_ptr[CDecryptionConfiguration] configuration
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, cache_lifetime=None):
+        self.configuration.reset(new CDecryptionConfiguration())
+
+    @property
+    def cache_lifetime(self):
+        """Lifetime of cached entities (key encryption keys,
+        local wrapping keys, KMS client objects)."""
+        return timedelta(
+            seconds=self.configuration.get().cache_lifetime_seconds)
+
+    @cache_lifetime.setter
+    def cache_lifetime(self, value):
+        self.configuration.get().cache_lifetime_seconds = value.total_seconds()
+
+    cdef inline shared_ptr[CDecryptionConfiguration] unwrap(self) nogil:
+        return self.configuration
+
+
+cdef class KmsConnectionConfig(_Weakrefable):
+    """Configuration of the connection to the Key Management Service (KMS)"""
+    cdef:
+        shared_ptr[CKmsConnectionConfig] configuration
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, kms_instance_id=None, kms_instance_url=None,
+                 key_access_token=None, custom_kms_conf=None):
+        self.configuration.reset(new CKmsConnectionConfig())
+        if kms_instance_id is not None:
+            self.kms_instance_id = kms_instance_id
+        if kms_instance_url is not None:
+            self.kms_instance_url = kms_instance_url
+        if key_access_token is None:
+            self.key_access_token = b'DEFAULT'
+        else:
+            self.key_access_token = key_access_token
+        if custom_kms_conf is not None:
+            self.custom_kms_conf = custom_kms_conf
+
+    @property
+    def kms_instance_id(self):
+        """ID of the KMS instance that will be used for encryption
+        (if multiple KMS instances are available)."""
+        return frombytes(self.configuration.get().kms_instance_id)
+
+    @kms_instance_id.setter
+    def kms_instance_id(self, value):
+        self.configuration.get().kms_instance_id = tobytes(value)
+
+    @property
+    def kms_instance_url(self):
+        """URL of the KMS instance."""
+        return frombytes(self.configuration.get().kms_instance_url)
+
+    @kms_instance_url.setter
+    def kms_instance_url(self, value):
+        self.configuration.get().kms_instance_url = tobytes(value)
+
+    @property
+    def key_access_token(self):
+        """Authorization token that will be passed to KMS."""
+        return frombytes(self.configuration.get()
+                         .refreshable_key_access_token.get().value())
+
+    @key_access_token.setter
+    def key_access_token(self, value):
+        self.refresh_key_access_token(value)
+
+    @property
+    def custom_kms_conf(self):
+        """A dictionary with KMS-type-specific configuration"""
+        custom_kms_conf = {
+            frombytes(k): frombytes(v)
+            for k, v in self.configuration.get().custom_kms_conf
+        }
+        return custom_kms_conf
+
+    @custom_kms_conf.setter
+    def custom_kms_conf(self, dict value):
+        if value is not None:
+            for k, v in value.items():
+                if isinstance(k, str) and isinstance(v, str):
+                    self.configuration.get().custom_kms_conf[tobytes(k)] = \
+                        tobytes(v)
+                else:
+                    raise TypeError("Expected custom_kms_conf to be " +
+                                    "a dictionary of strings")
+
+    def refresh_key_access_token(self, value):
+        cdef:
+            shared_ptr[CKeyAccessToken] c_key_access_token = \
+                self.configuration.get().refreshable_key_access_token
+
+        c_key_access_token.get().Refresh(tobytes(value))
+
+    cdef inline shared_ptr[CKmsConnectionConfig] unwrap(self) nogil:
+        return self.configuration
+
+    @staticmethod
+    cdef wrap(const CKmsConnectionConfig& config):
+        result = KmsConnectionConfig()
+        result.configuration = make_shared[CKmsConnectionConfig](move(config))
+        return result
+
+
+# Callback definitions for CPyKmsClientVtable
+cdef void _cb_wrap_key(
+        handler, const c_string& key_bytes,
+        const c_string& master_key_identifier, c_string* out) except *:
+    mkid_str = frombytes(master_key_identifier)
+    wrapped_key = handler.wrap_key(key_bytes, mkid_str)
+    out[0] = tobytes(wrapped_key)
+
+
+cdef void _cb_unwrap_key(
+        handler, const c_string& wrapped_key,
+        const c_string& master_key_identifier, c_string* out) except *:
+    mkid_str = frombytes(master_key_identifier)
+    wk_str = frombytes(wrapped_key)
+    key = handler.unwrap_key(wk_str, mkid_str)
+    out[0] = tobytes(key)
+
+
+cdef class KmsClient(_Weakrefable):
+    """The abstract base class for KmsClient implementations."""
+    cdef:
+        shared_ptr[CKmsClient] client
+
+    def __init__(self):
+        self.init()
+
+    cdef init(self):
+        cdef:
+            CPyKmsClientVtable vtable = CPyKmsClientVtable()
+
+        vtable.wrap_key = _cb_wrap_key
+        vtable.unwrap_key = _cb_unwrap_key
+
+        self.client.reset(new CPyKmsClient(self, vtable))
+
+    def wrap_key(self, key_bytes, master_key_identifier):
+        """Wrap a key - encrypt it with the master key."""
+        raise NotImplementedError()
+
+    def unwrap_key(self, wrapped_key, master_key_identifier):
+        """Unwrap a key - decrypt it with the master key."""
+        raise NotImplementedError()
+
+    cdef inline shared_ptr[CKmsClient] unwrap(self) nogil:
+        return self.client
+
+
+# Callback definition for CPyKmsClientFactoryVtable
+cdef void _cb_create_kms_client(
+        handler,
+        const CKmsConnectionConfig& kms_connection_config,
+        shared_ptr[CKmsClient]* out) except *:
+    connection_config = KmsConnectionConfig.wrap(kms_connection_config)
+
+    result = handler(connection_config)
+    if not isinstance(result, KmsClient):
+        raise TypeError(
+            "callable must return KmsClient instances, but got {}".format(
+                type(result)))
+
+    out[0] = (<KmsClient> result).unwrap()
+
+
+cdef class CryptoFactory(_Weakrefable):
+    """ A factory that produces the low-level FileEncryptionProperties and
+    FileDecryptionProperties objects, from the high-level parameters."""
+    cdef:
+        unique_ptr[CPyCryptoFactory] factory
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, kms_client_factory):
+        """Create CryptoFactory.
+
+        Parameters
+        ----------
+        kms_client_factory : a callable that accepts KmsConnectionConfig
+            and returns a KmsClient
+        """
+        self.factory.reset(new CPyCryptoFactory())
+
+        if callable(kms_client_factory):
+            self.init(kms_client_factory)
+        else:
+            raise TypeError("Parameter kms_client_factory must be a callable")
+
+    cdef init(self, callable_client_factory):
+        cdef:
+            CPyKmsClientFactoryVtable vtable
+            shared_ptr[CPyKmsClientFactory] kms_client_factory
+
+        vtable.create_kms_client = _cb_create_kms_client
+        kms_client_factory.reset(
+            new CPyKmsClientFactory(callable_client_factory, vtable))
+        # A KmsClientFactory object must be registered
+        # via this method before calling any of
+        # file_encryption_properties()/file_decryption_properties() methods.
+        self.factory.get().RegisterKmsClientFactory(
+            static_pointer_cast[CKmsClientFactory, CPyKmsClientFactory](
+                kms_client_factory))
+
+    def file_encryption_properties(self,
+                                   KmsConnectionConfig kms_connection_config,
+                                   EncryptionConfiguration encryption_config):
+        """Create file encryption properties.
+
+        Parameters
+        ----------
+        kms_connection_config : KmsConnectionConfig
+            Configuration of connection to KMS
+
+        encryption_config : EncryptionConfiguration
+            Configuration of the encryption, such as which columns to encrypt
+
+        Returns
+        -------
+        file_encryption_properties : FileEncryptionProperties
+            File encryption properties.
+        """
+        cdef:
+            CResult[shared_ptr[CFileEncryptionProperties]] \
+                file_encryption_properties_result
+        with nogil:
+            file_encryption_properties_result = \
+                self.factory.get().SafeGetFileEncryptionProperties(
+                    deref(kms_connection_config.unwrap().get()),
+                    deref(encryption_config.unwrap().get()))
+        file_encryption_properties = GetResultValue(
+            file_encryption_properties_result)
+        return FileEncryptionProperties.wrap(file_encryption_properties)
+
+    def file_decryption_properties(
+            self,
+            KmsConnectionConfig kms_connection_config,
+            DecryptionConfiguration decryption_config=None):
+        """Create file decryption properties.
+
+        Parameters
+        ----------
+        kms_connection_config : KmsConnectionConfig
+            Configuration of connection to KMS
+
+        decryption_config : DecryptionConfiguration, default None
+            Configuration of the decryption, such as cache timeout.
+            Can be None.
+
+        Returns
+        -------
+        file_decryption_properties : FileDecryptionProperties
+            File decryption properties.
+        """
+        cdef:
+            CDecryptionConfiguration c_decryption_config
+            CResult[shared_ptr[CFileDecryptionProperties]] \
+                c_file_decryption_properties
+        if decryption_config is None:
+            c_decryption_config = CDecryptionConfiguration()
+        else:
+            c_decryption_config = deref(decryption_config.unwrap().get())
+        with nogil:
+            c_file_decryption_properties = \
+                self.factory.get().SafeGetFileDecryptionProperties(
+                    deref(kms_connection_config.unwrap().get()),
+                    c_decryption_config)
+        file_decryption_properties = GetResultValue(
+            c_file_decryption_properties)
+        return FileDecryptionProperties.wrap(file_decryption_properties)
+
+    def remove_cache_entries_for_token(self, access_token):
+        self.factory.get().RemoveCacheEntriesForToken(tobytes(access_token))
+
+    def remove_cache_entries_for_all_tokens(self):
+        self.factory.get().RemoveCacheEntriesForAllTokens()
--- a/.venv/Lib/site-packages/pyarrow/_plasma.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_plasma.pyx
@ -0,0 +1,875 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level = 3
+
+from libcpp cimport bool as c_bool, nullptr
+from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector as c_vector
+from libcpp.unordered_map cimport unordered_map
+from libc.stdint cimport int64_t, uint8_t, uintptr_t
+from cython.operator cimport dereference as deref, preincrement as inc
+from cpython.pycapsule cimport *
+
+from collections.abc import Sequence
+import random
+import socket
+import warnings
+
+import pyarrow
+from pyarrow.lib cimport (Buffer, NativeFile, _Weakrefable,
+                          check_status, pyarrow_wrap_buffer)
+from pyarrow.lib import ArrowException, frombytes
+from pyarrow.includes.libarrow cimport (CBuffer, CMutableBuffer,
+                                        CFixedSizeBufferWriter, CStatus)
+from pyarrow.includes.libplasma cimport *
+
+PLASMA_WAIT_TIMEOUT = 2 ** 30
+
+
+cdef extern from "plasma/common.h" nogil:
+    cdef cppclass CCudaIpcPlaceholder" plasma::internal::CudaIpcPlaceholder":
+        pass
+
+    cdef cppclass CUniqueID" plasma::UniqueID":
+
+        @staticmethod
+        CUniqueID from_binary(const c_string& binary)
+
+        @staticmethod
+        CUniqueID from_random()
+
+        c_bool operator==(const CUniqueID& rhs) const
+
+        c_string hex() const
+
+        c_string binary() const
+
+        @staticmethod
+        int64_t size()
+
+    cdef enum CObjectState" plasma::ObjectState":
+        PLASMA_CREATED" plasma::ObjectState::PLASMA_CREATED"
+        PLASMA_SEALED" plasma::ObjectState::PLASMA_SEALED"
+
+    cdef struct CObjectTableEntry" plasma::ObjectTableEntry":
+        int fd
+        int device_num
+        int64_t map_size
+        ptrdiff_t offset
+        uint8_t* pointer
+        int64_t data_size
+        int64_t metadata_size
+        int ref_count
+        int64_t create_time
+        int64_t construct_duration
+        CObjectState state
+        shared_ptr[CCudaIpcPlaceholder] ipc_handle
+
+    ctypedef unordered_map[CUniqueID, unique_ptr[CObjectTableEntry]] \
+        CObjectTable" plasma::ObjectTable"
+
+
+cdef extern from "plasma/common.h":
+    cdef int64_t kDigestSize" plasma::kDigestSize"
+
+cdef extern from "plasma/client.h" nogil:
+
+    cdef cppclass CPlasmaClient" plasma::PlasmaClient":
+
+        CPlasmaClient()
+
+        CStatus Connect(const c_string& store_socket_name,
+                        const c_string& manager_socket_name,
+                        int release_delay, int num_retries)
+
+        CStatus Create(const CUniqueID& object_id,
+                       int64_t data_size, const uint8_t* metadata, int64_t
+                       metadata_size, const shared_ptr[CBuffer]* data)
+
+        CStatus CreateAndSeal(const CUniqueID& object_id,
+                              const c_string& data, const c_string& metadata)
+
+        CStatus Get(const c_vector[CUniqueID] object_ids, int64_t timeout_ms,
+                    c_vector[CObjectBuffer]* object_buffers)
+
+        CStatus Seal(const CUniqueID& object_id)
+
+        CStatus Evict(int64_t num_bytes, int64_t& num_bytes_evicted)
+
+        CStatus Hash(const CUniqueID& object_id, uint8_t* digest)
+
+        CStatus Release(const CUniqueID& object_id)
+
+        CStatus Contains(const CUniqueID& object_id, c_bool* has_object)
+
+        CStatus List(CObjectTable* objects)
+
+        CStatus Subscribe(int* fd)
+
+        CStatus DecodeNotifications(const uint8_t* buffer,
+                                    c_vector[CUniqueID]* object_ids,
+                                    c_vector[int64_t]* data_sizes,
+                                    c_vector[int64_t]* metadata_sizes)
+
+        CStatus GetNotification(int fd, CUniqueID* object_id,
+                                int64_t* data_size, int64_t* metadata_size)
+
+        CStatus Disconnect()
+
+        CStatus Delete(const c_vector[CUniqueID] object_ids)
+
+        CStatus SetClientOptions(const c_string& client_name,
+                                 int64_t limit_output_memory)
+
+        c_string DebugString()
+
+        int64_t store_capacity()
+
+cdef extern from "plasma/client.h" nogil:
+
+    cdef struct CObjectBuffer" plasma::ObjectBuffer":
+        shared_ptr[CBuffer] data
+        shared_ptr[CBuffer] metadata
+
+
+def make_object_id(object_id):
+    return ObjectID(object_id)
+
+
+cdef class ObjectID(_Weakrefable):
+    """
+    An ObjectID represents a string of bytes used to identify Plasma objects.
+    """
+
+    cdef:
+        CUniqueID data
+
+    def __cinit__(self, object_id):
+        if (not isinstance(object_id, bytes) or
+                len(object_id) != CUniqueID.size()):
+            raise ValueError("Object ID must by 20 bytes,"
+                             " is " + str(object_id))
+        self.data = CUniqueID.from_binary(object_id)
+
+    def __eq__(self, other):
+        try:
+            return self.data == (<ObjectID?>other).data
+        except TypeError:
+            return False
+
+    def __hash__(self):
+        return hash(self.data.binary())
+
+    def __repr__(self):
+        return "ObjectID(" + self.data.hex().decode() + ")"
+
+    def __reduce__(self):
+        return (make_object_id, (self.data.binary(),))
+
+    def binary(self):
+        """
+        Return the binary representation of this ObjectID.
+
+        Returns
+        -------
+        bytes
+            Binary representation of the ObjectID.
+        """
+        return self.data.binary()
+
+    @staticmethod
+    def from_random():
+        """
+        Returns a randomly generated ObjectID.
+
+        Returns
+        -------
+        ObjectID
+            A randomly generated ObjectID.
+        """
+        random_id = bytes(bytearray(
+            random.getrandbits(8) for _ in range(CUniqueID.size())))
+        return ObjectID(random_id)
+
+
+cdef class ObjectNotAvailable(_Weakrefable):
+    """
+    Placeholder for an object that was not available within the given timeout.
+    """
+    pass
+
+
+cdef class PlasmaBuffer(Buffer):
+    """
+    This is the type returned by calls to get with a PlasmaClient.
+
+    We define our own class instead of directly returning a buffer object so
+    that we can add a custom destructor which notifies Plasma that the object
+    is no longer being used, so the memory in the Plasma store backing the
+    object can potentially be freed.
+
+    Attributes
+    ----------
+    object_id : ObjectID
+        The ID of the object in the buffer.
+    client : PlasmaClient
+        The PlasmaClient that we use to communicate with the store and manager.
+    """
+
+    cdef:
+        ObjectID object_id
+        PlasmaClient client
+
+    @staticmethod
+    cdef PlasmaBuffer create(ObjectID object_id, PlasmaClient client,
+                             const shared_ptr[CBuffer]& buffer):
+        cdef PlasmaBuffer self = PlasmaBuffer.__new__(PlasmaBuffer)
+        self.object_id = object_id
+        self.client = client
+        self.init(buffer)
+        return self
+
+    def __init__(self):
+        raise TypeError("Do not call PlasmaBuffer's constructor directly, use "
+                        "`PlasmaClient.create` instead.")
+
+    def __dealloc__(self):
+        """
+        Notify Plasma that the object is no longer needed.
+
+        If the plasma client has been shut down, then don't do anything.
+        """
+        self.client._release(self.object_id)
+
+
+class PlasmaObjectNotFound(ArrowException):
+    pass
+
+
+class PlasmaStoreFull(ArrowException):
+    pass
+
+
+class PlasmaObjectExists(ArrowException):
+    pass
+
+
+cdef int plasma_check_status(const CStatus& status) nogil except -1:
+    if status.ok():
+        return 0
+
+    with gil:
+        message = frombytes(status.message())
+        if IsPlasmaObjectExists(status):
+            raise PlasmaObjectExists(message)
+        elif IsPlasmaObjectNotFound(status):
+            raise PlasmaObjectNotFound(message)
+        elif IsPlasmaStoreFull(status):
+            raise PlasmaStoreFull(message)
+
+    return check_status(status)
+
+
+def get_socket_from_fd(fileno, family, type):
+    import socket
+    return socket.socket(fileno=fileno, family=family, type=type)
+
+
+cdef class PlasmaClient(_Weakrefable):
+    """
+    The PlasmaClient is used to interface with a plasma store and manager.
+
+    The PlasmaClient can ask the PlasmaStore to allocate a new buffer, seal a
+    buffer, and get a buffer. Buffers are referred to by object IDs, which are
+    strings.
+    """
+
+    cdef:
+        shared_ptr[CPlasmaClient] client
+        int notification_fd
+        c_string store_socket_name
+
+    def __cinit__(self):
+        self.client.reset(new CPlasmaClient())
+        self.notification_fd = -1
+        self.store_socket_name = b""
+
+    cdef _get_object_buffers(self, object_ids, int64_t timeout_ms,
+                             c_vector[CObjectBuffer]* result):
+        cdef:
+            c_vector[CUniqueID] ids
+            ObjectID object_id
+
+        for object_id in object_ids:
+            ids.push_back(object_id.data)
+        with nogil:
+            plasma_check_status(self.client.get().Get(ids, timeout_ms, result))
+
+    # XXX C++ API should instead expose some kind of CreateAuto()
+    cdef _make_mutable_plasma_buffer(self, ObjectID object_id, uint8_t* data,
+                                     int64_t size):
+        cdef shared_ptr[CBuffer] buffer
+        buffer.reset(new CMutableBuffer(data, size))
+        return PlasmaBuffer.create(object_id, self, buffer)
+
+    @property
+    def store_socket_name(self):
+        return self.store_socket_name.decode()
+
+    def create(self, ObjectID object_id, int64_t data_size,
+               c_string metadata=b""):
+        """
+        Create a new buffer in the PlasmaStore for a particular object ID.
+
+        The returned buffer is mutable until ``seal()`` is called.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            The object ID used to identify an object.
+        data_size : int
+            The size in bytes of the created buffer.
+        metadata : bytes
+            An optional string of bytes encoding whatever metadata the user
+            wishes to encode.
+
+        Returns
+        -------
+        buffer : Buffer
+            A mutable buffer where to write the object data.
+
+        Raises
+        ------
+        PlasmaObjectExists
+            This exception is raised if the object could not be created because
+            there already is an object with the same ID in the plasma store.
+
+        PlasmaStoreFull
+            This exception is raised if the object could
+            not be created because the plasma store is unable to evict
+            enough objects to create room for it.
+        """
+        cdef shared_ptr[CBuffer] data
+        with nogil:
+            plasma_check_status(
+                self.client.get().Create(object_id.data, data_size,
+                                         <uint8_t*>(metadata.data()),
+                                         metadata.size(), &data))
+        return self._make_mutable_plasma_buffer(object_id,
+                                                data.get().mutable_data(),
+                                                data_size)
+
+    def create_and_seal(self, ObjectID object_id, c_string data,
+                        c_string metadata=b""):
+        """
+        Store a new object in the PlasmaStore for a particular object ID.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            The object ID used to identify an object.
+        data : bytes
+            The object to store.
+        metadata : bytes
+            An optional string of bytes encoding whatever metadata the user
+            wishes to encode.
+
+        Raises
+        ------
+        PlasmaObjectExists
+            This exception is raised if the object could not be created because
+            there already is an object with the same ID in the plasma store.
+
+        PlasmaStoreFull: This exception is raised if the object could
+                not be created because the plasma store is unable to evict
+                enough objects to create room for it.
+        """
+        with nogil:
+            plasma_check_status(
+                self.client.get().CreateAndSeal(object_id.data, data,
+                                                metadata))
+
+    def get_buffers(self, object_ids, timeout_ms=-1, with_meta=False):
+        """
+        Returns data buffer from the PlasmaStore based on object ID.
+
+        If the object has not been sealed yet, this call will block. The
+        retrieved buffer is immutable.
+
+        Parameters
+        ----------
+        object_ids : list
+            A list of ObjectIDs used to identify some objects.
+        timeout_ms : int
+            The number of milliseconds that the get call should block before
+            timing out and returning. Pass -1 if the call should block and 0
+            if the call should return immediately.
+        with_meta : bool
+
+        Returns
+        -------
+        list
+            If with_meta=False, this is a list of PlasmaBuffers for the data
+            associated with the object_ids and None if the object was not
+            available. If with_meta=True, this is a list of tuples of
+            PlasmaBuffer and metadata bytes.
+        """
+        cdef c_vector[CObjectBuffer] object_buffers
+        self._get_object_buffers(object_ids, timeout_ms, &object_buffers)
+        result = []
+        for i in range(object_buffers.size()):
+            if object_buffers[i].data.get() != nullptr:
+                data = pyarrow_wrap_buffer(object_buffers[i].data)
+            else:
+                data = None
+            if not with_meta:
+                result.append(data)
+            else:
+                if object_buffers[i].metadata.get() != nullptr:
+                    size = object_buffers[i].metadata.get().size()
+                    metadata = object_buffers[i].metadata.get().data()[:size]
+                else:
+                    metadata = None
+                result.append((metadata, data))
+        return result
+
+    def get_metadata(self, object_ids, timeout_ms=-1):
+        """
+        Returns metadata buffer from the PlasmaStore based on object ID.
+
+        If the object has not been sealed yet, this call will block. The
+        retrieved buffer is immutable.
+
+        Parameters
+        ----------
+        object_ids : list
+            A list of ObjectIDs used to identify some objects.
+        timeout_ms : int
+            The number of milliseconds that the get call should block before
+            timing out and returning. Pass -1 if the call should block and 0
+            if the call should return immediately.
+
+        Returns
+        -------
+        list
+            List of PlasmaBuffers for the metadata associated with the
+            object_ids and None if the object was not available.
+        """
+        cdef c_vector[CObjectBuffer] object_buffers
+        self._get_object_buffers(object_ids, timeout_ms, &object_buffers)
+        result = []
+        for i in range(object_buffers.size()):
+            if object_buffers[i].metadata.get() != nullptr:
+                result.append(pyarrow_wrap_buffer(object_buffers[i].metadata))
+            else:
+                result.append(None)
+        return result
+
+    def put_raw_buffer(self, object value, ObjectID object_id=None,
+                       c_string metadata=b"", int memcopy_threads=6):
+        """
+        Store Python buffer into the object store.
+
+        Parameters
+        ----------
+        value : Python object that implements the buffer protocol
+            A Python buffer object to store.
+        object_id : ObjectID, default None
+            If this is provided, the specified object ID will be used to refer
+            to the object.
+        metadata : bytes
+            An optional string of bytes encoding whatever metadata the user
+            wishes to encode.
+        memcopy_threads : int, default 6
+            The number of threads to use to write the serialized object into
+            the object store for large objects.
+
+        Returns
+        -------
+        ObjectID
+            The object ID associated to the Python buffer object.
+        """
+        cdef ObjectID target_id = (object_id if object_id
+                                   else ObjectID.from_random())
+        cdef Buffer arrow_buffer = pyarrow.py_buffer(value)
+        write_buffer = self.create(target_id, len(value), metadata)
+        stream = pyarrow.FixedSizeBufferWriter(write_buffer)
+        stream.set_memcopy_threads(memcopy_threads)
+        stream.write(arrow_buffer)
+        self.seal(target_id)
+        return target_id
+
+    def put(self, object value, ObjectID object_id=None, int memcopy_threads=6,
+            serialization_context=None):
+        """
+        Store a Python value into the object store.
+
+        Parameters
+        ----------
+        value : object
+            A Python object to store.
+        object_id : ObjectID, default None
+            If this is provided, the specified object ID will be used to refer
+            to the object.
+        memcopy_threads : int, default 6
+            The number of threads to use to write the serialized object into
+            the object store for large objects.
+        serialization_context : pyarrow.SerializationContext, default None
+            Custom serialization and deserialization context.
+
+        Returns
+        -------
+        ObjectID
+            The object ID associated to the Python object.
+        """
+        cdef ObjectID target_id = (object_id if object_id
+                                   else ObjectID.from_random())
+        if serialization_context is not None:
+            warnings.warn(
+                "'serialization_context' is deprecated and will be removed "
+                "in a future version.",
+                FutureWarning, stacklevel=2
+            )
+        serialized = pyarrow.lib._serialize(value, serialization_context)
+        buffer = self.create(target_id, serialized.total_bytes)
+        stream = pyarrow.FixedSizeBufferWriter(buffer)
+        stream.set_memcopy_threads(memcopy_threads)
+        serialized.write_to(stream)
+        self.seal(target_id)
+        return target_id
+
+    def get(self, object_ids, int timeout_ms=-1, serialization_context=None):
+        """
+        Get one or more Python values from the object store.
+
+        Parameters
+        ----------
+        object_ids : list or ObjectID
+            Object ID or list of object IDs associated to the values we get
+            from the store.
+        timeout_ms : int, default -1
+            The number of milliseconds that the get call should block before
+            timing out and returning. Pass -1 if the call should block and 0
+            if the call should return immediately.
+        serialization_context : pyarrow.SerializationContext, default None
+            Custom serialization and deserialization context.
+
+        Returns
+        -------
+        list or object
+            Python value or list of Python values for the data associated with
+            the object_ids and ObjectNotAvailable if the object was not
+            available.
+        """
+        if serialization_context is not None:
+            warnings.warn(
+                "'serialization_context' is deprecated and will be removed "
+                "in a future version.",
+                FutureWarning, stacklevel=2
+            )
+        if isinstance(object_ids, Sequence):
+            results = []
+            buffers = self.get_buffers(object_ids, timeout_ms)
+            for i in range(len(object_ids)):
+                # buffers[i] is None if this object was not available within
+                # the timeout
+                if buffers[i]:
+                    val = pyarrow.lib._deserialize(buffers[i],
+                                                   serialization_context)
+                    results.append(val)
+                else:
+                    results.append(ObjectNotAvailable)
+            return results
+        else:
+            return self.get([object_ids], timeout_ms, serialization_context)[0]
+
+    def seal(self, ObjectID object_id):
+        """
+        Seal the buffer in the PlasmaStore for a particular object ID.
+
+        Once a buffer has been sealed, the buffer is immutable and can only be
+        accessed through get.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            A string used to identify an object.
+        """
+        with nogil:
+            plasma_check_status(self.client.get().Seal(object_id.data))
+
+    def _release(self, ObjectID object_id):
+        """
+        Notify Plasma that the object is no longer needed.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            A string used to identify an object.
+        """
+        with nogil:
+            plasma_check_status(self.client.get().Release(object_id.data))
+
+    def contains(self, ObjectID object_id):
+        """
+        Check if the object is present and sealed in the PlasmaStore.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            A string used to identify an object.
+        """
+        cdef c_bool is_contained
+        with nogil:
+            plasma_check_status(self.client.get().Contains(object_id.data,
+                                                           &is_contained))
+        return is_contained
+
+    def hash(self, ObjectID object_id):
+        """
+        Compute the checksum of an object in the object store.
+
+        Parameters
+        ----------
+        object_id : ObjectID
+            A string used to identify an object.
+
+        Returns
+        -------
+        bytes
+            A digest string object's hash. If the object isn't in the object
+            store, the string will have length zero.
+        """
+        cdef c_vector[uint8_t] digest = c_vector[uint8_t](kDigestSize)
+        with nogil:
+            plasma_check_status(self.client.get().Hash(object_id.data,
+                                                       digest.data()))
+        return bytes(digest[:])
+
+    def evict(self, int64_t num_bytes):
+        """
+        Evict some objects until to recover some bytes.
+
+        Recover at least num_bytes bytes if possible.
+
+        Parameters
+        ----------
+        num_bytes : int
+            The number of bytes to attempt to recover.
+        """
+        cdef int64_t num_bytes_evicted = -1
+        with nogil:
+            plasma_check_status(
+                self.client.get().Evict(num_bytes, num_bytes_evicted))
+        return num_bytes_evicted
+
+    def subscribe(self):
+        """Subscribe to notifications about sealed objects."""
+        with nogil:
+            plasma_check_status(
+                self.client.get().Subscribe(&self.notification_fd))
+
+    def get_notification_socket(self):
+        """
+        Get the notification socket.
+        """
+        return get_socket_from_fd(self.notification_fd,
+                                  family=socket.AF_UNIX,
+                                  type=socket.SOCK_STREAM)
+
+    def decode_notifications(self, const uint8_t* buf):
+        """
+        Get the notification from the buffer.
+
+        Returns
+        -------
+        [ObjectID]
+            The list of object IDs in the notification message.
+        c_vector[int64_t]
+            The data sizes of the objects in the notification message.
+        c_vector[int64_t]
+            The metadata sizes of the objects in the notification message.
+        """
+        cdef c_vector[CUniqueID] ids
+        cdef c_vector[int64_t] data_sizes
+        cdef c_vector[int64_t] metadata_sizes
+        with nogil:
+            status = self.client.get().DecodeNotifications(buf,
+                                                           &ids,
+                                                           &data_sizes,
+                                                           &metadata_sizes)
+            plasma_check_status(status)
+        object_ids = []
+        for object_id in ids:
+            object_ids.append(ObjectID(object_id.binary()))
+        return object_ids, data_sizes, metadata_sizes
+
+    def get_next_notification(self):
+        """
+        Get the next notification from the notification socket.
+
+        Returns
+        -------
+        ObjectID
+            The object ID of the object that was stored.
+        int
+            The data size of the object that was stored.
+        int
+            The metadata size of the object that was stored.
+        """
+        cdef ObjectID object_id = ObjectID(CUniqueID.size() * b"\0")
+        cdef int64_t data_size
+        cdef int64_t metadata_size
+        with nogil:
+            status = self.client.get().GetNotification(self.notification_fd,
+                                                       &object_id.data,
+                                                       &data_size,
+                                                       &metadata_size)
+            plasma_check_status(status)
+        return object_id, data_size, metadata_size
+
+    def to_capsule(self):
+        return PyCapsule_New(<void *>self.client.get(), "plasma", NULL)
+
+    def disconnect(self):
+        """
+        Disconnect this client from the Plasma store.
+        """
+        with nogil:
+            plasma_check_status(self.client.get().Disconnect())
+
+    def delete(self, object_ids):
+        """
+        Delete the objects with the given IDs from other object store.
+
+        Parameters
+        ----------
+        object_ids : list
+            A list of strings used to identify the objects.
+        """
+        cdef c_vector[CUniqueID] ids
+        cdef ObjectID object_id
+        for object_id in object_ids:
+            ids.push_back(object_id.data)
+        with nogil:
+            plasma_check_status(self.client.get().Delete(ids))
+
+    def set_client_options(self, client_name, int64_t limit_output_memory):
+        cdef c_string name
+        name = client_name.encode()
+        with nogil:
+            plasma_check_status(
+                self.client.get().SetClientOptions(name, limit_output_memory))
+
+    def debug_string(self):
+        cdef c_string result
+        with nogil:
+            result = self.client.get().DebugString()
+        return result.decode()
+
+    def list(self):
+        """
+        Experimental: List the objects in the store.
+
+        Returns
+        -------
+        dict
+            Dictionary from ObjectIDs to an "info" dictionary describing the
+            object. The "info" dictionary has the following entries:
+
+            data_size
+              size of the object in bytes
+
+            metadata_size
+              size of the object metadata in bytes
+
+            ref_count
+              Number of clients referencing the object buffer
+
+            create_time
+              Unix timestamp of the creation of the object
+
+            construct_duration
+              Time the creation of the object took in seconds
+
+            state
+              "created" if the object is still being created and
+              "sealed" if it is already sealed
+        """
+        cdef CObjectTable objects
+        with nogil:
+            plasma_check_status(self.client.get().List(&objects))
+        result = dict()
+        cdef ObjectID object_id
+        cdef CObjectTableEntry entry
+        it = objects.begin()
+        while it != objects.end():
+            object_id = ObjectID(deref(it).first.binary())
+            entry = deref(deref(it).second)
+            if entry.state == CObjectState.PLASMA_CREATED:
+                state = "created"
+            else:
+                state = "sealed"
+            result[object_id] = {
+                "data_size": entry.data_size,
+                "metadata_size": entry.metadata_size,
+                "ref_count": entry.ref_count,
+                "create_time": entry.create_time,
+                "construct_duration": entry.construct_duration,
+                "state": state
+            }
+            inc(it)
+        return result
+
+    def store_capacity(self):
+        """
+        Get the memory capacity of the store.
+
+        Returns
+        -------
+
+        int
+            The memory capacity of the store in bytes.
+        """
+        return self.client.get().store_capacity()
+
+
+def connect(store_socket_name, int num_retries=-1):
+    """
+    Return a new PlasmaClient that is connected a plasma store and
+    optionally a manager.
+
+    Parameters
+    ----------
+    store_socket_name : str
+        Name of the socket the plasma store is listening at.
+    num_retries : int, default -1
+        Number of times to try to connect to plasma store. Default value of -1
+        uses the default (50)
+    """
+    cdef PlasmaClient result = PlasmaClient()
+    cdef int deprecated_release_delay = 0
+    result.store_socket_name = store_socket_name.encode()
+    with nogil:
+        plasma_check_status(
+            result.client.get().Connect(result.store_socket_name, b"",
+                                        deprecated_release_delay, num_retries))
+    return result
--- a/.venv/Lib/site-packages/pyarrow/_s3fs.cp310-win_amd64.pyd
+++ b/.venv/Lib/site-packages/pyarrow/_s3fs.cp310-win_amd64.pyd
--- a/.venv/Lib/site-packages/pyarrow/_s3fs.pyx
+++ b/.venv/Lib/site-packages/pyarrow/_s3fs.pyx
@ -0,0 +1,314 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow.lib cimport (check_status, pyarrow_wrap_metadata,
+                          pyarrow_unwrap_metadata)
+from pyarrow.lib import frombytes, tobytes, KeyValueMetadata
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow._fs cimport FileSystem
+
+
+cpdef enum S3LogLevel:
+    Off = <int8_t> CS3LogLevel_Off
+    Fatal = <int8_t> CS3LogLevel_Fatal
+    Error = <int8_t> CS3LogLevel_Error
+    Warn = <int8_t> CS3LogLevel_Warn
+    Info = <int8_t> CS3LogLevel_Info
+    Debug = <int8_t> CS3LogLevel_Debug
+    Trace = <int8_t> CS3LogLevel_Trace
+
+
+def initialize_s3(S3LogLevel log_level=S3LogLevel.Fatal):
+    """
+    Initialize S3 support
+
+    Parameters
+    ----------
+    log_level : S3LogLevel
+        level of logging
+    """
+    cdef CS3GlobalOptions options
+    options.log_level = <CS3LogLevel> log_level
+    check_status(CInitializeS3(options))
+
+
+def finalize_s3():
+    check_status(CFinalizeS3())
+
+
+def resolve_s3_region(bucket):
+    """
+    Resolve the S3 region of a bucket.
+
+    Parameters
+    ----------
+    bucket : str
+        A S3 bucket name
+
+    Returns
+    -------
+    region : str
+        A S3 region name
+
+    Examples
+    --------
+    >>> resolve_s3_region('ursa-labs-taxi-data')
+    'us-east-2'
+    """
+    cdef:
+        c_string c_bucket
+        c_string c_region
+
+    c_bucket = tobytes(bucket)
+    with nogil:
+        c_region = GetResultValue(ResolveS3BucketRegion(c_bucket))
+
+    return frombytes(c_region)
+
+
+cdef class S3FileSystem(FileSystem):
+    """
+    S3-backed FileSystem implementation
+
+    If neither access_key nor secret_key are provided, and role_arn is also not
+    provided, then attempts to initialize from AWS environment variables,
+    otherwise both access_key and secret_key must be provided.
+
+    If role_arn is provided instead of access_key and secret_key, temporary
+    credentials will be fetched by issuing a request to STS to assume the
+    specified role.
+
+    Note: S3 buckets are special and the operations available on them may be
+    limited or more expensive than desired.
+
+    Parameters
+    ----------
+    access_key : str, default None
+        AWS Access Key ID. Pass None to use the standard AWS environment
+        variables and/or configuration file.
+    secret_key : str, default None
+        AWS Secret Access key. Pass None to use the standard AWS environment
+        variables and/or configuration file.
+    session_token : str, default None
+        AWS Session Token.  An optional session token, required if access_key
+        and secret_key are temporary credentials from STS.
+    anonymous : boolean, default False
+        Whether to connect anonymously if access_key and secret_key are None.
+        If true, will not attempt to look up credentials using standard AWS
+        configuration methods.
+    role_arn : str, default None
+        AWS Role ARN.  If provided instead of access_key and secret_key,
+        temporary credentials will be fetched by assuming this role.
+    session_name : str, default None
+        An optional identifier for the assumed role session.
+    external_id : str, default None
+        An optional unique identifier that might be required when you assume
+        a role in another account.
+    load_frequency : int, default 900
+        The frequency (in seconds) with which temporary credentials from an
+        assumed role session will be refreshed.
+    region : str, default 'us-east-1'
+        AWS region to connect to.
+    scheme : str, default 'https'
+        S3 connection transport scheme.
+    endpoint_override : str, default None
+        Override region with a connect string such as "localhost:9000"
+    background_writes : boolean, default True
+        Whether file writes will be issued in the background, without
+        blocking.
+    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
+        Default metadata for open_output_stream.  This will be ignored if
+        non-empty metadata is passed to open_output_stream.
+    proxy_options : dict or str, default None
+        If a proxy is used, provide the options here. Supported options are:
+        'scheme' (str: 'http' or 'https'; required), 'host' (str; required),
+        'port' (int; required), 'username' (str; optional),
+        'password' (str; optional).
+        A proxy URI (str) can also be provided, in which case these options
+        will be derived from the provided URI.
+        The following are equivalent::
+
+            S3FileSystem(proxy_options='http://username:password@localhost:8020')
+            S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost',
+                                        'port': 8020, 'username': 'username',
+                                        'password': 'password'})
+    """
+
+    cdef:
+        CS3FileSystem* s3fs
+
+    def __init__(self, *, access_key=None, secret_key=None, session_token=None,
+                 bint anonymous=False, region=None, scheme=None,
+                 endpoint_override=None, bint background_writes=True,
+                 default_metadata=None, role_arn=None, session_name=None,
+                 external_id=None, load_frequency=900, proxy_options=None):
+        cdef:
+            CS3Options options
+            shared_ptr[CS3FileSystem] wrapped
+
+        if access_key is not None and secret_key is None:
+            raise ValueError(
+                'In order to initialize with explicit credentials both '
+                'access_key and secret_key must be provided, '
+                '`secret_key` is not set.'
+            )
+        elif access_key is None and secret_key is not None:
+            raise ValueError(
+                'In order to initialize with explicit credentials both '
+                'access_key and secret_key must be provided, '
+                '`access_key` is not set.'
+            )
+
+        elif session_token is not None and (access_key is None or
+                                            secret_key is None):
+            raise ValueError(
+                'In order to initialize a session with temporary credentials, '
+                'both secret_key and access_key must be provided in addition '
+                'to session_token.'
+            )
+
+        elif (access_key is not None or secret_key is not None):
+            if anonymous:
+                raise ValueError(
+                    'Cannot pass anonymous=True together with access_key '
+                    'and secret_key.')
+
+            if role_arn:
+                raise ValueError(
+                    'Cannot provide role_arn with access_key and secret_key')
+
+            if session_token is None:
+                session_token = ""
+
+            options = CS3Options.FromAccessKey(
+                tobytes(access_key),
+                tobytes(secret_key),
+                tobytes(session_token)
+            )
+        elif anonymous:
+            if role_arn:
+                raise ValueError(
+                    'Cannot provide role_arn with anonymous=True')
+
+            options = CS3Options.Anonymous()
+        elif role_arn:
+
+            options = CS3Options.FromAssumeRole(
+                tobytes(role_arn),
+                tobytes(session_name),
+                tobytes(external_id),
+                load_frequency
+            )
+        else:
+            options = CS3Options.Defaults()
+
+        if region is not None:
+            options.region = tobytes(region)
+        if scheme is not None:
+            options.scheme = tobytes(scheme)
+        if endpoint_override is not None:
+            options.endpoint_override = tobytes(endpoint_override)
+        if background_writes is not None:
+            options.background_writes = background_writes
+        if default_metadata is not None:
+            if not isinstance(default_metadata, KeyValueMetadata):
+                default_metadata = KeyValueMetadata(default_metadata)
+            options.default_metadata = pyarrow_unwrap_metadata(
+                default_metadata)
+
+        if proxy_options is not None:
+            if isinstance(proxy_options, dict):
+                options.proxy_options.scheme = tobytes(proxy_options["scheme"])
+                options.proxy_options.host = tobytes(proxy_options["host"])
+                options.proxy_options.port = proxy_options["port"]
+                proxy_username = proxy_options.get("username", None)
+                if proxy_username:
+                    options.proxy_options.username = tobytes(proxy_username)
+                proxy_password = proxy_options.get("password", None)
+                if proxy_password:
+                    options.proxy_options.password = tobytes(proxy_password)
+            elif isinstance(proxy_options, str):
+                options.proxy_options = GetResultValue(
+                    CS3ProxyOptions.FromUriString(tobytes(proxy_options)))
+            else:
+                raise TypeError(
+                    "'proxy_options': expected 'dict' or 'str', "
+                    f"got {type(proxy_options)} instead.")
+
+        with nogil:
+            wrapped = GetResultValue(CS3FileSystem.Make(options))
+
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped):
+        FileSystem.init(self, wrapped)
+        self.s3fs = <CS3FileSystem*> wrapped.get()
+
+    @classmethod
+    def _reconstruct(cls, kwargs):
+        return cls(**kwargs)
+
+    def __reduce__(self):
+        cdef CS3Options opts = self.s3fs.options()
+
+        # if creds were explicitly provided, then use them
+        # else obtain them as they were last time.
+        if opts.credentials_kind == CS3CredentialsKind_Explicit:
+            access_key = frombytes(opts.GetAccessKey())
+            secret_key = frombytes(opts.GetSecretKey())
+            session_token = frombytes(opts.GetSessionToken())
+        else:
+            access_key = None
+            secret_key = None
+            session_token = None
+
+        return (
+            S3FileSystem._reconstruct, (dict(
+                access_key=access_key,
+                secret_key=secret_key,
+                session_token=session_token,
+                anonymous=(opts.credentials_kind ==
+                           CS3CredentialsKind_Anonymous),
+                region=frombytes(opts.region),
+                scheme=frombytes(opts.scheme),
+                endpoint_override=frombytes(opts.endpoint_override),
+                role_arn=frombytes(opts.role_arn),
+                session_name=frombytes(opts.session_name),
+                external_id=frombytes(opts.external_id),
+                load_frequency=opts.load_frequency,
+                background_writes=opts.background_writes,
+                default_metadata=pyarrow_wrap_metadata(opts.default_metadata),
+                proxy_options={'scheme': frombytes(opts.proxy_options.scheme),
+                               'host': frombytes(opts.proxy_options.host),
+                               'port': opts.proxy_options.port,
+                               'username': frombytes(
+                                   opts.proxy_options.username),
+                               'password': frombytes(
+                                   opts.proxy_options.password)}
+            ),)
+        )
+
+    @property
+    def region(self):
+        """
+        The AWS region this filesystem connects to.
+        """
+        return frombytes(self.s3fs.region())
--- a/.venv/Lib/site-packages/pyarrow/array.pxi
+++ b/.venv/Lib/site-packages/pyarrow/array.pxi
--- a/.venv/Lib/site-packages/pyarrow/arrow.dll
+++ b/.venv/Lib/site-packages/pyarrow/arrow.dll
--- a/.venv/Lib/site-packages/pyarrow/arrow.lib
+++ b/.venv/Lib/site-packages/pyarrow/arrow.lib
--- a/.venv/Lib/site-packages/pyarrow/arrow_dataset.dll
+++ b/.venv/Lib/site-packages/pyarrow/arrow_dataset.dll
--- a/.venv/Lib/site-packages/pyarrow/arrow_dataset.lib
+++ b/.venv/Lib/site-packages/pyarrow/arrow_dataset.lib
--- a/.venv/Lib/site-packages/pyarrow/arrow_flight.dll
+++ b/.venv/Lib/site-packages/pyarrow/arrow_flight.dll
--- a/.venv/Lib/site-packages/pyarrow/arrow_flight.lib
+++ b/.venv/Lib/site-packages/pyarrow/arrow_flight.lib
--- a/.venv/Lib/site-packages/pyarrow/arrow_python.dll
+++ b/.venv/Lib/site-packages/pyarrow/arrow_python.dll
--- a/.venv/Lib/site-packages/pyarrow/arrow_python.lib
+++ b/.venv/Lib/site-packages/pyarrow/arrow_python.lib
--- a/.venv/Lib/site-packages/pyarrow/arrow_python_flight.dll
+++ b/.venv/Lib/site-packages/pyarrow/arrow_python_flight.dll
--- a/.venv/Lib/site-packages/pyarrow/arrow_python_flight.lib
+++ b/.venv/Lib/site-packages/pyarrow/arrow_python_flight.lib
--- a/.venv/Lib/site-packages/pyarrow/benchmark.pxi
+++ b/.venv/Lib/site-packages/pyarrow/benchmark.pxi
@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def benchmark_PandasObjectIsNull(list obj):
+    Benchmark_PandasObjectIsNull(obj)
--- a/.venv/Lib/site-packages/pyarrow/benchmark.py
+++ b/.venv/Lib/site-packages/pyarrow/benchmark.py
@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+
+from pyarrow.lib import benchmark_PandasObjectIsNull
--- a/.venv/Lib/site-packages/pyarrow/builder.pxi
+++ b/.venv/Lib/site-packages/pyarrow/builder.pxi
@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+cdef class StringBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 strings.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string').
+    """
+    cdef:
+        unique_ptr[CStringBuilder] builder
+
+    def __cinit__(self, MemoryPool memory_pool=None):
+        cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        self.builder.reset(new CStringBuilder(pool))
+
+    def append(self, value):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+        if value is None or value is np.nan:
+            self.builder.get().AppendNull()
+        elif isinstance(value, (bytes, str)):
+            self.builder.get().Append(tobytes(value))
+        else:
+            raise TypeError('StringBuilder only accepts string objects')
+
+    def append_values(self, values):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+        for value in values:
+            self.append(value)
+
+    def finish(self):
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+        cdef shared_ptr[CArray] out
+        with nogil:
+            self.builder.get().Finish(&out)
+        return pyarrow_wrap_array(out)
+
+    @property
+    def null_count(self):
+        return self.builder.get().null_count()
+
+    def __len__(self):
+        return self.builder.get().length()
--- a/.venv/Lib/site-packages/pyarrow/cffi.py
+++ b/.venv/Lib/site-packages/pyarrow/cffi.py
@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import absolute_import
+
+import cffi
+
+c_source = """
+    struct ArrowSchema {
+      // Array type description
+      const char* format;
+      const char* name;
+      const char* metadata;
+      int64_t flags;
+      int64_t n_children;
+      struct ArrowSchema** children;
+      struct ArrowSchema* dictionary;
+
+      // Release callback
+      void (*release)(struct ArrowSchema*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+
+    struct ArrowArray {
+      // Array data description
+      int64_t length;
+      int64_t null_count;
+      int64_t offset;
+      int64_t n_buffers;
+      int64_t n_children;
+      const void** buffers;
+      struct ArrowArray** children;
+      struct ArrowArray* dictionary;
+
+      // Release callback
+      void (*release)(struct ArrowArray*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+
+    struct ArrowArrayStream {
+      int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+      int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+      const char* (*get_last_error)(struct ArrowArrayStream*);
+
+      // Release callback
+      void (*release)(struct ArrowArrayStream*);
+      // Opaque producer-specific data
+      void* private_data;
+    };
+    """
+
+# TODO use out-of-line mode for faster import and avoid C parsing
+ffi = cffi.FFI()
+ffi.cdef(c_source)
--- a/.venv/Lib/site-packages/pyarrow/compat.pxi
+++ b/.venv/Lib/site-packages/pyarrow/compat.pxi
@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def encode_file_path(path):
+    if isinstance(path, str):
+        # POSIX systems can handle utf-8. UTF8 is converted to utf16-le in
+        # libarrow
+        encoded_path = path.encode('utf-8')
+    else:
+        encoded_path = path
+
+    # Windows file system requires utf-16le for file names; Arrow C++ libraries
+    # will convert utf8 to utf16
+    return encoded_path
+
+
+# Starting with Python 3.7, dicts are guaranteed to be insertion-ordered.
+ordered_dict = dict
+
+
+try:
+    import pickle5 as builtin_pickle
+except ImportError:
+    import pickle as builtin_pickle
+
+
+try:
+    import cloudpickle as pickle
+except ImportError:
+    pickle = builtin_pickle
+
+
+def tobytes(o):
+    """
+    Encode a unicode or bytes string to bytes.
+
+    Parameters
+    ----------
+    o : str or bytes
+        Input string.
+    """
+    if isinstance(o, str):
+        return o.encode('utf8')
+    else:
+        return o
+
+
+def frombytes(o, *, safe=False):
+    """
+    Decode the given bytestring to unicode.
+
+    Parameters
+    ----------
+    o : bytes-like
+        Input object.
+    safe : bool, default False
+        If true, raise on encoding errors.
+    """
+    if safe:
+        return o.decode('utf8', errors='replace')
+    else:
+        return o.decode('utf8')
--- a/.venv/Lib/site-packages/pyarrow/compat.py
+++ b/.venv/Lib/site-packages/pyarrow/compat.py
@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+import pyarrow.util as util
+import warnings
+
+
+warnings.warn("pyarrow.compat has been deprecated and will be removed in a "
+              "future release", FutureWarning)
+
+
+guid = util._deprecate_api("compat.guid", "util.guid",
+                           util.guid, "1.0.0")
--- a/.venv/Lib/site-packages/pyarrow/compute.py
+++ b/.venv/Lib/site-packages/pyarrow/compute.py
@ -0,0 +1,655 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._compute import (  # noqa
+    Function,
+    FunctionOptions,
+    FunctionRegistry,
+    HashAggregateFunction,
+    HashAggregateKernel,
+    Kernel,
+    ScalarAggregateFunction,
+    ScalarAggregateKernel,
+    ScalarFunction,
+    ScalarKernel,
+    VectorFunction,
+    VectorKernel,
+    # Option classes
+    ArraySortOptions,
+    AssumeTimezoneOptions,
+    CastOptions,
+    CountOptions,
+    DayOfWeekOptions,
+    DictionaryEncodeOptions,
+    ElementWiseAggregateOptions,
+    ExtractRegexOptions,
+    FilterOptions,
+    IndexOptions,
+    JoinOptions,
+    MakeStructOptions,
+    MapLookupOptions,
+    MatchSubstringOptions,
+    ModeOptions,
+    NullOptions,
+    PadOptions,
+    PartitionNthOptions,
+    QuantileOptions,
+    RandomOptions,
+    ReplaceSliceOptions,
+    ReplaceSubstringOptions,
+    RoundOptions,
+    RoundTemporalOptions,
+    RoundToMultipleOptions,
+    ScalarAggregateOptions,
+    SelectKOptions,
+    SetLookupOptions,
+    SliceOptions,
+    SortOptions,
+    SplitOptions,
+    SplitPatternOptions,
+    StrftimeOptions,
+    StrptimeOptions,
+    StructFieldOptions,
+    TakeOptions,
+    TDigestOptions,
+    TrimOptions,
+    Utf8NormalizeOptions,
+    VarianceOptions,
+    WeekOptions,
+    # Functions
+    call_function,
+    function_registry,
+    get_function,
+    list_functions,
+    _group_by,
+    # Expressions
+    Expression,
+)
+
+from collections import namedtuple
+import inspect
+from textwrap import dedent
+import warnings
+
+import pyarrow as pa
+from pyarrow import _compute_docstrings
+from pyarrow.vendored import docscrape
+
+
+def _get_arg_names(func):
+    return func._doc.arg_names
+
+
+_OptionsClassDoc = namedtuple('_OptionsClassDoc', ('params',))
+
+
+def _scrape_options_class_doc(options_class):
+    if not options_class.__doc__:
+        return None
+    doc = docscrape.NumpyDocString(options_class.__doc__)
+    return _OptionsClassDoc(doc['Parameters'])
+
+
+def _decorate_compute_function(wrapper, exposed_name, func, options_class):
+    # Decorate the given compute function wrapper with useful metadata
+    # and documentation.
+    cpp_doc = func._doc
+
+    wrapper.__arrow_compute_function__ = dict(
+        name=func.name,
+        arity=func.arity,
+        options_class=cpp_doc.options_class,
+        options_required=cpp_doc.options_required)
+    wrapper.__name__ = exposed_name
+    wrapper.__qualname__ = exposed_name
+
+    doc_pieces = []
+
+    # 1. One-line summary
+    summary = cpp_doc.summary
+    if not summary:
+        arg_str = "arguments" if func.arity > 1 else "argument"
+        summary = ("Call compute function {!r} with the given {}"
+                   .format(func.name, arg_str))
+
+    doc_pieces.append(f"{summary}.\n\n")
+
+    # 2. Multi-line description
+    description = cpp_doc.description
+    if description:
+        doc_pieces.append(f"{description}\n\n")
+
+    doc_addition = _compute_docstrings.function_doc_additions.get(func.name)
+
+    # 3. Parameter description
+    doc_pieces.append(dedent("""\
+        Parameters
+        ----------
+        """))
+
+    # 3a. Compute function parameters
+    arg_names = _get_arg_names(func)
+    for arg_name in arg_names:
+        if func.kind in ('vector', 'scalar_aggregate'):
+            arg_type = 'Array-like'
+        else:
+            arg_type = 'Array-like or scalar-like'
+        doc_pieces.append(f"{arg_name} : {arg_type}\n")
+        doc_pieces.append("    Argument to compute function.\n")
+
+    # 3b. Compute function option values
+    if options_class is not None:
+        options_class_doc = _scrape_options_class_doc(options_class)
+        if options_class_doc:
+            for p in options_class_doc.params:
+                doc_pieces.append(f"{p.name} : {p.type}\n")
+                for s in p.desc:
+                    doc_pieces.append(f"    {s}\n")
+        else:
+            warnings.warn(f"Options class {options_class.__name__} "
+                          f"does not have a docstring", RuntimeWarning)
+            options_sig = inspect.signature(options_class)
+            for p in options_sig.parameters.values():
+                doc_pieces.append(dedent("""\
+                {0} : optional
+                    Parameter for {1} constructor. Either `options`
+                    or `{0}` can be passed, but not both at the same time.
+                """.format(p.name, options_class.__name__)))
+        doc_pieces.append(dedent(f"""\
+            options : pyarrow.compute.{options_class.__name__}, optional
+                Alternative way of passing options.
+            """))
+
+    doc_pieces.append(dedent("""\
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        """))
+
+    # 4. Custom addition (e.g. examples)
+    if doc_addition is not None:
+        doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n")))
+
+    wrapper.__doc__ = "".join(doc_pieces)
+    return wrapper
+
+
+def _get_options_class(func):
+    class_name = func._doc.options_class
+    if not class_name:
+        return None
+    try:
+        return globals()[class_name]
+    except KeyError:
+        warnings.warn("Python binding for {} not exposed"
+                      .format(class_name), RuntimeWarning)
+        return None
+
+
+def _handle_options(name, options_class, options, args, kwargs):
+    if args or kwargs:
+        if options is not None:
+            raise TypeError(
+                "Function {!r} called with both an 'options' argument "
+                "and additional arguments"
+                .format(name))
+        return options_class(*args, **kwargs)
+
+    if options is not None:
+        if isinstance(options, dict):
+            return options_class(**options)
+        elif isinstance(options, options_class):
+            return options
+        raise TypeError(
+            "Function {!r} expected a {} parameter, got {}"
+            .format(name, options_class, type(options)))
+
+    return None
+
+
+def _make_generic_wrapper(func_name, func, options_class, arity):
+    if options_class is None:
+        def wrapper(*args, memory_pool=None):
+            if arity is not Ellipsis and len(args) != arity:
+                raise TypeError(
+                    f"{func_name} takes {arity} positional argument(s), "
+                    f"but {len(args)} were given"
+                )
+            if args and isinstance(args[0], Expression):
+                return Expression._call(func_name, list(args))
+            return func.call(args, None, memory_pool)
+    else:
+        def wrapper(*args, memory_pool=None, options=None, **kwargs):
+            if arity is not Ellipsis:
+                if len(args) < arity:
+                    raise TypeError(
+                        f"{func_name} takes {arity} positional argument(s), "
+                        f"but {len(args)} were given"
+                    )
+                option_args = args[arity:]
+                args = args[:arity]
+            else:
+                option_args = ()
+            options = _handle_options(func_name, options_class, options,
+                                      option_args, kwargs)
+            if args and isinstance(args[0], Expression):
+                return Expression._call(func_name, list(args), options)
+            return func.call(args, options, memory_pool)
+    return wrapper
+
+
+def _make_signature(arg_names, var_arg_names, options_class):
+    from inspect import Parameter
+    params = []
+    for name in arg_names:
+        params.append(Parameter(name, Parameter.POSITIONAL_ONLY))
+    for name in var_arg_names:
+        params.append(Parameter(name, Parameter.VAR_POSITIONAL))
+    if options_class is not None:
+        options_sig = inspect.signature(options_class)
+        for p in options_sig.parameters.values():
+            assert p.kind in (Parameter.POSITIONAL_OR_KEYWORD,
+                              Parameter.KEYWORD_ONLY)
+            if var_arg_names:
+                # Cannot have a positional argument after a *args
+                p = p.replace(kind=Parameter.KEYWORD_ONLY)
+            params.append(p)
+        params.append(Parameter("options", Parameter.KEYWORD_ONLY,
+                                default=None))
+    params.append(Parameter("memory_pool", Parameter.KEYWORD_ONLY,
+                            default=None))
+    return inspect.Signature(params)
+
+
+def _wrap_function(name, func):
+    options_class = _get_options_class(func)
+    arg_names = _get_arg_names(func)
+    has_vararg = arg_names and arg_names[-1].startswith('*')
+    if has_vararg:
+        var_arg_names = [arg_names.pop().lstrip('*')]
+    else:
+        var_arg_names = []
+
+    wrapper = _make_generic_wrapper(
+        name, func, options_class, arity=func.arity)
+    wrapper.__signature__ = _make_signature(arg_names, var_arg_names,
+                                            options_class)
+    return _decorate_compute_function(wrapper, name, func, options_class)
+
+
+def _make_global_functions():
+    """
+    Make global functions wrapping each compute function.
+
+    Note that some of the automatically-generated wrappers may be overridden
+    by custom versions below.
+    """
+    g = globals()
+    reg = function_registry()
+
+    # Avoid clashes with Python keywords
+    rewrites = {'and': 'and_',
+                'or': 'or_'}
+
+    for cpp_name in reg.list_functions():
+        name = rewrites.get(cpp_name, cpp_name)
+        func = reg.get_function(cpp_name)
+        if func.kind == "hash_aggregate":
+            # Hash aggregate functions are not callable,
+            # so let's not expose them at module level.
+            continue
+        assert name not in g, name
+        g[cpp_name] = g[name] = _wrap_function(name, func)
+
+
+_make_global_functions()
+
+
+def cast(arr, target_type, safe=True):
+    """
+    Cast array values to another data type. Can also be invoked as an array
+    instance method.
+
+    Parameters
+    ----------
+    arr : Array-like
+    target_type : DataType or str
+        Type to cast to
+    safe : bool, default True
+        Check for overflows or other unsafe conversions
+
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> import pyarrow as pa
+    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
+    >>> arr.type
+    TimestampType(timestamp[us])
+
+    You can use ``pyarrow.DataType`` objects to specify the target type:
+
+    >>> cast(arr, pa.timestamp('ms'))
+    <pyarrow.lib.TimestampArray object at 0x7fe93c0f6910>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+
+    >>> cast(arr, pa.timestamp('ms')).type
+    TimestampType(timestamp[ms])
+
+    Alternatively, it is also supported to use the string aliases for these
+    types:
+
+    >>> arr.cast('timestamp[ms]')
+    <pyarrow.lib.TimestampArray object at 0x10420eb88>
+    [
+      1262304000000,
+      1420070400000
+    ]
+    >>> arr.cast('timestamp[ms]').type
+    TimestampType(timestamp[ms])
+
+    Returns
+    -------
+    casted : Array
+    """
+    if target_type is None:
+        raise ValueError("Cast target type must not be None")
+    if safe:
+        options = CastOptions.safe(target_type)
+    else:
+        options = CastOptions.unsafe(target_type)
+    return call_function("cast", [arr], options)
+
+
+def index(data, value, start=None, end=None, *, memory_pool=None):
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array-like
+    value : Scalar-like object
+        The value to search for.
+    start : int, optional
+    end : int, optional
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    index : int
+        the index, or -1 if not found
+    """
+    if start is not None:
+        if end is not None:
+            data = data.slice(start, end - start)
+        else:
+            data = data.slice(start)
+    elif end is not None:
+        data = data.slice(0, end)
+
+    if not isinstance(value, pa.Scalar):
+        value = pa.scalar(value, type=data.type)
+    elif data.type != value.type:
+        value = pa.scalar(value.as_py(), type=data.type)
+    options = IndexOptions(value=value)
+    result = call_function('index', [data], options, memory_pool)
+    if start is not None and result.as_py() >= 0:
+        result = pa.scalar(result.as_py() + start, type=pa.int64())
+    return result
+
+
+def take(data, indices, *, boundscheck=True, memory_pool=None):
+    """
+    Select values (or records) from array- or table-like data given integer
+    selection indices.
+
+    The result will be of the same type(s) as the input, with elements taken
+    from the input array (or record batch / table fields) at the given
+    indices. If an index is null then the corresponding value in the output
+    will be null.
+
+    Parameters
+    ----------
+    data : Array, ChunkedArray, RecordBatch, or Table
+    indices : Array, ChunkedArray
+        Must be of integer type
+    boundscheck : boolean, default True
+        Whether to boundscheck the indices. If False and there is an out of
+        bounds index, will likely cause the process to crash.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : depends on inputs
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> indices = pa.array([0, None, 4, 3])
+    >>> arr.take(indices)
+    <pyarrow.lib.StringArray object at 0x7ffa4fc7d368>
+    [
+      "a",
+      null,
+      "e",
+      null
+    ]
+    """
+    options = TakeOptions(boundscheck=boundscheck)
+    return call_function('take', [data, indices], options, memory_pool)
+
+
+def fill_null(values, fill_value):
+    """
+    Replace each null element in values with fill_value. The fill_value must be
+    the same type as values or able to be implicitly casted to the array's
+    type.
+
+    This is an alias for :func:`coalesce`.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, or Scalar-like object
+        Each null element is replaced with the corresponding value
+        from fill_value.
+    fill_value : Array, ChunkedArray, or Scalar-like object
+        If not same type as data will attempt to cast.
+
+    Returns
+    -------
+    result : depends on inputs
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+    >>> fill_value = pa.scalar(5, type=pa.int8())
+    >>> arr.fill_null(fill_value)
+    pyarrow.lib.Int8Array object at 0x7f95437f01a0>
+    [
+      1,
+      2,
+      5,
+      3
+    ]
+    """
+    if not isinstance(fill_value, (pa.Array, pa.ChunkedArray, pa.Scalar)):
+        fill_value = pa.scalar(fill_value, type=values.type)
+    elif values.type != fill_value.type:
+        fill_value = pa.scalar(fill_value.as_py(), type=values.type)
+
+    return call_function("coalesce", [values, fill_value])
+
+
+def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
+    """
+    Select the indices of the top-k ordered elements from array- or table-like
+    data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get top indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.top_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at 0x7fdcb19d7f30>
+    [
+      5,
+      4,
+      2
+    ]
+    """
+    if sort_keys is None:
+        sort_keys = []
+    if isinstance(values, (pa.Array, pa.ChunkedArray)):
+        sort_keys.append(("dummy", "descending"))
+    else:
+        sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
+    options = SelectKOptions(k, sort_keys)
+    return call_function("select_k_unstable", [values], options, memory_pool)
+
+
+def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
+    """
+    Select the indices of the bottom-k ordered elements from
+    array- or table-like data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.bottom_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at 0x7fdcb19d7fa0>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+    if sort_keys is None:
+        sort_keys = []
+    if isinstance(values, (pa.Array, pa.ChunkedArray)):
+        sort_keys.append(("dummy", "ascending"))
+    else:
+        sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
+    options = SelectKOptions(k, sort_keys)
+    return call_function("select_k_unstable", [values], options, memory_pool)
+
+
+def field(*name_or_index):
+    """Reference a column of the dataset.
+
+    Stores only the field's name. Type and other information is known only when
+    the expression is bound to a dataset having an explicit scheme.
+
+    Nested references are allowed by passing multiple names or a tuple of
+    names. For example ``('foo', 'bar')`` references the field named "bar"
+    inside the field named "foo".
+
+    Parameters
+    ----------
+    *name_or_index : string, multiple strings, tuple or int
+        The name or index of the (possibly nested) field the expression
+        references to.
+
+    Returns
+    -------
+    field_expr : Expression
+
+    Examples
+    --------
+    >>> import pyarrow.compute as pc
+    >>> pc.field("a")
+    <pyarrow.compute.Expression a>
+    >>> pc.field(1)
+    <pyarrow.compute.Expression FieldPath(1)>
+    >>> pc.field(("a", "b"))
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    >>> pc.field("a", "b")
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    """
+    n = len(name_or_index)
+    if n == 1:
+        if isinstance(name_or_index[0], (str, int)):
+            return Expression._field(name_or_index[0])
+        elif isinstance(name_or_index[0], tuple):
+            return Expression._nested_field(name_or_index[0])
+        else:
+            raise TypeError(
+                "field reference should be str, multiple str, tuple or "
+                f"integer, got {type(name_or_index[0])}"
+            )
+    # In case of multiple strings not supplied in a tuple
+    else:
+        return Expression._nested_field(name_or_index)
+
+
+def scalar(value):
+    """Expression representing a scalar value.
+
+    Parameters
+    ----------
+    value : bool, int, float or string
+        Python value of the scalar. Note that only a subset of types are
+        currently supported.
+
+    Returns
+    -------
+    scalar_expr : Expression
+    """
+    return Expression._scalar(value)
--- a/.venv/Lib/site-packages/pyarrow/config.pxi
+++ b/.venv/Lib/site-packages/pyarrow/config.pxi
@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.libarrow cimport GetBuildInfo
+
+from collections import namedtuple
+
+
+VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
+
+BuildInfo = namedtuple(
+    'BuildInfo',
+    ('version', 'version_info', 'so_version', 'full_so_version',
+     'compiler_id', 'compiler_version', 'compiler_flags',
+     'git_id', 'git_description', 'package_kind', 'build_type'))
+
+RuntimeInfo = namedtuple('RuntimeInfo',
+                         ('simd_level', 'detected_simd_level'))
+
+cdef _build_info():
+    cdef:
+        const CBuildInfo* c_info
+
+    c_info = &GetBuildInfo()
+
+    return BuildInfo(version=frombytes(c_info.version_string),
+                     version_info=VersionInfo(c_info.version_major,
+                                              c_info.version_minor,
+                                              c_info.version_patch),
+                     so_version=frombytes(c_info.so_version),
+                     full_so_version=frombytes(c_info.full_so_version),
+                     compiler_id=frombytes(c_info.compiler_id),
+                     compiler_version=frombytes(c_info.compiler_version),
+                     compiler_flags=frombytes(c_info.compiler_flags),
+                     git_id=frombytes(c_info.git_id),
+                     git_description=frombytes(c_info.git_description),
+                     package_kind=frombytes(c_info.package_kind),
+                     build_type=frombytes(c_info.build_type).lower(),
+                     )
+
+
+cpp_build_info = _build_info()
+cpp_version = cpp_build_info.version
+cpp_version_info = cpp_build_info.version_info
+
+
+def runtime_info():
+    """
+    Get runtime information.
+
+    Returns
+    -------
+    info : pyarrow.RuntimeInfo
+    """
+    cdef:
+        CRuntimeInfo c_info
+
+    c_info = GetRuntimeInfo()
+
+    return RuntimeInfo(
+        simd_level=frombytes(c_info.simd_level),
+        detected_simd_level=frombytes(c_info.detected_simd_level))
--- a/.venv/Lib/site-packages/pyarrow/csv.py
+++ b/.venv/Lib/site-packages/pyarrow/csv.py
@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from pyarrow._csv import (  # noqa
+    ReadOptions, ParseOptions, ConvertOptions, ISO8601,
+    open_csv, read_csv, CSVStreamingReader, write_csv,
+    WriteOptions, CSVWriter, InvalidRow)
--- a/.venv/Lib/site-packages/pyarrow/cuda.py
+++ b/.venv/Lib/site-packages/pyarrow/cuda.py
@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+                           HostBuffer, BufferReader, BufferWriter,
+                           new_host_buffer,
+                           serialize_record_batch, read_message,
+                           read_record_batch)
--- a/.venv/Lib/site-packages/pyarrow/dataset.py
+++ b/.venv/Lib/site-packages/pyarrow/dataset.py
@ -0,0 +1,935 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Dataset is currently unstable. APIs subject to change without notice."""
+
+import pyarrow as pa
+from pyarrow.util import _is_iterable, _stringify_path, _is_path_like
+
+from pyarrow._dataset import (  # noqa
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FilenamePartitioning,
+    FileFormat,
+    FileFragment,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    InMemoryDataset,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    _get_partition_keys,
+    _filesystemdataset_write,
+)
+# keep Expression functionality exposed here for backwards compatibility
+from pyarrow.compute import Expression, scalar, field  # noqa
+
+
+_orc_available = False
+_orc_msg = (
+    "The pyarrow installation is not built with support for the ORC file "
+    "format."
+)
+
+try:
+    from pyarrow._dataset_orc import OrcFileFormat
+    _orc_available = True
+except ImportError:
+    pass
+
+_parquet_available = False
+_parquet_msg = (
+    "The pyarrow installation is not built with support for the Parquet file "
+    "format."
+)
+
+try:
+    from pyarrow._dataset_parquet import (  # noqa
+        ParquetDatasetFactory,
+        ParquetFactoryOptions,
+        ParquetFileFormat,
+        ParquetFileFragment,
+        ParquetFileWriteOptions,
+        ParquetFragmentScanOptions,
+        ParquetReadOptions,
+        RowGroupInfo,
+    )
+    _parquet_available = True
+except ImportError:
+    pass
+
+
+def __getattr__(name):
+    if name == "OrcFileFormat" and not _orc_available:
+        raise ImportError(_orc_msg)
+
+    if name == "ParquetFileFormat" and not _parquet_available:
+        raise ImportError(_parquet_msg)
+
+    raise AttributeError(
+        "module 'pyarrow.dataset' has no attribute '{0}'".format(name)
+    )
+
+
+def partitioning(schema=None, field_names=None, flavor=None,
+                 dictionaries=None):
+    """
+    Specify a partitioning scheme.
+
+    The supported schemes include:
+
+    - "DirectoryPartitioning": this scheme expects one segment in the file path
+      for each field in the specified schema (all fields are required to be
+      present). For example given schema<year:int16, month:int8> the path
+      "/2009/11" would be parsed to ("year"_ == 2009 and "month"_ == 11).
+    - "HivePartitioning": a scheme for "/$key=$value/" nested directories as
+      found in Apache Hive. This is a multi-level, directory based partitioning
+      scheme. Data is partitioned by static values of a particular column in
+      the schema. Partition keys are represented in the form $key=$value in
+      directory names. Field order is ignored, as are missing or unrecognized
+      field names.
+      For example, given schema<year:int16, month:int8, day:int8>, a possible
+      path would be "/year=2009/month=11/day=15" (but the field order does not
+      need to match).
+    - "FilenamePartitioning": this scheme expects the partitions will have
+      filenames containing the field values separated by "_".
+      For example, given schema<year:int16, month:int8, day:int8>, a possible
+      partition filename "2009_11_part-0.parquet" would be parsed
+      to ("year"_ == 2009 and "month"_ == 11).
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema, default None
+        The schema that describes the partitions present in the file path.
+        If not specified, and `field_names` and/or `flavor` are specified,
+        the schema will be inferred from the file path (and a
+        PartitioningFactory is returned).
+    field_names :  list of str, default None
+        A list of strings (field names). If specified, the schema's types are
+        inferred from the file paths (only valid for DirectoryPartitioning).
+    flavor : str, default None
+        The default is DirectoryPartitioning. Specify ``flavor="hive"`` for
+        a HivePartitioning, and ``flavor="filename"`` for a
+        FilenamePartitioning.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing. Alternatively, pass `infer` to have
+        Arrow discover the dictionary values, in which case a
+        PartitioningFactory is returned.
+
+    Returns
+    -------
+    Partitioning or PartitioningFactory
+
+    Examples
+    --------
+
+    Specify the Schema for paths like "/2009/June":
+
+    >>> partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
+
+    or let the types be inferred by only specifying the field names:
+
+    >>> partitioning(field_names=["year", "month"])
+
+    For paths like "/2009/June", the year will be inferred as int32 while month
+    will be inferred as string.
+
+    Specify a Schema with dictionary encoding, providing dictionary values:
+
+    >>> partitioning(
+    ...     pa.schema([
+    ...         ("year", pa.int16()),
+    ...         ("month", pa.dictionary(pa.int8(), pa.string()))
+    ...     ]),
+    ...     dictionaries={
+    ...         "month": pa.array(["January", "February", "March"]),
+    ...     })
+
+    Alternatively, specify a Schema with dictionary encoding, but have Arrow
+    infer the dictionary values:
+
+    >>> partitioning(
+    ...     pa.schema([
+    ...         ("year", pa.int16()),
+    ...         ("month", pa.dictionary(pa.int8(), pa.string()))
+    ...     ]),
+    ...     dictionaries="infer")
+
+    Create a Hive scheme for a path like "/year=2009/month=11":
+
+    >>> partitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())]),
+    ...     flavor="hive")
+
+    A Hive scheme can also be discovered from the directory structure (and
+    types will be inferred):
+
+    >>> partitioning(flavor="hive")
+
+    """
+    if flavor is None:
+        # default flavor
+        if schema is not None:
+            if field_names is not None:
+                raise ValueError(
+                    "Cannot specify both 'schema' and 'field_names'")
+            if dictionaries == 'infer':
+                return DirectoryPartitioning.discover(schema=schema)
+            return DirectoryPartitioning(schema, dictionaries)
+        elif field_names is not None:
+            if isinstance(field_names, list):
+                return DirectoryPartitioning.discover(field_names)
+            else:
+                raise ValueError(
+                    "Expected list of field names, got {}".format(
+                        type(field_names)))
+        else:
+            raise ValueError(
+                "For the default directory flavor, need to specify "
+                "a Schema or a list of field names")
+    if flavor == "filename":
+        # default flavor
+        if schema is not None:
+            if field_names is not None:
+                raise ValueError(
+                    "Cannot specify both 'schema' and 'field_names'")
+            if dictionaries == 'infer':
+                return FilenamePartitioning.discover(schema=schema)
+            return FilenamePartitioning(schema, dictionaries)
+        elif field_names is not None:
+            if isinstance(field_names, list):
+                return FilenamePartitioning.discover(field_names)
+            else:
+                raise ValueError(
+                    "Expected list of field names, got {}".format(
+                        type(field_names)))
+        else:
+            raise ValueError(
+                "For the filename flavor, need to specify "
+                "a Schema or a list of field names")
+    elif flavor == 'hive':
+        if field_names is not None:
+            raise ValueError("Cannot specify 'field_names' for flavor 'hive'")
+        elif schema is not None:
+            if isinstance(schema, pa.Schema):
+                if dictionaries == 'infer':
+                    return HivePartitioning.discover(schema=schema)
+                return HivePartitioning(schema, dictionaries)
+            else:
+                raise ValueError(
+                    "Expected Schema for 'schema', got {}".format(
+                        type(schema)))
+        else:
+            return HivePartitioning.discover()
+    else:
+        raise ValueError("Unsupported flavor")
+
+
+def _ensure_partitioning(scheme):
+    """
+    Validate input and return a Partitioning(Factory).
+
+    It passes None through if no partitioning scheme is defined.
+    """
+    if scheme is None:
+        pass
+    elif isinstance(scheme, str):
+        scheme = partitioning(flavor=scheme)
+    elif isinstance(scheme, list):
+        scheme = partitioning(field_names=scheme)
+    elif isinstance(scheme, (Partitioning, PartitioningFactory)):
+        pass
+    else:
+        ValueError("Expected Partitioning or PartitioningFactory, got {}"
+                   .format(type(scheme)))
+    return scheme
+
+
+def _ensure_format(obj):
+    if isinstance(obj, FileFormat):
+        return obj
+    elif obj == "parquet":
+        if not _parquet_available:
+            raise ValueError(_parquet_msg)
+        return ParquetFileFormat()
+    elif obj in {"ipc", "arrow", "feather"}:
+        return IpcFileFormat()
+    elif obj == "csv":
+        return CsvFileFormat()
+    elif obj == "orc":
+        if not _orc_available:
+            raise ValueError(_orc_msg)
+        return OrcFileFormat()
+    else:
+        raise ValueError("format '{}' is not supported".format(obj))
+
+
+def _ensure_multiple_sources(paths, filesystem=None):
+    """
+    Treat a list of paths as files belonging to a single file system
+
+    If the file system is local then also validates that all paths
+    are referencing existing *files* otherwise any non-file paths will be
+    silently skipped (for example on a remote filesystem).
+
+    Parameters
+    ----------
+    paths : list of path-like
+        Note that URIs are not allowed.
+    filesystem : FileSystem or str, optional
+        If an URI is passed, then its path component will act as a prefix for
+        the file paths.
+
+    Returns
+    -------
+    (FileSystem, list of str)
+        File system object and a list of normalized paths.
+
+    Raises
+    ------
+    TypeError
+        If the passed filesystem has wrong type.
+    IOError
+        If the file system is local and a referenced path is not available or
+        not a file.
+    """
+    from pyarrow.fs import (
+        LocalFileSystem, SubTreeFileSystem, _MockFileSystem, FileType,
+        _ensure_filesystem
+    )
+
+    if filesystem is None:
+        # fall back to local file system as the default
+        filesystem = LocalFileSystem()
+    else:
+        # construct a filesystem if it is a valid URI
+        filesystem = _ensure_filesystem(filesystem)
+
+    is_local = (
+        isinstance(filesystem, (LocalFileSystem, _MockFileSystem)) or
+        (isinstance(filesystem, SubTreeFileSystem) and
+         isinstance(filesystem.base_fs, LocalFileSystem))
+    )
+
+    # allow normalizing irregular paths such as Windows local paths
+    paths = [filesystem.normalize_path(_stringify_path(p)) for p in paths]
+
+    # validate that all of the paths are pointing to existing *files*
+    # possible improvement is to group the file_infos by type and raise for
+    # multiple paths per error category
+    if is_local:
+        for info in filesystem.get_file_info(paths):
+            file_type = info.type
+            if file_type == FileType.File:
+                continue
+            elif file_type == FileType.NotFound:
+                raise FileNotFoundError(info.path)
+            elif file_type == FileType.Directory:
+                raise IsADirectoryError(
+                    'Path {} points to a directory, but only file paths are '
+                    'supported. To construct a nested or union dataset pass '
+                    'a list of dataset objects instead.'.format(info.path)
+                )
+            else:
+                raise IOError(
+                    'Path {} exists but its type is unknown (could be a '
+                    'special file such as a Unix socket or character device, '
+                    'or Windows NUL / CON / ...)'.format(info.path)
+                )
+
+    return filesystem, paths
+
+
+def _ensure_single_source(path, filesystem=None):
+    """
+    Treat path as either a recursively traversable directory or a single file.
+
+    Parameters
+    ----------
+    path : path-like
+    filesystem : FileSystem or str, optional
+        If an URI is passed, then its path component will act as a prefix for
+        the file paths.
+
+    Returns
+    -------
+    (FileSystem, list of str or fs.Selector)
+        File system object and either a single item list pointing to a file or
+        an fs.Selector object pointing to a directory.
+
+    Raises
+    ------
+    TypeError
+        If the passed filesystem has wrong type.
+    FileNotFoundError
+        If the referenced file or directory doesn't exist.
+    """
+    from pyarrow.fs import FileType, FileSelector, _resolve_filesystem_and_path
+
+    # at this point we already checked that `path` is a path-like
+    filesystem, path = _resolve_filesystem_and_path(path, filesystem)
+
+    # ensure that the path is normalized before passing to dataset discovery
+    path = filesystem.normalize_path(path)
+
+    # retrieve the file descriptor
+    file_info = filesystem.get_file_info(path)
+
+    # depending on the path type either return with a recursive
+    # directory selector or as a list containing a single file
+    if file_info.type == FileType.Directory:
+        paths_or_selector = FileSelector(path, recursive=True)
+    elif file_info.type == FileType.File:
+        paths_or_selector = [path]
+    else:
+        raise FileNotFoundError(path)
+
+    return filesystem, paths_or_selector
+
+
+def _filesystem_dataset(source, schema=None, filesystem=None,
+                        partitioning=None, format=None,
+                        partition_base_dir=None, exclude_invalid_files=None,
+                        selector_ignore_prefixes=None):
+    """
+    Create a FileSystemDataset which can be used to build a Dataset.
+
+    Parameters are documented in the dataset function.
+
+    Returns
+    -------
+    FileSystemDataset
+    """
+    format = _ensure_format(format or 'parquet')
+    partitioning = _ensure_partitioning(partitioning)
+
+    if isinstance(source, (list, tuple)):
+        fs, paths_or_selector = _ensure_multiple_sources(source, filesystem)
+    else:
+        fs, paths_or_selector = _ensure_single_source(source, filesystem)
+
+    options = FileSystemFactoryOptions(
+        partitioning=partitioning,
+        partition_base_dir=partition_base_dir,
+        exclude_invalid_files=exclude_invalid_files,
+        selector_ignore_prefixes=selector_ignore_prefixes
+    )
+    factory = FileSystemDatasetFactory(fs, paths_or_selector, format, options)
+
+    return factory.finish(schema)
+
+
+def _in_memory_dataset(source, schema=None, **kwargs):
+    if any(v is not None for v in kwargs.values()):
+        raise ValueError(
+            "For in-memory datasets, you cannot pass any additional arguments")
+    return InMemoryDataset(source, schema)
+
+
+def _union_dataset(children, schema=None, **kwargs):
+    if any(v is not None for v in kwargs.values()):
+        raise ValueError(
+            "When passing a list of Datasets, you cannot pass any additional "
+            "arguments"
+        )
+
+    if schema is None:
+        # unify the children datasets' schemas
+        schema = pa.unify_schemas([child.schema for child in children])
+
+    # create datasets with the requested schema
+    children = [child.replace_schema(schema) for child in children]
+
+    return UnionDataset(schema, children)
+
+
+def parquet_dataset(metadata_path, schema=None, filesystem=None, format=None,
+                    partitioning=None, partition_base_dir=None):
+    """
+    Create a FileSystemDataset from a `_metadata` file created via
+    `pyarrrow.parquet.write_metadata`.
+
+    Parameters
+    ----------
+    metadata_path : path,
+        Path pointing to a single file parquet metadata file
+    schema : Schema, optional
+        Optionally provide the Schema for the Dataset, in which case it will
+        not be inferred from the source.
+    filesystem : FileSystem or URI string, default None
+        If a single path is given as source and filesystem is None, then the
+        filesystem will be inferred from the path.
+        If an URI string is passed, then a filesystem object is constructed
+        using the URI's optional path component as a directory prefix. See the
+        examples below.
+        Note that the URIs on Windows must follow 'file:///C:...' or
+        'file:/C:...' patterns.
+    format : ParquetFileFormat
+        An instance of a ParquetFileFormat if special options needs to be
+        passed.
+    partitioning : Partitioning, PartitioningFactory, str, list of str
+        The partitioning scheme specified with the ``partitioning()``
+        function. A flavor string can be used as shortcut, and with a list of
+        field names a DirectionaryPartitioning will be inferred.
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+
+    Returns
+    -------
+    FileSystemDataset
+    """
+    from pyarrow.fs import LocalFileSystem, _ensure_filesystem
+
+    if format is None:
+        format = ParquetFileFormat()
+    elif not isinstance(format, ParquetFileFormat):
+        raise ValueError("format argument must be a ParquetFileFormat")
+
+    if filesystem is None:
+        filesystem = LocalFileSystem()
+    else:
+        filesystem = _ensure_filesystem(filesystem)
+
+    metadata_path = filesystem.normalize_path(_stringify_path(metadata_path))
+    options = ParquetFactoryOptions(
+        partition_base_dir=partition_base_dir,
+        partitioning=_ensure_partitioning(partitioning)
+    )
+
+    factory = ParquetDatasetFactory(
+        metadata_path, filesystem, format, options=options)
+    return factory.finish(schema)
+
+
+def dataset(source, schema=None, format=None, filesystem=None,
+            partitioning=None, partition_base_dir=None,
+            exclude_invalid_files=None, ignore_prefixes=None):
+    """
+    Open a dataset.
+
+    Datasets provides functionality to efficiently work with tabular,
+    potentially larger than memory and multi-file dataset.
+
+    - A unified interface for different sources, like Parquet and Feather
+    - Discovery of sources (crawling directories, handle directory-based
+      partitioned datasets, basic schema normalization)
+    - Optimized reading with predicate pushdown (filtering rows), projection
+      (selecting columns), parallel reading or fine-grained managing of tasks.
+
+    Note that this is the high-level API, to have more control over the dataset
+    construction use the low-level API classes (FileSystemDataset,
+    FilesystemDatasetFactory, etc.)
+
+    Parameters
+    ----------
+    source : path, list of paths, dataset, list of datasets, (list of) \
+RecordBatch or Table, iterable of RecordBatch, RecordBatchReader, or URI
+        Path pointing to a single file:
+            Open a FileSystemDataset from a single file.
+        Path pointing to a directory:
+            The directory gets discovered recursively according to a
+            partitioning scheme if given.
+        List of file paths:
+            Create a FileSystemDataset from explicitly given files. The files
+            must be located on the same filesystem given by the filesystem
+            parameter.
+            Note that in contrary of construction from a single file, passing
+            URIs as paths is not allowed.
+        List of datasets:
+            A nested UnionDataset gets constructed, it allows arbitrary
+            composition of other datasets.
+            Note that additional keyword arguments are not allowed.
+        (List of) batches or tables, iterable of batches, or RecordBatchReader:
+            Create an InMemoryDataset. If an iterable or empty list is given,
+            a schema must also be given. If an iterable or RecordBatchReader
+            is given, the resulting dataset can only be scanned once; further
+            attempts will raise an error.
+    schema : Schema, optional
+        Optionally provide the Schema for the Dataset, in which case it will
+        not be inferred from the source.
+    format : FileFormat or str
+        Currently "parquet", "ipc"/"arrow"/"feather", "csv", and "orc" are
+        supported. For Feather, only version 2 files are supported.
+    filesystem : FileSystem or URI string, default None
+        If a single path is given as source and filesystem is None, then the
+        filesystem will be inferred from the path.
+        If an URI string is passed, then a filesystem object is constructed
+        using the URI's optional path component as a directory prefix. See the
+        examples below.
+        Note that the URIs on Windows must follow 'file:///C:...' or
+        'file:/C:...' patterns.
+    partitioning : Partitioning, PartitioningFactory, str, list of str
+        The partitioning scheme specified with the ``partitioning()``
+        function. A flavor string can be used as shortcut, and with a list of
+        field names a DirectionaryPartitioning will be inferred.
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    exclude_invalid_files : bool, optional (default True)
+        If True, invalid files will be excluded (file format specific check).
+        This will incur IO for each files in a serial and single threaded
+        fashion. Disabling this feature will skip the IO, but unsupported
+        files may be present in the Dataset (resulting in an error at scan
+        time).
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process. This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+
+    Returns
+    -------
+    dataset : Dataset
+        Either a FileSystemDataset or a UnionDataset depending on the source
+        parameter.
+
+    Examples
+    --------
+    Opening a single file:
+
+    >>> dataset("path/to/file.parquet", format="parquet")
+
+    Opening a single file with an explicit schema:
+
+    >>> dataset("path/to/file.parquet", schema=myschema, format="parquet")
+
+    Opening a dataset for a single directory:
+
+    >>> dataset("path/to/nyc-taxi/", format="parquet")
+    >>> dataset("s3://mybucket/nyc-taxi/", format="parquet")
+
+    Opening a dataset from a list of relatives local paths:
+
+    >>> dataset([
+    ...     "part0/data.parquet",
+    ...     "part1/data.parquet",
+    ...     "part3/data.parquet",
+    ... ], format='parquet')
+
+    With filesystem provided:
+
+    >>> paths = [
+    ...     'part0/data.parquet',
+    ...     'part1/data.parquet',
+    ...     'part3/data.parquet',
+    ... ]
+    >>> dataset(paths, filesystem='file:///directory/prefix, format='parquet')
+
+    Which is equivalent with:
+
+    >>> fs = SubTreeFileSystem("/directory/prefix", LocalFileSystem())
+    >>> dataset(paths, filesystem=fs, format='parquet')
+
+    With a remote filesystem URI:
+
+    >>> paths = [
+    ...     'nested/directory/part0/data.parquet',
+    ...     'nested/directory/part1/data.parquet',
+    ...     'nested/directory/part3/data.parquet',
+    ... ]
+    >>> dataset(paths, filesystem='s3://bucket/', format='parquet')
+
+    Similarly to the local example, the directory prefix may be included in the
+    filesystem URI:
+
+    >>> dataset(paths, filesystem='s3://bucket/nested/directory',
+    ...         format='parquet')
+
+    Construction of a nested dataset:
+
+    >>> dataset([
+    ...     dataset("s3://old-taxi-data", format="parquet"),
+    ...     dataset("local/path/to/data", format="ipc")
+    ... ])
+    """
+    # collect the keyword arguments for later reuse
+    kwargs = dict(
+        schema=schema,
+        filesystem=filesystem,
+        partitioning=partitioning,
+        format=format,
+        partition_base_dir=partition_base_dir,
+        exclude_invalid_files=exclude_invalid_files,
+        selector_ignore_prefixes=ignore_prefixes
+    )
+
+    if _is_path_like(source):
+        return _filesystem_dataset(source, **kwargs)
+    elif isinstance(source, (tuple, list)):
+        if all(_is_path_like(elem) for elem in source):
+            return _filesystem_dataset(source, **kwargs)
+        elif all(isinstance(elem, Dataset) for elem in source):
+            return _union_dataset(source, **kwargs)
+        elif all(isinstance(elem, (pa.RecordBatch, pa.Table))
+                 for elem in source):
+            return _in_memory_dataset(source, **kwargs)
+        else:
+            unique_types = set(type(elem).__name__ for elem in source)
+            type_names = ', '.join('{}'.format(t) for t in unique_types)
+            raise TypeError(
+                'Expected a list of path-like or dataset objects, or a list '
+                'of batches or tables. The given list contains the following '
+                'types: {}'.format(type_names)
+            )
+    elif isinstance(source, (pa.RecordBatch, pa.Table)):
+        return _in_memory_dataset(source, **kwargs)
+    else:
+        raise TypeError(
+            'Expected a path-like, list of path-likes or a list of Datasets '
+            'instead of the given type: {}'.format(type(source).__name__)
+        )
+
+
+def _ensure_write_partitioning(part, schema, flavor):
+    if isinstance(part, PartitioningFactory):
+        raise ValueError("A PartitioningFactory cannot be used. "
+                         "Did you call the partitioning function "
+                         "without supplying a schema?")
+
+    if isinstance(part, Partitioning) and flavor:
+        raise ValueError(
+            "Providing a partitioning_flavor with "
+            "a Partitioning object is not supported"
+        )
+    elif isinstance(part, (tuple, list)):
+        # Name of fields were provided instead of a partitioning object.
+        # Create a partitioning factory with those field names.
+        part = partitioning(
+            schema=pa.schema([schema.field(f) for f in part]),
+            flavor=flavor
+        )
+    elif part is None:
+        part = partitioning(pa.schema([]), flavor=flavor)
+
+    if not isinstance(part, Partitioning):
+        raise ValueError(
+            "partitioning must be a Partitioning object or "
+            "a list of column names"
+        )
+
+    return part
+
+
+def write_dataset(data, base_dir, basename_template=None, format=None,
+                  partitioning=None, partitioning_flavor=None, schema=None,
+                  filesystem=None, file_options=None, use_threads=True,
+                  max_partitions=None, max_open_files=None,
+                  max_rows_per_file=None, min_rows_per_group=None,
+                  max_rows_per_group=None, file_visitor=None,
+                  existing_data_behavior='error', create_dir=True):
+    """
+    Write a dataset to a given format and partitioning.
+
+    Parameters
+    ----------
+    data : Dataset, Table/RecordBatch, RecordBatchReader, list of \
+Table/RecordBatch, or iterable of RecordBatch
+        The data to write. This can be a Dataset instance or
+        in-memory Arrow data. If an iterable is given, the schema must
+        also be given.
+    base_dir : str
+        The root directory where to write the dataset.
+    basename_template : str, optional
+        A template string used to generate basenames of written data files.
+        The token '{i}' will be replaced with an automatically incremented
+        integer. If not specified, it defaults to
+        "part-{i}." + format.default_extname
+    format : FileFormat or str
+        The format in which to write the dataset. Currently supported:
+        "parquet", "ipc"/"arrow"/"feather", and "csv". If a FileSystemDataset
+        is being written and `format` is not specified, it defaults to the
+        same format as the specified FileSystemDataset. When writing a
+        Table or RecordBatch, this keyword is required.
+    partitioning : Partitioning or list[str], optional
+        The partitioning scheme specified with the ``partitioning()``
+        function or a list of field names. When providing a list of
+        field names, you can use ``partitioning_flavor`` to drive which
+        partitioning type should be used.
+    partitioning_flavor : str, optional
+        One of the partitioning flavors supported by
+        ``pyarrow.dataset.partitioning``. If omitted will use the
+        default of ``partitioning()`` which is directory partitioning.
+    schema : Schema, optional
+    filesystem : FileSystem, optional
+    file_options : pyarrow.dataset.FileWriteOptions, optional
+        FileFormat specific write options, created using the
+        ``FileFormat.make_write_options()`` function.
+    use_threads : bool, default True
+        Write files in parallel. If enabled, then maximum parallelism will be
+        used determined by the number of available CPU cores.
+    max_partitions : int, default 1024
+        Maximum number of partitions any batch may be written into.
+    max_open_files : int, default 1024
+        If greater than 0 then this will limit the maximum number of
+        files that can be left open. If an attempt is made to open
+        too many files then the least recently used file will be closed.
+        If this setting is set too low you may end up fragmenting your
+        data into many small files.
+    max_rows_per_file : int, default 0
+        Maximum number of rows per file. If greater than 0 then this will
+        limit how many rows are placed in any single file. Otherwise there
+        will be no limit and one file will be created in each output
+        directory unless files need to be closed to respect max_open_files
+    min_rows_per_group : int, default 0
+        Minimum number of rows per group. When the value is greater than 0,
+        the dataset writer will batch incoming data and only write the row
+        groups to the disk when sufficient rows have accumulated.
+    max_rows_per_group : int, default 1024 * 1024
+        Maximum number of rows per group. If the value is greater than 0,
+        then the dataset writer may split up large incoming batches into
+        multiple row groups.  If this value is set, then min_rows_per_group
+        should also be set. Otherwise it could end up with very small row
+        groups.
+    file_visitor : function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
+    existing_data_behavior : 'error' | 'overwrite_or_ignore' | \
+'delete_matching'
+        Controls how the dataset will handle data that already exists in
+        the destination.  The default behavior ('error') is to raise an error
+        if any data exists in the destination.
+
+        'overwrite_or_ignore' will ignore any existing data and will
+        overwrite files with the same name as an output file.  Other
+        existing files will be ignored.  This behavior, in combination
+        with a unique basename_template for each write, will allow for
+        an append workflow.
+
+        'delete_matching' is useful when you are writing a partitioned
+        dataset.  The first time each partition directory is encountered
+        the entire directory will be deleted.  This allows you to overwrite
+        old partitions completely.
+    create_dir : bool, default True
+        If False, directories will not be created.  This can be useful for
+        filesystems that do not require directories.
+    """
+    from pyarrow.fs import _resolve_filesystem_and_path
+
+    if isinstance(data, (list, tuple)):
+        schema = schema or data[0].schema
+        data = InMemoryDataset(data, schema=schema)
+    elif isinstance(data, (pa.RecordBatch, pa.Table)):
+        schema = schema or data.schema
+        data = InMemoryDataset(data, schema=schema)
+    elif isinstance(data, pa.ipc.RecordBatchReader) or _is_iterable(data):
+        data = Scanner.from_batches(data, schema=schema)
+        schema = None
+    elif not isinstance(data, (Dataset, Scanner)):
+        raise ValueError(
+            "Only Dataset, Scanner, Table/RecordBatch, RecordBatchReader, "
+            "a list of Tables/RecordBatches, or iterable of batches are "
+            "supported."
+        )
+
+    if format is None and isinstance(data, FileSystemDataset):
+        format = data.format
+    else:
+        format = _ensure_format(format)
+
+    if file_options is None:
+        file_options = format.make_write_options()
+
+    if format != file_options.format:
+        raise TypeError("Supplied FileWriteOptions have format {}, "
+                        "which doesn't match supplied FileFormat {}".format(
+                            format, file_options))
+
+    if basename_template is None:
+        basename_template = "part-{i}." + format.default_extname
+
+    if max_partitions is None:
+        max_partitions = 1024
+
+    if max_open_files is None:
+        max_open_files = 1024
+
+    if max_rows_per_file is None:
+        max_rows_per_file = 0
+
+    if max_rows_per_group is None:
+        max_rows_per_group = 1 << 20
+
+    if min_rows_per_group is None:
+        min_rows_per_group = 0
+
+    # at this point data is a Scanner or a Dataset, anything else
+    # was converted to one of those two. So we can grab the schema
+    # to build the partitioning object from Dataset.
+    if isinstance(data, Scanner):
+        partitioning_schema = data.dataset_schema
+    else:
+        partitioning_schema = data.schema
+    partitioning = _ensure_write_partitioning(partitioning,
+                                              schema=partitioning_schema,
+                                              flavor=partitioning_flavor)
+
+    filesystem, base_dir = _resolve_filesystem_and_path(base_dir, filesystem)
+
+    if isinstance(data, Dataset):
+        scanner = data.scanner(use_threads=use_threads)
+    else:
+        # scanner was passed directly by the user, in which case a schema
+        # cannot be passed
+        if schema is not None:
+            raise ValueError("Cannot specify a schema when writing a Scanner")
+        scanner = data
+
+    _filesystemdataset_write(
+        scanner, base_dir, basename_template, filesystem, partitioning,
+        file_options, max_partitions, file_visitor, existing_data_behavior,
+        max_open_files, max_rows_per_file,
+        min_rows_per_group, max_rows_per_group, create_dir
+    )
--- a/.venv/Lib/site-packages/pyarrow/error.pxi
+++ b/.venv/Lib/site-packages/pyarrow/error.pxi
@ -0,0 +1,250 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from cpython.exc cimport PyErr_CheckSignals, PyErr_SetInterrupt
+
+from pyarrow.includes.libarrow cimport CStatus
+from pyarrow.includes.libarrow_python cimport IsPyError, RestorePyError
+from pyarrow.includes.common cimport c_string
+
+from contextlib import contextmanager
+import os
+import signal
+import threading
+
+from pyarrow.util import _break_traceback_cycle_from_frame
+
+
+class ArrowException(Exception):
+    pass
+
+
+class ArrowInvalid(ValueError, ArrowException):
+    pass
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+    pass
+
+
+class ArrowKeyError(KeyError, ArrowException):
+    def __str__(self):
+        # Override KeyError.__str__, as it uses the repr() of the key
+        return ArrowException.__str__(self)
+
+
+class ArrowTypeError(TypeError, ArrowException):
+    pass
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+    pass
+
+
+class ArrowCapacityError(ArrowException):
+    pass
+
+
+class ArrowIndexError(IndexError, ArrowException):
+    pass
+
+
+class ArrowSerializationError(ArrowException):
+    pass
+
+
+class ArrowCancelled(ArrowException):
+    def __init__(self, message, signum=None):
+        super().__init__(message)
+        self.signum = signum
+
+
+# Compatibility alias
+ArrowIOError = IOError
+
+
+# This function could be written directly in C++ if we didn't
+# define Arrow-specific subclasses (ArrowInvalid etc.)
+cdef int check_status(const CStatus& status) nogil except -1:
+    if status.ok():
+        return 0
+
+    with gil:
+        if IsPyError(status):
+            RestorePyError(status)
+            return -1
+
+        # We don't use Status::ToString() as it would redundantly include
+        # the C++ class name.
+        message = frombytes(status.message(), safe=True)
+        detail = status.detail()
+        if detail != nullptr:
+            message += ". Detail: " + frombytes(detail.get().ToString(),
+                                                safe=True)
+
+        if status.IsInvalid():
+            raise ArrowInvalid(message)
+        elif status.IsIOError():
+            # Note: OSError constructor is
+            #   OSError(message)
+            # or
+            #   OSError(errno, message, filename=None)
+            # or (on Windows)
+            #   OSError(errno, message, filename, winerror)
+            errno = ErrnoFromStatus(status)
+            winerror = WinErrorFromStatus(status)
+            if winerror != 0:
+                raise IOError(errno, message, None, winerror)
+            elif errno != 0:
+                raise IOError(errno, message)
+            else:
+                raise IOError(message)
+        elif status.IsOutOfMemory():
+            raise ArrowMemoryError(message)
+        elif status.IsKeyError():
+            raise ArrowKeyError(message)
+        elif status.IsNotImplemented():
+            raise ArrowNotImplementedError(message)
+        elif status.IsTypeError():
+            raise ArrowTypeError(message)
+        elif status.IsCapacityError():
+            raise ArrowCapacityError(message)
+        elif status.IsIndexError():
+            raise ArrowIndexError(message)
+        elif status.IsSerializationError():
+            raise ArrowSerializationError(message)
+        elif status.IsCancelled():
+            signum = SignalFromStatus(status)
+            if signum > 0:
+                raise ArrowCancelled(message, signum)
+            else:
+                raise ArrowCancelled(message)
+        else:
+            message = frombytes(status.ToString(), safe=True)
+            raise ArrowException(message)
+
+
+# This is an API function for C++ PyArrow
+cdef api int pyarrow_internal_check_status(const CStatus& status) \
+        nogil except -1:
+    return check_status(status)
+
+
+cdef class StopToken:
+    cdef void init(self, CStopToken stop_token):
+        self.stop_token = move(stop_token)
+
+
+cdef c_bool signal_handlers_enabled = True
+
+
+def enable_signal_handlers(c_bool enable):
+    """
+    Enable or disable interruption of long-running operations.
+
+    By default, certain long running operations will detect user
+    interruptions, such as by pressing Ctrl-C.  This detection relies
+    on setting a signal handler for the duration of the long-running
+    operation, and may therefore interfere with other frameworks or
+    libraries (such as an event loop).
+
+    Parameters
+    ----------
+    enable : bool
+        Whether to enable user interruption by setting a temporary
+        signal handler.
+    """
+    global signal_handlers_enabled
+    signal_handlers_enabled = enable
+
+
+# For internal use
+
+# Whether we need a workaround for https://bugs.python.org/issue42248
+have_signal_refcycle = (sys.version_info < (3, 8, 10) or
+                        (3, 9) <= sys.version_info < (3, 9, 5) or
+                        sys.version_info[:2] == (3, 10))
+
+cdef class SignalStopHandler:
+    cdef:
+        StopToken _stop_token
+        vector[int] _signals
+        c_bool _enabled
+
+    def __cinit__(self):
+        self._enabled = False
+
+        self._init_signals()
+        if have_signal_refcycle:
+            _break_traceback_cycle_from_frame(sys._getframe(0))
+
+        self._stop_token = StopToken()
+        if not self._signals.empty():
+            self._stop_token.init(GetResultValue(
+                SetSignalStopSource()).token())
+            self._enabled = True
+
+    def _init_signals(self):
+        if (signal_handlers_enabled and
+                threading.current_thread() is threading.main_thread()):
+            self._signals = [
+                sig for sig in (signal.SIGINT, signal.SIGTERM)
+                if signal.getsignal(sig) not in (signal.SIG_DFL,
+                                                 signal.SIG_IGN, None)]
+
+    def __enter__(self):
+        if self._enabled:
+            check_status(RegisterCancellingSignalHandler(self._signals))
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        if self._enabled:
+            UnregisterCancellingSignalHandler()
+        if exc_value is None:
+            # Make sure we didn't lose a signal
+            try:
+                check_status(self._stop_token.stop_token.Poll())
+            except ArrowCancelled as e:
+                exc_value = e
+        if isinstance(exc_value, ArrowCancelled):
+            if exc_value.signum:
+                # Re-emit the exact same signal. We restored the Python signal
+                # handler above, so it should receive it.
+                if os.name == 'nt':
+                    SendSignal(exc_value.signum)
+                else:
+                    SendSignalToThread(exc_value.signum,
+                                       threading.main_thread().ident)
+            else:
+                # Simulate Python receiving a SIGINT
+                # (see https://bugs.python.org/issue43356 for why we can't
+                #  simulate the exact signal number)
+                PyErr_SetInterrupt()
+            # Maximize chances of the Python signal handler being executed now.
+            # Otherwise a potential KeyboardInterrupt might be missed by an
+            # immediately enclosing try/except block.
+            PyErr_CheckSignals()
+            # ArrowCancelled will be re-raised if PyErr_CheckSignals()
+            # returned successfully.
+
+    def __dealloc__(self):
+        if self._enabled:
+            ResetSignalStopSource()
+
+    @property
+    def stop_token(self):
+        return self._stop_token
--- a/.venv/Lib/site-packages/pyarrow/feather.py
+++ b/.venv/Lib/site-packages/pyarrow/feather.py
@ -0,0 +1,279 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import os
+
+from pyarrow.pandas_compat import _pandas_api  # noqa
+from pyarrow.lib import (Codec, Table,  # noqa
+                         concat_tables, schema)
+import pyarrow.lib as ext
+from pyarrow import _feather
+from pyarrow._feather import FeatherError  # noqa: F401
+from pyarrow.vendored.version import Version
+
+
+def _check_pandas_version():
+    if _pandas_api.loose_version < Version('0.17.0'):
+        raise ImportError("feather requires pandas >= 0.17.0")
+
+
+class FeatherDataset:
+    """
+    Encapsulates details of reading a list of Feather files.
+
+    Parameters
+    ----------
+    path_or_paths : List[str]
+        A list of file names
+    validate_schema : bool, default True
+        Check that individual file schemas are all the same / compatible
+    """
+
+    def __init__(self, path_or_paths, validate_schema=True):
+        self.paths = path_or_paths
+        self.validate_schema = validate_schema
+
+    def read_table(self, columns=None):
+        """
+        Read multiple feather files as a single pyarrow.Table
+
+        Parameters
+        ----------
+        columns : List[str]
+            Names of columns to read from the file
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a table (of columns)
+        """
+        _fil = read_table(self.paths[0], columns=columns)
+        self._tables = [_fil]
+        self.schema = _fil.schema
+
+        for path in self.paths[1:]:
+            table = read_table(path, columns=columns)
+            if self.validate_schema:
+                self.validate_schemas(path, table)
+            self._tables.append(table)
+        return concat_tables(self._tables)
+
+    def validate_schemas(self, piece, table):
+        if not self.schema.equals(table.schema):
+            raise ValueError('Schema in {!s} was different. \n'
+                             '{!s}\n\nvs\n\n{!s}'
+                             .format(piece, self.schema,
+                                     table.schema))
+
+    def read_pandas(self, columns=None, use_threads=True):
+        """
+        Read multiple Parquet files as a single pandas DataFrame
+
+        Parameters
+        ----------
+        columns : List[str]
+            Names of columns to read from the file
+        use_threads : bool, default True
+            Use multiple threads when converting to pandas
+
+        Returns
+        -------
+        pandas.DataFrame
+            Content of the file as a pandas DataFrame (of columns)
+        """
+        _check_pandas_version()
+        return self.read_table(columns=columns).to_pandas(
+            use_threads=use_threads)
+
+
+def check_chunked_overflow(name, col):
+    if col.num_chunks == 1:
+        return
+
+    if col.type in (ext.binary(), ext.string()):
+        raise ValueError("Column '{}' exceeds 2GB maximum capacity of "
+                         "a Feather binary column. This restriction may be "
+                         "lifted in the future".format(name))
+    else:
+        # TODO(wesm): Not sure when else this might be reached
+        raise ValueError("Column '{}' of type {} was chunked on conversion "
+                         "to Arrow and cannot be currently written to "
+                         "Feather format".format(name, str(col.type)))
+
+
+_FEATHER_SUPPORTED_CODECS = {'lz4', 'zstd', 'uncompressed'}
+
+
+def write_feather(df, dest, compression=None, compression_level=None,
+                  chunksize=None, version=2):
+    """
+    Write a pandas.DataFrame to Feather format.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame or pyarrow.Table
+        Data to write out as Feather format.
+    dest : str
+        Local destination path.
+    compression : string, default None
+        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
+        LZ4 for V2 files if it is available, otherwise uncompressed.
+    compression_level : int, default None
+        Use a compression level particular to the chosen compressor. If None
+        use the default compression level
+    chunksize : int, default None
+        For V2 files, the internal maximum size of Arrow RecordBatch chunks
+        when writing the Arrow IPC file format. None means use the default,
+        which is currently 64K
+    version : int, default 2
+        Feather file version. Version 2 is the current. Version 1 is the more
+        limited legacy format
+    """
+    if _pandas_api.have_pandas:
+        _check_pandas_version()
+        if (_pandas_api.has_sparse and
+                isinstance(df, _pandas_api.pd.SparseDataFrame)):
+            df = df.to_dense()
+
+    if _pandas_api.is_data_frame(df):
+        # Feather v1 creates a new column in the resultant Table to
+        # store index information if index type is not RangeIndex
+
+        if version == 1:
+            preserve_index = False
+        elif version == 2:
+            preserve_index = None
+        else:
+            raise ValueError("Version value should either be 1 or 2")
+
+        table = Table.from_pandas(df, preserve_index=preserve_index)
+
+        if version == 1:
+            # Version 1 does not chunking
+            for i, name in enumerate(table.schema.names):
+                col = table[i]
+                check_chunked_overflow(name, col)
+    else:
+        table = df
+
+    if version == 1:
+        if len(table.column_names) > len(set(table.column_names)):
+            raise ValueError("cannot serialize duplicate column names")
+
+        if compression is not None:
+            raise ValueError("Feather V1 files do not support compression "
+                             "option")
+
+        if chunksize is not None:
+            raise ValueError("Feather V1 files do not support chunksize "
+                             "option")
+    else:
+        if compression is None and Codec.is_available('lz4_frame'):
+            compression = 'lz4'
+        elif (compression is not None and
+              compression not in _FEATHER_SUPPORTED_CODECS):
+            raise ValueError('compression="{}" not supported, must be '
+                             'one of {}'.format(compression,
+                                                _FEATHER_SUPPORTED_CODECS))
+
+    try:
+        _feather.write_feather(table, dest, compression=compression,
+                               compression_level=compression_level,
+                               chunksize=chunksize, version=version)
+    except Exception:
+        if isinstance(dest, str):
+            try:
+                os.remove(dest)
+            except os.error:
+                pass
+        raise
+
+
+def read_feather(source, columns=None, use_threads=True, memory_map=True):
+    """
+    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
+    feather.read_table.
+
+    Parameters
+    ----------
+    source : str file path, or file-like object
+    columns : sequence, optional
+        Only read a specific set of columns. If not provided, all columns are
+        read.
+    use_threads : bool, default True
+        Whether to parallelize reading using multiple threads. If false the
+        restriction is used in the conversion to Pandas as well as in the
+        reading from Feather format.
+    memory_map : boolean, default True
+        Use memory mapping when opening file on disk
+
+    Returns
+    -------
+    df : pandas.DataFrame
+    """
+    _check_pandas_version()
+    return (read_table(
+        source, columns=columns, memory_map=memory_map,
+        use_threads=use_threads).to_pandas(use_threads=use_threads))
+
+
+def read_table(source, columns=None, memory_map=True, use_threads=True):
+    """
+    Read a pyarrow.Table from Feather format
+
+    Parameters
+    ----------
+    source : str file path, or file-like object
+    columns : sequence, optional
+        Only read a specific set of columns. If not provided, all columns are
+        read.
+    memory_map : boolean, default True
+        Use memory mapping when opening file on disk
+    use_threads : bool, default True
+        Whether to parallelize reading using multiple threads.
+
+    Returns
+    -------
+    table : pyarrow.Table
+    """
+    reader = _feather.FeatherReader(
+        source, use_memory_map=memory_map, use_threads=use_threads)
+
+    if columns is None:
+        return reader.read()
+
+    column_types = [type(column) for column in columns]
+    if all(map(lambda t: t == int, column_types)):
+        table = reader.read_indices(columns)
+    elif all(map(lambda t: t == str, column_types)):
+        table = reader.read_names(columns)
+    else:
+        column_type_names = [t.__name__ for t in column_types]
+        raise TypeError("Columns must be indices or names. "
+                        "Got columns {} of types {}"
+                        .format(columns, column_type_names))
+
+    # Feather v1 already respects the column selection
+    if reader.version < 3:
+        return table
+    # Feather v2 reads with sorted / deduplicated selection
+    elif sorted(set(columns)) == columns:
+        return table
+    else:
+        # follow exact order / selection of names
+        return table.select(columns)
--- a/.venv/Lib/site-packages/pyarrow/filesystem.py
+++ b/.venv/Lib/site-packages/pyarrow/filesystem.py
@ -0,0 +1,511 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import os
+import posixpath
+import sys
+import urllib.parse
+import warnings
+
+from os.path import join as pjoin
+
+import pyarrow as pa
+from pyarrow.util import implements, _stringify_path, _is_path_like, _DEPR_MSG
+
+
+_FS_DEPR_MSG = _DEPR_MSG.format(
+    "filesystem.LocalFileSystem", "2.0.0", "fs.LocalFileSystem"
+)
+
+
+class FileSystem:
+    """
+    Abstract filesystem interface.
+    """
+
+    def cat(self, path):
+        """
+        Return contents of file as a bytes object.
+
+        Parameters
+        ----------
+        path : str
+            File path to read content from.
+
+        Returns
+        -------
+        contents : bytes
+        """
+        with self.open(path, 'rb') as f:
+            return f.read()
+
+    def ls(self, path):
+        """
+        Return list of file paths.
+
+        Parameters
+        ----------
+        path : str
+            Directory to list contents from.
+        """
+        raise NotImplementedError
+
+    def delete(self, path, recursive=False):
+        """
+        Delete the indicated file or directory.
+
+        Parameters
+        ----------
+        path : str
+            Path to delete.
+        recursive : bool, default False
+            If True, also delete child paths for directories.
+        """
+        raise NotImplementedError
+
+    def disk_usage(self, path):
+        """
+        Compute bytes used by all contents under indicated path in file tree.
+
+        Parameters
+        ----------
+        path : str
+            Can be a file path or directory.
+
+        Returns
+        -------
+        usage : int
+        """
+        path = _stringify_path(path)
+        path_info = self.stat(path)
+        if path_info['kind'] == 'file':
+            return path_info['size']
+
+        total = 0
+        for root, directories, files in self.walk(path):
+            for child_path in files:
+                abspath = self._path_join(root, child_path)
+                total += self.stat(abspath)['size']
+
+        return total
+
+    def _path_join(self, *args):
+        return self.pathsep.join(args)
+
+    def stat(self, path):
+        """
+        Information about a filesystem entry.
+
+        Returns
+        -------
+        stat : dict
+        """
+        raise NotImplementedError('FileSystem.stat')
+
+    def rm(self, path, recursive=False):
+        """
+        Alias for FileSystem.delete.
+        """
+        return self.delete(path, recursive=recursive)
+
+    def mv(self, path, new_path):
+        """
+        Alias for FileSystem.rename.
+        """
+        return self.rename(path, new_path)
+
+    def rename(self, path, new_path):
+        """
+        Rename file, like UNIX mv command.
+
+        Parameters
+        ----------
+        path : str
+            Path to alter.
+        new_path : str
+            Path to move to.
+        """
+        raise NotImplementedError('FileSystem.rename')
+
+    def mkdir(self, path, create_parents=True):
+        """
+        Create a directory.
+
+        Parameters
+        ----------
+        path : str
+            Path to the directory.
+        create_parents : bool, default True
+            If the parent directories don't exists create them as well.
+        """
+        raise NotImplementedError
+
+    def exists(self, path):
+        """
+        Return True if path exists.
+
+        Parameters
+        ----------
+        path : str
+            Path to check.
+        """
+        raise NotImplementedError
+
+    def isdir(self, path):
+        """
+        Return True if path is a directory.
+
+        Parameters
+        ----------
+        path : str
+            Path to check.
+        """
+        raise NotImplementedError
+
+    def isfile(self, path):
+        """
+        Return True if path is a file.
+
+        Parameters
+        ----------
+        path : str
+            Path to check.
+        """
+        raise NotImplementedError
+
+    def _isfilestore(self):
+        """
+        Returns True if this FileSystem is a unix-style file store with
+        directories.
+        """
+        raise NotImplementedError
+
+    def read_parquet(self, path, columns=None, metadata=None, schema=None,
+                     use_threads=True, use_pandas_metadata=False):
+        """
+        Read Parquet data from path in file system. Can read from a single file
+        or a directory of files.
+
+        Parameters
+        ----------
+        path : str
+            Single file path or directory
+        columns : List[str], optional
+            Subset of columns to read.
+        metadata : pyarrow.parquet.FileMetaData
+            Known metadata to validate files against.
+        schema : pyarrow.parquet.Schema
+            Known schema to validate files against. Alternative to metadata
+            argument.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        table : pyarrow.Table
+        """
+        from pyarrow.parquet import ParquetDataset
+        dataset = ParquetDataset(path, schema=schema, metadata=metadata,
+                                 filesystem=self)
+        return dataset.read(columns=columns, use_threads=use_threads,
+                            use_pandas_metadata=use_pandas_metadata)
+
+    def open(self, path, mode='rb'):
+        """
+        Open file for reading or writing.
+        """
+        raise NotImplementedError
+
+    @property
+    def pathsep(self):
+        return '/'
+
+
+class LocalFileSystem(FileSystem):
+
+    _instance = None
+
+    def __init__(self):
+        warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2)
+        super().__init__()
+
+    @classmethod
+    def _get_instance(cls):
+        if cls._instance is None:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                cls._instance = LocalFileSystem()
+        return cls._instance
+
+    @classmethod
+    def get_instance(cls):
+        warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2)
+        return cls._get_instance()
+
+    @implements(FileSystem.ls)
+    def ls(self, path):
+        path = _stringify_path(path)
+        return sorted(pjoin(path, x) for x in os.listdir(path))
+
+    @implements(FileSystem.mkdir)
+    def mkdir(self, path, create_parents=True):
+        path = _stringify_path(path)
+        if create_parents:
+            os.makedirs(path)
+        else:
+            os.mkdir(path)
+
+    @implements(FileSystem.isdir)
+    def isdir(self, path):
+        path = _stringify_path(path)
+        return os.path.isdir(path)
+
+    @implements(FileSystem.isfile)
+    def isfile(self, path):
+        path = _stringify_path(path)
+        return os.path.isfile(path)
+
+    @implements(FileSystem._isfilestore)
+    def _isfilestore(self):
+        return True
+
+    @implements(FileSystem.exists)
+    def exists(self, path):
+        path = _stringify_path(path)
+        return os.path.exists(path)
+
+    @implements(FileSystem.open)
+    def open(self, path, mode='rb'):
+        """
+        Open file for reading or writing.
+        """
+        path = _stringify_path(path)
+        return open(path, mode=mode)
+
+    @property
+    def pathsep(self):
+        return os.path.sep
+
+    def walk(self, path):
+        """
+        Directory tree generator, see os.walk.
+        """
+        path = _stringify_path(path)
+        return os.walk(path)
+
+
+class DaskFileSystem(FileSystem):
+    """
+    Wraps s3fs Dask filesystem implementation like s3fs, gcsfs, etc.
+    """
+
+    def __init__(self, fs):
+        warnings.warn(
+            "The pyarrow.filesystem.DaskFileSystem/S3FSWrapper are deprecated "
+            "as of pyarrow 3.0.0, and will be removed in a future version.",
+            FutureWarning, stacklevel=2)
+        self.fs = fs
+
+    @implements(FileSystem.isdir)
+    def isdir(self, path):
+        raise NotImplementedError("Unsupported file system API")
+
+    @implements(FileSystem.isfile)
+    def isfile(self, path):
+        raise NotImplementedError("Unsupported file system API")
+
+    @implements(FileSystem._isfilestore)
+    def _isfilestore(self):
+        """
+        Object Stores like S3 and GCSFS are based on key lookups, not true
+        file-paths.
+        """
+        return False
+
+    @implements(FileSystem.delete)
+    def delete(self, path, recursive=False):
+        path = _stringify_path(path)
+        return self.fs.rm(path, recursive=recursive)
+
+    @implements(FileSystem.exists)
+    def exists(self, path):
+        path = _stringify_path(path)
+        return self.fs.exists(path)
+
+    @implements(FileSystem.mkdir)
+    def mkdir(self, path, create_parents=True):
+        path = _stringify_path(path)
+        if create_parents:
+            return self.fs.mkdirs(path)
+        else:
+            return self.fs.mkdir(path)
+
+    @implements(FileSystem.open)
+    def open(self, path, mode='rb'):
+        """
+        Open file for reading or writing.
+        """
+        path = _stringify_path(path)
+        return self.fs.open(path, mode=mode)
+
+    def ls(self, path, detail=False):
+        path = _stringify_path(path)
+        return self.fs.ls(path, detail=detail)
+
+    def walk(self, path):
+        """
+        Directory tree generator, like os.walk.
+        """
+        path = _stringify_path(path)
+        return self.fs.walk(path)
+
+
+class S3FSWrapper(DaskFileSystem):
+
+    @implements(FileSystem.isdir)
+    def isdir(self, path):
+        path = _sanitize_s3(_stringify_path(path))
+        try:
+            contents = self.fs.ls(path)
+            if len(contents) == 1 and contents[0] == path:
+                return False
+            else:
+                return True
+        except OSError:
+            return False
+
+    @implements(FileSystem.isfile)
+    def isfile(self, path):
+        path = _sanitize_s3(_stringify_path(path))
+        try:
+            contents = self.fs.ls(path)
+            return len(contents) == 1 and contents[0] == path
+        except OSError:
+            return False
+
+    def walk(self, path, refresh=False):
+        """
+        Directory tree generator, like os.walk.
+
+        Generator version of what is in s3fs, which yields a flattened list of
+        files.
+        """
+        path = _sanitize_s3(_stringify_path(path))
+        directories = set()
+        files = set()
+
+        for key in list(self.fs._ls(path, refresh=refresh)):
+            path = key['Key']
+            if key['StorageClass'] == 'DIRECTORY':
+                directories.add(path)
+            elif key['StorageClass'] == 'BUCKET':
+                pass
+            else:
+                files.add(path)
+
+        # s3fs creates duplicate 'DIRECTORY' entries
+        files = sorted([posixpath.split(f)[1] for f in files
+                        if f not in directories])
+        directories = sorted([posixpath.split(x)[1]
+                              for x in directories])
+
+        yield path, directories, files
+
+        for directory in directories:
+            yield from self.walk(directory, refresh=refresh)
+
+
+def _sanitize_s3(path):
+    if path.startswith('s3://'):
+        return path.replace('s3://', '')
+    else:
+        return path
+
+
+def _ensure_filesystem(fs):
+    fs_type = type(fs)
+
+    # If the arrow filesystem was subclassed, assume it supports the full
+    # interface and return it
+    if not issubclass(fs_type, FileSystem):
+        if "fsspec" in sys.modules:
+            fsspec = sys.modules["fsspec"]
+            if isinstance(fs, fsspec.AbstractFileSystem):
+                # for recent fsspec versions that stop inheriting from
+                # pyarrow.filesystem.FileSystem, still allow fsspec
+                # filesystems (which should be compatible with our legacy fs)
+                return fs
+
+        raise OSError('Unrecognized filesystem: {}'.format(fs_type))
+    else:
+        return fs
+
+
+def resolve_filesystem_and_path(where, filesystem=None):
+    """
+    Return filesystem from path which could be an HDFS URI, a local URI,
+    or a plain filesystem path.
+    """
+    if not _is_path_like(where):
+        if filesystem is not None:
+            raise ValueError("filesystem passed but where is file-like, so"
+                             " there is nothing to open with filesystem.")
+        return filesystem, where
+
+    if filesystem is not None:
+        filesystem = _ensure_filesystem(filesystem)
+        if isinstance(filesystem, LocalFileSystem):
+            path = _stringify_path(where)
+        elif not isinstance(where, str):
+            raise TypeError(
+                "Expected string path; path-like objects are only allowed "
+                "with a local filesystem"
+            )
+        else:
+            path = where
+        return filesystem, path
+
+    path = _stringify_path(where)
+
+    parsed_uri = urllib.parse.urlparse(path)
+    if parsed_uri.scheme == 'hdfs' or parsed_uri.scheme == 'viewfs':
+        # Input is hdfs URI such as hdfs://host:port/myfile.parquet
+        netloc_split = parsed_uri.netloc.split(':')
+        host = netloc_split[0]
+        if host == '':
+            host = 'default'
+        else:
+            host = parsed_uri.scheme + "://" + host
+        port = 0
+        if len(netloc_split) == 2 and netloc_split[1].isnumeric():
+            port = int(netloc_split[1])
+        fs = pa.hdfs._connect(host=host, port=port)
+        fs_path = parsed_uri.path
+    elif parsed_uri.scheme == 'file':
+        # Input is local URI such as file:///home/user/myfile.parquet
+        fs = LocalFileSystem._get_instance()
+        fs_path = parsed_uri.path
+    else:
+        # Input is local path such as /home/user/myfile.parquet
+        fs = LocalFileSystem._get_instance()
+        fs_path = path
+
+    return fs, fs_path
--- a/.venv/Lib/site-packages/pyarrow/flight.py
+++ b/.venv/Lib/site-packages/pyarrow/flight.py
@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._flight import (  # noqa:F401
+    connect,
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+)
--- a/.venv/Lib/site-packages/pyarrow/fs.py
+++ b/.venv/Lib/site-packages/pyarrow/fs.py
@ -0,0 +1,412 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+FileSystem abstraction to interact with various local and remote filesystems.
+"""
+
+from pyarrow.util import _is_path_like, _stringify_path
+
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    _copy_files,
+    _copy_files_selector,
+)
+
+# For backward compatibility.
+FileStats = FileInfo
+
+_not_imported = []
+
+try:
+    from pyarrow._hdfs import HadoopFileSystem  # noqa
+except ImportError:
+    _not_imported.append("HadoopFileSystem")
+
+try:
+    from pyarrow._s3fs import (  # noqa
+        S3FileSystem, S3LogLevel, initialize_s3, finalize_s3,
+        resolve_s3_region)
+except ImportError:
+    _not_imported.append("S3FileSystem")
+else:
+    initialize_s3()
+
+
+def __getattr__(name):
+    if name in _not_imported:
+        raise ImportError(
+            "The pyarrow installation is not built with support for "
+            "'{0}'".format(name)
+        )
+
+    raise AttributeError(
+        "module 'pyarrow.fs' has no attribute '{0}'".format(name)
+    )
+
+
+def _filesystem_from_str(uri):
+    # instantiate the file system from an uri, if the uri has a path
+    # component then it will be treated as a path prefix
+    filesystem, prefix = FileSystem.from_uri(uri)
+    prefix = filesystem.normalize_path(prefix)
+    if prefix:
+        # validate that the prefix is pointing to a directory
+        prefix_info = filesystem.get_file_info([prefix])[0]
+        if prefix_info.type != FileType.Directory:
+            raise ValueError(
+                "The path component of the filesystem URI must point to a "
+                "directory but it has a type: `{}`. The path component "
+                "is `{}` and the given filesystem URI is `{}`".format(
+                    prefix_info.type.name, prefix_info.path, uri
+                )
+            )
+        filesystem = SubTreeFileSystem(prefix, filesystem)
+    return filesystem
+
+
+def _ensure_filesystem(
+    filesystem, use_mmap=False, allow_legacy_filesystem=False
+):
+    if isinstance(filesystem, FileSystem):
+        return filesystem
+    elif isinstance(filesystem, str):
+        if use_mmap:
+            raise ValueError(
+                "Specifying to use memory mapping not supported for "
+                "filesystem specified as an URI string"
+            )
+        return _filesystem_from_str(filesystem)
+
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+    except ImportError:
+        pass
+    else:
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            if type(filesystem).__name__ == 'LocalFileSystem':
+                # In case its a simple LocalFileSystem, use native arrow one
+                return LocalFileSystem(use_mmap=use_mmap)
+            return PyFileSystem(FSSpecHandler(filesystem))
+
+    # map old filesystems to new ones
+    import pyarrow.filesystem as legacyfs
+
+    if isinstance(filesystem, legacyfs.LocalFileSystem):
+        return LocalFileSystem(use_mmap=use_mmap)
+    # TODO handle HDFS?
+    if allow_legacy_filesystem and isinstance(filesystem, legacyfs.FileSystem):
+        return filesystem
+
+    raise TypeError(
+        "Unrecognized filesystem: {}. `filesystem` argument must be a "
+        "FileSystem instance or a valid file system URI'".format(
+            type(filesystem))
+    )
+
+
+def _resolve_filesystem_and_path(
+    path, filesystem=None, allow_legacy_filesystem=False
+):
+    """
+    Return filesystem/path from path which could be an URI or a plain
+    filesystem path.
+    """
+    if not _is_path_like(path):
+        if filesystem is not None:
+            raise ValueError(
+                "'filesystem' passed but the specified path is file-like, so"
+                " there is nothing to open with 'filesystem'."
+            )
+        return filesystem, path
+
+    if filesystem is not None:
+        filesystem = _ensure_filesystem(
+            filesystem, allow_legacy_filesystem=allow_legacy_filesystem
+        )
+        if isinstance(filesystem, LocalFileSystem):
+            path = _stringify_path(path)
+        elif not isinstance(path, str):
+            raise TypeError(
+                "Expected string path; path-like objects are only allowed "
+                "with a local filesystem"
+            )
+        if not allow_legacy_filesystem:
+            path = filesystem.normalize_path(path)
+        return filesystem, path
+
+    path = _stringify_path(path)
+
+    # if filesystem is not given, try to automatically determine one
+    # first check if the file exists as a local (relative) file path
+    # if not then try to parse the path as an URI
+    filesystem = LocalFileSystem()
+    try:
+        file_info = filesystem.get_file_info(path)
+    except ValueError:  # ValueError means path is likely an URI
+        file_info = None
+        exists_locally = False
+    else:
+        exists_locally = (file_info.type != FileType.NotFound)
+
+    # if the file or directory doesn't exists locally, then assume that
+    # the path is an URI describing the file system as well
+    if not exists_locally:
+        try:
+            filesystem, path = FileSystem.from_uri(path)
+        except ValueError as e:
+            # neither an URI nor a locally existing path, so assume that
+            # local path was given and propagate a nicer file not found error
+            # instead of a more confusing scheme parsing error
+            if "empty scheme" not in str(e):
+                raise
+    else:
+        path = filesystem.normalize_path(path)
+
+    return filesystem, path
+
+
+def copy_files(source, destination,
+               source_filesystem=None, destination_filesystem=None,
+               *, chunk_size=1024*1024, use_threads=True):
+    """
+    Copy files between FileSystems.
+
+    This functions allows you to recursively copy directories of files from
+    one file system to another, such as from S3 to your local machine.
+
+    Parameters
+    ----------
+    source : string
+        Source file path or URI to a single file or directory.
+        If a directory, files will be copied recursively from this path.
+    destination : string
+        Destination file path or URI. If `source` is a file, `destination`
+        is also interpreted as the destination file (not directory).
+        Directories will be created as necessary.
+    source_filesystem : FileSystem, optional
+        Source filesystem, needs to be specified if `source` is not a URI,
+        otherwise inferred.
+    destination_filesystem : FileSystem, optional
+        Destination filesystem, needs to be specified if `destination` is not
+        a URI, otherwise inferred.
+    chunk_size : int, default 1MB
+        The maximum size of block to read before flushing to the
+        destination file. A larger chunk_size will use more memory while
+        copying but may help accommodate high latency FileSystems.
+    use_threads : bool, default True
+        Whether to use multiple threads to accelerate copying.
+
+    Examples
+    --------
+    Copy an S3 bucket's files to a local directory:
+
+    >>> copy_files("s3://your-bucket-name", "local-directory")
+
+    Using a FileSystem object:
+
+    >>> copy_files("your-bucket-name", "local-directory",
+    ...            source_filesystem=S3FileSystem(...))
+
+    """
+    source_fs, source_path = _resolve_filesystem_and_path(
+        source, source_filesystem
+    )
+    destination_fs, destination_path = _resolve_filesystem_and_path(
+        destination, destination_filesystem
+    )
+
+    file_info = source_fs.get_file_info(source_path)
+    if file_info.type == FileType.Directory:
+        source_sel = FileSelector(source_path, recursive=True)
+        _copy_files_selector(source_fs, source_sel,
+                             destination_fs, destination_path,
+                             chunk_size, use_threads)
+    else:
+        _copy_files(source_fs, source_path,
+                    destination_fs, destination_path,
+                    chunk_size, use_threads)
+
+
+class FSSpecHandler(FileSystemHandler):
+    """
+    Handler for fsspec-based Python filesystems.
+
+    https://filesystem-spec.readthedocs.io/en/latest/index.html
+
+    Parameters
+    ----------
+    fs : FSSpec-compliant filesystem instance.
+
+    Examples
+    --------
+    >>> PyFileSystem(FSSpecHandler(fsspec_fs))
+    """
+
+    def __init__(self, fs):
+        self.fs = fs
+
+    def __eq__(self, other):
+        if isinstance(other, FSSpecHandler):
+            return self.fs == other.fs
+        return NotImplemented
+
+    def __ne__(self, other):
+        if isinstance(other, FSSpecHandler):
+            return self.fs != other.fs
+        return NotImplemented
+
+    def get_type_name(self):
+        protocol = self.fs.protocol
+        if isinstance(protocol, list):
+            protocol = protocol[0]
+        return "fsspec+{0}".format(protocol)
+
+    def normalize_path(self, path):
+        return path
+
+    @staticmethod
+    def _create_file_info(path, info):
+        size = info["size"]
+        if info["type"] == "file":
+            ftype = FileType.File
+        elif info["type"] == "directory":
+            ftype = FileType.Directory
+            # some fsspec filesystems include a file size for directories
+            size = None
+        else:
+            ftype = FileType.Unknown
+        return FileInfo(path, ftype, size=size, mtime=info.get("mtime", None))
+
+    def get_file_info(self, paths):
+        infos = []
+        for path in paths:
+            try:
+                info = self.fs.info(path)
+            except FileNotFoundError:
+                infos.append(FileInfo(path, FileType.NotFound))
+            else:
+                infos.append(self._create_file_info(path, info))
+        return infos
+
+    def get_file_info_selector(self, selector):
+        if not self.fs.isdir(selector.base_dir):
+            if self.fs.exists(selector.base_dir):
+                raise NotADirectoryError(selector.base_dir)
+            else:
+                if selector.allow_not_found:
+                    return []
+                else:
+                    raise FileNotFoundError(selector.base_dir)
+
+        if selector.recursive:
+            maxdepth = None
+        else:
+            maxdepth = 1
+
+        infos = []
+        selected_files = self.fs.find(
+            selector.base_dir, maxdepth=maxdepth, withdirs=True, detail=True
+        )
+        for path, info in selected_files.items():
+            infos.append(self._create_file_info(path, info))
+
+        return infos
+
+    def create_dir(self, path, recursive):
+        # mkdir also raises FileNotFoundError when base directory is not found
+        try:
+            self.fs.mkdir(path, create_parents=recursive)
+        except FileExistsError:
+            pass
+
+    def delete_dir(self, path):
+        self.fs.rm(path, recursive=True)
+
+    def _delete_dir_contents(self, path, missing_dir_ok):
+        try:
+            subpaths = self.fs.listdir(path, detail=False)
+        except FileNotFoundError:
+            if missing_dir_ok:
+                return
+            raise
+        for subpath in subpaths:
+            if self.fs.isdir(subpath):
+                self.fs.rm(subpath, recursive=True)
+            elif self.fs.isfile(subpath):
+                self.fs.rm(subpath)
+
+    def delete_dir_contents(self, path, missing_dir_ok):
+        if path.strip("/") == "":
+            raise ValueError(
+                "delete_dir_contents called on path '", path, "'")
+        self._delete_dir_contents(path, missing_dir_ok)
+
+    def delete_root_dir_contents(self):
+        self._delete_dir_contents("/")
+
+    def delete_file(self, path):
+        # fs.rm correctly raises IsADirectoryError when `path` is a directory
+        # instead of a file and `recursive` is not set to True
+        if not self.fs.exists(path):
+            raise FileNotFoundError(path)
+        self.fs.rm(path)
+
+    def move(self, src, dest):
+        self.fs.mv(src, dest, recursive=True)
+
+    def copy_file(self, src, dest):
+        # fs.copy correctly raises IsADirectoryError when `src` is a directory
+        # instead of a file
+        self.fs.copy(src, dest)
+
+    # TODO can we read/pass metadata (e.g. Content-Type) in the methods below?
+
+    def open_input_stream(self, path):
+        from pyarrow import PythonFile
+
+        if not self.fs.isfile(path):
+            raise FileNotFoundError(path)
+
+        return PythonFile(self.fs.open(path, mode="rb"), mode="r")
+
+    def open_input_file(self, path):
+        from pyarrow import PythonFile
+
+        if not self.fs.isfile(path):
+            raise FileNotFoundError(path)
+
+        return PythonFile(self.fs.open(path, mode="rb"), mode="r")
+
+    def open_output_stream(self, path, metadata):
+        from pyarrow import PythonFile
+
+        return PythonFile(self.fs.open(path, mode="wb"), mode="w")
+
+    def open_append_stream(self, path, metadata):
+        from pyarrow import PythonFile
+
+        return PythonFile(self.fs.open(path, mode="ab"), mode="w")
--- a/.venv/Lib/site-packages/pyarrow/gandiva.pyx
+++ b/.venv/Lib/site-packages/pyarrow/gandiva.pyx
@ -0,0 +1,568 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: language_level = 3
+
+from libcpp cimport bool as c_bool, nullptr
+from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector as c_vector
+from libcpp.unordered_set cimport unordered_set as c_unordered_set
+from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
+
+from pyarrow.includes.libarrow cimport *
+from pyarrow.lib cimport (Array, DataType, Field, MemoryPool, RecordBatch,
+                          Schema, check_status, pyarrow_wrap_array,
+                          pyarrow_wrap_data_type, ensure_type, _Weakrefable,
+                          pyarrow_wrap_field)
+from pyarrow.lib import frombytes
+
+from pyarrow.includes.libgandiva cimport (
+    CCondition, CGandivaExpression,
+    CNode, CProjector, CFilter,
+    CSelectionVector,
+    CSelectionVector_Mode,
+    _ensure_selection_mode,
+    CConfiguration,
+    CConfigurationBuilder,
+    TreeExprBuilder_MakeExpression,
+    TreeExprBuilder_MakeFunction,
+    TreeExprBuilder_MakeBoolLiteral,
+    TreeExprBuilder_MakeUInt8Literal,
+    TreeExprBuilder_MakeUInt16Literal,
+    TreeExprBuilder_MakeUInt32Literal,
+    TreeExprBuilder_MakeUInt64Literal,
+    TreeExprBuilder_MakeInt8Literal,
+    TreeExprBuilder_MakeInt16Literal,
+    TreeExprBuilder_MakeInt32Literal,
+    TreeExprBuilder_MakeInt64Literal,
+    TreeExprBuilder_MakeFloatLiteral,
+    TreeExprBuilder_MakeDoubleLiteral,
+    TreeExprBuilder_MakeStringLiteral,
+    TreeExprBuilder_MakeBinaryLiteral,
+    TreeExprBuilder_MakeField,
+    TreeExprBuilder_MakeIf,
+    TreeExprBuilder_MakeAnd,
+    TreeExprBuilder_MakeOr,
+    TreeExprBuilder_MakeCondition,
+    TreeExprBuilder_MakeInExpressionInt32,
+    TreeExprBuilder_MakeInExpressionInt64,
+    TreeExprBuilder_MakeInExpressionTime32,
+    TreeExprBuilder_MakeInExpressionTime64,
+    TreeExprBuilder_MakeInExpressionDate32,
+    TreeExprBuilder_MakeInExpressionDate64,
+    TreeExprBuilder_MakeInExpressionTimeStamp,
+    TreeExprBuilder_MakeInExpressionString,
+    TreeExprBuilder_MakeInExpressionBinary,
+    SelectionVector_MakeInt16,
+    SelectionVector_MakeInt32,
+    SelectionVector_MakeInt64,
+    Projector_Make,
+    Filter_Make,
+    CFunctionSignature,
+    GetRegisteredFunctionSignatures)
+
+
+cdef class Node(_Weakrefable):
+    cdef:
+        shared_ptr[CNode] node
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use the "
+                        "TreeExprBuilder API directly"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CNode] node):
+        cdef Node self = Node.__new__(Node)
+        self.node = node
+        return self
+
+    def __str__(self):
+        return self.node.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def return_type(self):
+        return pyarrow_wrap_data_type(self.node.get().return_type())
+
+
+cdef class Expression(_Weakrefable):
+    cdef:
+        shared_ptr[CGandivaExpression] expression
+
+    cdef void init(self, shared_ptr[CGandivaExpression] expression):
+        self.expression = expression
+
+    def __str__(self):
+        return self.expression.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.expression.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.expression.get().result())
+
+
+cdef class Condition(_Weakrefable):
+    cdef:
+        shared_ptr[CCondition] condition
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use the "
+                        "TreeExprBuilder API instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CCondition] condition):
+        cdef Condition self = Condition.__new__(Condition)
+        self.condition = condition
+        return self
+
+    def __str__(self):
+        return self.condition.get().ToString().decode()
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def root(self):
+        return Node.create(self.condition.get().root())
+
+    def result(self):
+        return pyarrow_wrap_field(self.condition.get().result())
+
+
+cdef class SelectionVector(_Weakrefable):
+    cdef:
+        shared_ptr[CSelectionVector] selection_vector
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly."
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CSelectionVector] selection_vector):
+        cdef SelectionVector self = SelectionVector.__new__(SelectionVector)
+        self.selection_vector = selection_vector
+        return self
+
+    def to_array(self):
+        cdef shared_ptr[CArray] result = self.selection_vector.get().ToArray()
+        return pyarrow_wrap_array(result)
+
+
+cdef class Projector(_Weakrefable):
+    cdef:
+        shared_ptr[CProjector] projector
+        MemoryPool pool
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "make_projector instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CProjector] projector, MemoryPool pool):
+        cdef Projector self = Projector.__new__(Projector)
+        self.projector = projector
+        self.pool = pool
+        return self
+
+    @property
+    def llvm_ir(self):
+        return self.projector.get().DumpIR().decode()
+
+    def evaluate(self, RecordBatch batch, SelectionVector selection=None):
+        cdef vector[shared_ptr[CArray]] results
+        if selection is None:
+            check_status(self.projector.get().Evaluate(
+                batch.sp_batch.get()[0], self.pool.pool, &results))
+        else:
+            check_status(
+                self.projector.get().Evaluate(
+                    batch.sp_batch.get()[0], selection.selection_vector.get(),
+                    self.pool.pool, &results))
+        cdef shared_ptr[CArray] result
+        arrays = []
+        for result in results:
+            arrays.append(pyarrow_wrap_array(result))
+        return arrays
+
+
+cdef class Filter(_Weakrefable):
+    cdef:
+        shared_ptr[CFilter] filter
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "make_filter instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CFilter] filter):
+        cdef Filter self = Filter.__new__(Filter)
+        self.filter = filter
+        return self
+
+    @property
+    def llvm_ir(self):
+        return self.filter.get().DumpIR().decode()
+
+    def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'):
+        cdef:
+            DataType type = ensure_type(dtype)
+            shared_ptr[CSelectionVector] selection
+
+        if type.id == _Type_INT16:
+            check_status(SelectionVector_MakeInt16(
+                batch.num_rows, pool.pool, &selection))
+        elif type.id == _Type_INT32:
+            check_status(SelectionVector_MakeInt32(
+                batch.num_rows, pool.pool, &selection))
+        elif type.id == _Type_INT64:
+            check_status(SelectionVector_MakeInt64(
+                batch.num_rows, pool.pool, &selection))
+        else:
+            raise ValueError("'dtype' of the selection vector should be "
+                             "one of 'int16', 'int32' and 'int64'.")
+
+        check_status(self.filter.get().Evaluate(
+            batch.sp_batch.get()[0], selection))
+        return SelectionVector.create(selection)
+
+
+cdef class TreeExprBuilder(_Weakrefable):
+
+    def make_literal(self, value, dtype):
+        cdef:
+            DataType type = ensure_type(dtype)
+            shared_ptr[CNode] r
+
+        if type.id == _Type_BOOL:
+            r = TreeExprBuilder_MakeBoolLiteral(value)
+        elif type.id == _Type_UINT8:
+            r = TreeExprBuilder_MakeUInt8Literal(value)
+        elif type.id == _Type_UINT16:
+            r = TreeExprBuilder_MakeUInt16Literal(value)
+        elif type.id == _Type_UINT32:
+            r = TreeExprBuilder_MakeUInt32Literal(value)
+        elif type.id == _Type_UINT64:
+            r = TreeExprBuilder_MakeUInt64Literal(value)
+        elif type.id == _Type_INT8:
+            r = TreeExprBuilder_MakeInt8Literal(value)
+        elif type.id == _Type_INT16:
+            r = TreeExprBuilder_MakeInt16Literal(value)
+        elif type.id == _Type_INT32:
+            r = TreeExprBuilder_MakeInt32Literal(value)
+        elif type.id == _Type_INT64:
+            r = TreeExprBuilder_MakeInt64Literal(value)
+        elif type.id == _Type_FLOAT:
+            r = TreeExprBuilder_MakeFloatLiteral(value)
+        elif type.id == _Type_DOUBLE:
+            r = TreeExprBuilder_MakeDoubleLiteral(value)
+        elif type.id == _Type_STRING:
+            r = TreeExprBuilder_MakeStringLiteral(value.encode('UTF-8'))
+        elif type.id == _Type_BINARY:
+            r = TreeExprBuilder_MakeBinaryLiteral(value)
+        else:
+            raise TypeError("Didn't recognize dtype " + str(dtype))
+
+        return Node.create(r)
+
+    def make_expression(self, Node root_node, Field return_field):
+        cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression(
+            root_node.node, return_field.sp_field)
+        cdef Expression expression = Expression()
+        expression.init(r)
+        return expression
+
+    def make_function(self, name, children, DataType return_type):
+        cdef c_vector[shared_ptr[CNode]] c_children
+        cdef Node child
+        for child in children:
+            c_children.push_back(child.node)
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeFunction(
+            name.encode(), c_children, return_type.sp_type)
+        return Node.create(r)
+
+    def make_field(self, Field field):
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
+        return Node.create(r)
+
+    def make_if(self, Node condition, Node this_node,
+                Node else_node, DataType return_type):
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
+            condition.node, this_node.node, else_node.node,
+            return_type.sp_type)
+        return Node.create(r)
+
+    def make_and(self, children):
+        cdef c_vector[shared_ptr[CNode]] c_children
+        cdef Node child
+        for child in children:
+            c_children.push_back(child.node)
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeAnd(c_children)
+        return Node.create(r)
+
+    def make_or(self, children):
+        cdef c_vector[shared_ptr[CNode]] c_children
+        cdef Node child
+        for child in children:
+            c_children.push_back(child.node)
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeOr(c_children)
+        return Node.create(r)
+
+    def _make_in_expression_int32(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int32_t] c_values
+        cdef int32_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionInt32(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_int64(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int64_t] c_values
+        cdef int64_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionInt64(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_time32(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int32_t] c_values
+        cdef int32_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionTime32(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_time64(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int64_t] c_values
+        cdef int64_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionTime64(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_date32(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int32_t] c_values
+        cdef int32_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionDate32(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_date64(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int64_t] c_values
+        cdef int64_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionDate64(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_timestamp(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[int64_t] c_values
+        cdef int64_t v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionTimeStamp(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_binary(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[c_string] c_values
+        cdef c_string v
+        for v in values:
+            c_values.insert(v)
+        r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
+        return Node.create(r)
+
+    def _make_in_expression_string(self, Node node, values):
+        cdef shared_ptr[CNode] r
+        cdef c_unordered_set[c_string] c_values
+        cdef c_string _v
+        for v in values:
+            _v = v.encode('UTF-8')
+            c_values.insert(_v)
+        r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
+        return Node.create(r)
+
+    def make_in_expression(self, Node node, values, dtype):
+        cdef DataType type = ensure_type(dtype)
+
+        if type.id == _Type_INT32:
+            return self._make_in_expression_int32(node, values)
+        elif type.id == _Type_INT64:
+            return self._make_in_expression_int64(node, values)
+        elif type.id == _Type_TIME32:
+            return self._make_in_expression_time32(node, values)
+        elif type.id == _Type_TIME64:
+            return self._make_in_expression_time64(node, values)
+        elif type.id == _Type_TIMESTAMP:
+            return self._make_in_expression_timestamp(node, values)
+        elif type.id == _Type_DATE32:
+            return self._make_in_expression_date32(node, values)
+        elif type.id == _Type_DATE64:
+            return self._make_in_expression_date64(node, values)
+        elif type.id == _Type_BINARY:
+            return self._make_in_expression_binary(node, values)
+        elif type.id == _Type_STRING:
+            return self._make_in_expression_string(node, values)
+        else:
+            raise TypeError("Data type " + str(dtype) + " not supported.")
+
+    def make_condition(self, Node condition):
+        cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
+            condition.node)
+        return Condition.create(r)
+
+
+cpdef make_projector(Schema schema, children, MemoryPool pool,
+                     str selection_mode="NONE"):
+    """
+    Construct a projection using expressions.
+
+    A projector is built for a specific schema and vector of expressions.
+    Once the projector is built, it can be used to evaluate many row batches.
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema
+        Schema for the record batches, and the expressions.
+    children : list[pyarrow.gandiva.Expression]
+        List of projectable expression objects.
+    pool : pyarrow.MemoryPool
+        Memory pool used to allocate output arrays.
+    selection_mode : str, default "NONE"
+        Possible values are NONE, UINT16, UINT32, UINT64.
+
+    Returns
+    -------
+    Projector instance
+    """
+    cdef:
+        Expression child
+        c_vector[shared_ptr[CGandivaExpression]] c_children
+        shared_ptr[CProjector] result
+
+    for child in children:
+        c_children.push_back(child.expression)
+
+    check_status(
+        Projector_Make(schema.sp_schema, c_children,
+                       _ensure_selection_mode(selection_mode),
+                       CConfigurationBuilder.DefaultConfiguration(),
+                       &result))
+    return Projector.create(result, pool)
+
+
+cpdef make_filter(Schema schema, Condition condition):
+    """
+    Construct a filter based on a condition.
+
+    A filter is built for a specific schema and condition. Once the filter is
+    built, it can be used to evaluate many row batches.
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema
+        Schema for the record batches, and the condition.
+    condition : pyarrow.gandiva.Condition
+        Filter condition.
+
+    Returns
+    -------
+    Filter instance
+    """
+    cdef shared_ptr[CFilter] result
+    check_status(
+        Filter_Make(schema.sp_schema, condition.condition, &result))
+    return Filter.create(result)
+
+
+cdef class FunctionSignature(_Weakrefable):
+    """
+    Signature of a Gandiva function including name, parameter types
+    and return type.
+    """
+
+    cdef:
+        shared_ptr[CFunctionSignature] signature
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly."
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CFunctionSignature] signature):
+        cdef FunctionSignature self = FunctionSignature.__new__(
+            FunctionSignature)
+        self.signature = signature
+        return self
+
+    def return_type(self):
+        return pyarrow_wrap_data_type(self.signature.get().ret_type())
+
+    def param_types(self):
+        result = []
+        cdef vector[shared_ptr[CDataType]] types = \
+            self.signature.get().param_types()
+        for t in types:
+            result.append(pyarrow_wrap_data_type(t))
+        return result
+
+    def name(self):
+        return self.signature.get().base_name().decode()
+
+    def __repr__(self):
+        signature = self.signature.get().ToString().decode()
+        return "FunctionSignature(" + signature + ")"
+
+
+def get_registered_function_signatures():
+    """
+    Return the function in Gandiva's ExpressionRegistry.
+
+    Returns
+    -------
+    registry: a list of registered function signatures
+    """
+    results = []
+
+    cdef vector[shared_ptr[CFunctionSignature]] signatures = \
+        GetRegisteredFunctionSignatures()
+
+    for signature in signatures:
+        results.append(FunctionSignature.create(signature))
+
+    return results
--- a/.venv/Lib/site-packages/pyarrow/hdfs.py
+++ b/.venv/Lib/site-packages/pyarrow/hdfs.py
@ -0,0 +1,240 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import os
+import posixpath
+import sys
+import warnings
+
+from pyarrow.util import implements, _DEPR_MSG
+from pyarrow.filesystem import FileSystem
+import pyarrow._hdfsio as _hdfsio
+
+
+class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
+    """
+    DEPRECATED: FileSystem interface for HDFS cluster.
+
+    See pyarrow.hdfs.connect for full connection details
+
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.HadoopFileSystem`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
+    """
+
+    def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
+                 driver='libhdfs', extra_conf=None):
+        warnings.warn(
+            _DEPR_MSG.format(
+                "hdfs.HadoopFileSystem", "2.0.0", "fs.HadoopFileSystem"),
+            FutureWarning, stacklevel=2)
+        if driver == 'libhdfs':
+            _maybe_set_hadoop_classpath()
+
+        self._connect(host, port, user, kerb_ticket, extra_conf)
+
+    def __reduce__(self):
+        return (HadoopFileSystem, (self.host, self.port, self.user,
+                                   self.kerb_ticket, self.extra_conf))
+
+    def _isfilestore(self):
+        """
+        Return True if this is a Unix-style file store with directories.
+        """
+        return True
+
+    @implements(FileSystem.isdir)
+    def isdir(self, path):
+        return super().isdir(path)
+
+    @implements(FileSystem.isfile)
+    def isfile(self, path):
+        return super().isfile(path)
+
+    @implements(FileSystem.delete)
+    def delete(self, path, recursive=False):
+        return super().delete(path, recursive)
+
+    def mkdir(self, path, **kwargs):
+        """
+        Create directory in HDFS.
+
+        Parameters
+        ----------
+        path : str
+            Directory path to create, including any parent directories.
+
+        Notes
+        -----
+        libhdfs does not support create_parents=False, so we ignore this here
+        """
+        return super().mkdir(path)
+
+    @implements(FileSystem.rename)
+    def rename(self, path, new_path):
+        return super().rename(path, new_path)
+
+    @implements(FileSystem.exists)
+    def exists(self, path):
+        return super().exists(path)
+
+    def ls(self, path, detail=False):
+        """
+        Retrieve directory contents and metadata, if requested.
+
+        Parameters
+        ----------
+        path : str
+            HDFS path to retrieve contents of.
+        detail : bool, default False
+            If False, only return list of paths.
+
+        Returns
+        -------
+        result : list of dicts (detail=True) or strings (detail=False)
+        """
+        return super().ls(path, detail)
+
+    def walk(self, top_path):
+        """
+        Directory tree generator for HDFS, like os.walk.
+
+        Parameters
+        ----------
+        top_path : str
+            Root directory for tree traversal.
+
+        Returns
+        -------
+        Generator yielding 3-tuple (dirpath, dirnames, filename)
+        """
+        contents = self.ls(top_path, detail=True)
+
+        directories, files = _libhdfs_walk_files_dirs(top_path, contents)
+        yield top_path, directories, files
+        for dirname in directories:
+            yield from self.walk(self._path_join(top_path, dirname))
+
+
+def _maybe_set_hadoop_classpath():
+    import re
+
+    if re.search(r'hadoop-common[^/]+.jar', os.environ.get('CLASSPATH', '')):
+        return
+
+    if 'HADOOP_HOME' in os.environ:
+        if sys.platform != 'win32':
+            classpath = _derive_hadoop_classpath()
+        else:
+            hadoop_bin = '{}/bin/hadoop'.format(os.environ['HADOOP_HOME'])
+            classpath = _hadoop_classpath_glob(hadoop_bin)
+    else:
+        classpath = _hadoop_classpath_glob('hadoop')
+
+    os.environ['CLASSPATH'] = classpath.decode('utf-8')
+
+
+def _derive_hadoop_classpath():
+    import subprocess
+
+    find_args = ('find', '-L', os.environ['HADOOP_HOME'], '-name', '*.jar')
+    find = subprocess.Popen(find_args, stdout=subprocess.PIPE)
+    xargs_echo = subprocess.Popen(('xargs', 'echo'),
+                                  stdin=find.stdout,
+                                  stdout=subprocess.PIPE)
+    jars = subprocess.check_output(('tr', "' '", "':'"),
+                                   stdin=xargs_echo.stdout)
+    hadoop_conf = os.environ["HADOOP_CONF_DIR"] \
+        if "HADOOP_CONF_DIR" in os.environ \
+        else os.environ["HADOOP_HOME"] + "/etc/hadoop"
+    return (hadoop_conf + ":").encode("utf-8") + jars
+
+
+def _hadoop_classpath_glob(hadoop_bin):
+    import subprocess
+
+    hadoop_classpath_args = (hadoop_bin, 'classpath', '--glob')
+    return subprocess.check_output(hadoop_classpath_args)
+
+
+def _libhdfs_walk_files_dirs(top_path, contents):
+    files = []
+    directories = []
+    for c in contents:
+        scrubbed_name = posixpath.split(c['name'])[1]
+        if c['kind'] == 'file':
+            files.append(scrubbed_name)
+        else:
+            directories.append(scrubbed_name)
+
+    return directories, files
+
+
+def connect(host="default", port=0, user=None, kerb_ticket=None,
+            extra_conf=None):
+    """
+    DEPRECATED: Connect to an HDFS cluster.
+
+    All parameters are optional and should only be set if the defaults need
+    to be overridden.
+
+    Authentication should be automatic if the HDFS cluster uses Kerberos.
+    However, if a username is specified, then the ticket cache will likely
+    be required.
+
+    .. deprecated:: 2.0
+        ``pyarrow.hdfs.connect`` is deprecated,
+        please use ``pyarrow.fs.HadoopFileSystem`` instead.
+
+    Parameters
+    ----------
+    host : NameNode. Set to "default" for fs.defaultFS from core-site.xml.
+    port : NameNode's port. Set to 0 for default or logical (HA) nodes.
+    user : Username when connecting to HDFS; None implies login user.
+    kerb_ticket : Path to Kerberos ticket cache.
+    extra_conf : dict, default None
+      extra Key/Value pairs for config; Will override any
+      hdfs-site.xml properties
+
+    Notes
+    -----
+    The first time you call this method, it will take longer than usual due
+    to JNI spin-up time.
+
+    Returns
+    -------
+    filesystem : HadoopFileSystem
+    """
+    warnings.warn(
+        _DEPR_MSG.format("hdfs.connect", "2.0.0", "fs.HadoopFileSystem"),
+        FutureWarning, stacklevel=2
+    )
+    return _connect(
+        host=host, port=port, user=user, kerb_ticket=kerb_ticket,
+        extra_conf=extra_conf
+    )
+
+
+def _connect(host="default", port=0, user=None, kerb_ticket=None,
+             extra_conf=None):
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        fs = HadoopFileSystem(host=host, port=port, user=user,
+                              kerb_ticket=kerb_ticket,
+                              extra_conf=extra_conf)
+    return fs
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/adapters/tensorflow/convert.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/adapters/tensorflow/convert.h
@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "tensorflow/core/framework/op.h"
+
+#include "arrow/type.h"
+
+// These utilities are supposed to be included in TensorFlow operators
+// that need to be compiled separately from Arrow because of ABI issues.
+// They therefore need to be header-only.
+
+namespace arrow {
+
+namespace adapters {
+
+namespace tensorflow {
+
+Status GetArrowType(::tensorflow::DataType dtype, std::shared_ptr<DataType>* out) {
+  switch (dtype) {
+    case ::tensorflow::DT_BOOL:
+      *out = arrow::boolean();
+      break;
+    case ::tensorflow::DT_FLOAT:
+      *out = arrow::float32();
+      break;
+    case ::tensorflow::DT_DOUBLE:
+      *out = arrow::float64();
+      break;
+    case ::tensorflow::DT_HALF:
+      *out = arrow::float16();
+      break;
+    case ::tensorflow::DT_INT8:
+      *out = arrow::int8();
+      break;
+    case ::tensorflow::DT_INT16:
+      *out = arrow::int16();
+      break;
+    case ::tensorflow::DT_INT32:
+      *out = arrow::int32();
+      break;
+    case ::tensorflow::DT_INT64:
+      *out = arrow::int64();
+      break;
+    case ::tensorflow::DT_UINT8:
+      *out = arrow::uint8();
+      break;
+    case ::tensorflow::DT_UINT16:
+      *out = arrow::uint16();
+      break;
+    case ::tensorflow::DT_UINT32:
+      *out = arrow::uint32();
+      break;
+    case ::tensorflow::DT_UINT64:
+      *out = arrow::uint64();
+      break;
+    default:
+      return Status::TypeError("TensorFlow data type is not supported");
+  }
+  return Status::OK();
+}
+
+Status GetTensorFlowType(std::shared_ptr<DataType> dtype, ::tensorflow::DataType* out) {
+  switch (dtype->id()) {
+    case Type::BOOL:
+      *out = ::tensorflow::DT_BOOL;
+      break;
+    case Type::UINT8:
+      *out = ::tensorflow::DT_UINT8;
+      break;
+    case Type::INT8:
+      *out = ::tensorflow::DT_INT8;
+      break;
+    case Type::UINT16:
+      *out = ::tensorflow::DT_UINT16;
+      break;
+    case Type::INT16:
+      *out = ::tensorflow::DT_INT16;
+      break;
+    case Type::UINT32:
+      *out = ::tensorflow::DT_UINT32;
+      break;
+    case Type::INT32:
+      *out = ::tensorflow::DT_INT32;
+      break;
+    case Type::UINT64:
+      *out = ::tensorflow::DT_UINT64;
+      break;
+    case Type::INT64:
+      *out = ::tensorflow::DT_INT64;
+      break;
+    case Type::HALF_FLOAT:
+      *out = ::tensorflow::DT_HALF;
+      break;
+    case Type::FLOAT:
+      *out = ::tensorflow::DT_FLOAT;
+      break;
+    case Type::DOUBLE:
+      *out = ::tensorflow::DT_DOUBLE;
+      break;
+    default:
+      return Status::TypeError("Arrow data type is not supported");
+  }
+  return arrow::Status::OK();
+}
+
+}  // namespace tensorflow
+
+}  // namespace adapters
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/api.h
@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Coarse public API while the library is in development
+
+#pragma once
+
+#include "arrow/array.h"                    // IYWU pragma: export
+#include "arrow/array/concatenate.h"        // IYWU pragma: export
+#include "arrow/buffer.h"                   // IYWU pragma: export
+#include "arrow/builder.h"                  // IYWU pragma: export
+#include "arrow/chunked_array.h"            // IYWU pragma: export
+#include "arrow/compare.h"                  // IYWU pragma: export
+#include "arrow/config.h"                   // IYWU pragma: export
+#include "arrow/datum.h"                    // IYWU pragma: export
+#include "arrow/extension_type.h"           // IYWU pragma: export
+#include "arrow/memory_pool.h"              // IYWU pragma: export
+#include "arrow/pretty_print.h"             // IYWU pragma: export
+#include "arrow/record_batch.h"             // IYWU pragma: export
+#include "arrow/result.h"                   // IYWU pragma: export
+#include "arrow/status.h"                   // IYWU pragma: export
+#include "arrow/table.h"                    // IYWU pragma: export
+#include "arrow/table_builder.h"            // IYWU pragma: export
+#include "arrow/tensor.h"                   // IYWU pragma: export
+#include "arrow/type.h"                     // IYWU pragma: export
+#include "arrow/util/key_value_metadata.h"  // IWYU pragma: export
+#include "arrow/visit_array_inline.h"       // IYWU pragma: export
+#include "arrow/visit_scalar_inline.h"      // IYWU pragma: export
+#include "arrow/visitor.h"                  // IYWU pragma: export
+
+/// \brief Top-level namespace for Apache Arrow C++ API
+namespace arrow {}
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array.h
@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Kitchen-sink public API for arrow::Array data structures. C++ library code
+// (especially header files) in Apache Arrow should use more specific headers
+// unless it's a file that uses most or all Array types in which case using
+// arrow/array.h is fine.
+
+#pragma once
+
+/// \defgroup numeric-arrays Concrete classes for numeric arrays
+/// @{
+/// @}
+
+/// \defgroup binary-arrays Concrete classes for binary/string arrays
+/// @{
+/// @}
+
+/// \defgroup nested-arrays Concrete classes for nested arrays
+/// @{
+/// @}
+
+#include "arrow/array/array_base.h"       // IWYU pragma: keep
+#include "arrow/array/array_binary.h"     // IWYU pragma: keep
+#include "arrow/array/array_decimal.h"    // IWYU pragma: keep
+#include "arrow/array/array_dict.h"       // IWYU pragma: keep
+#include "arrow/array/array_nested.h"     // IWYU pragma: keep
+#include "arrow/array/array_primitive.h"  // IWYU pragma: keep
+#include "arrow/array/data.h"             // IWYU pragma: keep
+#include "arrow/array/util.h"             // IWYU pragma: keep
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_base.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_base.h
@ -0,0 +1,264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+#include "arrow/visitor.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// User array accessor types
+
+/// \brief Array base type
+/// Immutable data array with some logical type and some length.
+///
+/// Any memory is owned by the respective Buffer instance (or its parents).
+///
+/// The base class is only required to have a null bitmap buffer if the null
+/// count is greater than 0
+///
+/// If known, the null count can be provided in the base Array constructor. If
+/// the null count is not known, pass -1 to indicate that the null count is to
+/// be computed on the first call to null_count()
+class ARROW_EXPORT Array {
+ public:
+  virtual ~Array() = default;
+
+  /// \brief Return true if value at index is null. Does not boundscheck
+  bool IsNull(int64_t i) const {
+    return null_bitmap_data_ != NULLPTR
+               ? !bit_util::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count == data_->length;
+  }
+
+  /// \brief Return true if value at index is valid (not null). Does not
+  /// boundscheck
+  bool IsValid(int64_t i) const {
+    return null_bitmap_data_ != NULLPTR
+               ? bit_util::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count != data_->length;
+  }
+
+  /// \brief Return a Scalar containing the value of this array at i
+  Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
+
+  /// Size in the number of elements this array contains.
+  int64_t length() const { return data_->length; }
+
+  /// A relative position into another array's data, to enable zero-copy
+  /// slicing. This value defaults to zero
+  int64_t offset() const { return data_->offset; }
+
+  /// The number of null entries in the array. If the null count was not known
+  /// at time of construction (and set to a negative value), then the null
+  /// count will be computed and cached on the first invocation of this
+  /// function
+  int64_t null_count() const;
+
+  std::shared_ptr<DataType> type() const { return data_->type; }
+  Type::type type_id() const { return data_->type->id(); }
+
+  /// Buffer for the validity (null) bitmap, if any. Note that Union types
+  /// never have a null bitmap.
+  ///
+  /// Note that for `null_count == 0` or for null type, this will be null.
+  /// This buffer does not account for any slice offset
+  const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
+
+  /// Raw pointer to the null bitmap.
+  ///
+  /// Note that for `null_count == 0` or for null type, this will be null.
+  /// This buffer does not account for any slice offset
+  const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
+
+  /// Equality comparison with another array
+  bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
+  bool Equals(const std::shared_ptr<Array>& arr,
+              const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// \brief Return the formatted unified diff of arrow::Diff between this
+  /// Array and another Array
+  std::string Diff(const Array& other) const;
+
+  /// Approximate equality comparison with another array
+  ///
+  /// epsilon is only used if this is FloatArray or DoubleArray
+  bool ApproxEquals(const std::shared_ptr<Array>& arr,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
+  bool ApproxEquals(const Array& arr,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// Compare if the range of slots specified are equal for the given array and
+  /// this array.  end_idx exclusive.  This methods does not bounds check.
+  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+                   const Array& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+                   const std::shared_ptr<Array>& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
+                   int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
+                   int64_t end_idx, int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// \brief Apply the ArrayVisitor::Visit() method specialized to the array type
+  Status Accept(ArrayVisitor* visitor) const;
+
+  /// Construct a zero-copy view of this array with the given type.
+  ///
+  /// This method checks if the types are layout-compatible.
+  /// Nested types are traversed in depth-first order. Data buffers must have
+  /// the same item sizes, even though the logical types may be different.
+  /// An error is returned if the types are not layout-compatible.
+  Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
+
+  /// Construct a zero-copy slice of the array with the indicated offset and
+  /// length
+  ///
+  /// \param[in] offset the position of the first element in the constructed
+  /// slice
+  /// \param[in] length the length of the slice. If there are not enough
+  /// elements in the array, the length will be adjusted accordingly
+  ///
+  /// \return a new object wrapped in std::shared_ptr<Array>
+  std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
+
+  /// Slice from offset until end of the array
+  std::shared_ptr<Array> Slice(int64_t offset) const;
+
+  /// Input-checking variant of Array::Slice
+  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
+  /// Input-checking variant of Array::Slice
+  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
+
+  const std::shared_ptr<ArrayData>& data() const { return data_; }
+
+  int num_fields() const { return static_cast<int>(data_->child_data.size()); }
+
+  /// \return PrettyPrint representation of array suitable for debugging
+  std::string ToString() const;
+
+  /// \brief Perform cheap validation checks to determine obvious inconsistencies
+  /// within the array's internal data.
+  ///
+  /// This is O(k) where k is the number of descendents.
+  ///
+  /// \return Status
+  Status Validate() const;
+
+  /// \brief Perform extensive validation checks to determine inconsistencies
+  /// within the array's internal data.
+  ///
+  /// This is potentially O(k*n) where k is the number of descendents and n
+  /// is the array length.
+  ///
+  /// \return Status
+  Status ValidateFull() const;
+
+ protected:
+  Array() = default;
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
+
+  std::shared_ptr<ArrayData> data_;
+  const uint8_t* null_bitmap_data_ = NULLPTR;
+
+  /// Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    if (data->buffers.size() > 0) {
+      null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
+    } else {
+      null_bitmap_data_ = NULLPTR;
+    }
+    data_ = data;
+  }
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
+
+  ARROW_EXPORT friend void PrintTo(const Array& x, std::ostream* os);
+};
+
+static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
+  os << x.ToString();
+  return os;
+}
+
+/// Base class for non-nested arrays
+class ARROW_EXPORT FlatArray : public Array {
+ protected:
+  using Array::Array;
+};
+
+/// Base class for arrays of fixed-size logical types
+class ARROW_EXPORT PrimitiveArray : public FlatArray {
+ public:
+  PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
+                 const std::shared_ptr<Buffer>& data,
+                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// Does not account for any slice offset
+  std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
+
+ protected:
+  PrimitiveArray() : raw_values_(NULLPTR) {}
+
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->Array::SetData(data);
+    raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
+  }
+
+  explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+  const uint8_t* raw_values_;
+};
+
+/// Degenerate null type Array
+class ARROW_EXPORT NullArray : public FlatArray {
+ public:
+  using TypeClass = NullType;
+
+  explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+  explicit NullArray(int64_t length);
+
+ private:
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    null_bitmap_data_ = NULLPTR;
+    data->null_count = data->length;
+    data_ = data;
+  }
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_binary.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_binary.h
@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for Binary, LargeBinart, String, LargeString,
+// FixedSizeBinary
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/stl_iterator.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"  // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup binary-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+/// Base class for variable-sized binary arrays, regardless of offset size
+/// and logical interpretation.
+template <typename TYPE>
+class BaseBinaryArray : public FlatArray {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+  using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
+
+  /// Return the pointer to the given elements bytes
+  // XXX should GetValue(int64_t i) return a string_view?
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    // Account for base offset
+    i += data_->offset;
+    const offset_type pos = raw_value_offsets_[i];
+    *out_length = raw_value_offsets_[i + 1] - pos;
+    return raw_data_ + pos;
+  }
+
+  /// \brief Get binary value as a string_view
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view GetView(int64_t i) const {
+    // Account for base offset
+    i += data_->offset;
+    const offset_type pos = raw_value_offsets_[i];
+    return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
+                             raw_value_offsets_[i + 1] - pos);
+  }
+
+  util::optional<util::string_view> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  /// \brief Get binary value as a string_view
+  /// Provided for consistency with other arrays.
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view Value(int64_t i) const { return GetView(i); }
+
+  /// \brief Get binary value as a std::string
+  ///
+  /// \param i the value index
+  /// \return the value copied into a std::string
+  std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
+
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
+
+  const uint8_t* raw_data() const { return raw_data_; }
+
+  /// \brief Return the data buffer absolute offset of the data for the value
+  /// at the passed index.
+  ///
+  /// Does not perform boundschecking
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+
+  /// \brief Return the length of the data for the value at the passed index.
+  ///
+  /// Does not perform boundschecking
+  offset_type value_length(int64_t i) const {
+    i += data_->offset;
+    return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+  }
+
+  /// \brief Return the total length of the memory in the data buffer
+  /// referenced by this array. If the array has been sliced then this may be
+  /// less than the size of the data buffer (data_->buffers[2]).
+  offset_type total_values_length() const {
+    if (data_->length > 0) {
+      return raw_value_offsets_[data_->length + data_->offset] -
+             raw_value_offsets_[data_->offset];
+    } else {
+      return 0;
+    }
+  }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  // For subclasses
+  BaseBinaryArray() = default;
+
+  // Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->Array::SetData(data);
+    raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
+    raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
+  }
+
+  const offset_type* raw_value_offsets_ = NULLPTR;
+  const uint8_t* raw_data_ = NULLPTR;
+};
+
+/// Concrete Array class for variable-size binary data
+class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
+ public:
+  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+              const std::shared_ptr<Buffer>& data,
+              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as StringArray
+  BinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for variable-size string (utf-8) data
+class ARROW_EXPORT StringArray : public BinaryArray {
+ public:
+  using TypeClass = StringType;
+
+  explicit StringArray(const std::shared_ptr<ArrayData>& data);
+
+  StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+              const std::shared_ptr<Buffer>& data,
+              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Validate that this array contains only valid UTF8 entries
+  ///
+  /// This check is also implied by ValidateFull()
+  Status ValidateUTF8() const;
+};
+
+/// Concrete Array class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
+ public:
+  explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as LargeStringArray
+  LargeBinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for large variable-size string (utf-8) data
+class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
+ public:
+  using TypeClass = LargeStringType;
+
+  explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Validate that this array contains only valid UTF8 entries
+  ///
+  /// This check is also implied by ValidateFull()
+  Status ValidateUTF8() const;
+};
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+/// Concrete Array class for fixed-size binary data
+class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
+ public:
+  using TypeClass = FixedSizeBinaryType;
+  using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
+
+  explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
+                       const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const uint8_t* GetValue(int64_t i) const;
+  const uint8_t* Value(int64_t i) const { return GetValue(i); }
+
+  util::string_view GetView(int64_t i) const {
+    return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
+  }
+
+  util::optional<util::string_view> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+  int32_t byte_width() const { return byte_width_; }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->PrimitiveArray::SetData(data);
+    byte_width_ =
+        internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
+  }
+
+  int32_t byte_width_;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_decimal.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_decimal.h
@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Decimal128Array
+
+/// Concrete Array class for 128-bit decimal data
+class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
+ public:
+  using TypeClass = Decimal128Type;
+
+  using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+  /// \brief Construct Decimal128Array from ArrayData instance
+  explicit Decimal128Array(const std::shared_ptr<ArrayData>& data);
+
+  std::string FormatValue(int64_t i) const;
+};
+
+// Backward compatibility
+using DecimalArray = Decimal128Array;
+
+// ----------------------------------------------------------------------
+// Decimal256Array
+
+/// Concrete Array class for 256-bit decimal data
+class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
+ public:
+  using TypeClass = Decimal256Type;
+
+  using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+  /// \brief Construct Decimal256Array from ArrayData instance
+  explicit Decimal256Array(const std::shared_ptr<ArrayData>& data);
+
+  std::string FormatValue(int64_t i) const;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_dict.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_dict.h
@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// DictionaryArray
+
+/// \brief Array type for dictionary-encoded data with a
+/// data-dependent dictionary
+///
+/// A dictionary array contains an array of non-negative integers (the
+/// "dictionary indices") along with a data type containing a "dictionary"
+/// corresponding to the distinct values represented in the data.
+///
+/// For example, the array
+///
+///   ["foo", "bar", "foo", "bar", "foo", "bar"]
+///
+/// with dictionary ["bar", "foo"], would have dictionary array representation
+///
+///   indices: [1, 0, 1, 0, 1, 0]
+///   dictionary: ["bar", "foo"]
+///
+/// The indices in principle may be any integer type.
+class ARROW_EXPORT DictionaryArray : public Array {
+ public:
+  using TypeClass = DictionaryType;
+
+  explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
+
+  DictionaryArray(const std::shared_ptr<DataType>& type,
+                  const std::shared_ptr<Array>& indices,
+                  const std::shared_ptr<Array>& dictionary);
+
+  /// \brief Construct DictionaryArray from dictionary and indices
+  /// array and validate
+  ///
+  /// This function does the validation of the indices and input type. It checks if
+  /// all indices are non-negative and smaller than the size of the dictionary.
+  ///
+  /// \param[in] type a dictionary type
+  /// \param[in] dictionary the dictionary with same value type as the
+  /// type object
+  /// \param[in] indices an array of non-negative integers smaller than the
+  /// size of the dictionary
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
+      const std::shared_ptr<Array>& dictionary);
+
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& indices, const std::shared_ptr<Array>& dictionary) {
+    return FromArrays(::arrow::dictionary(indices->type(), dictionary->type()), indices,
+                      dictionary);
+  }
+
+  /// \brief Transpose this DictionaryArray
+  ///
+  /// This method constructs a new dictionary array with the given dictionary
+  /// type, transposing indices using the transpose map.  The type and the
+  /// transpose map are typically computed using DictionaryUnifier.
+  ///
+  /// \param[in] type the new type object
+  /// \param[in] dictionary the new dictionary
+  /// \param[in] transpose_map transposition array of this array's indices
+  ///   into the target array's indices
+  /// \param[in] pool a pool to allocate the array data from
+  Result<std::shared_ptr<Array>> Transpose(
+      const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+      const int32_t* transpose_map, MemoryPool* pool = default_memory_pool()) const;
+
+  /// \brief Determine whether dictionary arrays may be compared without unification
+  bool CanCompareIndices(const DictionaryArray& other) const;
+
+  /// \brief Return the dictionary for this array, which is stored as
+  /// a member of the ArrayData internal structure
+  std::shared_ptr<Array> dictionary() const;
+  std::shared_ptr<Array> indices() const;
+
+  /// \brief Return the ith value of indices, cast to int64_t. Not recommended
+  /// for use in performance-sensitive code. Does not validate whether the
+  /// value is null or out-of-bounds.
+  int64_t GetValueIndex(int64_t i) const;
+
+  const DictionaryType* dict_type() const { return dict_type_; }
+
+ private:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+  const DictionaryType* dict_type_;
+  std::shared_ptr<Array> indices_;
+
+  // Lazily initialized when invoking dictionary()
+  mutable std::shared_ptr<Array> dictionary_;
+};
+
+/// \brief Helper class for incremental dictionary unification
+class ARROW_EXPORT DictionaryUnifier {
+ public:
+  virtual ~DictionaryUnifier() = default;
+
+  /// \brief Construct a DictionaryUnifier
+  /// \param[in] value_type the data type of the dictionaries
+  /// \param[in] pool MemoryPool to use for memory allocations
+  static Result<std::unique_ptr<DictionaryUnifier>> Make(
+      std::shared_ptr<DataType> value_type, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross array chunks
+  ///
+  /// The dictionaries in the array chunks will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<ChunkedArray>> UnifyChunkedArray(
+      const std::shared_ptr<ChunkedArray>& array,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross the chunks of each table column
+  ///
+  /// The dictionaries in each table column will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<Table>> UnifyTable(
+      const Table& table, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Append dictionary to the internal memo
+  virtual Status Unify(const Array& dictionary) = 0;
+
+  /// \brief Append dictionary and compute transpose indices
+  /// \param[in] dictionary the dictionary values to unify
+  /// \param[out] out_transpose a Buffer containing computed transpose indices
+  /// as int32_t values equal in length to the passed dictionary. The value in
+  /// each slot corresponds to the new index value for each original index
+  /// for a DictionaryArray with the old dictionary
+  virtual Status Unify(const Array& dictionary,
+                       std::shared_ptr<Buffer>* out_transpose) = 0;
+
+  /// \brief Return a result DictionaryType with the smallest possible index
+  /// type to accommodate the unified dictionary. The unifier cannot be used
+  /// after this is called
+  virtual Status GetResult(std::shared_ptr<DataType>* out_type,
+                           std::shared_ptr<Array>* out_dict) = 0;
+
+  /// \brief Return a unified dictionary with the given index type.  If
+  /// the index type is not large enough then an invalid status will be returned.
+  /// The unifier cannot be used after this is called
+  virtual Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
+                                        std::shared_ptr<Array>* out_dict) = 0;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_nested.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_nested.h
@ -0,0 +1,569 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for List, LargeList, FixedSizeList, Map, Struct, and
+// Union
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// ListArray
+
+template <typename TYPE>
+class BaseListArray;
+
+namespace internal {
+
+// Private helper for ListArray::SetData.
+// Unfortunately, trying to define BaseListArray::SetData outside of this header
+// doesn't play well with MSVC.
+template <typename TYPE>
+void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
+                 Type::type expected_type_id = TYPE::type_id);
+
+}  // namespace internal
+
+/// Base class for variable-sized list arrays, regardless of offset size.
+template <typename TYPE>
+class BaseListArray : public Array {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  const TypeClass* list_type() const { return list_type_; }
+
+  /// \brief Return array object containing the list's values
+  std::shared_ptr<Array> values() const { return values_; }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+  std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
+
+  /// Return pointer to raw value offsets accounting for any slice offset
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
+
+  // The following functions will not perform boundschecking
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+  offset_type value_length(int64_t i) const {
+    i += data_->offset;
+    return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+  }
+  std::shared_ptr<Array> value_slice(int64_t i) const {
+    return values_->Slice(value_offset(i), value_length(i));
+  }
+
+ protected:
+  friend void internal::SetListData<TYPE>(BaseListArray<TYPE>* self,
+                                          const std::shared_ptr<ArrayData>& data,
+                                          Type::type expected_type_id);
+
+  const TypeClass* list_type_ = NULLPTR;
+  std::shared_ptr<Array> values_;
+  const offset_type* raw_value_offsets_ = NULLPTR;
+};
+
+/// Concrete Array class for list data
+class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
+ public:
+  explicit ListArray(std::shared_ptr<ArrayData> data);
+
+  ListArray(std::shared_ptr<DataType> type, int64_t length,
+            std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
+            std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct ListArray from array of offsets and child value array
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int32 type
+  /// \param[in] values Array containing list values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<ListArray>> FromArrays(
+      const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<ListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration of this array's offsets as well as null elements backed
+  /// by non-empty lists (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Return list offsets as an Int32Array
+  ///
+  /// The returned array will not have a validity bitmap, so you cannot expect
+  /// to pass it to ListArray::FromArrays() and get back the same list array
+  /// if the original one has nulls.
+  std::shared_ptr<Array> offsets() const;
+
+ protected:
+  // This constructor defers SetData to a derived array class
+  ListArray() = default;
+
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+/// Concrete Array class for large list data (with 64-bit offsets)
+class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
+ public:
+  explicit LargeListArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+                 const std::shared_ptr<Buffer>& value_offsets,
+                 const std::shared_ptr<Array>& values,
+                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct LargeListArray from array of offsets and child value array
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int64 type
+  /// \param[in] values Array containing list values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<LargeListArray>> FromArrays(
+      const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<LargeListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration of this array's offsets as well as null elements backed
+  /// by non-empty lists (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Return list offsets as an Int64Array
+  std::shared_ptr<Array> offsets() const;
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+// ----------------------------------------------------------------------
+// MapArray
+
+/// Concrete Array class for map data
+///
+/// NB: "value" in this context refers to a pair of a key and the corresponding item
+class ARROW_EXPORT MapArray : public ListArray {
+ public:
+  using TypeClass = MapType;
+
+  explicit MapArray(const std::shared_ptr<ArrayData>& data);
+
+  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+           const std::shared_ptr<Buffer>& value_offsets,
+           const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+           int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+           const std::shared_ptr<Buffer>& value_offsets,
+           const std::shared_ptr<Array>& values,
+           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+           int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct MapArray from array of offsets and child key, item arrays
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int32 type
+  /// \param[in] keys Array containing key values
+  /// \param[in] items Array containing item values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
+      const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<Array>> FromArrays(
+      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+      MemoryPool* pool = default_memory_pool());
+
+  const MapType* map_type() const { return map_type_; }
+
+  /// \brief Return array object containing all map keys
+  std::shared_ptr<Array> keys() const { return keys_; }
+
+  /// \brief Return array object containing all mapped items
+  std::shared_ptr<Array> items() const { return items_; }
+
+  /// Validate child data before constructing the actual MapArray.
+  static Status ValidateChildData(
+      const std::vector<std::shared_ptr<ArrayData>>& child_data);
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+
+  static Result<std::shared_ptr<Array>> FromArraysInternal(
+      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+      MemoryPool* pool);
+
+ private:
+  const MapType* map_type_;
+  std::shared_ptr<Array> keys_, items_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeListArray
+
+/// Concrete Array class for fixed size list data
+class ARROW_EXPORT FixedSizeListArray : public Array {
+ public:
+  using TypeClass = FixedSizeListType;
+  using offset_type = TypeClass::offset_type;
+
+  explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);
+
+  FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+                     const std::shared_ptr<Array>& values,
+                     const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                     int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const FixedSizeListType* list_type() const;
+
+  /// \brief Return array object containing the list's values
+  std::shared_ptr<Array> values() const;
+
+  std::shared_ptr<DataType> value_type() const;
+
+  // The following functions will not perform boundschecking
+  int64_t value_offset(int64_t i) const {
+    i += data_->offset;
+    return list_size_ * i;
+  }
+  int32_t value_length(int64_t i = 0) const {
+    ARROW_UNUSED(i);
+    return list_size_;
+  }
+  std::shared_ptr<Array> value_slice(int64_t i) const {
+    return values_->Slice(value_offset(i), value_length(i));
+  }
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration null elements (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Construct FixedSizeListArray from child value array and value_length
+  ///
+  /// \param[in] values Array containing list values
+  /// \param[in] list_size The fixed length of each list
+  /// \return Will have length equal to values.length() / list_size
+  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+                                                   int32_t list_size);
+
+  /// \brief Construct FixedSizeListArray from child value array and type
+  ///
+  /// \param[in] values Array containing list values
+  /// \param[in] type The fixed sized list type
+  /// \return Will have length equal to values.length() / type.list_size()
+  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+                                                   std::shared_ptr<DataType> type);
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+  int32_t list_size_;
+
+ private:
+  std::shared_ptr<Array> values_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+/// Concrete Array class for struct data
+class ARROW_EXPORT StructArray : public Array {
+ public:
+  using TypeClass = StructType;
+
+  explicit StructArray(const std::shared_ptr<ArrayData>& data);
+
+  StructArray(const std::shared_ptr<DataType>& type, int64_t length,
+              const std::vector<std::shared_ptr<Array>>& children,
+              std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Return a StructArray from child arrays and field names.
+  ///
+  /// The length and data type are automatically inferred from the arguments.
+  /// There should be at least one child array.
+  static Result<std::shared_ptr<StructArray>> Make(
+      const ArrayVector& children, const std::vector<std::string>& field_names,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Return a StructArray from child arrays and fields.
+  ///
+  /// The length is automatically inferred from the arguments.
+  /// There should be at least one child array.  This method does not
+  /// check that field types and child array types are consistent.
+  static Result<std::shared_ptr<StructArray>> Make(
+      const ArrayVector& children, const FieldVector& fields,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const StructType* struct_type() const;
+
+  // Return a shared pointer in case the requestor desires to share ownership
+  // with this array.  The returned array has its offset, length and null
+  // count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+  const ArrayVector& fields() const;
+
+  /// Returns null if name not found
+  std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
+
+  /// \brief Flatten this array as a vector of arrays, one for each field
+  ///
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
+
+  /// \brief Get one of the child arrays, combining its null bitmap
+  /// with the parent struct array's bitmap.
+  ///
+  /// \param[in] index Which child array to get
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> GetFlattenedField(
+      int index, MemoryPool* pool = default_memory_pool()) const;
+
+ private:
+  // For caching boxed child data
+  // XXX This is not handled in a thread-safe manner.
+  mutable ArrayVector boxed_fields_;
+};
+
+// ----------------------------------------------------------------------
+// Union
+
+/// Base class for SparseUnionArray and DenseUnionArray
+class ARROW_EXPORT UnionArray : public Array {
+ public:
+  using type_code_t = int8_t;
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
+
+  const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
+
+  /// The logical type code of the value at index.
+  type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
+
+  /// The physical child id containing value at index.
+  int child_id(int64_t i) const {
+    return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
+  }
+
+  const UnionType* union_type() const { return union_type_; }
+
+  UnionMode::type mode() const { return union_type_->mode(); }
+
+  /// \brief Return the given field as an individual array.
+  ///
+  /// For sparse unions, the returned array has its offset, length and null
+  /// count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+
+  const type_code_t* raw_type_codes_;
+  const UnionType* union_type_;
+
+  // For caching boxed child data
+  mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+};
+
+/// Concrete Array class for sparse union data
+class ARROW_EXPORT SparseUnionArray : public UnionArray {
+ public:
+  using TypeClass = SparseUnionType;
+
+  explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
+
+  SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+                   std::shared_ptr<Buffer> type_ids, int64_t offset = 0);
+
+  /// \brief Construct SparseUnionArray from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+                                             std::vector<type_code_t> type_codes) {
+    return Make(std::move(type_ids), std::move(children), std::vector<std::string>{},
+                std::move(type_codes));
+  }
+
+  /// \brief Construct SparseUnionArray with custom field names from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+                                             std::vector<std::string> field_names = {},
+                                             std::vector<type_code_t> type_codes = {});
+
+  const SparseUnionType* union_type() const {
+    return internal::checked_cast<const SparseUnionType*>(union_type_);
+  }
+
+  /// \brief Get one of the child arrays, adjusting its null bitmap
+  /// where the union array type code does not match.
+  ///
+  /// \param[in] index Which child array to get (i.e. the physical index, not the type
+  /// code) \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> GetFlattenedField(
+      int index, MemoryPool* pool = default_memory_pool()) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+};
+
+/// \brief Concrete Array class for dense union data
+///
+/// Note that union types do not have a validity bitmap
+class ARROW_EXPORT DenseUnionArray : public UnionArray {
+ public:
+  using TypeClass = DenseUnionType;
+
+  explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);
+
+  DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+                  std::shared_ptr<Buffer> type_ids,
+                  std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t offset = 0);
+
+  /// \brief Construct DenseUnionArray from type_ids, value_offsets, and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+                                             const Array& value_offsets,
+                                             ArrayVector children,
+                                             std::vector<type_code_t> type_codes) {
+    return Make(type_ids, value_offsets, std::move(children), std::vector<std::string>{},
+                std::move(type_codes));
+  }
+
+  /// \brief Construct DenseUnionArray with custom field names from type_ids,
+  /// value_offsets, and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+                                             const Array& value_offsets,
+                                             ArrayVector children,
+                                             std::vector<std::string> field_names = {},
+                                             std::vector<type_code_t> type_codes = {});
+
+  const DenseUnionType* union_type() const {
+    return internal::checked_cast<const DenseUnionType*>(union_type_);
+  }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
+
+  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
+
+  const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+
+ protected:
+  const int32_t* raw_value_offsets_;
+
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_primitive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_primitive.h
@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor types for primitive/C-type-based arrays, such as numbers,
+// boolean, and temporal types.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/stl_iterator.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"  // IWYU pragma: export
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// Concrete Array class for boolean data
+class ARROW_EXPORT BooleanArray : public PrimitiveArray {
+ public:
+  using TypeClass = BooleanType;
+  using IteratorType = stl::ArrayIterator<BooleanArray>;
+
+  explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
+
+  BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
+               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  bool Value(int64_t i) const {
+    return bit_util::GetBit(reinterpret_cast<const uint8_t*>(raw_values_),
+                            i + data_->offset);
+  }
+
+  bool GetView(int64_t i) const { return Value(i); }
+
+  util::optional<bool> operator[](int64_t i) const { return *IteratorType(*this, i); }
+
+  /// \brief Return the number of false (0) values among the valid
+  /// values. Result is not cached.
+  int64_t false_count() const;
+
+  /// \brief Return the number of true (1) values among the valid
+  /// values. Result is not cached.
+  int64_t true_count() const;
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  using PrimitiveArray::PrimitiveArray;
+};
+
+/// \addtogroup numeric-arrays
+///
+/// @{
+
+/// \brief Concrete Array class for numeric data with a corresponding C type
+///
+/// This class is templated on the corresponding DataType subclass for the
+/// given data, for example NumericArray<Int8Type> or NumericArray<Date32Type>.
+///
+/// Note that convenience aliases are available for all accepted types
+/// (for example Int8Array for NumericArray<Int8Type>).
+template <typename TYPE>
+class NumericArray : public PrimitiveArray {
+ public:
+  using TypeClass = TYPE;
+  using value_type = typename TypeClass::c_type;
+  using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;
+
+  explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
+
+  // Only enable this constructor without a type argument for types without additional
+  // metadata
+  template <typename T1 = TYPE>
+  NumericArray(enable_if_parameter_free<T1, int64_t> length,
+               const std::shared_ptr<Buffer>& data,
+               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+               int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
+                       null_count, offset) {}
+
+  const value_type* raw_values() const {
+    return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
+  }
+
+  value_type Value(int64_t i) const { return raw_values()[i]; }
+
+  // For API compatibility with BinaryArray etc.
+  value_type GetView(int64_t i) const { return Value(i); }
+
+  util::optional<value_type> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  using PrimitiveArray::PrimitiveArray;
+};
+
+/// DayTimeArray
+/// ---------------------
+/// \brief Array of Day and Millisecond values.
+class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
+ public:
+  using TypeClass = DayTimeIntervalType;
+  using IteratorType = stl::ArrayIterator<DayTimeIntervalArray>;
+
+  explicit DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+  DayTimeIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
+                       const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  DayTimeIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  TypeClass::DayMilliseconds GetValue(int64_t i) const;
+  TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }
+
+  // For compatibility with Take kernel.
+  TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+  util::optional<TypeClass::DayMilliseconds> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
+};
+
+/// \brief Array of Month, Day and nanosecond values.
+class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
+ public:
+  using TypeClass = MonthDayNanoIntervalType;
+  using IteratorType = stl::ArrayIterator<MonthDayNanoIntervalArray>;
+
+  explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+  MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
+                            const std::shared_ptr<Buffer>& data,
+                            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  TypeClass::MonthDayNanos GetValue(int64_t i) const;
+  TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }
+
+  // For compatibility with Take kernel.
+  TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+  util::optional<TypeClass::MonthDayNanos> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_adaptive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_adaptive.h
@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+namespace internal {
+
+class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
+ public:
+  AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
+
+  explicit AdaptiveIntBuilderBase(MemoryPool* pool)
+      : AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
+
+  /// \brief Append multiple nulls
+  /// \param[in] length the number of nulls to append
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNull(length);
+    }
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 0;
+    pending_has_nulls_ = true;
+    ++pending_pos_;
+    ++length_;
+    ++null_count_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNotNull(length);
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+    ++length_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+ protected:
+  Status AppendInternal(const uint64_t val) {
+    pending_data_[pending_pos_] = val;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+    ++length_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  virtual Status CommitPendingData() = 0;
+
+  template <typename new_type, typename old_type>
+  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+  ExpandIntSizeInternal();
+  template <typename new_type, typename old_type>
+  typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
+  ExpandIntSizeInternal();
+
+  std::shared_ptr<ResizableBuffer> data_;
+  uint8_t* raw_data_ = NULLPTR;
+
+  const uint8_t start_int_size_;
+  uint8_t int_size_;
+
+  static constexpr int32_t pending_size_ = 1024;
+  uint8_t pending_valid_[pending_size_];
+  uint64_t pending_data_[pending_size_];
+  int32_t pending_pos_ = 0;
+  bool pending_has_nulls_ = false;
+};
+
+}  // namespace internal
+
+class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveUIntBuilder(uint8_t start_int_size,
+                               MemoryPool* pool = default_memory_pool());
+
+  explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
+      : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const uint64_t val) { return AppendInternal(val); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  std::shared_ptr<DataType> type() const override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const uint64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveIntBuilder(uint8_t start_int_size,
+                              MemoryPool* pool = default_memory_pool());
+
+  explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
+      : AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const int64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  std::shared_ptr<DataType> type() const override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const int64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_base.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_base.h
@ -0,0 +1,350 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>  // IWYU pragma: keep
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \defgroup numeric-builders Concrete builder subclasses for numeric types
+/// @{
+/// @}
+
+/// \defgroup temporal-builders Concrete builder subclasses for temporal types
+/// @{
+/// @}
+
+/// \defgroup binary-builders Concrete builder subclasses for binary types
+/// @{
+/// @}
+
+/// \defgroup nested-builders Concrete builder subclasses for nested types
+/// @{
+/// @}
+
+/// \defgroup dictionary-builders Concrete builder subclasses for dictionary types
+/// @{
+/// @}
+
+constexpr int64_t kMinBuilderCapacity = 1 << 5;
+constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
+
+/// Base class for all data array builders.
+///
+/// This class provides a facilities for incrementally building the null bitmap
+/// (see Append methods) and as a side effect the current number of slots and
+/// the null count.
+///
+/// \note Users are expected to use builders as one of the concrete types below.
+/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
+class ARROW_EXPORT ArrayBuilder {
+ public:
+  explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
+
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
+
+  virtual ~ArrayBuilder() = default;
+
+  /// For nested types. Since the objects are owned by this class instance, we
+  /// skip shared pointers and just return a raw pointer
+  ArrayBuilder* child(int i) { return children_[i].get(); }
+
+  const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
+
+  int num_children() const { return static_cast<int>(children_.size()); }
+
+  virtual int64_t length() const { return length_; }
+  int64_t null_count() const { return null_count_; }
+  int64_t capacity() const { return capacity_; }
+
+  /// \brief Ensure that enough memory has been allocated to fit the indicated
+  /// number of total elements in the builder, including any that have already
+  /// been appended. Does not account for reallocations that may be due to
+  /// variable size data, like binary values. To make space for incremental
+  /// appends, use Reserve instead.
+  ///
+  /// \param[in] capacity the minimum number of total array values to
+  ///            accommodate. Must be greater than the current capacity.
+  /// \return Status
+  virtual Status Resize(int64_t capacity);
+
+  /// \brief Ensure that there is enough space allocated to append the indicated
+  /// number of elements without any further reallocation. Overallocation is
+  /// used in order to minimize the impact of incremental Reserve() calls.
+  /// Note that additional_capacity is relative to the current number of elements
+  /// rather than to the current capacity, so calls to Reserve() which are not
+  /// interspersed with addition of new elements may not increase the capacity.
+  ///
+  /// \param[in] additional_capacity the number of additional array values
+  /// \return Status
+  Status Reserve(int64_t additional_capacity) {
+    auto current_capacity = capacity();
+    auto min_capacity = length() + additional_capacity;
+    if (min_capacity <= current_capacity) return Status::OK();
+
+    // leave growth factor up to BufferBuilder
+    auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
+    return Resize(new_capacity);
+  }
+
+  /// Reset the builder.
+  virtual void Reset();
+
+  /// \brief Append a null value to builder
+  virtual Status AppendNull() = 0;
+  /// \brief Append a number of null values to builder
+  virtual Status AppendNulls(int64_t length) = 0;
+
+  /// \brief Append a non-null value to builder
+  ///
+  /// The appended value is an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending a null value to a parent nested type.
+  virtual Status AppendEmptyValue() = 0;
+
+  /// \brief Append a number of non-null values to builder
+  ///
+  /// The appended values are an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending null values to a parent nested type.
+  virtual Status AppendEmptyValues(int64_t length) = 0;
+
+  /// \brief Append a value from a scalar
+  Status AppendScalar(const Scalar& scalar) { return AppendScalar(scalar, 1); }
+  virtual Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+  virtual Status AppendScalars(const ScalarVector& scalars);
+
+  /// \brief Append a range of values from an array.
+  ///
+  /// The given array must be the same type as the builder.
+  virtual Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                                  int64_t length) {
+    return Status::NotImplemented("AppendArraySlice for builder for ", *type());
+  }
+
+  /// For cases where raw data was memcpy'd into the internal buffers, allows us
+  /// to advance the length of the builder. It is your responsibility to use
+  /// this function responsibly.
+  ARROW_DEPRECATED(
+      "Deprecated in 6.0.0. ArrayBuilder::Advance is poorly supported and mostly "
+      "untested.\nFor low-level control over buffer construction, use BufferBuilder "
+      "or TypedBufferBuilder directly.")
+  Status Advance(int64_t elements);
+
+  /// \brief Return result of builder as an internal generic ArrayData
+  /// object. Resets builder except for dictionary builder
+  ///
+  /// \param[out] out the finalized ArrayData object
+  /// \return Status
+  virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
+
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \param[out] out the finalized Array object
+  /// \return Status
+  Status Finish(std::shared_ptr<Array>* out);
+
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \return The finalized Array object
+  Result<std::shared_ptr<Array>> Finish();
+
+  /// \brief Return the type of the built Array
+  virtual std::shared_ptr<DataType> type() const = 0;
+
+ protected:
+  /// Append to null bitmap
+  Status AppendToBitmap(bool is_valid);
+
+  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
+  /// assume all of length bits are valid.
+  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+
+  /// Uniform append.  Append N times the same validity bit.
+  Status AppendToBitmap(int64_t num_bits, bool value);
+
+  /// Set the next length bits to not null (i.e. valid).
+  Status SetNotNull(int64_t length);
+
+  // Unsafe operations (don't check capacity/don't resize)
+
+  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+
+  // Append to null bitmap, update the length
+  void UnsafeAppendToBitmap(bool is_valid) {
+    null_bitmap_builder_.UnsafeAppend(is_valid);
+    ++length_;
+    if (!is_valid) ++null_count_;
+  }
+
+  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
+  // assume all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+    if (valid_bytes == NULLPTR) {
+      return UnsafeSetNotNull(length);
+    }
+    null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
+    length_ += length;
+    null_count_ = null_bitmap_builder_.false_count();
+  }
+
+  // Vector append. Copy from a given bitmap. If bitmap is null assume
+  // all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* bitmap, int64_t offset, int64_t length) {
+    if (bitmap == NULLPTR) {
+      return UnsafeSetNotNull(length);
+    }
+    null_bitmap_builder_.UnsafeAppend(bitmap, offset, length);
+    length_ += length;
+    null_count_ = null_bitmap_builder_.false_count();
+  }
+
+  // Append the same validity value a given number of times.
+  void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
+    if (value) {
+      UnsafeSetNotNull(num_bits);
+    } else {
+      UnsafeSetNull(num_bits);
+    }
+  }
+
+  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
+
+  // Set the next validity bits to not null (i.e. valid).
+  void UnsafeSetNotNull(int64_t length);
+
+  // Set the next validity bits to null (i.e. invalid).
+  void UnsafeSetNull(int64_t length);
+
+  static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
+
+  /// \brief Finish to an array of the specified ArrayType
+  template <typename ArrayType>
+  Status FinishTyped(std::shared_ptr<ArrayType>* out) {
+    std::shared_ptr<Array> out_untyped;
+    ARROW_RETURN_NOT_OK(Finish(&out_untyped));
+    *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
+    return Status::OK();
+  }
+
+  // Check the requested capacity for validity
+  Status CheckCapacity(int64_t new_capacity) {
+    if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
+      return Status::Invalid(
+          "Resize capacity must be positive (requested: ", new_capacity, ")");
+    }
+
+    if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
+      return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
+                             ", current length: ", length_, ")");
+    }
+
+    return Status::OK();
+  }
+
+  // Check for array type
+  Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
+                        const Array& array, const char* message);
+  Status CheckArrayType(Type::type expected_type, const Array& array,
+                        const char* message);
+
+  MemoryPool* pool_;
+
+  TypedBufferBuilder<bool> null_bitmap_builder_;
+  int64_t null_count_ = 0;
+
+  // Array length, so far. Also, the index of the next element to be added
+  int64_t length_ = 0;
+  int64_t capacity_ = 0;
+
+  // Child value array builders. These are owned by this class
+  std::vector<std::shared_ptr<ArrayBuilder>> children_;
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
+};
+
+/// \brief Construct an empty ArrayBuilder corresponding to the data
+/// type
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the data type to create the builder for
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                   std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilder(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out));
+  return std::move(out);
+}
+
+/// \brief Construct an empty ArrayBuilder corresponding to the data
+/// type, where any top-level or nested dictionary builders return the
+/// exact index type specified by the type.
+ARROW_EXPORT
+Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                             std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out));
+  return std::move(out);
+}
+
+/// \brief Construct an empty DictionaryBuilder initialized optionally
+/// with a pre-existing dictionary
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the dictionary type to create the builder for
+/// \param[in] dictionary the initial dictionary, if any. May be nullptr
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                             const std::shared_ptr<Array>& dictionary,
+                             std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeDictionaryBuilder(
+    const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+    MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out));
+  return std::move(out);
+}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_binary.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_binary.h
@ -0,0 +1,703 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"  // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup binary-builders
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+template <typename TYPE>
+class BaseBinaryBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
+
+  BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : BaseBinaryBuilder(pool) {}
+
+  Status Append(const uint8_t* value, offset_type length) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status Append(const char* value, offset_type length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(util::string_view value) {
+    return Append(value.data(), static_cast<offset_type>(value.size()));
+  }
+
+  /// Extend the last appended value by appending more data at the end
+  ///
+  /// Unlike Append, this does not create a new offset.
+  Status ExtendCurrent(const uint8_t* value, offset_type length) {
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+    return Status::OK();
+  }
+
+  Status ExtendCurrent(util::string_view value) {
+    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                         static_cast<offset_type>(value.size()));
+  }
+
+  Status AppendNulls(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
+  /// \brief Append without checking capacity
+  ///
+  /// Offsets and data should have been presized using Reserve() and
+  /// ReserveData(), respectively.
+  void UnsafeAppend(const uint8_t* value, offset_type length) {
+    UnsafeAppendNextOffset();
+    value_data_builder_.UnsafeAppend(value, length);
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppend(const char* value, offset_type length) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  void UnsafeAppend(const std::string& value) {
+    UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
+  }
+
+  void UnsafeAppend(util::string_view value) {
+    UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
+  }
+
+  /// Like ExtendCurrent, but do not check capacity
+  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
+    value_data_builder_.UnsafeAppend(value, length);
+  }
+
+  void UnsafeExtendCurrent(util::string_view value) {
+    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                        static_cast<offset_type>(value.size()));
+  }
+
+  void UnsafeAppendNull() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(false);
+  }
+
+  void UnsafeAppendEmptyValue() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(true);
+  }
+
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = std::accumulate(
+        values.begin(), values.end(), 0ULL,
+        [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+    ARROW_RETURN_NOT_OK(Reserve(values.size()));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+    if (valid_bytes != NULLPTR) {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          value_data_builder_.UnsafeAppend(
+              reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+        }
+      }
+    } else {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        value_data_builder_.UnsafeAppend(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+      }
+    }
+
+    UnsafeAppendToBitmap(valid_bytes, values.size());
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const char** values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = 0;
+    std::vector<std::size_t> value_lengths(length);
+    bool have_null_value = false;
+    for (int64_t i = 0; i < length; ++i) {
+      if (values[i] != NULLPTR) {
+        auto value_length = strlen(values[i]);
+        value_lengths[i] = value_length;
+        total_length += value_length;
+      } else {
+        have_null_value = true;
+      }
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ReserveData(total_length));
+
+    if (valid_bytes) {
+      int64_t valid_bytes_offset = 0;
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+          } else {
+            UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
+                                 i - valid_bytes_offset);
+            UnsafeAppendToBitmap(false);
+            valid_bytes_offset = i + 1;
+          }
+        }
+      }
+      UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+    } else {
+      if (have_null_value) {
+        std::vector<uint8_t> valid_vector(length, 0);
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+            valid_vector[i] = 1;
+          }
+        }
+        UnsafeAppendToBitmap(valid_vector.data(), length);
+      } else {
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+        }
+        UnsafeAppendToBitmap(NULLPTR, length);
+      }
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    auto bitmap = array.GetValues<uint8_t>(0, 0);
+    auto offsets = array.GetValues<offset_type>(1);
+    auto data = array.GetValues<uint8_t>(2, 0);
+    for (int64_t i = 0; i < length; i++) {
+      if (!bitmap || bit_util::GetBit(bitmap, array.offset + offset + i)) {
+        const offset_type start = offsets[offset + i];
+        const offset_type end = offsets[offset + i + 1];
+        ARROW_RETURN_NOT_OK(Append(data + start, end - start));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_data_builder_.Reset();
+  }
+
+  Status ValidateOverflow(int64_t new_bytes) {
+    auto new_size = value_data_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements) {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return value_data_builder_.Reserve(elements);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    // Write final offset (values length)
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+    // These buffers' padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+    *out = ArrayData::Make(type(), length_, {null_bitmap, offsets, value_data},
+                           null_count_, 0);
+    Reset();
+    return Status::OK();
+  }
+
+  /// \return data pointer of the value date builder
+  const uint8_t* value_data() const { return value_data_builder_.data(); }
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return value_data_builder_.length(); }
+  /// \return capacity of values buffer
+  int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
+
+  /// \return data pointer of the value date builder
+  const offset_type* offsets_data() const { return offsets_builder_.data(); }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    const offset_type* offsets = offsets_builder_.data();
+    const auto offset = offsets[i];
+    if (i == (length_ - 1)) {
+      *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+    } else {
+      *out_length = offsets[i + 1] - offset;
+    }
+    return value_data_builder_.data() + offset;
+  }
+
+  offset_type offset(int64_t i) const { return offsets_data()[i]; }
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const {
+    offset_type value_length;
+    const uint8_t* value_data = GetValue(i, &value_length);
+    return util::string_view(reinterpret_cast<const char*>(value_data), value_length);
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<offset_type>::max() - 1;
+  }
+
+ protected:
+  TypedBufferBuilder<offset_type> offsets_builder_;
+  TypedBufferBuilder<uint8_t> value_data_builder_;
+
+  Status AppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
+  }
+
+  void UnsafeAppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+  }
+};
+
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
+ public:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return binary(); }
+};
+
+/// \class StringBuilder
+/// \brief Builder class for UTF8 strings
+class ARROW_EXPORT StringBuilder : public BinaryBuilder {
+ public:
+  using BinaryBuilder::BinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return utf8(); }
+};
+
+/// \class LargeBinaryBuilder
+/// \brief Builder class for large variable-length binary data
+class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
+ public:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return large_binary(); }
+};
+
+/// \class LargeStringBuilder
+/// \brief Builder class for large UTF8 strings
+class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
+ public:
+  using LargeBinaryBuilder::LargeBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return large_utf8(); }
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeBinaryBuilder
+
+class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = FixedSizeBinaryType;
+
+  explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+                                  MemoryPool* pool = default_memory_pool());
+
+  Status Append(const uint8_t* value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(value);
+    return Status::OK();
+  }
+
+  Status Append(const char* value) {
+    return Append(reinterpret_cast<const uint8_t*>(value));
+  }
+
+  Status Append(const util::string_view& view) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(view);
+    return Status::OK();
+  }
+
+  Status Append(const std::string& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(s);
+    return Status::OK();
+  }
+
+  Status Append(const Buffer& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(util::string_view(s));
+    return Status::OK();
+  }
+
+  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
+
+  template <size_t NBYTES>
+  Status Append(const std::array<uint8_t, NBYTES>& value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(
+        util::string_view(reinterpret_cast<const char*>(value.data()), value.size()));
+    return Status::OK();
+  }
+
+  Status AppendValues(const uint8_t* data, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status AppendValues(const uint8_t* data, int64_t length, const uint8_t* validity,
+                      int64_t bitmap_offset);
+
+  Status AppendNull() final;
+  Status AppendNulls(int64_t length) final;
+
+  Status AppendEmptyValue() final;
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(
+        array.GetValues<uint8_t>(1, 0) + ((array.offset + offset) * byte_width_), length,
+        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  void UnsafeAppend(const uint8_t* value) {
+    UnsafeAppendToBitmap(true);
+    if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
+      byte_builder_.UnsafeAppend(value, byte_width_);
+    }
+  }
+
+  void UnsafeAppend(const char* value) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value));
+  }
+
+  void UnsafeAppend(util::string_view value) {
+#ifndef NDEBUG
+    CheckValueSize(static_cast<size_t>(value.size()));
+#endif
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
+  }
+
+  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
+
+  void UnsafeAppendNull() {
+    UnsafeAppendToBitmap(false);
+    byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
+  }
+
+  Status ValidateOverflow(int64_t new_bytes) const {
+    auto new_size = byte_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements) {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return byte_builder_.Reserve(elements);
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<FixedSizeBinaryArray>* out) { return FinishTyped(out); }
+
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return byte_builder_.length(); }
+
+  int32_t byte_width() const { return byte_width_; }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i) const;
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const;
+
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<int64_t>::max() - 1;
+  }
+
+  std::shared_ptr<DataType> type() const override {
+    return fixed_size_binary(byte_width_);
+  }
+
+ protected:
+  int32_t byte_width_;
+  BufferBuilder byte_builder_;
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  uint8_t* GetMutableValue(int64_t i) {
+    uint8_t* data_ptr = byte_builder_.mutable_data();
+    return data_ptr + i * byte_width_;
+  }
+
+  void CheckValueSize(int64_t size);
+};
+
+/// @}
+
+// ----------------------------------------------------------------------
+// Chunked builders: build a sequence of BinaryArray or StringArray that are
+// limited to a particular size (to the upper limit of 2GB)
+
+namespace internal {
+
+class ARROW_EXPORT ChunkedBinaryBuilder {
+ public:
+  explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length,
+                                MemoryPool* pool = default_memory_pool());
+
+  ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length,
+                       MemoryPool* pool = default_memory_pool());
+
+  virtual ~ChunkedBinaryBuilder() = default;
+
+  Status Append(const uint8_t* value, int32_t length) {
+    if (ARROW_PREDICT_FALSE(length + builder_->value_data_length() >
+                            max_chunk_value_length_)) {
+      if (builder_->value_data_length() == 0) {
+        // The current item is larger than max_chunk_size_;
+        // this chunk will be oversize and hold *only* this item
+        ARROW_RETURN_NOT_OK(builder_->Append(value, length));
+        return NextChunk();
+      }
+      // The current item would cause builder_->value_data_length() to exceed
+      // max_chunk_size_, so finish this chunk and append the current item to the next
+      // chunk
+      ARROW_RETURN_NOT_OK(NextChunk());
+      return Append(value, length);
+    }
+
+    if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+      // The current item would cause builder_->length() to exceed max_chunk_length_, so
+      // finish this chunk and append the current item to the next chunk
+      ARROW_RETURN_NOT_OK(NextChunk());
+    }
+
+    return builder_->Append(value, length);
+  }
+
+  Status Append(const util::string_view& value) {
+    return Append(reinterpret_cast<const uint8_t*>(value.data()),
+                  static_cast<int32_t>(value.size()));
+  }
+
+  Status AppendNull() {
+    if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+      ARROW_RETURN_NOT_OK(NextChunk());
+    }
+    return builder_->AppendNull();
+  }
+
+  Status Reserve(int64_t values);
+
+  virtual Status Finish(ArrayVector* out);
+
+ protected:
+  Status NextChunk();
+
+  // maximum total character data size per chunk
+  int64_t max_chunk_value_length_;
+
+  // maximum elements allowed per chunk
+  int64_t max_chunk_length_ = kListMaximumElements;
+
+  // when Reserve() would cause builder_ to exceed its max_chunk_length_,
+  // add to extra_capacity_ instead and wait to reserve until the next chunk
+  int64_t extra_capacity_ = 0;
+
+  std::unique_ptr<BinaryBuilder> builder_;
+  std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+class ARROW_EXPORT ChunkedStringBuilder : public ChunkedBinaryBuilder {
+ public:
+  using ChunkedBinaryBuilder::ChunkedBinaryBuilder;
+
+  Status Finish(ArrayVector* out) override;
+};
+
+}  // namespace internal
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_decimal.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_decimal.h
@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
+ public:
+  using TypeClass = Decimal128Type;
+  using ValueType = Decimal128;
+
+  explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool = default_memory_pool());
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(Decimal128 val);
+  void UnsafeAppend(Decimal128 val);
+  void UnsafeAppend(util::string_view val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<Decimal128Array>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+  std::shared_ptr<Decimal128Type> decimal_type_;
+};
+
+class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
+ public:
+  using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
+
+  explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool = default_memory_pool());
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(const Decimal256& val);
+  void UnsafeAppend(const Decimal256& val);
+  void UnsafeAppend(util::string_view val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+  std::shared_ptr<Decimal256Type> decimal_type_;
+};
+
+using DecimalBuilder = Decimal128Builder;
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_dict.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_dict.h
@ -0,0 +1,722 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_adaptive.h"   // IWYU pragma: export
+#include "arrow/array/builder_base.h"       // IWYU pragma: export
+#include "arrow/array/builder_primitive.h"  // IWYU pragma: export
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Dictionary builder
+
+namespace internal {
+
+template <typename T, typename Enable = void>
+struct DictionaryValue {
+  using type = typename T::c_type;
+  using PhysicalType = T;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_base_binary<T>> {
+  using type = util::string_view;
+  using PhysicalType =
+      typename std::conditional<std::is_same<typename T::offset_type, int32_t>::value,
+                                BinaryType, LargeBinaryType>::type;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_fixed_size_binary<T>> {
+  using type = util::string_view;
+  using PhysicalType = BinaryType;
+};
+
+class ARROW_EXPORT DictionaryMemoTable {
+ public:
+  DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<DataType>& type);
+  DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
+  ~DictionaryMemoTable();
+
+  Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out);
+
+  /// \brief Insert new memo values
+  Status InsertValues(const Array& values);
+
+  int32_t size() const;
+
+  template <typename T>
+  Status GetOrInsert(typename DictionaryValue<T>::type value, int32_t* out) {
+    // We want to keep the DictionaryMemoTable implementation private, also we can't
+    // use extern template classes because of compiler issues (MinGW?).  Instead,
+    // we expose explicit function overrides for each supported physical type.
+    const typename DictionaryValue<T>::PhysicalType* physical_type = NULLPTR;
+    return GetOrInsert(physical_type, value, out);
+  }
+
+ private:
+  Status GetOrInsert(const BooleanType*, bool value, int32_t* out);
+  Status GetOrInsert(const Int8Type*, int8_t value, int32_t* out);
+  Status GetOrInsert(const Int16Type*, int16_t value, int32_t* out);
+  Status GetOrInsert(const Int32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Int64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const UInt8Type*, uint8_t value, int32_t* out);
+  Status GetOrInsert(const UInt16Type*, uint16_t value, int32_t* out);
+  Status GetOrInsert(const UInt32Type*, uint32_t value, int32_t* out);
+  Status GetOrInsert(const UInt64Type*, uint64_t value, int32_t* out);
+  Status GetOrInsert(const DurationType*, int64_t value, int32_t* out);
+  Status GetOrInsert(const TimestampType*, int64_t value, int32_t* out);
+  Status GetOrInsert(const Date32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Date64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const Time32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Time64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const MonthDayNanoIntervalType*,
+                     MonthDayNanoIntervalType::MonthDayNanos value, int32_t* out);
+  Status GetOrInsert(const DayTimeIntervalType*,
+                     DayTimeIntervalType::DayMilliseconds value, int32_t* out);
+  Status GetOrInsert(const MonthIntervalType*, int32_t value, int32_t* out);
+  Status GetOrInsert(const FloatType*, float value, int32_t* out);
+  Status GetOrInsert(const DoubleType*, double value, int32_t* out);
+
+  Status GetOrInsert(const BinaryType*, util::string_view value, int32_t* out);
+  Status GetOrInsert(const LargeBinaryType*, util::string_view value, int32_t* out);
+
+  class DictionaryMemoTableImpl;
+  std::unique_ptr<DictionaryMemoTableImpl> impl_;
+};
+
+}  // namespace internal
+
+/// \addtogroup dictionary-builders
+///
+/// @{
+
+namespace internal {
+
+/// \brief Array builder for created encoded DictionaryArray from
+/// dense array
+///
+/// Unlike other builders, dictionary builder does not completely
+/// reset the state on Finish calls.
+template <typename BuilderType, typename T>
+class DictionaryBuilderBase : public ArrayBuilder {
+ public:
+  using TypeClass = DictionaryType;
+  using Value = typename DictionaryValue<T>::type;
+
+  // WARNING: the type given below is the value type, not the DictionaryType.
+  // The DictionaryType is instantiated on the Finish() call.
+  template <typename B = BuilderType, typename T1 = T>
+  DictionaryBuilderBase(uint8_t start_int_size,
+                        enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+                                        !is_fixed_size_binary_type<T1>::value,
+                                    const std::shared_ptr<DataType>&>
+                            value_type,
+                        MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(start_int_size, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
+          value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      const std::shared_ptr<DataType>& index_type,
+      enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
+          value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(index_type, pool),
+        value_type_(value_type) {}
+
+  template <typename B = BuilderType, typename T1 = T>
+  DictionaryBuilderBase(uint8_t start_int_size,
+                        enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+                                        is_fixed_size_binary_type<T1>::value,
+                                    const std::shared_ptr<DataType>&>
+                            value_type,
+                        MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(start_int_size, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      const std::shared_ptr<DataType>& index_type,
+      enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(index_type, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+      : DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
+
+  // This constructor doesn't check for errors. Use InsertMemoValues instead.
+  explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(pool),
+        value_type_(dictionary->type()) {}
+
+  ~DictionaryBuilderBase() override = default;
+
+  /// \brief The current number of entries in the dictionary
+  int64_t dictionary_length() const { return memo_table_->size(); }
+
+  /// \brief The value byte width (for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
+    return byte_width_;
+  }
+
+  /// \brief Append a scalar value
+  Status Append(Value value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+
+    int32_t memo_index;
+    ARROW_RETURN_NOT_OK(memo_table_->GetOrInsert<T>(value, &memo_index));
+    ARROW_RETURN_NOT_OK(indices_builder_.Append(memo_index));
+    length_ += 1;
+
+    return Status::OK();
+  }
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> Append(const uint8_t* value) {
+    return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
+  }
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> Append(const char* value) {
+    return Append(util::string_view(value, byte_width_));
+  }
+
+  /// \brief Append a string (only for binary types)
+  template <typename T1 = T>
+  enable_if_binary_like<T1, Status> Append(const uint8_t* value, int32_t length) {
+    return Append(reinterpret_cast<const char*>(value), length);
+  }
+
+  /// \brief Append a string (only for binary types)
+  template <typename T1 = T>
+  enable_if_binary_like<T1, Status> Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+
+  /// \brief Append a string (only for string types)
+  template <typename T1 = T>
+  enable_if_string_like<T1, Status> Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+
+  /// \brief Append a decimal (only for Decimal128Type)
+  template <typename T1 = T>
+  enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
+    uint8_t data[16];
+    value.ToBytes(data);
+    return Append(data, 16);
+  }
+
+  /// \brief Append a decimal (only for Decimal128Type)
+  template <typename T1 = T>
+  enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
+    uint8_t data[32];
+    value.ToBytes(data);
+    return Append(data, 32);
+  }
+
+  /// \brief Append a scalar null value
+  Status AppendNull() final {
+    length_ += 1;
+    null_count_ += 1;
+
+    return indices_builder_.AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    length_ += length;
+    null_count_ += length;
+
+    return indices_builder_.AppendNulls(length);
+  }
+
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
+  Status AppendScalar(const Scalar& scalar, int64_t n_repeats) override {
+    if (!scalar.is_valid) return AppendNulls(n_repeats);
+
+    const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*scalar.type);
+    const DictionaryScalar& dict_scalar =
+        internal::checked_cast<const DictionaryScalar&>(scalar);
+    const auto& dict = internal::checked_cast<const typename TypeTraits<T>::ArrayType&>(
+        *dict_scalar.value.dictionary);
+    ARROW_RETURN_NOT_OK(Reserve(n_repeats));
+    switch (dict_ty.index_type()->id()) {
+      case Type::UINT8:
+        return AppendScalarImpl<UInt8Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT8:
+        return AppendScalarImpl<Int8Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT16:
+        return AppendScalarImpl<UInt16Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT16:
+        return AppendScalarImpl<Int16Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT32:
+        return AppendScalarImpl<UInt32Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT32:
+        return AppendScalarImpl<Int32Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT64:
+        return AppendScalarImpl<UInt64Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT64:
+        return AppendScalarImpl<Int64Type>(dict, *dict_scalar.value.index, n_repeats);
+      default:
+        return Status::TypeError("Invalid index type: ", dict_ty);
+    }
+    return Status::OK();
+  }
+
+  Status AppendScalars(const ScalarVector& scalars) override {
+    for (const auto& scalar : scalars) {
+      ARROW_RETURN_NOT_OK(AppendScalar(*scalar, /*n_repeats=*/1));
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
+    // Visit the indices and insert the unpacked values.
+    const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*array.type);
+    const typename TypeTraits<T>::ArrayType dict(array.dictionary);
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    switch (dict_ty.index_type()->id()) {
+      case Type::UINT8:
+        return AppendArraySliceImpl<uint8_t>(dict, array, offset, length);
+      case Type::INT8:
+        return AppendArraySliceImpl<int8_t>(dict, array, offset, length);
+      case Type::UINT16:
+        return AppendArraySliceImpl<uint16_t>(dict, array, offset, length);
+      case Type::INT16:
+        return AppendArraySliceImpl<int16_t>(dict, array, offset, length);
+      case Type::UINT32:
+        return AppendArraySliceImpl<uint32_t>(dict, array, offset, length);
+      case Type::INT32:
+        return AppendArraySliceImpl<int32_t>(dict, array, offset, length);
+      case Type::UINT64:
+        return AppendArraySliceImpl<uint64_t>(dict, array, offset, length);
+      case Type::INT64:
+        return AppendArraySliceImpl<int64_t>(dict, array, offset, length);
+      default:
+        return Status::TypeError("Invalid index type: ", dict_ty);
+    }
+    return Status::OK();
+  }
+
+  /// \brief Insert values into the dictionary's memo, but do not append any
+  /// indices. Can be used to initialize a new builder with known dictionary
+  /// values
+  /// \param[in] values dictionary values to add to memo. Type must match
+  /// builder type
+  Status InsertMemoValues(const Array& values) {
+    return memo_table_->InsertValues(values);
+  }
+
+  /// \brief Append a whole dense array to the builder
+  template <typename T1 = T>
+  enable_if_t<!is_fixed_size_binary_type<T1>::value, Status> AppendArray(
+      const Array& array) {
+    using ArrayType = typename TypeTraits<T>::ArrayType;
+
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+    const auto& concrete_array = static_cast<const ArrayType&>(array);
+    for (int64_t i = 0; i < array.length(); i++) {
+      if (array.IsNull(i)) {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      } else {
+        ARROW_RETURN_NOT_OK(Append(concrete_array.GetView(i)));
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> AppendArray(const Array& array) {
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+    const auto& concrete_array = static_cast<const FixedSizeBinaryArray&>(array);
+    for (int64_t i = 0; i < array.length(); i++) {
+      if (array.IsNull(i)) {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      } else {
+        ARROW_RETURN_NOT_OK(Append(concrete_array.GetValue(i)));
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    // Perform a partial reset. Call ResetFull to also reset the accumulated
+    // dictionary values
+    ArrayBuilder::Reset();
+    indices_builder_.Reset();
+  }
+
+  /// \brief Reset and also clear accumulated dictionary values in memo table
+  void ResetFull() {
+    Reset();
+    memo_table_.reset(new internal::DictionaryMemoTable(pool_, value_type_));
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+    capacity_ = indices_builder_.capacity();
+    return Status::OK();
+  }
+
+  /// \brief Return dictionary indices and a delta dictionary since the last
+  /// time that Finish or FinishDelta were called, and reset state of builder
+  /// (except the memo table)
+  Status FinishDelta(std::shared_ptr<Array>* out_indices,
+                     std::shared_ptr<Array>* out_delta) {
+    std::shared_ptr<ArrayData> indices_data;
+    std::shared_ptr<ArrayData> delta_data;
+    ARROW_RETURN_NOT_OK(FinishWithDictOffset(delta_offset_, &indices_data, &delta_data));
+    *out_indices = MakeArray(indices_data);
+    *out_delta = MakeArray(delta_data);
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override {
+    return ::arrow::dictionary(indices_builder_.type(), value_type_);
+  }
+
+ protected:
+  template <typename c_type>
+  Status AppendArraySliceImpl(const typename TypeTraits<T>::ArrayType& dict,
+                              const ArrayData& array, int64_t offset, int64_t length) {
+    const c_type* values = array.GetValues<c_type>(1) + offset;
+    return VisitBitBlocks(
+        array.buffers[0], array.offset + offset, length,
+        [&](const int64_t position) {
+          const int64_t index = static_cast<int64_t>(values[position]);
+          if (dict.IsValid(index)) {
+            return Append(dict.GetView(index));
+          }
+          return AppendNull();
+        },
+        [&]() { return AppendNull(); });
+  }
+
+  template <typename IndexType>
+  Status AppendScalarImpl(const typename TypeTraits<T>::ArrayType& dict,
+                          const Scalar& index_scalar, int64_t n_repeats) {
+    using ScalarType = typename TypeTraits<IndexType>::ScalarType;
+    const auto index = internal::checked_cast<const ScalarType&>(index_scalar).value;
+    if (index_scalar.is_valid && dict.IsValid(index)) {
+      const auto& value = dict.GetView(index);
+      for (int64_t i = 0; i < n_repeats; i++) {
+        ARROW_RETURN_NOT_OK(Append(value));
+      }
+      return Status::OK();
+    }
+    return AppendNulls(n_repeats);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    std::shared_ptr<ArrayData> dictionary;
+    ARROW_RETURN_NOT_OK(FinishWithDictOffset(/*offset=*/0, out, &dictionary));
+
+    // Set type of array data to the right dictionary type
+    (*out)->type = type();
+    (*out)->dictionary = dictionary;
+    return Status::OK();
+  }
+
+  Status FinishWithDictOffset(int64_t dict_offset,
+                              std::shared_ptr<ArrayData>* out_indices,
+                              std::shared_ptr<ArrayData>* out_dictionary) {
+    // Finalize indices array
+    ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out_indices));
+
+    // Generate dictionary array from hash table contents
+    ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(dict_offset, out_dictionary));
+    delta_offset_ = memo_table_->size();
+
+    // Update internals for further uses of this DictionaryBuilder
+    ArrayBuilder::Reset();
+    return Status::OK();
+  }
+
+  std::unique_ptr<DictionaryMemoTable> memo_table_;
+
+  // The size of the dictionary memo at last invocation of Finish, to use in
+  // FinishDelta for computing dictionary deltas
+  int32_t delta_offset_;
+
+  // Only used for FixedSizeBinaryType
+  int32_t byte_width_;
+
+  BuilderType indices_builder_;
+  std::shared_ptr<DataType> value_type_;
+};
+
+template <typename BuilderType>
+class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
+ public:
+  template <typename B = BuilderType>
+  DictionaryBuilderBase(
+      enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+          start_int_size,
+      const std::shared_ptr<DataType>& value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<DataType>& value_type,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<DataType>& index_type,
+                                 const std::shared_ptr<DataType>& value_type,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(index_type, pool) {}
+
+  template <typename B = BuilderType>
+  explicit DictionaryBuilderBase(
+      enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+          start_int_size,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
+  explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  /// \brief Append a scalar null value
+  Status AppendNull() final {
+    length_ += 1;
+    null_count_ += 1;
+
+    return indices_builder_.AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    length_ += length;
+    null_count_ += length;
+
+    return indices_builder_.AppendNulls(length);
+  }
+
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
+  /// \brief Append a whole dense array to the builder
+  Status AppendArray(const Array& array) {
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        Type::NA, array, "Wrong value type of array to be appended"));
+#endif
+    for (int64_t i = 0; i < array.length(); i++) {
+      ARROW_RETURN_NOT_OK(AppendNull());
+    }
+    return Status::OK();
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+
+    ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+    capacity_ = indices_builder_.capacity();
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out));
+    (*out)->type = dictionary((*out)->type, null());
+    (*out)->dictionary = NullArray(0).data();
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override {
+    return ::arrow::dictionary(indices_builder_.type(), null());
+  }
+
+ protected:
+  BuilderType indices_builder_;
+};
+
+}  // namespace internal
+
+/// \brief A DictionaryArray builder that uses AdaptiveIntBuilder to return the
+/// smallest index size that can accommodate the dictionary indices
+template <typename T>
+class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuilder, T> {
+ public:
+  using BASE = internal::DictionaryBuilderBase<AdaptiveIntBuilder, T>;
+  using BASE::BASE;
+
+  /// \brief Append dictionary indices directly without modifying memo
+  ///
+  /// NOTE: Experimental API
+  Status AppendIndices(const int64_t* values, int64_t length,
+                       const uint8_t* valid_bytes = NULLPTR) {
+    int64_t null_count_before = this->indices_builder_.null_count();
+    ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+    this->capacity_ = this->indices_builder_.capacity();
+    this->length_ += length;
+    this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+    return Status::OK();
+  }
+};
+
+/// \brief A DictionaryArray builder that always returns int32 dictionary
+/// indices so that data cast to dictionary form will have a consistent index
+/// type, e.g. for creating a ChunkedArray
+template <typename T>
+class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder, T> {
+ public:
+  using BASE = internal::DictionaryBuilderBase<Int32Builder, T>;
+  using BASE::BASE;
+
+  /// \brief Append dictionary indices directly without modifying memo
+  ///
+  /// NOTE: Experimental API
+  Status AppendIndices(const int32_t* values, int64_t length,
+                       const uint8_t* valid_bytes = NULLPTR) {
+    int64_t null_count_before = this->indices_builder_.null_count();
+    ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+    this->capacity_ = this->indices_builder_.capacity();
+    this->length_ += length;
+    this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Binary / Unicode builders
+// (compatibility aliases; those used to be derived classes with additional
+//  Append() overloads, but they have been folded into DictionaryBuilderBase)
+
+using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
+using StringDictionaryBuilder = DictionaryBuilder<StringType>;
+using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
+using StringDictionary32Builder = Dictionary32Builder<StringType>;
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_nested.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_nested.h
@ -0,0 +1,561 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-builders
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// List builder
+
+template <typename TYPE>
+class BaseListBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  /// Use this constructor to incrementally build the value array along with offsets and
+  /// null bitmap.
+  BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
+                  const std::shared_ptr<DataType>& type)
+      : ArrayBuilder(pool),
+        offsets_builder_(pool),
+        value_builder_(value_builder),
+        value_field_(type->field(0)->WithType(NULLPTR)) {}
+
+  BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder)
+      : BaseListBuilder(pool, value_builder, list(value_builder->type())) {}
+
+  Status Resize(int64_t capacity) override {
+    if (capacity > maximum_elements()) {
+      return Status::CapacityError("List array cannot reserve space for more than ",
+                                   maximum_elements(), " got ", capacity);
+    }
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_builder_->Reset();
+  }
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const offset_type* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    offsets_builder_.UnsafeAppend(offsets, length);
+    return Status::OK();
+  }
+
+  /// \brief Start a new variable-length list slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// value builder
+  Status Append(bool is_valid = true) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return AppendNextOffset();
+  }
+
+  Status AppendNull() final { return Append(false); }
+
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    UnsafeAppendToBitmap(length, false);
+    const int64_t num_values = value_builder_->length();
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final { return Append(true); }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    UnsafeAppendToBitmap(length, true);
+    const int64_t num_values = value_builder_->length();
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    const offset_type* offsets = array.GetValues<offset_type>(1);
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(Append());
+        int64_t slot_length = offsets[row + 1] - offsets[row];
+        ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(*array.child_data[0],
+                                                             offsets[row], slot_length));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+    // Offset padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+    if (value_builder_->length() == 0) {
+      // Try to make sure we get a non-null values buffer (ARROW-2744)
+      ARROW_RETURN_NOT_OK(value_builder_->Resize(0));
+    }
+
+    std::shared_ptr<ArrayData> items;
+    ARROW_RETURN_NOT_OK(value_builder_->FinishInternal(&items));
+
+    *out = ArrayData::Make(type(), length_, {null_bitmap, offsets}, {std::move(items)},
+                           null_count_);
+    Reset();
+    return Status::OK();
+  }
+
+  Status ValidateOverflow(int64_t new_elements) const {
+    auto new_length = value_builder_->length() + new_elements;
+    if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
+      return Status::CapacityError("List array cannot contain more than ",
+                                   maximum_elements(), " elements, have ", new_elements);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t maximum_elements() {
+    return std::numeric_limits<offset_type>::max() - 1;
+  }
+
+  std::shared_ptr<DataType> type() const override {
+    return std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
+  }
+
+ protected:
+  TypedBufferBuilder<offset_type> offsets_builder_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
+  std::shared_ptr<Field> value_field_;
+
+  Status AppendNextOffset() {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    const int64_t num_values = value_builder_->length();
+    return offsets_builder_.Append(static_cast<offset_type>(num_values));
+  }
+};
+
+/// \class ListBuilder
+/// \brief Builder class for variable-length list array value types
+///
+/// To use this class, you must append values to the child array builder and use
+/// the Append function to delimit each distinct list value (once the values
+/// have been appended to the child array) or use the bulk API to append
+/// a sequence of offsets and null values.
+///
+/// A note on types.  Per arrow/type.h all types in the c++ implementation are
+/// logical so even though this class always builds list array, this can
+/// represent multiple different logical types.  If no logical type is provided
+/// at construction time, the class defaults to List<T> where t is taken from the
+/// value_builder/values that the object is constructed with.
+class ARROW_EXPORT ListBuilder : public BaseListBuilder<ListType> {
+ public:
+  using BaseListBuilder::BaseListBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<ListArray>* out) { return FinishTyped(out); }
+};
+
+/// \class LargeListBuilder
+/// \brief Builder class for large variable-length list array value types
+///
+/// Like ListBuilder, but to create large list arrays (with 64-bit offsets).
+class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
+ public:
+  using BaseListBuilder::BaseListBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeListArray>* out) { return FinishTyped(out); }
+};
+
+// ----------------------------------------------------------------------
+// Map builder
+
+/// \class MapBuilder
+/// \brief Builder class for arrays of variable-size maps
+///
+/// To use this class, you must append values to the key and item array builders
+/// and use the Append function to delimit each distinct map (once the keys and items
+/// have been appended) or use the bulk API to append a sequence of offsets and null
+/// maps.
+///
+/// Key uniqueness and ordering are not validated.
+class ARROW_EXPORT MapBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to define the built array's type explicitly. If key_builder
+  /// or item_builder has indeterminate type, this builder will also.
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+             const std::shared_ptr<ArrayBuilder>& item_builder,
+             const std::shared_ptr<DataType>& type);
+
+  /// Use this constructor to infer the built array's type. If key_builder or
+  /// item_builder has indeterminate type, this builder will also.
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+             const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);
+
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
+             const std::shared_ptr<DataType>& type);
+
+  Status Resize(int64_t capacity) override;
+  void Reset() override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<MapArray>* out) { return FinishTyped(out); }
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const int32_t* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Start a new variable-length map slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// key and item builders
+  Status Append();
+
+  Status AppendNull() final;
+
+  Status AppendNulls(int64_t length) final;
+
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    const int32_t* offsets = array.GetValues<int32_t>(1);
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(Append());
+        const int64_t slot_length = offsets[row + 1] - offsets[row];
+        ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
+            *array.child_data[0]->child_data[0], offsets[row], slot_length));
+        ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
+            *array.child_data[0]->child_data[1], offsets[row], slot_length));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  /// \brief Get builder to append keys.
+  ///
+  /// Append a key with this builder should be followed by appending
+  /// an item or null value with item_builder().
+  ArrayBuilder* key_builder() const { return key_builder_.get(); }
+
+  /// \brief Get builder to append items
+  ///
+  /// Appending an item with this builder should have been preceded
+  /// by appending a key with key_builder().
+  ArrayBuilder* item_builder() const { return item_builder_.get(); }
+
+  /// \brief Get builder to add Map entries as struct values.
+  ///
+  /// This is used instead of key_builder()/item_builder() and allows
+  /// the Map to be built as a list of struct values.
+  ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }
+
+  std::shared_ptr<DataType> type() const override {
+    // Key and Item builder may update types, but they don't contain the field names,
+    // so we need to reconstruct the type. (See ARROW-13735.)
+    return std::make_shared<MapType>(
+        field(entries_name_,
+              struct_({field(key_name_, key_builder_->type(), false),
+                       field(item_name_, item_builder_->type(), item_nullable_)}),
+              false),
+        keys_sorted_);
+  }
+
+  Status ValidateOverflow(int64_t new_elements) {
+    return list_builder_->ValidateOverflow(new_elements);
+  }
+
+ protected:
+  inline Status AdjustStructBuilderLength();
+
+ protected:
+  bool keys_sorted_ = false;
+  bool item_nullable_ = false;
+  std::string entries_name_;
+  std::string key_name_;
+  std::string item_name_;
+  std::shared_ptr<ListBuilder> list_builder_;
+  std::shared_ptr<ArrayBuilder> key_builder_;
+  std::shared_ptr<ArrayBuilder> item_builder_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeList builder
+
+/// \class FixedSizeListBuilder
+/// \brief Builder class for fixed-length list array value types
+class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to define the built array's type explicitly. If value_builder
+  /// has indeterminate type, this builder will also.
+  FixedSizeListBuilder(MemoryPool* pool,
+                       std::shared_ptr<ArrayBuilder> const& value_builder,
+                       int32_t list_size);
+
+  /// Use this constructor to infer the built array's type. If value_builder has
+  /// indeterminate type, this builder will also.
+  FixedSizeListBuilder(MemoryPool* pool,
+                       std::shared_ptr<ArrayBuilder> const& value_builder,
+                       const std::shared_ptr<DataType>& type);
+
+  Status Resize(int64_t capacity) override;
+  void Reset() override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<FixedSizeListArray>* out) { return FinishTyped(out); }
+
+  /// \brief Append a valid fixed length list.
+  ///
+  /// This function affects only the validity bitmap; the child values must be appended
+  /// using the child array builder.
+  Status Append();
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes wil be read and any zero byte
+  /// will cause the corresponding slot to be null
+  ///
+  /// This function affects only the validity bitmap; the child values must be appended
+  /// using the child array builder. This includes appending nulls for null lists.
+  /// XXX this restriction is confusing, should this method be omitted?
+  Status AppendValues(int64_t length, const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a null fixed length list.
+  ///
+  /// The child array builder will have the appropriate number of nulls appended
+  /// automatically.
+  Status AppendNull() final;
+
+  /// \brief Append length null fixed length lists.
+  ///
+  /// The child array builder will have the appropriate number of nulls appended
+  /// automatically.
+  Status AppendNulls(int64_t length) final;
+
+  Status ValidateOverflow(int64_t new_elements);
+
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(
+            *array.child_data[0], list_size_ * (array.offset + row), list_size_));
+        ARROW_RETURN_NOT_OK(Append());
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+  std::shared_ptr<DataType> type() const override {
+    return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t maximum_elements() {
+    return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
+  }
+
+ protected:
+  std::shared_ptr<Field> value_field_;
+  const int32_t list_size_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+// ---------------------------------------------------------------------------------
+// StructArray builder
+/// Append, Resize and Reserve methods are acting on StructBuilder.
+/// Please make sure all these methods of all child-builders' are consistently
+/// called to maintain data-structure consistency.
+class ARROW_EXPORT StructBuilder : public ArrayBuilder {
+ public:
+  /// If any of field_builders has indeterminate type, this builder will also
+  StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+                std::vector<std::shared_ptr<ArrayBuilder>> field_builders);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<StructArray>* out) { return FinishTyped(out); }
+
+  /// Null bitmap is of equal length to every child field, and any zero byte
+  /// will be considered as a null for that field, but users must using app-
+  /// end methods or advance methods of the child builders' independently to
+  /// insert data.
+  Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// Append an element to the Struct. All child-builders' Append method must
+  /// be called independently to maintain data-structure consistency.
+  Status Append(bool is_valid = true) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  /// \brief Append a null value. Automatically appends an empty value to each child
+  /// builder.
+  Status AppendNull() final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
+    }
+    return Append(false);
+  }
+
+  /// \brief Append multiple null values. Automatically appends empty values to each
+  /// child builder.
+  Status AppendNulls(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
+    }
+    return Append(true);
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    for (int i = 0; static_cast<size_t>(i) < children_.size(); i++) {
+      ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(*array.child_data[i],
+                                                         array.offset + offset, length));
+    }
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(validity, array.offset + offset, length);
+    return Status::OK();
+  }
+
+  void Reset() override;
+
+  ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
+
+  int num_fields() const { return static_cast<int>(children_.size()); }
+
+  std::shared_ptr<DataType> type() const override;
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_primitive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_primitive.h
@ -0,0 +1,539 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+
+namespace arrow {
+
+class ARROW_EXPORT NullBuilder : public ArrayBuilder {
+ public:
+  explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
+  explicit NullBuilder(const std::shared_ptr<DataType>& type,
+                       MemoryPool* pool = default_memory_pool())
+      : NullBuilder(pool) {}
+
+  /// \brief Append the specified number of null elements
+  Status AppendNulls(int64_t length) final {
+    if (length < 0) return Status::Invalid("length must be positive");
+    null_count_ += length;
+    length_ += length;
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() final { return AppendNulls(1); }
+
+  Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); }
+
+  Status AppendEmptyValue() final { return AppendEmptyValues(1); }
+
+  Status Append(std::nullptr_t) { return AppendNull(); }
+
+  Status AppendArraySlice(const ArrayData&, int64_t, int64_t length) override {
+    return AppendNulls(length);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  std::shared_ptr<DataType> type() const override { return null(); }
+
+  Status Finish(std::shared_ptr<NullArray>* out) { return FinishTyped(out); }
+};
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+/// Base class for all Builders that emit an Array of a scalar numerical type.
+template <typename T>
+class NumericBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = T;
+  using value_type = typename T::c_type;
+  using ArrayType = typename TypeTraits<T>::ArrayType;
+
+  template <typename T1 = T>
+  explicit NumericBuilder(
+      enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+      : ArrayBuilder(pool), type_(TypeTraits<T>::type_singleton()), data_builder_(pool) {}
+
+  NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ArrayBuilder(pool), type_(type), data_builder_(pool) {}
+
+  /// Append a single scalar and increase the size if necessary.
+  Status Append(const value_type val) {
+    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  /// The memory at the corresponding data slot is set to 0 to prevent
+  /// uninitialized memory access
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
+    UnsafeSetNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(value_type{});  // zero
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  /// \brief Append a empty element
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(value_type{});  // zero
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  /// \brief Append several empty elements
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
+
+  void Reset() override { data_builder_.Reset(); }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  value_type operator[](int64_t index) const { return GetValue(index); }
+
+  value_type& operator[](int64_t index) {
+    return reinterpret_cast<value_type*>(data_builder_.mutable_data())[index];
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] bitmap a validity bitmap to copy (may be null)
+  /// \param[in] bitmap_offset an offset into the validity bitmap
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length, const uint8_t* bitmap,
+                      int64_t bitmap_offset) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(bitmap, bitmap_offset, length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const std::vector<bool>& is_valid) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values,
+                      const std::vector<bool>& is_valid) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
+                          null_bitmap_builder_.FinishWithLength(length_));
+    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
+    *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+    capacity_ = length_ = null_count_ = 0;
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<ArrayType>* out) { return FinishTyped(out); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \return Status
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    // this updates the length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values.
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  // Same as above, with a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    // this updates the length_
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
+      length_ = null_bitmap_builder_.length();
+      null_count_ = null_bitmap_builder_.false_count();
+    }
+
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(array.GetValues<value_type>(1) + offset, length,
+                        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  /// Append a single scalar under the assumption that the underlying Buffer is
+  /// large enough.
+  ///
+  /// This method does not capacity-check; make sure to call Reserve
+  /// beforehand.
+  void UnsafeAppend(const value_type val) {
+    ArrayBuilder::UnsafeAppendToBitmap(true);
+    data_builder_.UnsafeAppend(val);
+  }
+
+  void UnsafeAppendNull() {
+    ArrayBuilder::UnsafeAppendToBitmap(false);
+    data_builder_.UnsafeAppend(value_type{});  // zero
+  }
+
+  std::shared_ptr<DataType> type() const override { return type_; }
+
+ protected:
+  std::shared_ptr<DataType> type_;
+  TypedBufferBuilder<value_type> data_builder_;
+};
+
+// Builders
+
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
+
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
+
+/// @}
+
+/// \addtogroup temporal-builders
+///
+/// @{
+
+using Date32Builder = NumericBuilder<Date32Type>;
+using Date64Builder = NumericBuilder<Date64Type>;
+using Time32Builder = NumericBuilder<Time32Type>;
+using Time64Builder = NumericBuilder<Time64Type>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
+using MonthIntervalBuilder = NumericBuilder<MonthIntervalType>;
+using DurationBuilder = NumericBuilder<DurationType>;
+
+/// @}
+
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = BooleanType;
+  using value_type = bool;
+
+  explicit BooleanBuilder(MemoryPool* pool = default_memory_pool());
+
+  BooleanBuilder(const std::shared_ptr<DataType>& type,
+                 MemoryPool* pool = default_memory_pool());
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNull(length);
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendNull();
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(false);
+    UnsafeSetNotNull(1);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// Scalar append
+  Status Append(const bool val) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t val) { return Append(val != 0); }
+
+  /// Scalar append, without checking for capacity
+  void UnsafeAppend(const bool val) {
+    data_builder_.UnsafeAppend(val);
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppendNull() {
+    data_builder_.UnsafeAppend(false);
+    UnsafeAppendToBitmap(false);
+  }
+
+  void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous array of bytes (non-zero is 1)
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a bitmap of values
+  /// \param[in] length the number of values to append
+  /// \param[in] validity a validity bitmap to copy (may be null)
+  /// \param[in] offset an offset into the values and validity bitmaps
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length, const uint8_t* validity,
+                      int64_t offset);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+    // this updates length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  // Same as above, for a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
+    }
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  Status AppendValues(int64_t length, bool value);
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(array.GetValues<uint8_t>(1, 0), length,
+                        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BooleanArray>* out) { return FinishTyped(out); }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+  std::shared_ptr<DataType> type() const override { return boolean(); }
+
+ protected:
+  TypedBufferBuilder<bool> data_builder_;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_time.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_time.h
@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains declarations of time related Arrow builder types.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_primitive.h"
+
+namespace arrow {
+
+/// \addtogroup temporal-builders
+///
+/// @{
+
+// TODO(ARROW-7938): this class is untested
+
+class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
+ public:
+  using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
+
+  explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool())
+      : DayTimeIntervalBuilder(day_time_interval(), pool) {}
+
+  explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
+                                  MemoryPool* pool = default_memory_pool())
+      : NumericBuilder<DayTimeIntervalType>(type, pool) {}
+};
+
+class ARROW_EXPORT MonthDayNanoIntervalBuilder
+    : public NumericBuilder<MonthDayNanoIntervalType> {
+ public:
+  using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos;
+
+  explicit MonthDayNanoIntervalBuilder(MemoryPool* pool = default_memory_pool())
+      : MonthDayNanoIntervalBuilder(month_day_nano_interval(), pool) {}
+
+  explicit MonthDayNanoIntervalBuilder(std::shared_ptr<DataType> type,
+                                       MemoryPool* pool = default_memory_pool())
+      : NumericBuilder<MonthDayNanoIntervalType>(type, pool) {}
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_union.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_union.h
@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-builders
+///
+/// @{
+
+/// \brief Base class for union array builds.
+///
+/// Note that while we subclass ArrayBuilder, as union types do not have a
+/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
+class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
+ public:
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+
+  /// \brief Make a new child builder available to the UnionArray
+  ///
+  /// \param[in] new_child the child builder
+  /// \param[in] field_name the name of the field in the union array type
+  /// if type inference is used
+  /// \return child index, which is the "type" argument that needs
+  /// to be passed to the "Append" method to add a new element to
+  /// the union array.
+  int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+                     const std::string& field_name = "");
+
+  std::shared_ptr<DataType> type() const override;
+
+  int64_t length() const override { return types_builder_.length(); }
+
+ protected:
+  BasicUnionBuilder(MemoryPool* pool,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type);
+
+  int8_t NextTypeId();
+
+  std::vector<std::shared_ptr<Field>> child_fields_;
+  std::vector<int8_t> type_codes_;
+  UnionMode::type mode_;
+
+  std::vector<ArrayBuilder*> type_id_to_children_;
+  std::vector<int> type_id_to_child_id_;
+  // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
+  int8_t dense_type_id_ = 0;
+  TypedBufferBuilder<int8_t> types_builder_;
+};
+
+/// \class DenseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
+ public:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit DenseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  DenseUnionBuilder(MemoryPool* pool,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
+
+  Status AppendNull() final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+    // Append a null arbitrarily to the first child
+    return child_builder->AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+    // Append just a single null to the first child
+    return child_builder->AppendNull();
+  }
+
+  Status AppendEmptyValue() final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+    // Append an empty value arbitrarily to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+    // Append just a single empty value to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called.
+  Status Append(int8_t next_type) {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+    if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
+      return Status::CapacityError(
+          "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
+          "child");
+    }
+    auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
+    return offsets_builder_.Append(offset);
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override;
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ private:
+  TypedBufferBuilder<int32_t> offsets_builder_;
+};
+
+/// \class SparseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
+ public:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit SparseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  SparseUnionBuilder(MemoryPool* pool,
+                     const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                     const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, children, type) {}
+
+  /// \brief Append a null value.
+  ///
+  /// A null is appended to the first child, empty values to the other children.
+  Status AppendNull() final {
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append multiple null values.
+  ///
+  /// Nulls are appended to the first child, empty values to the other children.
+  Status AppendNulls(int64_t length) final {
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(
+          type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
+    for (int8_t code : type_codes_) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
+    for (int8_t code : type_codes_) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called, and all other child builders must have null or empty value appended.
+  Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/concatenate.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/concatenate.h
@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Concatenate arrays
+///
+/// \param[in] arrays a vector of arrays to be concatenated
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return the concatenated array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
+                                           MemoryPool* pool = default_memory_pool());
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/data.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/data.h
@ -0,0 +1,258 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>  // IWYU pragma: export
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// When slicing, we do not know the null count of the sliced range without
+// doing some computation. To avoid doing this eagerly, we set the null count
+// to -1 (any negative number will do). When Array::null_count is called the
+// first time, the null count will be computed. See ARROW-33
+constexpr int64_t kUnknownNullCount = -1;
+
+// ----------------------------------------------------------------------
+// Generic array data container
+
+/// \class ArrayData
+/// \brief Mutable container for generic Arrow array data
+///
+/// This data structure is a self-contained representation of the memory and
+/// metadata inside an Arrow array data structure (called vectors in Java). The
+/// classes arrow::Array and its subclasses provide strongly-typed accessors
+/// with support for the visitor pattern and other affordances.
+///
+/// This class is designed for easy internal data manipulation, analytical data
+/// processing, and data transport to and from IPC messages. For example, we
+/// could cast from int64 to float64 like so:
+///
+/// Int64Array arr = GetMyData();
+/// auto new_data = arr.data()->Copy();
+/// new_data->type = arrow::float64();
+/// DoubleArray double_arr(new_data);
+///
+/// This object is also useful in an analytics setting where memory may be
+/// reused. For example, if we had a group of operations all returning doubles,
+/// say:
+///
+/// Log(Sqrt(Expr(arr)))
+///
+/// Then the low-level implementations of each of these functions could have
+/// the signatures
+///
+/// void Log(const ArrayData& values, ArrayData* out);
+///
+/// As another example a function may consume one or more memory buffers in an
+/// input array and replace them with newly-allocated data, changing the output
+/// data type as well.
+struct ARROW_EXPORT ArrayData {
+  ArrayData() = default;
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            std::vector<std::shared_ptr<Buffer>> buffers,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : ArrayData(std::move(type), length, null_count, offset) {
+    this->buffers = std::move(buffers);
+  }
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            std::vector<std::shared_ptr<Buffer>> buffers,
+            std::vector<std::shared_ptr<ArrayData>> child_data,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : ArrayData(std::move(type), length, null_count, offset) {
+    this->buffers = std::move(buffers);
+    this->child_data = std::move(child_data);
+  }
+
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
+                                         std::vector<std::shared_ptr<Buffer>> buffers,
+                                         int64_t null_count = kUnknownNullCount,
+                                         int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(
+      std::shared_ptr<DataType> type, int64_t length,
+      std::vector<std::shared_ptr<Buffer>> buffers,
+      std::vector<std::shared_ptr<ArrayData>> child_data,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(
+      std::shared_ptr<DataType> type, int64_t length,
+      std::vector<std::shared_ptr<Buffer>> buffers,
+      std::vector<std::shared_ptr<ArrayData>> child_data,
+      std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
+      int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
+                                         int64_t null_count = kUnknownNullCount,
+                                         int64_t offset = 0);
+
+  // Move constructor
+  ArrayData(ArrayData&& other) noexcept
+      : type(std::move(other.type)),
+        length(other.length),
+        offset(other.offset),
+        buffers(std::move(other.buffers)),
+        child_data(std::move(other.child_data)),
+        dictionary(std::move(other.dictionary)) {
+    SetNullCount(other.null_count);
+  }
+
+  // Copy constructor
+  ArrayData(const ArrayData& other) noexcept
+      : type(other.type),
+        length(other.length),
+        offset(other.offset),
+        buffers(other.buffers),
+        child_data(other.child_data),
+        dictionary(other.dictionary) {
+    SetNullCount(other.null_count);
+  }
+
+  // Move assignment
+  ArrayData& operator=(ArrayData&& other) {
+    type = std::move(other.type);
+    length = other.length;
+    SetNullCount(other.null_count);
+    offset = other.offset;
+    buffers = std::move(other.buffers);
+    child_data = std::move(other.child_data);
+    dictionary = std::move(other.dictionary);
+    return *this;
+  }
+
+  // Copy assignment
+  ArrayData& operator=(const ArrayData& other) {
+    type = other.type;
+    length = other.length;
+    SetNullCount(other.null_count);
+    offset = other.offset;
+    buffers = other.buffers;
+    child_data = other.child_data;
+    dictionary = other.dictionary;
+    return *this;
+  }
+
+  std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
+
+  // Access a buffer's data as a typed C pointer
+  template <typename T>
+  inline const T* GetValues(int i, int64_t absolute_offset) const {
+    if (buffers[i]) {
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline const T* GetValues(int i) const {
+    return GetValues<T>(i, offset);
+  }
+
+  // Like GetValues, but returns NULLPTR instead of aborting if the underlying
+  // buffer is not a CPU buffer.
+  template <typename T>
+  inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
+    if (buffers[i] && buffers[i]->is_cpu()) {
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline const T* GetValuesSafe(int i) const {
+    return GetValuesSafe<T>(i, offset);
+  }
+
+  // Access a buffer's data as a typed C pointer
+  template <typename T>
+  inline T* GetMutableValues(int i, int64_t absolute_offset) {
+    if (buffers[i]) {
+      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline T* GetMutableValues(int i) {
+    return GetMutableValues<T>(i, offset);
+  }
+
+  /// \brief Construct a zero-copy slice of the data with the given offset and length
+  std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
+
+  /// \brief Input-checking variant of Slice
+  ///
+  /// An Invalid Status is returned if the requested slice falls out of bounds.
+  /// Note that unlike Slice, `length` isn't clamped to the available buffer size.
+  Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
+
+  void SetNullCount(int64_t v) { null_count.store(v); }
+
+  /// \brief Return null count, or compute and set it if it's not known
+  int64_t GetNullCount() const;
+
+  bool MayHaveNulls() const {
+    // If an ArrayData is slightly malformed it may have kUnknownNullCount set
+    // but no buffer
+    return null_count.load() != 0 && buffers[0] != NULLPTR;
+  }
+
+  std::shared_ptr<DataType> type;
+  int64_t length = 0;
+  mutable std::atomic<int64_t> null_count{0};
+  // The logical start point into the physical buffers (in values, not bytes).
+  // Note that, for child data, this must be *added* to the child data's own offset.
+  int64_t offset = 0;
+  std::vector<std::shared_ptr<Buffer>> buffers;
+  std::vector<std::shared_ptr<ArrayData>> child_data;
+
+  // The dictionary for this Array, if any. Only used for dictionary type
+  std::shared_ptr<ArrayData> dictionary;
+};
+
+namespace internal {
+
+/// Construct a zero-copy view of this ArrayData with the given type.
+///
+/// This method checks if the types are layout-compatible.
+/// Nested types are traversed in depth-first order. Data buffers must have
+/// the same item sizes, even though the logical types may be different.
+/// An error is returned if the types are not layout-compatible.
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> GetArrayView(const std::shared_ptr<ArrayData>& data,
+                                                const std::shared_ptr<DataType>& type);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/diff.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/diff.h
@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Compare two arrays, returning an edit script which expresses the difference
+/// between them
+///
+/// An edit script is an array of struct(insert: bool, run_length: int64_t).
+/// Each element of "insert" determines whether an element was inserted into (true)
+/// or deleted from (false) base. Each insertion or deletion is followed by a run of
+/// elements which are unchanged from base to target; the length of this run is stored
+/// in "run_length". (Note that the edit script begins and ends with a run of shared
+/// elements but both fields of the struct must have the same length. To accommodate this
+/// the first element of "insert" should be ignored.)
+///
+/// For example for base "hlloo" and target "hello", the edit script would be
+/// [
+///   {"insert": false, "run_length": 1}, // leading run of length 1 ("h")
+///   {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo")
+///   {"insert": false, "run_length": 0} // delete("o") then an empty run
+/// ]
+///
+/// Diffing arrays containing nulls is not currently supported.
+///
+/// \param[in] base baseline for comparison
+/// \param[in] target an array of identical type to base whose elements differ from base's
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return an edit script array which can be applied to base to produce target
+ARROW_EXPORT
+Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
+                                          MemoryPool* pool = default_memory_pool());
+
+/// \brief visitor interface for easy traversal of an edit script
+///
+/// visitor will be called for each hunk of insertions and deletions.
+ARROW_EXPORT Status VisitEditScript(
+    const Array& edits,
+    const std::function<Status(int64_t delete_begin, int64_t delete_end,
+                               int64_t insert_begin, int64_t insert_end)>& visitor);
+
+/// \brief return a function which will format an edit script in unified
+/// diff format to os, given base and target arrays of type
+ARROW_EXPORT Result<
+    std::function<Status(const Array& edits, const Array& base, const Array& target)>>
+MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os);
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/util.h
@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Create a strongly-typed Array instance from generic ArrayData
+/// \param[in] data the array contents
+/// \return the resulting Array instance
+ARROW_EXPORT
+std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data);
+
+/// \brief Create a strongly-typed Array instance with all elements null
+/// \param[in] type the array type
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
+                                               int64_t length,
+                                               MemoryPool* pool = default_memory_pool());
+
+/// \brief Create an Array instance whose slots are the given scalar
+/// \param[in] scalar the value with which to fill the array
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayFromScalar(
+    const Scalar& scalar, int64_t length, MemoryPool* pool = default_memory_pool());
+
+/// \brief Create an empty Array of a given type
+///
+/// The output Array will be of the given type.
+///
+/// \param[in] type the data type of the empty Array
+/// \param[in] pool the memory pool to allocate memory from
+/// \return the resulting Array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeEmptyArray(std::shared_ptr<DataType> type,
+                                              MemoryPool* pool = default_memory_pool());
+
+namespace internal {
+
+/// \brief Swap endian of each element in a generic ArrayData
+///
+/// As dictionaries are often shared between different arrays, dictionaries
+/// are not swapped by this function and should be handled separately.
+///
+/// \param[in] data the array contents
+/// \return the resulting ArrayData whose elements were swapped
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
+    const std::shared_ptr<ArrayData>& data);
+
+/// Given a number of ArrayVectors, treat each ArrayVector as the
+/// chunks of a chunked array.  Then rechunk each ArrayVector such that
+/// all ArrayVectors are chunked identically.  It is mandatory that
+/// all ArrayVectors contain the same total number of elements.
+ARROW_EXPORT
+std::vector<ArrayVector> RechunkArraysConsistently(const std::vector<ArrayVector>&);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/validate.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/validate.h
@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// Internal functions implementing Array::Validate() and friends.
+
+// O(1) array metadata validation
+
+ARROW_EXPORT
+Status ValidateArray(const Array& array);
+
+ARROW_EXPORT
+Status ValidateArray(const ArrayData& data);
+
+// O(N) array data validation.
+// Note that, starting from 7.0.0, "full" routines also validate metadata.
+// Before, ValidateArray() needed to be called before ValidateArrayFull()
+// to ensure metadata correctness, otherwise invalid memory accesses
+// may occur.
+
+ARROW_EXPORT
+Status ValidateArrayFull(const Array& array);
+
+ARROW_EXPORT
+Status ValidateArrayFull(const ArrayData& data);
+
+ARROW_EXPORT
+Status ValidateUTF8(const Array& array);
+
+ARROW_EXPORT
+Status ValidateUTF8(const ArrayData& data);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/buffer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/buffer.h
@ -0,0 +1,506 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/device.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+/// \class Buffer
+/// \brief Object containing a pointer to a piece of contiguous memory with a
+/// particular size.
+///
+/// Buffers have two related notions of length: size and capacity. Size is
+/// the number of bytes that might have valid data. Capacity is the number
+/// of bytes that were allocated for the buffer in total.
+///
+/// The Buffer base class does not own its memory, but subclasses often do.
+///
+/// The following invariant is always true: Size <= Capacity
+class ARROW_EXPORT Buffer {
+ public:
+  /// \brief Construct from buffer and size without copying memory
+  ///
+  /// \param[in] data a memory buffer
+  /// \param[in] size buffer size
+  ///
+  /// \note The passed memory must be kept alive through some other means
+  Buffer(const uint8_t* data, int64_t size)
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
+    SetMemoryManager(default_cpu_memory_manager());
+  }
+
+  Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
+         std::shared_ptr<Buffer> parent = NULLPTR)
+      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
+    SetMemoryManager(std::move(mm));
+  }
+
+  Buffer(uintptr_t address, int64_t size, std::shared_ptr<MemoryManager> mm,
+         std::shared_ptr<Buffer> parent = NULLPTR)
+      : Buffer(reinterpret_cast<const uint8_t*>(address), size, std::move(mm),
+               std::move(parent)) {}
+
+  /// \brief Construct from string_view without copying memory
+  ///
+  /// \param[in] data a string_view object
+  ///
+  /// \note The memory viewed by data must not be deallocated in the lifetime of the
+  /// Buffer; temporary rvalue strings must be stored in an lvalue somewhere
+  explicit Buffer(util::string_view data)
+      : Buffer(reinterpret_cast<const uint8_t*>(data.data()),
+               static_cast<int64_t>(data.size())) {}
+
+  virtual ~Buffer() = default;
+
+  /// An offset into data that is owned by another buffer, but we want to be
+  /// able to retain a valid pointer to it even after other shared_ptr's to the
+  /// parent buffer have been destroyed
+  ///
+  /// This method makes no assertions about alignment or padding of the buffer but
+  /// in general we expected buffers to be aligned and padded to 64 bytes.  In the future
+  /// we might add utility methods to help determine if a buffer satisfies this contract.
+  Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
+      : Buffer(parent->data_ + offset, size) {
+    parent_ = parent;
+    SetMemoryManager(parent->memory_manager_);
+  }
+
+  uint8_t operator[](std::size_t i) const { return data_[i]; }
+
+  /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
+  /// \return std::string
+  std::string ToHexString();
+
+  /// Return true if both buffers are the same size and contain the same bytes
+  /// up to the number of compared bytes
+  bool Equals(const Buffer& other, int64_t nbytes) const;
+
+  /// Return true if both buffers are the same size and contain the same bytes
+  bool Equals(const Buffer& other) const;
+
+  /// Copy a section of the buffer into a new Buffer.
+  Result<std::shared_ptr<Buffer>> CopySlice(
+      const int64_t start, const int64_t nbytes,
+      MemoryPool* pool = default_memory_pool()) const;
+
+  /// Zero bytes in padding, i.e. bytes between size_ and capacity_.
+  void ZeroPadding() {
+#ifndef NDEBUG
+    CheckMutable();
+#endif
+    // A zero-capacity buffer can have a null data pointer
+    if (capacity_ != 0) {
+      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
+    }
+  }
+
+  /// \brief Construct an immutable buffer that takes ownership of the contents
+  /// of an std::string (without copying it).
+  ///
+  /// \param[in] data a string to own
+  /// \return a new Buffer instance
+  static std::shared_ptr<Buffer> FromString(std::string data);
+
+  /// \brief Create buffer referencing typed memory with some length without
+  /// copying
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
+                                    static_cast<int64_t>(sizeof(T) * length));
+  }
+
+  /// \brief Create buffer referencing std::vector with some length without
+  /// copying
+  /// \param[in] data the vector to be referenced. If this vector is changed,
+  /// the buffer may become invalid
+  /// \return a new shared_ptr<Buffer>
+  template <typename T>
+  static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
+                                    static_cast<int64_t>(sizeof(T) * data.size()));
+  }
+
+  /// \brief Copy buffer contents into a new std::string
+  /// \return std::string
+  /// \note Can throw std::bad_alloc if buffer is large
+  std::string ToString() const;
+
+  /// \brief View buffer contents as a util::string_view
+  /// \return util::string_view
+  explicit operator util::string_view() const {
+    return util::string_view(reinterpret_cast<const char*>(data_), size_);
+  }
+
+  /// \brief View buffer contents as a util::bytes_view
+  /// \return util::bytes_view
+  explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }
+
+  /// \brief Return a pointer to the buffer's data
+  ///
+  /// The buffer has to be a CPU buffer (`is_cpu()` is true).
+  /// Otherwise, an assertion may be thrown or a null pointer may be returned.
+  ///
+  /// To get the buffer's data address regardless of its device, call `address()`.
+  const uint8_t* data() const {
+#ifndef NDEBUG
+    CheckCPU();
+#endif
+    return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
+  }
+
+  /// \brief Return a writable pointer to the buffer's data
+  ///
+  /// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
+  /// are true).  Otherwise, an assertion may be thrown or a null pointer may
+  /// be returned.
+  ///
+  /// To get the buffer's mutable data address regardless of its device, call
+  /// `mutable_address()`.
+  uint8_t* mutable_data() {
+#ifndef NDEBUG
+    CheckCPU();
+    CheckMutable();
+#endif
+    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
+                                                      : NULLPTR;
+  }
+
+  /// \brief Return the device address of the buffer's data
+  uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
+
+  /// \brief Return a writable device address to the buffer's data
+  ///
+  /// The buffer has to be a mutable buffer (`is_mutable()` is true).
+  /// Otherwise, an assertion may be thrown or 0 may be returned.
+  uintptr_t mutable_address() const {
+#ifndef NDEBUG
+    CheckMutable();
+#endif
+    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
+  }
+
+  /// \brief Return the buffer's size in bytes
+  int64_t size() const { return size_; }
+
+  /// \brief Return the buffer's capacity (number of allocated bytes)
+  int64_t capacity() const { return capacity_; }
+
+  /// \brief Whether the buffer is directly CPU-accessible
+  ///
+  /// If this function returns true, you can read directly from the buffer's
+  /// `data()` pointer.  Otherwise, you'll have to `View()` or `Copy()` it.
+  bool is_cpu() const { return is_cpu_; }
+
+  /// \brief Whether the buffer is mutable
+  ///
+  /// If this function returns true, you are allowed to modify buffer contents
+  /// using the pointer returned by `mutable_data()` or `mutable_address()`.
+  bool is_mutable() const { return is_mutable_; }
+
+  const std::shared_ptr<Device>& device() const { return memory_manager_->device(); }
+
+  const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
+
+  std::shared_ptr<Buffer> parent() const { return parent_; }
+
+  /// \brief Get a RandomAccessFile for reading a buffer
+  ///
+  /// The returned file object reads from this buffer's underlying memory.
+  static Result<std::shared_ptr<io::RandomAccessFile>> GetReader(std::shared_ptr<Buffer>);
+
+  /// \brief Get a OutputStream for writing to a buffer
+  ///
+  /// The buffer must be mutable.  The returned stream object writes into the buffer's
+  /// underlying memory (but it won't resize it).
+  static Result<std::shared_ptr<io::OutputStream>> GetWriter(std::shared_ptr<Buffer>);
+
+  /// \brief Copy buffer
+  ///
+  /// The buffer contents will be copied into a new buffer allocated by the
+  /// given MemoryManager.  This function supports cross-device copies.
+  static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
+                                              const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief Copy a non-owned buffer
+  ///
+  /// This is useful for cases where the source memory area is externally managed
+  /// (its lifetime not tied to the source Buffer), otherwise please use Copy().
+  static Result<std::unique_ptr<Buffer>> CopyNonOwned(
+      const Buffer& source, const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief View buffer
+  ///
+  /// Return a Buffer that reflects this buffer, seen potentially from another
+  /// device, without making an explicit copy of the contents.  The underlying
+  /// mechanism is typically implemented by the kernel or device driver, and may
+  /// involve lazy caching of parts of the buffer contents on the destination
+  /// device's memory.
+  ///
+  /// If a non-copy view is unsupported for the buffer on the given device,
+  /// nullptr is returned.  An error can be returned if some low-level
+  /// operation fails (such as an out-of-memory condition).
+  static Result<std::shared_ptr<Buffer>> View(std::shared_ptr<Buffer> source,
+                                              const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief View or copy buffer
+  ///
+  /// Try to view buffer contents on the given MemoryManager's device, but
+  /// fall back to copying if a no-copy view isn't supported.
+  static Result<std::shared_ptr<Buffer>> ViewOrCopy(
+      std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
+
+ protected:
+  bool is_mutable_;
+  bool is_cpu_;
+  const uint8_t* data_;
+  int64_t size_;
+  int64_t capacity_;
+
+  // null by default, but may be set
+  std::shared_ptr<Buffer> parent_;
+
+ private:
+  // private so that subclasses are forced to call SetMemoryManager()
+  std::shared_ptr<MemoryManager> memory_manager_;
+
+ protected:
+  void CheckMutable() const;
+  void CheckCPU() const;
+
+  void SetMemoryManager(std::shared_ptr<MemoryManager> mm) {
+    memory_manager_ = std::move(mm);
+    is_cpu_ = memory_manager_->is_cpu();
+  }
+
+ private:
+  Buffer() = delete;
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
+};
+
+/// \defgroup buffer-slicing-functions Functions for slicing buffers
+///
+/// @{
+
+/// \brief Construct a view on a buffer at the given offset and length.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+                                                  const int64_t offset,
+                                                  const int64_t length) {
+  return std::make_shared<Buffer>(buffer, offset, length);
+}
+
+/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+                                                  const int64_t offset) {
+  int64_t length = buffer->size() - offset;
+  return SliceBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+                                                int64_t offset);
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+                                                int64_t offset, int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+ARROW_EXPORT
+std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+                                           const int64_t offset, const int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+static inline std::shared_ptr<Buffer> SliceMutableBuffer(
+    const std::shared_ptr<Buffer>& buffer, const int64_t offset) {
+  int64_t length = buffer->size() - offset;
+  return SliceMutableBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset);
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
+/// @}
+
+/// \class MutableBuffer
+/// \brief A Buffer whose contents can be mutated. May or may not own its data.
+class ARROW_EXPORT MutableBuffer : public Buffer {
+ public:
+  MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
+    is_mutable_ = true;
+  }
+
+  MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
+      : Buffer(data, size, std::move(mm)) {
+    is_mutable_ = true;
+  }
+
+  MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
+                const int64_t size);
+
+  /// \brief Create buffer referencing typed memory with some length
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
+    return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
+                                           static_cast<int64_t>(sizeof(T) * length));
+  }
+
+ protected:
+  MutableBuffer() : Buffer(NULLPTR, 0) {}
+};
+
+/// \class ResizableBuffer
+/// \brief A mutable buffer that can be resized
+class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
+ public:
+  /// Change buffer reported size to indicated size, allocating memory if
+  /// necessary.  This will ensure that the capacity of the buffer is a multiple
+  /// of 64 bytes as defined in Layout.md.
+  /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
+  /// specification.
+  ///
+  /// @param new_size The new size for the buffer.
+  /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
+  virtual Status Resize(const int64_t new_size, bool shrink_to_fit) = 0;
+  Status Resize(const int64_t new_size) {
+    return Resize(new_size, /*shrink_to_fit=*/true);
+  }
+
+  /// Ensure that buffer has enough memory allocated to fit the indicated
+  /// capacity (and meets the 64 byte padding requirement in Layout.md).
+  /// It does not change buffer's reported size and doesn't zero the padding.
+  virtual Status Reserve(const int64_t new_capacity) = 0;
+
+  template <class T>
+  Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
+    return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
+  }
+
+  template <class T>
+  Status TypedReserve(const int64_t new_nb_elements) {
+    return Reserve(sizeof(T) * new_nb_elements);
+  }
+
+ protected:
+  ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
+  ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm)
+      : MutableBuffer(data, size, std::move(mm)) {}
+};
+
+/// \defgroup buffer-allocation-functions Functions for allocating buffers
+///
+/// @{
+
+/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size,
+                                               MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(
+    const int64_t size, MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a bitmap buffer from a memory pool
+/// no guarantee on values is provided.
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length,
+                                               MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
+
+/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length,
+                                                    MemoryPool* pool = NULLPTR);
+
+/// \brief Concatenate multiple buffers into a single buffer
+///
+/// \param[in] buffers to be concatenated
+/// \param[in] pool memory pool to allocate the new buffer from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> ConcatenateBuffers(const BufferVector& buffers,
+                                                   MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status ConcatenateBuffers(const BufferVector& buffers, MemoryPool* pool,
+                          std::shared_ptr<Buffer>* out);
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/buffer_builder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/buffer_builder.h
@ -0,0 +1,459 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_generate.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer builder classes
+
+/// \class BufferBuilder
+/// \brief A class for incrementally building a contiguous chunk of in-memory
+/// data
+class ARROW_EXPORT BufferBuilder {
+ public:
+  explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
+      : pool_(pool),
+        data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
+              util::MakeNonNull<uint8_t>()),
+        capacity_(0),
+        size_(0) {}
+
+  /// \brief Constructs new Builder that will start using
+  /// the provided buffer until Finish/Reset are called.
+  /// The buffer is not resized.
+  explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+                         MemoryPool* pool = default_memory_pool())
+      : buffer_(std::move(buffer)),
+        pool_(pool),
+        data_(buffer_->mutable_data()),
+        capacity_(buffer_->capacity()),
+        size_(buffer_->size()) {}
+
+  /// \brief Resize the buffer to the nearest multiple of 64 bytes
+  ///
+  /// \param new_capacity the new capacity of the of the builder. Will be
+  /// rounded up to a multiple of 64 bytes for padding
+  /// \param shrink_to_fit if new capacity is smaller than the existing,
+  /// reallocate internal buffer. Set to false to avoid reallocations when
+  /// shrinking the builder.
+  /// \return Status
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    if (buffer_ == NULLPTR) {
+      ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
+    } else {
+      ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
+    }
+    capacity_ = buffer_->capacity();
+    data_ = buffer_->mutable_data();
+    return Status::OK();
+  }
+
+  /// \brief Ensure that builder can accommodate the additional number of bytes
+  /// without the need to perform allocations
+  ///
+  /// \param[in] additional_bytes number of additional bytes to make space for
+  /// \return Status
+  Status Reserve(const int64_t additional_bytes) {
+    auto min_capacity = size_ + additional_bytes;
+    if (min_capacity <= capacity_) {
+      return Status::OK();
+    }
+    return Resize(GrowByFactor(capacity_, min_capacity), false);
+  }
+
+  /// \brief Return a capacity expanded by the desired growth factor
+  static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+    // Doubling capacity except for large Reserve requests. 2x growth strategy
+    // (versus 1.5x) seems to have slightly better performance when using
+    // jemalloc, but significantly better performance when using the system
+    // allocator. See ARROW-6450 for further discussion
+    return std::max(new_capacity, current_capacity * 2);
+  }
+
+  /// \brief Append the given data to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const void* data, const int64_t length) {
+    if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
+      ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
+    }
+    UnsafeAppend(data, length);
+    return Status::OK();
+  }
+
+  /// \brief Append copies of a value to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const int64_t num_copies, uint8_t value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  // Advance pointer and zero out memory
+  Status Advance(const int64_t length) { return Append(length, 0); }
+
+  // Advance pointer, but don't allocate or zero memory
+  void UnsafeAdvance(const int64_t length) { size_ += length; }
+
+  // Unsafe methods don't check existing size
+  void UnsafeAppend(const void* data, const int64_t length) {
+    memcpy(data_ + size_, data, static_cast<size_t>(length));
+    size_ += length;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, uint8_t value) {
+    memset(data_ + size_, value, static_cast<size_t>(num_copies));
+    size_ += num_copies;
+  }
+
+  /// \brief Return result of builder as a Buffer object.
+  ///
+  /// The builder is reset and can be reused afterwards.
+  ///
+  /// \param[out] out the finalized Buffer object
+  /// \param shrink_to_fit if the buffer size is smaller than its capacity,
+  /// reallocate to fit more tightly in memory. Set to false to avoid
+  /// a reallocation, at the expense of potentially more memory consumption.
+  /// \return Status
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
+    if (size_ != 0) buffer_->ZeroPadding();
+    *out = buffer_;
+    if (*out == NULLPTR) {
+      ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, pool_));
+    }
+    Reset();
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using BufferBuilder
+  /// mostly for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    size_ = final_length;
+    return Finish(shrink_to_fit);
+  }
+
+  void Reset() {
+    buffer_ = NULLPTR;
+    capacity_ = size_ = 0;
+  }
+
+  /// \brief Set size to a smaller value without modifying builder
+  /// contents. For reusable BufferBuilder classes
+  /// \param[in] position must be non-negative and less than or equal
+  /// to the current length()
+  void Rewind(int64_t position) { size_ = position; }
+
+  int64_t capacity() const { return capacity_; }
+  int64_t length() const { return size_; }
+  const uint8_t* data() const { return data_; }
+  uint8_t* mutable_data() { return data_; }
+
+ private:
+  std::shared_ptr<ResizableBuffer> buffer_;
+  MemoryPool* pool_;
+  uint8_t* data_;
+  int64_t capacity_;
+  int64_t size_;
+};
+
+template <typename T, typename Enable = void>
+class TypedBufferBuilder;
+
+/// \brief A BufferBuilder for building a buffer of arithmetic elements
+template <typename T>
+class TypedBufferBuilder<
+    T, typename std::enable_if<std::is_arithmetic<T>::value ||
+                               std::is_standard_layout<T>::value>::type> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(pool) {}
+
+  explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+                              MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(std::move(buffer), pool) {}
+
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
+  Status Append(T value) {
+    return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  Status Append(const T* values, int64_t num_elements) {
+    return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
+                                 num_elements * sizeof(T));
+  }
+
+  Status Append(const int64_t num_copies, T value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(T value) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  void UnsafeAppend(const T* values, int64_t num_elements) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
+                                num_elements * sizeof(T));
+  }
+
+  template <typename Iter>
+  void UnsafeAppend(Iter values_begin, Iter values_end) {
+    int64_t num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
+    std::copy(values_begin, values_end, data);
+  }
+
+  void UnsafeAppend(const int64_t num_copies, T value) {
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
+    std::fill(data, data + num_copies, value);
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return bytes_builder_.Reserve(additional_elements * sizeof(T));
+  }
+
+  Status Advance(const int64_t length) {
+    return bytes_builder_.Advance(length * sizeof(T));
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
+  }
+
+  void Reset() { bytes_builder_.Reset(); }
+
+  int64_t length() const { return bytes_builder_.length() / sizeof(T); }
+  int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
+  const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
+  T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
+
+ private:
+  BufferBuilder bytes_builder_;
+};
+
+/// \brief A BufferBuilder for building a buffer containing a bitmap
+template <>
+class TypedBufferBuilder<bool> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(pool) {}
+
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
+  Status Append(bool value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(value);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
+    ARROW_RETURN_NOT_OK(Reserve(num_elements));
+    UnsafeAppend(valid_bytes, num_elements);
+    return Status::OK();
+  }
+
+  Status Append(const int64_t num_copies, bool value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(bool value) {
+    bit_util::SetBitTo(mutable_data(), bit_length_, value);
+    if (!value) {
+      ++false_count_;
+    }
+    ++bit_length_;
+  }
+
+  /// \brief Append bits from an array of bytes (one value per byte)
+  void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
+    if (num_elements == 0) return;
+    int64_t i = 0;
+    internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+      bool value = bytes[i++];
+      false_count_ += !value;
+      return value;
+    });
+    bit_length_ += num_elements;
+  }
+
+  /// \brief Append bits from a packed bitmap
+  void UnsafeAppend(const uint8_t* bitmap, int64_t offset, int64_t num_elements) {
+    if (num_elements == 0) return;
+    internal::CopyBitmap(bitmap, offset, num_elements, mutable_data(), bit_length_);
+    false_count_ += num_elements - internal::CountSetBits(bitmap, offset, num_elements);
+    bit_length_ += num_elements;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, bool value) {
+    bit_util::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
+    false_count_ += num_copies * !value;
+    bit_length_ += num_copies;
+  }
+
+  template <bool count_falses, typename Generator>
+  void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
+    if (num_elements == 0) return;
+
+    if (count_falses) {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+        bool value = gen();
+        false_count_ += !value;
+        return value;
+      });
+    } else {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
+                                     std::forward<Generator>(gen));
+    }
+    bit_length_ += num_elements;
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    const int64_t old_byte_capacity = bytes_builder_.capacity();
+    ARROW_RETURN_NOT_OK(
+        bytes_builder_.Resize(bit_util::BytesForBits(new_capacity), shrink_to_fit));
+    // Resize() may have chosen a larger capacity (e.g. for padding),
+    // so ask it again before calling memset().
+    const int64_t new_byte_capacity = bytes_builder_.capacity();
+    if (new_byte_capacity > old_byte_capacity) {
+      // The additional buffer space is 0-initialized for convenience,
+      // so that other methods can simply bump the length.
+      memset(mutable_data() + old_byte_capacity, 0,
+             static_cast<size_t>(new_byte_capacity - old_byte_capacity));
+    }
+    return Status::OK();
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return Resize(
+        BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
+        false);
+  }
+
+  Status Advance(const int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    bit_length_ += length;
+    false_count_ += length;
+    return Status::OK();
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    // set bytes_builder_.size_ == byte size of data
+    bytes_builder_.UnsafeAdvance(bit_util::BytesForBits(bit_length_) -
+                                 bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    const auto final_byte_length = bit_util::BytesForBits(final_length);
+    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
+  }
+
+  void Reset() {
+    bytes_builder_.Reset();
+    bit_length_ = false_count_ = 0;
+  }
+
+  int64_t length() const { return bit_length_; }
+  int64_t capacity() const { return bytes_builder_.capacity() * 8; }
+  const uint8_t* data() const { return bytes_builder_.data(); }
+  uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
+  int64_t false_count() const { return false_count_; }
+
+ private:
+  BufferBuilder bytes_builder_;
+  int64_t bit_length_ = 0;
+  int64_t false_count_ = 0;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/builder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/builder.h
@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_adaptive.h"   // IWYU pragma: keep
+#include "arrow/array/builder_base.h"       // IWYU pragma: keep
+#include "arrow/array/builder_binary.h"     // IWYU pragma: keep
+#include "arrow/array/builder_decimal.h"    // IWYU pragma: keep
+#include "arrow/array/builder_dict.h"       // IWYU pragma: keep
+#include "arrow/array/builder_nested.h"     // IWYU pragma: keep
+#include "arrow/array/builder_primitive.h"  // IWYU pragma: keep
+#include "arrow/array/builder_time.h"       // IWYU pragma: keep
+#include "arrow/array/builder_union.h"      // IWYU pragma: keep
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/c/abi.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/c/abi.h
@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+// EXPERIMENTAL: C stream interface
+
+struct ArrowArrayStream {
+  // Callback to get the stream type
+  // (will be the same for all arrays in the stream).
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowSchema must be released independently from the stream.
+  int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+  // Callback to get the next array
+  // (if no error and the array is released, the stream has ended)
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowArray must be released independently from the stream.
+  int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+  // Callback to get optional detailed error information.
+  // This must only be called if the last stream operation failed
+  // with a non-0 return code.
+  //
+  // Return value: pointer to a null-terminated character array describing
+  // the last error, or NULL if no description is available.
+  //
+  // The returned pointer is only valid until the next operation on this stream
+  // (including release).
+  const char* (*get_last_error)(struct ArrowArrayStream*);
+
+  // Release callback: release the stream's own resources.
+  // Note that arrays returned by `get_next` must be individually released.
+  void (*release)(struct ArrowArrayStream*);
+
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#ifdef __cplusplus
+}
+#endif
--- a/Show More
+++ b/Show More