first commit

2025-10-25 07:50:56 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/adapters/tensorflow/convert.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/adapters/tensorflow/convert.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "tensorflow/core/framework/op.h"
+
+#include "arrow/type.h"
+
+// These utilities are supposed to be included in TensorFlow operators
+// that need to be compiled separately from Arrow because of ABI issues.
+// They therefore need to be header-only.
+
+namespace arrow {
+
+namespace adapters {
+
+namespace tensorflow {
+
+Status GetArrowType(::tensorflow::DataType dtype, std::shared_ptr<DataType>* out) {
+  switch (dtype) {
+    case ::tensorflow::DT_BOOL:
+      *out = arrow::boolean();
+      break;
+    case ::tensorflow::DT_FLOAT:
+      *out = arrow::float32();
+      break;
+    case ::tensorflow::DT_DOUBLE:
+      *out = arrow::float64();
+      break;
+    case ::tensorflow::DT_HALF:
+      *out = arrow::float16();
+      break;
+    case ::tensorflow::DT_INT8:
+      *out = arrow::int8();
+      break;
+    case ::tensorflow::DT_INT16:
+      *out = arrow::int16();
+      break;
+    case ::tensorflow::DT_INT32:
+      *out = arrow::int32();
+      break;
+    case ::tensorflow::DT_INT64:
+      *out = arrow::int64();
+      break;
+    case ::tensorflow::DT_UINT8:
+      *out = arrow::uint8();
+      break;
+    case ::tensorflow::DT_UINT16:
+      *out = arrow::uint16();
+      break;
+    case ::tensorflow::DT_UINT32:
+      *out = arrow::uint32();
+      break;
+    case ::tensorflow::DT_UINT64:
+      *out = arrow::uint64();
+      break;
+    default:
+      return Status::TypeError("TensorFlow data type is not supported");
+  }
+  return Status::OK();
+}
+
+Status GetTensorFlowType(std::shared_ptr<DataType> dtype, ::tensorflow::DataType* out) {
+  switch (dtype->id()) {
+    case Type::BOOL:
+      *out = ::tensorflow::DT_BOOL;
+      break;
+    case Type::UINT8:
+      *out = ::tensorflow::DT_UINT8;
+      break;
+    case Type::INT8:
+      *out = ::tensorflow::DT_INT8;
+      break;
+    case Type::UINT16:
+      *out = ::tensorflow::DT_UINT16;
+      break;
+    case Type::INT16:
+      *out = ::tensorflow::DT_INT16;
+      break;
+    case Type::UINT32:
+      *out = ::tensorflow::DT_UINT32;
+      break;
+    case Type::INT32:
+      *out = ::tensorflow::DT_INT32;
+      break;
+    case Type::UINT64:
+      *out = ::tensorflow::DT_UINT64;
+      break;
+    case Type::INT64:
+      *out = ::tensorflow::DT_INT64;
+      break;
+    case Type::HALF_FLOAT:
+      *out = ::tensorflow::DT_HALF;
+      break;
+    case Type::FLOAT:
+      *out = ::tensorflow::DT_FLOAT;
+      break;
+    case Type::DOUBLE:
+      *out = ::tensorflow::DT_DOUBLE;
+      break;
+    default:
+      return Status::TypeError("Arrow data type is not supported");
+  }
+  return arrow::Status::OK();
+}
+
+}  // namespace tensorflow
+
+}  // namespace adapters
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/api.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Coarse public API while the library is in development
+
+#pragma once
+
+#include "arrow/array.h"                    // IYWU pragma: export
+#include "arrow/array/concatenate.h"        // IYWU pragma: export
+#include "arrow/buffer.h"                   // IYWU pragma: export
+#include "arrow/builder.h"                  // IYWU pragma: export
+#include "arrow/chunked_array.h"            // IYWU pragma: export
+#include "arrow/compare.h"                  // IYWU pragma: export
+#include "arrow/config.h"                   // IYWU pragma: export
+#include "arrow/datum.h"                    // IYWU pragma: export
+#include "arrow/extension_type.h"           // IYWU pragma: export
+#include "arrow/memory_pool.h"              // IYWU pragma: export
+#include "arrow/pretty_print.h"             // IYWU pragma: export
+#include "arrow/record_batch.h"             // IYWU pragma: export
+#include "arrow/result.h"                   // IYWU pragma: export
+#include "arrow/status.h"                   // IYWU pragma: export
+#include "arrow/table.h"                    // IYWU pragma: export
+#include "arrow/table_builder.h"            // IYWU pragma: export
+#include "arrow/tensor.h"                   // IYWU pragma: export
+#include "arrow/type.h"                     // IYWU pragma: export
+#include "arrow/util/key_value_metadata.h"  // IWYU pragma: export
+#include "arrow/visit_array_inline.h"       // IYWU pragma: export
+#include "arrow/visit_scalar_inline.h"      // IYWU pragma: export
+#include "arrow/visitor.h"                  // IYWU pragma: export
+
+/// \brief Top-level namespace for Apache Arrow C++ API
+namespace arrow {}
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array.h
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Kitchen-sink public API for arrow::Array data structures. C++ library code
+// (especially header files) in Apache Arrow should use more specific headers
+// unless it's a file that uses most or all Array types in which case using
+// arrow/array.h is fine.
+
+#pragma once
+
+/// \defgroup numeric-arrays Concrete classes for numeric arrays
+/// @{
+/// @}
+
+/// \defgroup binary-arrays Concrete classes for binary/string arrays
+/// @{
+/// @}
+
+/// \defgroup nested-arrays Concrete classes for nested arrays
+/// @{
+/// @}
+
+#include "arrow/array/array_base.h"       // IWYU pragma: keep
+#include "arrow/array/array_binary.h"     // IWYU pragma: keep
+#include "arrow/array/array_decimal.h"    // IWYU pragma: keep
+#include "arrow/array/array_dict.h"       // IWYU pragma: keep
+#include "arrow/array/array_nested.h"     // IWYU pragma: keep
+#include "arrow/array/array_primitive.h"  // IWYU pragma: keep
+#include "arrow/array/data.h"             // IWYU pragma: keep
+#include "arrow/array/util.h"             // IWYU pragma: keep
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_base.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_base.h
@@ -0,0 +1,264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+#include "arrow/visitor.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// User array accessor types
+
+/// \brief Array base type
+/// Immutable data array with some logical type and some length.
+///
+/// Any memory is owned by the respective Buffer instance (or its parents).
+///
+/// The base class is only required to have a null bitmap buffer if the null
+/// count is greater than 0
+///
+/// If known, the null count can be provided in the base Array constructor. If
+/// the null count is not known, pass -1 to indicate that the null count is to
+/// be computed on the first call to null_count()
+class ARROW_EXPORT Array {
+ public:
+  virtual ~Array() = default;
+
+  /// \brief Return true if value at index is null. Does not boundscheck
+  bool IsNull(int64_t i) const {
+    return null_bitmap_data_ != NULLPTR
+               ? !bit_util::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count == data_->length;
+  }
+
+  /// \brief Return true if value at index is valid (not null). Does not
+  /// boundscheck
+  bool IsValid(int64_t i) const {
+    return null_bitmap_data_ != NULLPTR
+               ? bit_util::GetBit(null_bitmap_data_, i + data_->offset)
+               : data_->null_count != data_->length;
+  }
+
+  /// \brief Return a Scalar containing the value of this array at i
+  Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
+
+  /// Size in the number of elements this array contains.
+  int64_t length() const { return data_->length; }
+
+  /// A relative position into another array's data, to enable zero-copy
+  /// slicing. This value defaults to zero
+  int64_t offset() const { return data_->offset; }
+
+  /// The number of null entries in the array. If the null count was not known
+  /// at time of construction (and set to a negative value), then the null
+  /// count will be computed and cached on the first invocation of this
+  /// function
+  int64_t null_count() const;
+
+  std::shared_ptr<DataType> type() const { return data_->type; }
+  Type::type type_id() const { return data_->type->id(); }
+
+  /// Buffer for the validity (null) bitmap, if any. Note that Union types
+  /// never have a null bitmap.
+  ///
+  /// Note that for `null_count == 0` or for null type, this will be null.
+  /// This buffer does not account for any slice offset
+  const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
+
+  /// Raw pointer to the null bitmap.
+  ///
+  /// Note that for `null_count == 0` or for null type, this will be null.
+  /// This buffer does not account for any slice offset
+  const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
+
+  /// Equality comparison with another array
+  bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
+  bool Equals(const std::shared_ptr<Array>& arr,
+              const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// \brief Return the formatted unified diff of arrow::Diff between this
+  /// Array and another Array
+  std::string Diff(const Array& other) const;
+
+  /// Approximate equality comparison with another array
+  ///
+  /// epsilon is only used if this is FloatArray or DoubleArray
+  bool ApproxEquals(const std::shared_ptr<Array>& arr,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
+  bool ApproxEquals(const Array& arr,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// Compare if the range of slots specified are equal for the given array and
+  /// this array.  end_idx exclusive.  This methods does not bounds check.
+  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+                   const Array& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
+                   const std::shared_ptr<Array>& other,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
+                   int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+  bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
+                   int64_t end_idx, int64_t other_start_idx,
+                   const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// \brief Apply the ArrayVisitor::Visit() method specialized to the array type
+  Status Accept(ArrayVisitor* visitor) const;
+
+  /// Construct a zero-copy view of this array with the given type.
+  ///
+  /// This method checks if the types are layout-compatible.
+  /// Nested types are traversed in depth-first order. Data buffers must have
+  /// the same item sizes, even though the logical types may be different.
+  /// An error is returned if the types are not layout-compatible.
+  Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
+
+  /// Construct a zero-copy slice of the array with the indicated offset and
+  /// length
+  ///
+  /// \param[in] offset the position of the first element in the constructed
+  /// slice
+  /// \param[in] length the length of the slice. If there are not enough
+  /// elements in the array, the length will be adjusted accordingly
+  ///
+  /// \return a new object wrapped in std::shared_ptr<Array>
+  std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
+
+  /// Slice from offset until end of the array
+  std::shared_ptr<Array> Slice(int64_t offset) const;
+
+  /// Input-checking variant of Array::Slice
+  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
+  /// Input-checking variant of Array::Slice
+  Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
+
+  const std::shared_ptr<ArrayData>& data() const { return data_; }
+
+  int num_fields() const { return static_cast<int>(data_->child_data.size()); }
+
+  /// \return PrettyPrint representation of array suitable for debugging
+  std::string ToString() const;
+
+  /// \brief Perform cheap validation checks to determine obvious inconsistencies
+  /// within the array's internal data.
+  ///
+  /// This is O(k) where k is the number of descendents.
+  ///
+  /// \return Status
+  Status Validate() const;
+
+  /// \brief Perform extensive validation checks to determine inconsistencies
+  /// within the array's internal data.
+  ///
+  /// This is potentially O(k*n) where k is the number of descendents and n
+  /// is the array length.
+  ///
+  /// \return Status
+  Status ValidateFull() const;
+
+ protected:
+  Array() = default;
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
+
+  std::shared_ptr<ArrayData> data_;
+  const uint8_t* null_bitmap_data_ = NULLPTR;
+
+  /// Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    if (data->buffers.size() > 0) {
+      null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
+    } else {
+      null_bitmap_data_ = NULLPTR;
+    }
+    data_ = data;
+  }
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
+
+  ARROW_EXPORT friend void PrintTo(const Array& x, std::ostream* os);
+};
+
+static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
+  os << x.ToString();
+  return os;
+}
+
+/// Base class for non-nested arrays
+class ARROW_EXPORT FlatArray : public Array {
+ protected:
+  using Array::Array;
+};
+
+/// Base class for arrays of fixed-size logical types
+class ARROW_EXPORT PrimitiveArray : public FlatArray {
+ public:
+  PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
+                 const std::shared_ptr<Buffer>& data,
+                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// Does not account for any slice offset
+  std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
+
+ protected:
+  PrimitiveArray() : raw_values_(NULLPTR) {}
+
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->Array::SetData(data);
+    raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
+  }
+
+  explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+
+  const uint8_t* raw_values_;
+};
+
+/// Degenerate null type Array
+class ARROW_EXPORT NullArray : public FlatArray {
+ public:
+  using TypeClass = NullType;
+
+  explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
+  explicit NullArray(int64_t length);
+
+ private:
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    null_bitmap_data_ = NULLPTR;
+    data->null_count = data->length;
+    data_ = data;
+  }
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_binary.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_binary.h
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for Binary, LargeBinart, String, LargeString,
+// FixedSizeBinary
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/stl_iterator.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"  // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup binary-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+/// Base class for variable-sized binary arrays, regardless of offset size
+/// and logical interpretation.
+template <typename TYPE>
+class BaseBinaryArray : public FlatArray {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+  using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
+
+  /// Return the pointer to the given elements bytes
+  // XXX should GetValue(int64_t i) return a string_view?
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    // Account for base offset
+    i += data_->offset;
+    const offset_type pos = raw_value_offsets_[i];
+    *out_length = raw_value_offsets_[i + 1] - pos;
+    return raw_data_ + pos;
+  }
+
+  /// \brief Get binary value as a string_view
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view GetView(int64_t i) const {
+    // Account for base offset
+    i += data_->offset;
+    const offset_type pos = raw_value_offsets_[i];
+    return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
+                             raw_value_offsets_[i + 1] - pos);
+  }
+
+  util::optional<util::string_view> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  /// \brief Get binary value as a string_view
+  /// Provided for consistency with other arrays.
+  ///
+  /// \param i the value index
+  /// \return the view over the selected value
+  util::string_view Value(int64_t i) const { return GetView(i); }
+
+  /// \brief Get binary value as a std::string
+  ///
+  /// \param i the value index
+  /// \return the value copied into a std::string
+  std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
+
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
+
+  const uint8_t* raw_data() const { return raw_data_; }
+
+  /// \brief Return the data buffer absolute offset of the data for the value
+  /// at the passed index.
+  ///
+  /// Does not perform boundschecking
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+
+  /// \brief Return the length of the data for the value at the passed index.
+  ///
+  /// Does not perform boundschecking
+  offset_type value_length(int64_t i) const {
+    i += data_->offset;
+    return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+  }
+
+  /// \brief Return the total length of the memory in the data buffer
+  /// referenced by this array. If the array has been sliced then this may be
+  /// less than the size of the data buffer (data_->buffers[2]).
+  offset_type total_values_length() const {
+    if (data_->length > 0) {
+      return raw_value_offsets_[data_->length + data_->offset] -
+             raw_value_offsets_[data_->offset];
+    } else {
+      return 0;
+    }
+  }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  // For subclasses
+  BaseBinaryArray() = default;
+
+  // Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->Array::SetData(data);
+    raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
+    raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
+  }
+
+  const offset_type* raw_value_offsets_ = NULLPTR;
+  const uint8_t* raw_data_ = NULLPTR;
+};
+
+/// Concrete Array class for variable-size binary data
+class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
+ public:
+  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+              const std::shared_ptr<Buffer>& data,
+              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as StringArray
+  BinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for variable-size string (utf-8) data
+class ARROW_EXPORT StringArray : public BinaryArray {
+ public:
+  using TypeClass = StringType;
+
+  explicit StringArray(const std::shared_ptr<ArrayData>& data);
+
+  StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+              const std::shared_ptr<Buffer>& data,
+              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Validate that this array contains only valid UTF8 entries
+  ///
+  /// This check is also implied by ValidateFull()
+  Status ValidateUTF8() const;
+};
+
+/// Concrete Array class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
+ public:
+  explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as LargeStringArray
+  LargeBinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for large variable-size string (utf-8) data
+class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
+ public:
+  using TypeClass = LargeStringType;
+
+  explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Validate that this array contains only valid UTF8 entries
+  ///
+  /// This check is also implied by ValidateFull()
+  Status ValidateUTF8() const;
+};
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+/// Concrete Array class for fixed-size binary data
+class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
+ public:
+  using TypeClass = FixedSizeBinaryType;
+  using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
+
+  explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
+                       const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const uint8_t* GetValue(int64_t i) const;
+  const uint8_t* Value(int64_t i) const { return GetValue(i); }
+
+  util::string_view GetView(int64_t i) const {
+    return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
+  }
+
+  util::optional<util::string_view> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  std::string GetString(int64_t i) const { return std::string(GetView(i)); }
+
+  int32_t byte_width() const { return byte_width_; }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->PrimitiveArray::SetData(data);
+    byte_width_ =
+        internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
+  }
+
+  int32_t byte_width_;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_decimal.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_decimal.h
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Decimal128Array
+
+/// Concrete Array class for 128-bit decimal data
+class ARROW_EXPORT Decimal128Array : public FixedSizeBinaryArray {
+ public:
+  using TypeClass = Decimal128Type;
+
+  using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+  /// \brief Construct Decimal128Array from ArrayData instance
+  explicit Decimal128Array(const std::shared_ptr<ArrayData>& data);
+
+  std::string FormatValue(int64_t i) const;
+};
+
+// Backward compatibility
+using DecimalArray = Decimal128Array;
+
+// ----------------------------------------------------------------------
+// Decimal256Array
+
+/// Concrete Array class for 256-bit decimal data
+class ARROW_EXPORT Decimal256Array : public FixedSizeBinaryArray {
+ public:
+  using TypeClass = Decimal256Type;
+
+  using FixedSizeBinaryArray::FixedSizeBinaryArray;
+
+  /// \brief Construct Decimal256Array from ArrayData instance
+  explicit Decimal256Array(const std::shared_ptr<ArrayData>& data);
+
+  std::string FormatValue(int64_t i) const;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_dict.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_dict.h
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// DictionaryArray
+
+/// \brief Array type for dictionary-encoded data with a
+/// data-dependent dictionary
+///
+/// A dictionary array contains an array of non-negative integers (the
+/// "dictionary indices") along with a data type containing a "dictionary"
+/// corresponding to the distinct values represented in the data.
+///
+/// For example, the array
+///
+///   ["foo", "bar", "foo", "bar", "foo", "bar"]
+///
+/// with dictionary ["bar", "foo"], would have dictionary array representation
+///
+///   indices: [1, 0, 1, 0, 1, 0]
+///   dictionary: ["bar", "foo"]
+///
+/// The indices in principle may be any integer type.
+class ARROW_EXPORT DictionaryArray : public Array {
+ public:
+  using TypeClass = DictionaryType;
+
+  explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
+
+  DictionaryArray(const std::shared_ptr<DataType>& type,
+                  const std::shared_ptr<Array>& indices,
+                  const std::shared_ptr<Array>& dictionary);
+
+  /// \brief Construct DictionaryArray from dictionary and indices
+  /// array and validate
+  ///
+  /// This function does the validation of the indices and input type. It checks if
+  /// all indices are non-negative and smaller than the size of the dictionary.
+  ///
+  /// \param[in] type a dictionary type
+  /// \param[in] dictionary the dictionary with same value type as the
+  /// type object
+  /// \param[in] indices an array of non-negative integers smaller than the
+  /// size of the dictionary
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& indices,
+      const std::shared_ptr<Array>& dictionary);
+
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& indices, const std::shared_ptr<Array>& dictionary) {
+    return FromArrays(::arrow::dictionary(indices->type(), dictionary->type()), indices,
+                      dictionary);
+  }
+
+  /// \brief Transpose this DictionaryArray
+  ///
+  /// This method constructs a new dictionary array with the given dictionary
+  /// type, transposing indices using the transpose map.  The type and the
+  /// transpose map are typically computed using DictionaryUnifier.
+  ///
+  /// \param[in] type the new type object
+  /// \param[in] dictionary the new dictionary
+  /// \param[in] transpose_map transposition array of this array's indices
+  ///   into the target array's indices
+  /// \param[in] pool a pool to allocate the array data from
+  Result<std::shared_ptr<Array>> Transpose(
+      const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+      const int32_t* transpose_map, MemoryPool* pool = default_memory_pool()) const;
+
+  /// \brief Determine whether dictionary arrays may be compared without unification
+  bool CanCompareIndices(const DictionaryArray& other) const;
+
+  /// \brief Return the dictionary for this array, which is stored as
+  /// a member of the ArrayData internal structure
+  std::shared_ptr<Array> dictionary() const;
+  std::shared_ptr<Array> indices() const;
+
+  /// \brief Return the ith value of indices, cast to int64_t. Not recommended
+  /// for use in performance-sensitive code. Does not validate whether the
+  /// value is null or out-of-bounds.
+  int64_t GetValueIndex(int64_t i) const;
+
+  const DictionaryType* dict_type() const { return dict_type_; }
+
+ private:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+  const DictionaryType* dict_type_;
+  std::shared_ptr<Array> indices_;
+
+  // Lazily initialized when invoking dictionary()
+  mutable std::shared_ptr<Array> dictionary_;
+};
+
+/// \brief Helper class for incremental dictionary unification
+class ARROW_EXPORT DictionaryUnifier {
+ public:
+  virtual ~DictionaryUnifier() = default;
+
+  /// \brief Construct a DictionaryUnifier
+  /// \param[in] value_type the data type of the dictionaries
+  /// \param[in] pool MemoryPool to use for memory allocations
+  static Result<std::unique_ptr<DictionaryUnifier>> Make(
+      std::shared_ptr<DataType> value_type, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross array chunks
+  ///
+  /// The dictionaries in the array chunks will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<ChunkedArray>> UnifyChunkedArray(
+      const std::shared_ptr<ChunkedArray>& array,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Unify dictionaries accross the chunks of each table column
+  ///
+  /// The dictionaries in each table column will be unified, their indices
+  /// accordingly transposed.
+  ///
+  /// Only dictionaries with a primitive value type are currently supported.
+  /// However, dictionaries nested inside a more complex type are correctly unified.
+  static Result<std::shared_ptr<Table>> UnifyTable(
+      const Table& table, MemoryPool* pool = default_memory_pool());
+
+  /// \brief Append dictionary to the internal memo
+  virtual Status Unify(const Array& dictionary) = 0;
+
+  /// \brief Append dictionary and compute transpose indices
+  /// \param[in] dictionary the dictionary values to unify
+  /// \param[out] out_transpose a Buffer containing computed transpose indices
+  /// as int32_t values equal in length to the passed dictionary. The value in
+  /// each slot corresponds to the new index value for each original index
+  /// for a DictionaryArray with the old dictionary
+  virtual Status Unify(const Array& dictionary,
+                       std::shared_ptr<Buffer>* out_transpose) = 0;
+
+  /// \brief Return a result DictionaryType with the smallest possible index
+  /// type to accommodate the unified dictionary. The unifier cannot be used
+  /// after this is called
+  virtual Status GetResult(std::shared_ptr<DataType>* out_type,
+                           std::shared_ptr<Array>* out_dict) = 0;
+
+  /// \brief Return a unified dictionary with the given index type.  If
+  /// the index type is not large enough then an invalid status will be returned.
+  /// The unifier cannot be used after this is called
+  virtual Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
+                                        std::shared_ptr<Array>* out_dict) = 0;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_nested.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_nested.h
@@ -0,0 +1,569 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor classes for List, LargeList, FixedSizeList, Map, Struct, and
+// Union
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-arrays
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// ListArray
+
+template <typename TYPE>
+class BaseListArray;
+
+namespace internal {
+
+// Private helper for ListArray::SetData.
+// Unfortunately, trying to define BaseListArray::SetData outside of this header
+// doesn't play well with MSVC.
+template <typename TYPE>
+void SetListData(BaseListArray<TYPE>* self, const std::shared_ptr<ArrayData>& data,
+                 Type::type expected_type_id = TYPE::type_id);
+
+}  // namespace internal
+
+/// Base class for variable-sized list arrays, regardless of offset size.
+template <typename TYPE>
+class BaseListArray : public Array {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  const TypeClass* list_type() const { return list_type_; }
+
+  /// \brief Return array object containing the list's values
+  std::shared_ptr<Array> values() const { return values_; }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
+
+  std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
+
+  /// Return pointer to raw value offsets accounting for any slice offset
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
+
+  // The following functions will not perform boundschecking
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+  offset_type value_length(int64_t i) const {
+    i += data_->offset;
+    return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
+  }
+  std::shared_ptr<Array> value_slice(int64_t i) const {
+    return values_->Slice(value_offset(i), value_length(i));
+  }
+
+ protected:
+  friend void internal::SetListData<TYPE>(BaseListArray<TYPE>* self,
+                                          const std::shared_ptr<ArrayData>& data,
+                                          Type::type expected_type_id);
+
+  const TypeClass* list_type_ = NULLPTR;
+  std::shared_ptr<Array> values_;
+  const offset_type* raw_value_offsets_ = NULLPTR;
+};
+
+/// Concrete Array class for list data
+class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
+ public:
+  explicit ListArray(std::shared_ptr<ArrayData> data);
+
+  ListArray(std::shared_ptr<DataType> type, int64_t length,
+            std::shared_ptr<Buffer> value_offsets, std::shared_ptr<Array> values,
+            std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct ListArray from array of offsets and child value array
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int32 type
+  /// \param[in] values Array containing list values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<ListArray>> FromArrays(
+      const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<ListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration of this array's offsets as well as null elements backed
+  /// by non-empty lists (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Return list offsets as an Int32Array
+  ///
+  /// The returned array will not have a validity bitmap, so you cannot expect
+  /// to pass it to ListArray::FromArrays() and get back the same list array
+  /// if the original one has nulls.
+  std::shared_ptr<Array> offsets() const;
+
+ protected:
+  // This constructor defers SetData to a derived array class
+  ListArray() = default;
+
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+/// Concrete Array class for large list data (with 64-bit offsets)
+class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
+ public:
+  explicit LargeListArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+                 const std::shared_ptr<Buffer>& value_offsets,
+                 const std::shared_ptr<Array>& values,
+                 const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                 int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct LargeListArray from array of offsets and child value array
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int64 type
+  /// \param[in] values Array containing list values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<LargeListArray>> FromArrays(
+      const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<LargeListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration of this array's offsets as well as null elements backed
+  /// by non-empty lists (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Return list offsets as an Int64Array
+  std::shared_ptr<Array> offsets() const;
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+// ----------------------------------------------------------------------
+// MapArray
+
+/// Concrete Array class for map data
+///
+/// NB: "value" in this context refers to a pair of a key and the corresponding item
+class ARROW_EXPORT MapArray : public ListArray {
+ public:
+  using TypeClass = MapType;
+
+  explicit MapArray(const std::shared_ptr<ArrayData>& data);
+
+  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+           const std::shared_ptr<Buffer>& value_offsets,
+           const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+           int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  MapArray(const std::shared_ptr<DataType>& type, int64_t length,
+           const std::shared_ptr<Buffer>& value_offsets,
+           const std::shared_ptr<Array>& values,
+           const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+           int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Construct MapArray from array of offsets and child key, item arrays
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types, and will allocate a new offsets array if necessary (i.e. if
+  /// the offsets contain any nulls). If the offsets do not have nulls, they
+  /// are assumed to be well-formed
+  ///
+  /// \param[in] offsets Array containing n + 1 offsets encoding length and
+  /// size. Must be of int32 type
+  /// \param[in] keys Array containing key values
+  /// \param[in] items Array containing item values
+  /// \param[in] pool MemoryPool in case new offsets array needs to be
+  /// allocated because of null values
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
+      const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
+
+  static Result<std::shared_ptr<Array>> FromArrays(
+      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+      MemoryPool* pool = default_memory_pool());
+
+  const MapType* map_type() const { return map_type_; }
+
+  /// \brief Return array object containing all map keys
+  std::shared_ptr<Array> keys() const { return keys_; }
+
+  /// \brief Return array object containing all mapped items
+  std::shared_ptr<Array> items() const { return items_; }
+
+  /// Validate child data before constructing the actual MapArray.
+  static Status ValidateChildData(
+      const std::vector<std::shared_ptr<ArrayData>>& child_data);
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+
+  static Result<std::shared_ptr<Array>> FromArraysInternal(
+      std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
+      const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
+      MemoryPool* pool);
+
+ private:
+  const MapType* map_type_;
+  std::shared_ptr<Array> keys_, items_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeListArray
+
+/// Concrete Array class for fixed size list data
+class ARROW_EXPORT FixedSizeListArray : public Array {
+ public:
+  using TypeClass = FixedSizeListType;
+  using offset_type = TypeClass::offset_type;
+
+  explicit FixedSizeListArray(const std::shared_ptr<ArrayData>& data);
+
+  FixedSizeListArray(const std::shared_ptr<DataType>& type, int64_t length,
+                     const std::shared_ptr<Array>& values,
+                     const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                     int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const FixedSizeListType* list_type() const;
+
+  /// \brief Return array object containing the list's values
+  std::shared_ptr<Array> values() const;
+
+  std::shared_ptr<DataType> value_type() const;
+
+  // The following functions will not perform boundschecking
+  int64_t value_offset(int64_t i) const {
+    i += data_->offset;
+    return list_size_ * i;
+  }
+  int32_t value_length(int64_t i = 0) const {
+    ARROW_UNUSED(i);
+    return list_size_;
+  }
+  std::shared_ptr<Array> value_slice(int64_t i) const {
+    return values_->Slice(value_offset(i), value_length(i));
+  }
+
+  /// \brief Return an Array that is a concatenation of the lists in this array.
+  ///
+  /// Note that it's different from `values()` in that it takes into
+  /// consideration null elements (they are skipped, thus copying may be needed).
+  Result<std::shared_ptr<Array>> Flatten(
+      MemoryPool* memory_pool = default_memory_pool()) const;
+
+  /// \brief Construct FixedSizeListArray from child value array and value_length
+  ///
+  /// \param[in] values Array containing list values
+  /// \param[in] list_size The fixed length of each list
+  /// \return Will have length equal to values.length() / list_size
+  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+                                                   int32_t list_size);
+
+  /// \brief Construct FixedSizeListArray from child value array and type
+  ///
+  /// \param[in] values Array containing list values
+  /// \param[in] type The fixed sized list type
+  /// \return Will have length equal to values.length() / type.list_size()
+  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+                                                   std::shared_ptr<DataType> type);
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+  int32_t list_size_;
+
+ private:
+  std::shared_ptr<Array> values_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+/// Concrete Array class for struct data
+class ARROW_EXPORT StructArray : public Array {
+ public:
+  using TypeClass = StructType;
+
+  explicit StructArray(const std::shared_ptr<ArrayData>& data);
+
+  StructArray(const std::shared_ptr<DataType>& type, int64_t length,
+              const std::vector<std::shared_ptr<Array>>& children,
+              std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Return a StructArray from child arrays and field names.
+  ///
+  /// The length and data type are automatically inferred from the arguments.
+  /// There should be at least one child array.
+  static Result<std::shared_ptr<StructArray>> Make(
+      const ArrayVector& children, const std::vector<std::string>& field_names,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  /// \brief Return a StructArray from child arrays and fields.
+  ///
+  /// The length is automatically inferred from the arguments.
+  /// There should be at least one child array.  This method does not
+  /// check that field types and child array types are consistent.
+  static Result<std::shared_ptr<StructArray>> Make(
+      const ArrayVector& children, const FieldVector& fields,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  const StructType* struct_type() const;
+
+  // Return a shared pointer in case the requestor desires to share ownership
+  // with this array.  The returned array has its offset, length and null
+  // count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+  const ArrayVector& fields() const;
+
+  /// Returns null if name not found
+  std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
+
+  /// \brief Flatten this array as a vector of arrays, one for each field
+  ///
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
+
+  /// \brief Get one of the child arrays, combining its null bitmap
+  /// with the parent struct array's bitmap.
+  ///
+  /// \param[in] index Which child array to get
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> GetFlattenedField(
+      int index, MemoryPool* pool = default_memory_pool()) const;
+
+ private:
+  // For caching boxed child data
+  // XXX This is not handled in a thread-safe manner.
+  mutable ArrayVector boxed_fields_;
+};
+
+// ----------------------------------------------------------------------
+// Union
+
+/// Base class for SparseUnionArray and DenseUnionArray
+class ARROW_EXPORT UnionArray : public Array {
+ public:
+  using type_code_t = int8_t;
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
+
+  const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
+
+  /// The logical type code of the value at index.
+  type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
+
+  /// The physical child id containing value at index.
+  int child_id(int64_t i) const {
+    return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
+  }
+
+  const UnionType* union_type() const { return union_type_; }
+
+  UnionMode::type mode() const { return union_type_->mode(); }
+
+  /// \brief Return the given field as an individual array.
+  ///
+  /// For sparse unions, the returned array has its offset, length and null
+  /// count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+
+  const type_code_t* raw_type_codes_;
+  const UnionType* union_type_;
+
+  // For caching boxed child data
+  mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+};
+
+/// Concrete Array class for sparse union data
+class ARROW_EXPORT SparseUnionArray : public UnionArray {
+ public:
+  using TypeClass = SparseUnionType;
+
+  explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
+
+  SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+                   std::shared_ptr<Buffer> type_ids, int64_t offset = 0);
+
+  /// \brief Construct SparseUnionArray from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+                                             std::vector<type_code_t> type_codes) {
+    return Make(std::move(type_ids), std::move(children), std::vector<std::string>{},
+                std::move(type_codes));
+  }
+
+  /// \brief Construct SparseUnionArray with custom field names from type_ids and children
+  ///
+  /// This function does the bare minimum of validation of the input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, ArrayVector children,
+                                             std::vector<std::string> field_names = {},
+                                             std::vector<type_code_t> type_codes = {});
+
+  const SparseUnionType* union_type() const {
+    return internal::checked_cast<const SparseUnionType*>(union_type_);
+  }
+
+  /// \brief Get one of the child arrays, adjusting its null bitmap
+  /// where the union array type code does not match.
+  ///
+  /// \param[in] index Which child array to get (i.e. the physical index, not the type
+  /// code) \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> GetFlattenedField(
+      int index, MemoryPool* pool = default_memory_pool()) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+};
+
+/// \brief Concrete Array class for dense union data
+///
+/// Note that union types do not have a validity bitmap
+class ARROW_EXPORT DenseUnionArray : public UnionArray {
+ public:
+  using TypeClass = DenseUnionType;
+
+  explicit DenseUnionArray(const std::shared_ptr<ArrayData>& data);
+
+  DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector children,
+                  std::shared_ptr<Buffer> type_ids,
+                  std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t offset = 0);
+
+  /// \brief Construct DenseUnionArray from type_ids, value_offsets, and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+                                             const Array& value_offsets,
+                                             ArrayVector children,
+                                             std::vector<type_code_t> type_codes) {
+    return Make(type_ids, value_offsets, std::move(children), std::vector<std::string>{},
+                std::move(type_codes));
+  }
+
+  /// \brief Construct DenseUnionArray with custom field names from type_ids,
+  /// value_offsets, and children
+  ///
+  /// This function does the bare minimum of validation of the offsets and
+  /// input types.
+  ///
+  /// \param[in] type_ids An array of logical type ids for the union type
+  /// \param[in] value_offsets An array of signed int32 values indicating the
+  /// relative offset into the respective child array for the type in a given slot.
+  /// The respective offsets for each child value array must be in order / increasing.
+  /// \param[in] children Vector of children Arrays containing the data for each type.
+  /// \param[in] field_names Vector of strings containing the name of each field.
+  /// \param[in] type_codes Vector of type codes.
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+                                             const Array& value_offsets,
+                                             ArrayVector children,
+                                             std::vector<std::string> field_names = {},
+                                             std::vector<type_code_t> type_codes = {});
+
+  const DenseUnionType* union_type() const {
+    return internal::checked_cast<const DenseUnionType*>(union_type_);
+  }
+
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
+
+  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
+
+  const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+
+ protected:
+  const int32_t* raw_value_offsets_;
+
+  void SetData(const std::shared_ptr<ArrayData>& data);
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_primitive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/array_primitive.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Array accessor types for primitive/C-type-based arrays, such as numbers,
+// boolean, and temporal types.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/stl_iterator.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"  // IWYU pragma: export
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// Concrete Array class for boolean data
+class ARROW_EXPORT BooleanArray : public PrimitiveArray {
+ public:
+  using TypeClass = BooleanType;
+  using IteratorType = stl::ArrayIterator<BooleanArray>;
+
+  explicit BooleanArray(const std::shared_ptr<ArrayData>& data);
+
+  BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
+               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  bool Value(int64_t i) const {
+    return bit_util::GetBit(reinterpret_cast<const uint8_t*>(raw_values_),
+                            i + data_->offset);
+  }
+
+  bool GetView(int64_t i) const { return Value(i); }
+
+  util::optional<bool> operator[](int64_t i) const { return *IteratorType(*this, i); }
+
+  /// \brief Return the number of false (0) values among the valid
+  /// values. Result is not cached.
+  int64_t false_count() const;
+
+  /// \brief Return the number of true (1) values among the valid
+  /// values. Result is not cached.
+  int64_t true_count() const;
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  using PrimitiveArray::PrimitiveArray;
+};
+
+/// \addtogroup numeric-arrays
+///
+/// @{
+
+/// \brief Concrete Array class for numeric data with a corresponding C type
+///
+/// This class is templated on the corresponding DataType subclass for the
+/// given data, for example NumericArray<Int8Type> or NumericArray<Date32Type>.
+///
+/// Note that convenience aliases are available for all accepted types
+/// (for example Int8Array for NumericArray<Int8Type>).
+template <typename TYPE>
+class NumericArray : public PrimitiveArray {
+ public:
+  using TypeClass = TYPE;
+  using value_type = typename TypeClass::c_type;
+  using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;
+
+  explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
+
+  // Only enable this constructor without a type argument for types without additional
+  // metadata
+  template <typename T1 = TYPE>
+  NumericArray(enable_if_parameter_free<T1, int64_t> length,
+               const std::shared_ptr<Buffer>& data,
+               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+               int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
+                       null_count, offset) {}
+
+  const value_type* raw_values() const {
+    return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
+  }
+
+  value_type Value(int64_t i) const { return raw_values()[i]; }
+
+  // For API compatibility with BinaryArray etc.
+  value_type GetView(int64_t i) const { return Value(i); }
+
+  util::optional<value_type> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  using PrimitiveArray::PrimitiveArray;
+};
+
+/// DayTimeArray
+/// ---------------------
+/// \brief Array of Day and Millisecond values.
+class ARROW_EXPORT DayTimeIntervalArray : public PrimitiveArray {
+ public:
+  using TypeClass = DayTimeIntervalType;
+  using IteratorType = stl::ArrayIterator<DayTimeIntervalArray>;
+
+  explicit DayTimeIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+  DayTimeIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
+                       const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  DayTimeIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                       const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  TypeClass::DayMilliseconds GetValue(int64_t i) const;
+  TypeClass::DayMilliseconds Value(int64_t i) const { return GetValue(i); }
+
+  // For compatibility with Take kernel.
+  TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+  util::optional<TypeClass::DayMilliseconds> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
+};
+
+/// \brief Array of Month, Day and nanosecond values.
+class ARROW_EXPORT MonthDayNanoIntervalArray : public PrimitiveArray {
+ public:
+  using TypeClass = MonthDayNanoIntervalType;
+  using IteratorType = stl::ArrayIterator<MonthDayNanoIntervalArray>;
+
+  explicit MonthDayNanoIntervalArray(const std::shared_ptr<ArrayData>& data);
+
+  MonthDayNanoIntervalArray(const std::shared_ptr<DataType>& type, int64_t length,
+                            const std::shared_ptr<Buffer>& data,
+                            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  MonthDayNanoIntervalArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                            const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                            int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  TypeClass::MonthDayNanos GetValue(int64_t i) const;
+  TypeClass::MonthDayNanos Value(int64_t i) const { return GetValue(i); }
+
+  // For compatibility with Take kernel.
+  TypeClass::MonthDayNanos GetView(int64_t i) const { return GetValue(i); }
+
+  IteratorType begin() const { return IteratorType(*this); }
+
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+  util::optional<TypeClass::MonthDayNanos> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  int32_t byte_width() const { return sizeof(TypeClass::MonthDayNanos); }
+
+  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width(); }
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_adaptive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_adaptive.h
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+namespace internal {
+
+class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
+ public:
+  AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
+
+  explicit AdaptiveIntBuilderBase(MemoryPool* pool)
+      : AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
+
+  /// \brief Append multiple nulls
+  /// \param[in] length the number of nulls to append
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNull(length);
+    }
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 0;
+    pending_has_nulls_ = true;
+    ++pending_pos_;
+    ++length_;
+    ++null_count_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNotNull(length);
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+    ++length_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+ protected:
+  Status AppendInternal(const uint64_t val) {
+    pending_data_[pending_pos_] = val;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
+    ++length_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
+    return Status::OK();
+  }
+
+  virtual Status CommitPendingData() = 0;
+
+  template <typename new_type, typename old_type>
+  typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+  ExpandIntSizeInternal();
+  template <typename new_type, typename old_type>
+  typename std::enable_if<(sizeof(old_type) < sizeof(new_type)), Status>::type
+  ExpandIntSizeInternal();
+
+  std::shared_ptr<ResizableBuffer> data_;
+  uint8_t* raw_data_ = NULLPTR;
+
+  const uint8_t start_int_size_;
+  uint8_t int_size_;
+
+  static constexpr int32_t pending_size_ = 1024;
+  uint8_t pending_valid_[pending_size_];
+  uint64_t pending_data_[pending_size_];
+  int32_t pending_pos_ = 0;
+  bool pending_has_nulls_ = false;
+};
+
+}  // namespace internal
+
+class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveUIntBuilder(uint8_t start_int_size,
+                               MemoryPool* pool = default_memory_pool());
+
+  explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool())
+      : AdaptiveUIntBuilder(sizeof(uint8_t), pool) {}
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const uint64_t val) { return AppendInternal(val); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  std::shared_ptr<DataType> type() const override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const uint64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
+ public:
+  explicit AdaptiveIntBuilder(uint8_t start_int_size,
+                              MemoryPool* pool = default_memory_pool());
+
+  explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
+      : AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
+
+  using ArrayBuilder::Advance;
+  using internal::AdaptiveIntBuilderBase::Reset;
+
+  /// Scalar append
+  Status Append(const int64_t val) { return AppendInternal(static_cast<uint64_t>(val)); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const int64_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  std::shared_ptr<DataType> type() const override;
+
+ protected:
+  Status CommitPendingData() override;
+  Status ExpandIntSize(uint8_t new_int_size);
+
+  Status AppendValuesInternal(const int64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
+  template <typename new_type>
+  Status ExpandIntSizeN();
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_base.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_base.h
@@ -0,0 +1,350 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>  // IWYU pragma: keep
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \defgroup numeric-builders Concrete builder subclasses for numeric types
+/// @{
+/// @}
+
+/// \defgroup temporal-builders Concrete builder subclasses for temporal types
+/// @{
+/// @}
+
+/// \defgroup binary-builders Concrete builder subclasses for binary types
+/// @{
+/// @}
+
+/// \defgroup nested-builders Concrete builder subclasses for nested types
+/// @{
+/// @}
+
+/// \defgroup dictionary-builders Concrete builder subclasses for dictionary types
+/// @{
+/// @}
+
+constexpr int64_t kMinBuilderCapacity = 1 << 5;
+constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
+
+/// Base class for all data array builders.
+///
+/// This class provides a facilities for incrementally building the null bitmap
+/// (see Append methods) and as a side effect the current number of slots and
+/// the null count.
+///
+/// \note Users are expected to use builders as one of the concrete types below.
+/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
+class ARROW_EXPORT ArrayBuilder {
+ public:
+  explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
+
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
+
+  virtual ~ArrayBuilder() = default;
+
+  /// For nested types. Since the objects are owned by this class instance, we
+  /// skip shared pointers and just return a raw pointer
+  ArrayBuilder* child(int i) { return children_[i].get(); }
+
+  const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
+
+  int num_children() const { return static_cast<int>(children_.size()); }
+
+  virtual int64_t length() const { return length_; }
+  int64_t null_count() const { return null_count_; }
+  int64_t capacity() const { return capacity_; }
+
+  /// \brief Ensure that enough memory has been allocated to fit the indicated
+  /// number of total elements in the builder, including any that have already
+  /// been appended. Does not account for reallocations that may be due to
+  /// variable size data, like binary values. To make space for incremental
+  /// appends, use Reserve instead.
+  ///
+  /// \param[in] capacity the minimum number of total array values to
+  ///            accommodate. Must be greater than the current capacity.
+  /// \return Status
+  virtual Status Resize(int64_t capacity);
+
+  /// \brief Ensure that there is enough space allocated to append the indicated
+  /// number of elements without any further reallocation. Overallocation is
+  /// used in order to minimize the impact of incremental Reserve() calls.
+  /// Note that additional_capacity is relative to the current number of elements
+  /// rather than to the current capacity, so calls to Reserve() which are not
+  /// interspersed with addition of new elements may not increase the capacity.
+  ///
+  /// \param[in] additional_capacity the number of additional array values
+  /// \return Status
+  Status Reserve(int64_t additional_capacity) {
+    auto current_capacity = capacity();
+    auto min_capacity = length() + additional_capacity;
+    if (min_capacity <= current_capacity) return Status::OK();
+
+    // leave growth factor up to BufferBuilder
+    auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
+    return Resize(new_capacity);
+  }
+
+  /// Reset the builder.
+  virtual void Reset();
+
+  /// \brief Append a null value to builder
+  virtual Status AppendNull() = 0;
+  /// \brief Append a number of null values to builder
+  virtual Status AppendNulls(int64_t length) = 0;
+
+  /// \brief Append a non-null value to builder
+  ///
+  /// The appended value is an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending a null value to a parent nested type.
+  virtual Status AppendEmptyValue() = 0;
+
+  /// \brief Append a number of non-null values to builder
+  ///
+  /// The appended values are an implementation detail, but the corresponding
+  /// memory slot is guaranteed to be initialized.
+  /// This method is useful when appending null values to a parent nested type.
+  virtual Status AppendEmptyValues(int64_t length) = 0;
+
+  /// \brief Append a value from a scalar
+  Status AppendScalar(const Scalar& scalar) { return AppendScalar(scalar, 1); }
+  virtual Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+  virtual Status AppendScalars(const ScalarVector& scalars);
+
+  /// \brief Append a range of values from an array.
+  ///
+  /// The given array must be the same type as the builder.
+  virtual Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                                  int64_t length) {
+    return Status::NotImplemented("AppendArraySlice for builder for ", *type());
+  }
+
+  /// For cases where raw data was memcpy'd into the internal buffers, allows us
+  /// to advance the length of the builder. It is your responsibility to use
+  /// this function responsibly.
+  ARROW_DEPRECATED(
+      "Deprecated in 6.0.0. ArrayBuilder::Advance is poorly supported and mostly "
+      "untested.\nFor low-level control over buffer construction, use BufferBuilder "
+      "or TypedBufferBuilder directly.")
+  Status Advance(int64_t elements);
+
+  /// \brief Return result of builder as an internal generic ArrayData
+  /// object. Resets builder except for dictionary builder
+  ///
+  /// \param[out] out the finalized ArrayData object
+  /// \return Status
+  virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
+
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \param[out] out the finalized Array object
+  /// \return Status
+  Status Finish(std::shared_ptr<Array>* out);
+
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \return The finalized Array object
+  Result<std::shared_ptr<Array>> Finish();
+
+  /// \brief Return the type of the built Array
+  virtual std::shared_ptr<DataType> type() const = 0;
+
+ protected:
+  /// Append to null bitmap
+  Status AppendToBitmap(bool is_valid);
+
+  /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
+  /// assume all of length bits are valid.
+  Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+
+  /// Uniform append.  Append N times the same validity bit.
+  Status AppendToBitmap(int64_t num_bits, bool value);
+
+  /// Set the next length bits to not null (i.e. valid).
+  Status SetNotNull(int64_t length);
+
+  // Unsafe operations (don't check capacity/don't resize)
+
+  void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
+
+  // Append to null bitmap, update the length
+  void UnsafeAppendToBitmap(bool is_valid) {
+    null_bitmap_builder_.UnsafeAppend(is_valid);
+    ++length_;
+    if (!is_valid) ++null_count_;
+  }
+
+  // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
+  // assume all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
+    if (valid_bytes == NULLPTR) {
+      return UnsafeSetNotNull(length);
+    }
+    null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
+    length_ += length;
+    null_count_ = null_bitmap_builder_.false_count();
+  }
+
+  // Vector append. Copy from a given bitmap. If bitmap is null assume
+  // all of length bits are valid.
+  void UnsafeAppendToBitmap(const uint8_t* bitmap, int64_t offset, int64_t length) {
+    if (bitmap == NULLPTR) {
+      return UnsafeSetNotNull(length);
+    }
+    null_bitmap_builder_.UnsafeAppend(bitmap, offset, length);
+    length_ += length;
+    null_count_ = null_bitmap_builder_.false_count();
+  }
+
+  // Append the same validity value a given number of times.
+  void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
+    if (value) {
+      UnsafeSetNotNull(num_bits);
+    } else {
+      UnsafeSetNull(num_bits);
+    }
+  }
+
+  void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
+
+  // Set the next validity bits to not null (i.e. valid).
+  void UnsafeSetNotNull(int64_t length);
+
+  // Set the next validity bits to null (i.e. invalid).
+  void UnsafeSetNull(int64_t length);
+
+  static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
+
+  /// \brief Finish to an array of the specified ArrayType
+  template <typename ArrayType>
+  Status FinishTyped(std::shared_ptr<ArrayType>* out) {
+    std::shared_ptr<Array> out_untyped;
+    ARROW_RETURN_NOT_OK(Finish(&out_untyped));
+    *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
+    return Status::OK();
+  }
+
+  // Check the requested capacity for validity
+  Status CheckCapacity(int64_t new_capacity) {
+    if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
+      return Status::Invalid(
+          "Resize capacity must be positive (requested: ", new_capacity, ")");
+    }
+
+    if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
+      return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
+                             ", current length: ", length_, ")");
+    }
+
+    return Status::OK();
+  }
+
+  // Check for array type
+  Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
+                        const Array& array, const char* message);
+  Status CheckArrayType(Type::type expected_type, const Array& array,
+                        const char* message);
+
+  MemoryPool* pool_;
+
+  TypedBufferBuilder<bool> null_bitmap_builder_;
+  int64_t null_count_ = 0;
+
+  // Array length, so far. Also, the index of the next element to be added
+  int64_t length_ = 0;
+  int64_t capacity_ = 0;
+
+  // Child value array builders. These are owned by this class
+  std::vector<std::shared_ptr<ArrayBuilder>> children_;
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
+};
+
+/// \brief Construct an empty ArrayBuilder corresponding to the data
+/// type
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the data type to create the builder for
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                   std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilder(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out));
+  return std::move(out);
+}
+
+/// \brief Construct an empty ArrayBuilder corresponding to the data
+/// type, where any top-level or nested dictionary builders return the
+/// exact index type specified by the type.
+ARROW_EXPORT
+Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                             std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out));
+  return std::move(out);
+}
+
+/// \brief Construct an empty DictionaryBuilder initialized optionally
+/// with a pre-existing dictionary
+/// \param[in] pool the MemoryPool to use for allocations
+/// \param[in] type the dictionary type to create the builder for
+/// \param[in] dictionary the initial dictionary, if any. May be nullptr
+/// \param[out] out the created ArrayBuilder
+ARROW_EXPORT
+Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+                             const std::shared_ptr<Array>& dictionary,
+                             std::unique_ptr<ArrayBuilder>* out);
+
+inline Result<std::unique_ptr<ArrayBuilder>> MakeDictionaryBuilder(
+    const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+    MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out));
+  return std::move(out);
+}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_binary.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_binary.h
@@ -0,0 +1,703 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"  // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup binary-builders
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// Binary and String
+
+template <typename TYPE>
+class BaseBinaryBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
+
+  BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : BaseBinaryBuilder(pool) {}
+
+  Status Append(const uint8_t* value, offset_type length) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status Append(const char* value, offset_type length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(util::string_view value) {
+    return Append(value.data(), static_cast<offset_type>(value.size()));
+  }
+
+  /// Extend the last appended value by appending more data at the end
+  ///
+  /// Unlike Append, this does not create a new offset.
+  Status ExtendCurrent(const uint8_t* value, offset_type length) {
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(ValidateOverflow(length));
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
+    return Status::OK();
+  }
+
+  Status ExtendCurrent(util::string_view value) {
+    return ExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                         static_cast<offset_type>(value.size()));
+  }
+
+  Status AppendNulls(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
+  /// \brief Append without checking capacity
+  ///
+  /// Offsets and data should have been presized using Reserve() and
+  /// ReserveData(), respectively.
+  void UnsafeAppend(const uint8_t* value, offset_type length) {
+    UnsafeAppendNextOffset();
+    value_data_builder_.UnsafeAppend(value, length);
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppend(const char* value, offset_type length) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  void UnsafeAppend(const std::string& value) {
+    UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
+  }
+
+  void UnsafeAppend(util::string_view value) {
+    UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
+  }
+
+  /// Like ExtendCurrent, but do not check capacity
+  void UnsafeExtendCurrent(const uint8_t* value, offset_type length) {
+    value_data_builder_.UnsafeAppend(value, length);
+  }
+
+  void UnsafeExtendCurrent(util::string_view value) {
+    UnsafeExtendCurrent(reinterpret_cast<const uint8_t*>(value.data()),
+                        static_cast<offset_type>(value.size()));
+  }
+
+  void UnsafeAppendNull() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(false);
+  }
+
+  void UnsafeAppendEmptyValue() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+    UnsafeAppendToBitmap(true);
+  }
+
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = std::accumulate(
+        values.begin(), values.end(), 0ULL,
+        [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+    ARROW_RETURN_NOT_OK(Reserve(values.size()));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+    if (valid_bytes != NULLPTR) {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          value_data_builder_.UnsafeAppend(
+              reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+        }
+      }
+    } else {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        value_data_builder_.UnsafeAppend(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+      }
+    }
+
+    UnsafeAppendToBitmap(valid_bytes, values.size());
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const char** values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = 0;
+    std::vector<std::size_t> value_lengths(length);
+    bool have_null_value = false;
+    for (int64_t i = 0; i < length; ++i) {
+      if (values[i] != NULLPTR) {
+        auto value_length = strlen(values[i]);
+        value_lengths[i] = value_length;
+        total_length += value_length;
+      } else {
+        have_null_value = true;
+      }
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ReserveData(total_length));
+
+    if (valid_bytes) {
+      int64_t valid_bytes_offset = 0;
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+          } else {
+            UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
+                                 i - valid_bytes_offset);
+            UnsafeAppendToBitmap(false);
+            valid_bytes_offset = i + 1;
+          }
+        }
+      }
+      UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+    } else {
+      if (have_null_value) {
+        std::vector<uint8_t> valid_vector(length, 0);
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+            valid_vector[i] = 1;
+          }
+        }
+        UnsafeAppendToBitmap(valid_vector.data(), length);
+      } else {
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+        }
+        UnsafeAppendToBitmap(NULLPTR, length);
+      }
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    auto bitmap = array.GetValues<uint8_t>(0, 0);
+    auto offsets = array.GetValues<offset_type>(1);
+    auto data = array.GetValues<uint8_t>(2, 0);
+    for (int64_t i = 0; i < length; i++) {
+      if (!bitmap || bit_util::GetBit(bitmap, array.offset + offset + i)) {
+        const offset_type start = offsets[offset + i];
+        const offset_type end = offsets[offset + i + 1];
+        ARROW_RETURN_NOT_OK(Append(data + start, end - start));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_data_builder_.Reset();
+  }
+
+  Status ValidateOverflow(int64_t new_bytes) {
+    auto new_size = value_data_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements) {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return value_data_builder_.Reserve(elements);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    // Write final offset (values length)
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+    // These buffers' padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+    *out = ArrayData::Make(type(), length_, {null_bitmap, offsets, value_data},
+                           null_count_, 0);
+    Reset();
+    return Status::OK();
+  }
+
+  /// \return data pointer of the value date builder
+  const uint8_t* value_data() const { return value_data_builder_.data(); }
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return value_data_builder_.length(); }
+  /// \return capacity of values buffer
+  int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
+
+  /// \return data pointer of the value date builder
+  const offset_type* offsets_data() const { return offsets_builder_.data(); }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    const offset_type* offsets = offsets_builder_.data();
+    const auto offset = offsets[i];
+    if (i == (length_ - 1)) {
+      *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+    } else {
+      *out_length = offsets[i + 1] - offset;
+    }
+    return value_data_builder_.data() + offset;
+  }
+
+  offset_type offset(int64_t i) const { return offsets_data()[i]; }
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const {
+    offset_type value_length;
+    const uint8_t* value_data = GetValue(i, &value_length);
+    return util::string_view(reinterpret_cast<const char*>(value_data), value_length);
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<offset_type>::max() - 1;
+  }
+
+ protected:
+  TypedBufferBuilder<offset_type> offsets_builder_;
+  TypedBufferBuilder<uint8_t> value_data_builder_;
+
+  Status AppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
+  }
+
+  void UnsafeAppendNextOffset() {
+    const int64_t num_bytes = value_data_builder_.length();
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+  }
+};
+
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
+ public:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return binary(); }
+};
+
+/// \class StringBuilder
+/// \brief Builder class for UTF8 strings
+class ARROW_EXPORT StringBuilder : public BinaryBuilder {
+ public:
+  using BinaryBuilder::BinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return utf8(); }
+};
+
+/// \class LargeBinaryBuilder
+/// \brief Builder class for large variable-length binary data
+class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
+ public:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return large_binary(); }
+};
+
+/// \class LargeStringBuilder
+/// \brief Builder class for large UTF8 strings
+class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
+ public:
+  using LargeBinaryBuilder::LargeBinaryBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return large_utf8(); }
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeBinaryBuilder
+
+class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = FixedSizeBinaryType;
+
+  explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+                                  MemoryPool* pool = default_memory_pool());
+
+  Status Append(const uint8_t* value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(value);
+    return Status::OK();
+  }
+
+  Status Append(const char* value) {
+    return Append(reinterpret_cast<const uint8_t*>(value));
+  }
+
+  Status Append(const util::string_view& view) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(view);
+    return Status::OK();
+  }
+
+  Status Append(const std::string& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(s);
+    return Status::OK();
+  }
+
+  Status Append(const Buffer& s) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(util::string_view(s));
+    return Status::OK();
+  }
+
+  Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }
+
+  template <size_t NBYTES>
+  Status Append(const std::array<uint8_t, NBYTES>& value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(
+        util::string_view(reinterpret_cast<const char*>(value.data()), value.size()));
+    return Status::OK();
+  }
+
+  Status AppendValues(const uint8_t* data, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  Status AppendValues(const uint8_t* data, int64_t length, const uint8_t* validity,
+                      int64_t bitmap_offset);
+
+  Status AppendNull() final;
+  Status AppendNulls(int64_t length) final;
+
+  Status AppendEmptyValue() final;
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(
+        array.GetValues<uint8_t>(1, 0) + ((array.offset + offset) * byte_width_), length,
+        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  void UnsafeAppend(const uint8_t* value) {
+    UnsafeAppendToBitmap(true);
+    if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
+      byte_builder_.UnsafeAppend(value, byte_width_);
+    }
+  }
+
+  void UnsafeAppend(const char* value) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value));
+  }
+
+  void UnsafeAppend(util::string_view value) {
+#ifndef NDEBUG
+    CheckValueSize(static_cast<size_t>(value.size()));
+#endif
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
+  }
+
+  void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+  void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }
+
+  void UnsafeAppendNull() {
+    UnsafeAppendToBitmap(false);
+    byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
+  }
+
+  Status ValidateOverflow(int64_t new_bytes) const {
+    auto new_size = byte_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements) {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return byte_builder_.Reserve(elements);
+  }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<FixedSizeBinaryArray>* out) { return FinishTyped(out); }
+
+  /// \return size of values buffer so far
+  int64_t value_data_length() const { return byte_builder_.length(); }
+
+  int32_t byte_width() const { return byte_width_; }
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  const uint8_t* GetValue(int64_t i) const;
+
+  /// Temporary access to a value.
+  ///
+  /// This view becomes invalid on the next modifying operation.
+  util::string_view GetView(int64_t i) const;
+
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<int64_t>::max() - 1;
+  }
+
+  std::shared_ptr<DataType> type() const override {
+    return fixed_size_binary(byte_width_);
+  }
+
+ protected:
+  int32_t byte_width_;
+  BufferBuilder byte_builder_;
+
+  /// Temporary access to a value.
+  ///
+  /// This pointer becomes invalid on the next modifying operation.
+  uint8_t* GetMutableValue(int64_t i) {
+    uint8_t* data_ptr = byte_builder_.mutable_data();
+    return data_ptr + i * byte_width_;
+  }
+
+  void CheckValueSize(int64_t size);
+};
+
+/// @}
+
+// ----------------------------------------------------------------------
+// Chunked builders: build a sequence of BinaryArray or StringArray that are
+// limited to a particular size (to the upper limit of 2GB)
+
+namespace internal {
+
+class ARROW_EXPORT ChunkedBinaryBuilder {
+ public:
+  explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length,
+                                MemoryPool* pool = default_memory_pool());
+
+  ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length,
+                       MemoryPool* pool = default_memory_pool());
+
+  virtual ~ChunkedBinaryBuilder() = default;
+
+  Status Append(const uint8_t* value, int32_t length) {
+    if (ARROW_PREDICT_FALSE(length + builder_->value_data_length() >
+                            max_chunk_value_length_)) {
+      if (builder_->value_data_length() == 0) {
+        // The current item is larger than max_chunk_size_;
+        // this chunk will be oversize and hold *only* this item
+        ARROW_RETURN_NOT_OK(builder_->Append(value, length));
+        return NextChunk();
+      }
+      // The current item would cause builder_->value_data_length() to exceed
+      // max_chunk_size_, so finish this chunk and append the current item to the next
+      // chunk
+      ARROW_RETURN_NOT_OK(NextChunk());
+      return Append(value, length);
+    }
+
+    if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+      // The current item would cause builder_->length() to exceed max_chunk_length_, so
+      // finish this chunk and append the current item to the next chunk
+      ARROW_RETURN_NOT_OK(NextChunk());
+    }
+
+    return builder_->Append(value, length);
+  }
+
+  Status Append(const util::string_view& value) {
+    return Append(reinterpret_cast<const uint8_t*>(value.data()),
+                  static_cast<int32_t>(value.size()));
+  }
+
+  Status AppendNull() {
+    if (ARROW_PREDICT_FALSE(builder_->length() == max_chunk_length_)) {
+      ARROW_RETURN_NOT_OK(NextChunk());
+    }
+    return builder_->AppendNull();
+  }
+
+  Status Reserve(int64_t values);
+
+  virtual Status Finish(ArrayVector* out);
+
+ protected:
+  Status NextChunk();
+
+  // maximum total character data size per chunk
+  int64_t max_chunk_value_length_;
+
+  // maximum elements allowed per chunk
+  int64_t max_chunk_length_ = kListMaximumElements;
+
+  // when Reserve() would cause builder_ to exceed its max_chunk_length_,
+  // add to extra_capacity_ instead and wait to reserve until the next chunk
+  int64_t extra_capacity_ = 0;
+
+  std::unique_ptr<BinaryBuilder> builder_;
+  std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+class ARROW_EXPORT ChunkedStringBuilder : public ChunkedBinaryBuilder {
+ public:
+  using ChunkedBinaryBuilder::ChunkedBinaryBuilder;
+
+  Status Finish(ArrayVector* out) override;
+};
+
+}  // namespace internal
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_decimal.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_decimal.h
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/data.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
+ public:
+  using TypeClass = Decimal128Type;
+  using ValueType = Decimal128;
+
+  explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool = default_memory_pool());
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(Decimal128 val);
+  void UnsafeAppend(Decimal128 val);
+  void UnsafeAppend(util::string_view val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<Decimal128Array>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+  std::shared_ptr<Decimal128Type> decimal_type_;
+};
+
+class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
+ public:
+  using TypeClass = Decimal256Type;
+  using ValueType = Decimal256;
+
+  explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
+                             MemoryPool* pool = default_memory_pool());
+
+  using FixedSizeBinaryBuilder::Append;
+  using FixedSizeBinaryBuilder::AppendValues;
+  using FixedSizeBinaryBuilder::Reset;
+
+  Status Append(const Decimal256& val);
+  void UnsafeAppend(const Decimal256& val);
+  void UnsafeAppend(util::string_view val);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<Decimal256Array>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override { return decimal_type_; }
+
+ protected:
+  std::shared_ptr<Decimal256Type> decimal_type_;
+};
+
+using DecimalBuilder = Decimal128Builder;
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_dict.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_dict.h
@@ -0,0 +1,722 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/builder_adaptive.h"   // IWYU pragma: export
+#include "arrow/array/builder_base.h"       // IWYU pragma: export
+#include "arrow/array/builder_primitive.h"  // IWYU pragma: export
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Dictionary builder
+
+namespace internal {
+
+template <typename T, typename Enable = void>
+struct DictionaryValue {
+  using type = typename T::c_type;
+  using PhysicalType = T;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_base_binary<T>> {
+  using type = util::string_view;
+  using PhysicalType =
+      typename std::conditional<std::is_same<typename T::offset_type, int32_t>::value,
+                                BinaryType, LargeBinaryType>::type;
+};
+
+template <typename T>
+struct DictionaryValue<T, enable_if_fixed_size_binary<T>> {
+  using type = util::string_view;
+  using PhysicalType = BinaryType;
+};
+
+class ARROW_EXPORT DictionaryMemoTable {
+ public:
+  DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<DataType>& type);
+  DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
+  ~DictionaryMemoTable();
+
+  Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out);
+
+  /// \brief Insert new memo values
+  Status InsertValues(const Array& values);
+
+  int32_t size() const;
+
+  template <typename T>
+  Status GetOrInsert(typename DictionaryValue<T>::type value, int32_t* out) {
+    // We want to keep the DictionaryMemoTable implementation private, also we can't
+    // use extern template classes because of compiler issues (MinGW?).  Instead,
+    // we expose explicit function overrides for each supported physical type.
+    const typename DictionaryValue<T>::PhysicalType* physical_type = NULLPTR;
+    return GetOrInsert(physical_type, value, out);
+  }
+
+ private:
+  Status GetOrInsert(const BooleanType*, bool value, int32_t* out);
+  Status GetOrInsert(const Int8Type*, int8_t value, int32_t* out);
+  Status GetOrInsert(const Int16Type*, int16_t value, int32_t* out);
+  Status GetOrInsert(const Int32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Int64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const UInt8Type*, uint8_t value, int32_t* out);
+  Status GetOrInsert(const UInt16Type*, uint16_t value, int32_t* out);
+  Status GetOrInsert(const UInt32Type*, uint32_t value, int32_t* out);
+  Status GetOrInsert(const UInt64Type*, uint64_t value, int32_t* out);
+  Status GetOrInsert(const DurationType*, int64_t value, int32_t* out);
+  Status GetOrInsert(const TimestampType*, int64_t value, int32_t* out);
+  Status GetOrInsert(const Date32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Date64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const Time32Type*, int32_t value, int32_t* out);
+  Status GetOrInsert(const Time64Type*, int64_t value, int32_t* out);
+  Status GetOrInsert(const MonthDayNanoIntervalType*,
+                     MonthDayNanoIntervalType::MonthDayNanos value, int32_t* out);
+  Status GetOrInsert(const DayTimeIntervalType*,
+                     DayTimeIntervalType::DayMilliseconds value, int32_t* out);
+  Status GetOrInsert(const MonthIntervalType*, int32_t value, int32_t* out);
+  Status GetOrInsert(const FloatType*, float value, int32_t* out);
+  Status GetOrInsert(const DoubleType*, double value, int32_t* out);
+
+  Status GetOrInsert(const BinaryType*, util::string_view value, int32_t* out);
+  Status GetOrInsert(const LargeBinaryType*, util::string_view value, int32_t* out);
+
+  class DictionaryMemoTableImpl;
+  std::unique_ptr<DictionaryMemoTableImpl> impl_;
+};
+
+}  // namespace internal
+
+/// \addtogroup dictionary-builders
+///
+/// @{
+
+namespace internal {
+
+/// \brief Array builder for created encoded DictionaryArray from
+/// dense array
+///
+/// Unlike other builders, dictionary builder does not completely
+/// reset the state on Finish calls.
+template <typename BuilderType, typename T>
+class DictionaryBuilderBase : public ArrayBuilder {
+ public:
+  using TypeClass = DictionaryType;
+  using Value = typename DictionaryValue<T>::type;
+
+  // WARNING: the type given below is the value type, not the DictionaryType.
+  // The DictionaryType is instantiated on the Finish() call.
+  template <typename B = BuilderType, typename T1 = T>
+  DictionaryBuilderBase(uint8_t start_int_size,
+                        enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+                                        !is_fixed_size_binary_type<T1>::value,
+                                    const std::shared_ptr<DataType>&>
+                            value_type,
+                        MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(start_int_size, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
+          value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      const std::shared_ptr<DataType>& index_type,
+      enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
+          value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(index_type, pool),
+        value_type_(value_type) {}
+
+  template <typename B = BuilderType, typename T1 = T>
+  DictionaryBuilderBase(uint8_t start_int_size,
+                        enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value &&
+                                        is_fixed_size_binary_type<T1>::value,
+                                    const std::shared_ptr<DataType>&>
+                            value_type,
+                        MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(start_int_size, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      const std::shared_ptr<DataType>& index_type,
+      enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
+        delta_offset_(0),
+        byte_width_(static_cast<const T1&>(*value_type).byte_width()),
+        indices_builder_(index_type, pool),
+        value_type_(value_type) {}
+
+  template <typename T1 = T>
+  explicit DictionaryBuilderBase(
+      enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+      : DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
+
+  // This constructor doesn't check for errors. Use InsertMemoValues instead.
+  explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool),
+        memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
+        delta_offset_(0),
+        byte_width_(-1),
+        indices_builder_(pool),
+        value_type_(dictionary->type()) {}
+
+  ~DictionaryBuilderBase() override = default;
+
+  /// \brief The current number of entries in the dictionary
+  int64_t dictionary_length() const { return memo_table_->size(); }
+
+  /// \brief The value byte width (for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, int32_t> byte_width() const {
+    return byte_width_;
+  }
+
+  /// \brief Append a scalar value
+  Status Append(Value value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+
+    int32_t memo_index;
+    ARROW_RETURN_NOT_OK(memo_table_->GetOrInsert<T>(value, &memo_index));
+    ARROW_RETURN_NOT_OK(indices_builder_.Append(memo_index));
+    length_ += 1;
+
+    return Status::OK();
+  }
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> Append(const uint8_t* value) {
+    return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
+  }
+
+  /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> Append(const char* value) {
+    return Append(util::string_view(value, byte_width_));
+  }
+
+  /// \brief Append a string (only for binary types)
+  template <typename T1 = T>
+  enable_if_binary_like<T1, Status> Append(const uint8_t* value, int32_t length) {
+    return Append(reinterpret_cast<const char*>(value), length);
+  }
+
+  /// \brief Append a string (only for binary types)
+  template <typename T1 = T>
+  enable_if_binary_like<T1, Status> Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+
+  /// \brief Append a string (only for string types)
+  template <typename T1 = T>
+  enable_if_string_like<T1, Status> Append(const char* value, int32_t length) {
+    return Append(util::string_view(value, length));
+  }
+
+  /// \brief Append a decimal (only for Decimal128Type)
+  template <typename T1 = T>
+  enable_if_decimal128<T1, Status> Append(const Decimal128& value) {
+    uint8_t data[16];
+    value.ToBytes(data);
+    return Append(data, 16);
+  }
+
+  /// \brief Append a decimal (only for Decimal128Type)
+  template <typename T1 = T>
+  enable_if_decimal256<T1, Status> Append(const Decimal256& value) {
+    uint8_t data[32];
+    value.ToBytes(data);
+    return Append(data, 32);
+  }
+
+  /// \brief Append a scalar null value
+  Status AppendNull() final {
+    length_ += 1;
+    null_count_ += 1;
+
+    return indices_builder_.AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    length_ += length;
+    null_count_ += length;
+
+    return indices_builder_.AppendNulls(length);
+  }
+
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
+  Status AppendScalar(const Scalar& scalar, int64_t n_repeats) override {
+    if (!scalar.is_valid) return AppendNulls(n_repeats);
+
+    const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*scalar.type);
+    const DictionaryScalar& dict_scalar =
+        internal::checked_cast<const DictionaryScalar&>(scalar);
+    const auto& dict = internal::checked_cast<const typename TypeTraits<T>::ArrayType&>(
+        *dict_scalar.value.dictionary);
+    ARROW_RETURN_NOT_OK(Reserve(n_repeats));
+    switch (dict_ty.index_type()->id()) {
+      case Type::UINT8:
+        return AppendScalarImpl<UInt8Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT8:
+        return AppendScalarImpl<Int8Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT16:
+        return AppendScalarImpl<UInt16Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT16:
+        return AppendScalarImpl<Int16Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT32:
+        return AppendScalarImpl<UInt32Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT32:
+        return AppendScalarImpl<Int32Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::UINT64:
+        return AppendScalarImpl<UInt64Type>(dict, *dict_scalar.value.index, n_repeats);
+      case Type::INT64:
+        return AppendScalarImpl<Int64Type>(dict, *dict_scalar.value.index, n_repeats);
+      default:
+        return Status::TypeError("Invalid index type: ", dict_ty);
+    }
+    return Status::OK();
+  }
+
+  Status AppendScalars(const ScalarVector& scalars) override {
+    for (const auto& scalar : scalars) {
+      ARROW_RETURN_NOT_OK(AppendScalar(*scalar, /*n_repeats=*/1));
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
+    // Visit the indices and insert the unpacked values.
+    const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*array.type);
+    const typename TypeTraits<T>::ArrayType dict(array.dictionary);
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    switch (dict_ty.index_type()->id()) {
+      case Type::UINT8:
+        return AppendArraySliceImpl<uint8_t>(dict, array, offset, length);
+      case Type::INT8:
+        return AppendArraySliceImpl<int8_t>(dict, array, offset, length);
+      case Type::UINT16:
+        return AppendArraySliceImpl<uint16_t>(dict, array, offset, length);
+      case Type::INT16:
+        return AppendArraySliceImpl<int16_t>(dict, array, offset, length);
+      case Type::UINT32:
+        return AppendArraySliceImpl<uint32_t>(dict, array, offset, length);
+      case Type::INT32:
+        return AppendArraySliceImpl<int32_t>(dict, array, offset, length);
+      case Type::UINT64:
+        return AppendArraySliceImpl<uint64_t>(dict, array, offset, length);
+      case Type::INT64:
+        return AppendArraySliceImpl<int64_t>(dict, array, offset, length);
+      default:
+        return Status::TypeError("Invalid index type: ", dict_ty);
+    }
+    return Status::OK();
+  }
+
+  /// \brief Insert values into the dictionary's memo, but do not append any
+  /// indices. Can be used to initialize a new builder with known dictionary
+  /// values
+  /// \param[in] values dictionary values to add to memo. Type must match
+  /// builder type
+  Status InsertMemoValues(const Array& values) {
+    return memo_table_->InsertValues(values);
+  }
+
+  /// \brief Append a whole dense array to the builder
+  template <typename T1 = T>
+  enable_if_t<!is_fixed_size_binary_type<T1>::value, Status> AppendArray(
+      const Array& array) {
+    using ArrayType = typename TypeTraits<T>::ArrayType;
+
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+    const auto& concrete_array = static_cast<const ArrayType&>(array);
+    for (int64_t i = 0; i < array.length(); i++) {
+      if (array.IsNull(i)) {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      } else {
+        ARROW_RETURN_NOT_OK(Append(concrete_array.GetView(i)));
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename T1 = T>
+  enable_if_fixed_size_binary<T1, Status> AppendArray(const Array& array) {
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        value_type_, array, "Wrong value type of array to be appended"));
+#endif
+
+    const auto& concrete_array = static_cast<const FixedSizeBinaryArray&>(array);
+    for (int64_t i = 0; i < array.length(); i++) {
+      if (array.IsNull(i)) {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      } else {
+        ARROW_RETURN_NOT_OK(Append(concrete_array.GetValue(i)));
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    // Perform a partial reset. Call ResetFull to also reset the accumulated
+    // dictionary values
+    ArrayBuilder::Reset();
+    indices_builder_.Reset();
+  }
+
+  /// \brief Reset and also clear accumulated dictionary values in memo table
+  void ResetFull() {
+    Reset();
+    memo_table_.reset(new internal::DictionaryMemoTable(pool_, value_type_));
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+    capacity_ = indices_builder_.capacity();
+    return Status::OK();
+  }
+
+  /// \brief Return dictionary indices and a delta dictionary since the last
+  /// time that Finish or FinishDelta were called, and reset state of builder
+  /// (except the memo table)
+  Status FinishDelta(std::shared_ptr<Array>* out_indices,
+                     std::shared_ptr<Array>* out_delta) {
+    std::shared_ptr<ArrayData> indices_data;
+    std::shared_ptr<ArrayData> delta_data;
+    ARROW_RETURN_NOT_OK(FinishWithDictOffset(delta_offset_, &indices_data, &delta_data));
+    *out_indices = MakeArray(indices_data);
+    *out_delta = MakeArray(delta_data);
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override {
+    return ::arrow::dictionary(indices_builder_.type(), value_type_);
+  }
+
+ protected:
+  template <typename c_type>
+  Status AppendArraySliceImpl(const typename TypeTraits<T>::ArrayType& dict,
+                              const ArrayData& array, int64_t offset, int64_t length) {
+    const c_type* values = array.GetValues<c_type>(1) + offset;
+    return VisitBitBlocks(
+        array.buffers[0], array.offset + offset, length,
+        [&](const int64_t position) {
+          const int64_t index = static_cast<int64_t>(values[position]);
+          if (dict.IsValid(index)) {
+            return Append(dict.GetView(index));
+          }
+          return AppendNull();
+        },
+        [&]() { return AppendNull(); });
+  }
+
+  template <typename IndexType>
+  Status AppendScalarImpl(const typename TypeTraits<T>::ArrayType& dict,
+                          const Scalar& index_scalar, int64_t n_repeats) {
+    using ScalarType = typename TypeTraits<IndexType>::ScalarType;
+    const auto index = internal::checked_cast<const ScalarType&>(index_scalar).value;
+    if (index_scalar.is_valid && dict.IsValid(index)) {
+      const auto& value = dict.GetView(index);
+      for (int64_t i = 0; i < n_repeats; i++) {
+        ARROW_RETURN_NOT_OK(Append(value));
+      }
+      return Status::OK();
+    }
+    return AppendNulls(n_repeats);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    std::shared_ptr<ArrayData> dictionary;
+    ARROW_RETURN_NOT_OK(FinishWithDictOffset(/*offset=*/0, out, &dictionary));
+
+    // Set type of array data to the right dictionary type
+    (*out)->type = type();
+    (*out)->dictionary = dictionary;
+    return Status::OK();
+  }
+
+  Status FinishWithDictOffset(int64_t dict_offset,
+                              std::shared_ptr<ArrayData>* out_indices,
+                              std::shared_ptr<ArrayData>* out_dictionary) {
+    // Finalize indices array
+    ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out_indices));
+
+    // Generate dictionary array from hash table contents
+    ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(dict_offset, out_dictionary));
+    delta_offset_ = memo_table_->size();
+
+    // Update internals for further uses of this DictionaryBuilder
+    ArrayBuilder::Reset();
+    return Status::OK();
+  }
+
+  std::unique_ptr<DictionaryMemoTable> memo_table_;
+
+  // The size of the dictionary memo at last invocation of Finish, to use in
+  // FinishDelta for computing dictionary deltas
+  int32_t delta_offset_;
+
+  // Only used for FixedSizeBinaryType
+  int32_t byte_width_;
+
+  BuilderType indices_builder_;
+  std::shared_ptr<DataType> value_type_;
+};
+
+template <typename BuilderType>
+class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
+ public:
+  template <typename B = BuilderType>
+  DictionaryBuilderBase(
+      enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+          start_int_size,
+      const std::shared_ptr<DataType>& value_type,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<DataType>& value_type,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<DataType>& index_type,
+                                 const std::shared_ptr<DataType>& value_type,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(index_type, pool) {}
+
+  template <typename B = BuilderType>
+  explicit DictionaryBuilderBase(
+      enable_if_t<std::is_base_of<AdaptiveIntBuilderBase, B>::value, uint8_t>
+          start_int_size,
+      MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(start_int_size, pool) {}
+
+  explicit DictionaryBuilderBase(MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
+                                 MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), indices_builder_(pool) {}
+
+  /// \brief Append a scalar null value
+  Status AppendNull() final {
+    length_ += 1;
+    null_count_ += 1;
+
+    return indices_builder_.AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    length_ += length;
+    null_count_ += length;
+
+    return indices_builder_.AppendNulls(length);
+  }
+
+  Status AppendEmptyValue() final {
+    length_ += 1;
+
+    return indices_builder_.AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    length_ += length;
+
+    return indices_builder_.AppendEmptyValues(length);
+  }
+
+  /// \brief Append a whole dense array to the builder
+  Status AppendArray(const Array& array) {
+#ifndef NDEBUG
+    ARROW_RETURN_NOT_OK(ArrayBuilder::CheckArrayType(
+        Type::NA, array, "Wrong value type of array to be appended"));
+#endif
+    for (int64_t i = 0; i < array.length(); i++) {
+      ARROW_RETURN_NOT_OK(AppendNull());
+    }
+    return Status::OK();
+  }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+
+    ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
+    capacity_ = indices_builder_.capacity();
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out));
+    (*out)->type = dictionary((*out)->type, null());
+    (*out)->dictionary = NullArray(0).data();
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
+
+  std::shared_ptr<DataType> type() const override {
+    return ::arrow::dictionary(indices_builder_.type(), null());
+  }
+
+ protected:
+  BuilderType indices_builder_;
+};
+
+}  // namespace internal
+
+/// \brief A DictionaryArray builder that uses AdaptiveIntBuilder to return the
+/// smallest index size that can accommodate the dictionary indices
+template <typename T>
+class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuilder, T> {
+ public:
+  using BASE = internal::DictionaryBuilderBase<AdaptiveIntBuilder, T>;
+  using BASE::BASE;
+
+  /// \brief Append dictionary indices directly without modifying memo
+  ///
+  /// NOTE: Experimental API
+  Status AppendIndices(const int64_t* values, int64_t length,
+                       const uint8_t* valid_bytes = NULLPTR) {
+    int64_t null_count_before = this->indices_builder_.null_count();
+    ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+    this->capacity_ = this->indices_builder_.capacity();
+    this->length_ += length;
+    this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+    return Status::OK();
+  }
+};
+
+/// \brief A DictionaryArray builder that always returns int32 dictionary
+/// indices so that data cast to dictionary form will have a consistent index
+/// type, e.g. for creating a ChunkedArray
+template <typename T>
+class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder, T> {
+ public:
+  using BASE = internal::DictionaryBuilderBase<Int32Builder, T>;
+  using BASE::BASE;
+
+  /// \brief Append dictionary indices directly without modifying memo
+  ///
+  /// NOTE: Experimental API
+  Status AppendIndices(const int32_t* values, int64_t length,
+                       const uint8_t* valid_bytes = NULLPTR) {
+    int64_t null_count_before = this->indices_builder_.null_count();
+    ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
+    this->capacity_ = this->indices_builder_.capacity();
+    this->length_ += length;
+    this->null_count_ += this->indices_builder_.null_count() - null_count_before;
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Binary / Unicode builders
+// (compatibility aliases; those used to be derived classes with additional
+//  Append() overloads, but they have been folded into DictionaryBuilderBase)
+
+using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
+using StringDictionaryBuilder = DictionaryBuilder<StringType>;
+using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
+using StringDictionary32Builder = Dictionary32Builder<StringType>;
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_nested.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_nested.h
@@ -0,0 +1,561 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-builders
+///
+/// @{
+
+// ----------------------------------------------------------------------
+// List builder
+
+template <typename TYPE>
+class BaseListBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
+
+  /// Use this constructor to incrementally build the value array along with offsets and
+  /// null bitmap.
+  BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
+                  const std::shared_ptr<DataType>& type)
+      : ArrayBuilder(pool),
+        offsets_builder_(pool),
+        value_builder_(value_builder),
+        value_field_(type->field(0)->WithType(NULLPTR)) {}
+
+  BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder)
+      : BaseListBuilder(pool, value_builder, list(value_builder->type())) {}
+
+  Status Resize(int64_t capacity) override {
+    if (capacity > maximum_elements()) {
+      return Status::CapacityError("List array cannot reserve space for more than ",
+                                   maximum_elements(), " got ", capacity);
+    }
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_builder_->Reset();
+  }
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const offset_type* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    offsets_builder_.UnsafeAppend(offsets, length);
+    return Status::OK();
+  }
+
+  /// \brief Start a new variable-length list slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// value builder
+  Status Append(bool is_valid = true) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return AppendNextOffset();
+  }
+
+  Status AppendNull() final { return Append(false); }
+
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    UnsafeAppendToBitmap(length, false);
+    const int64_t num_values = value_builder_->length();
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final { return Append(true); }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    UnsafeAppendToBitmap(length, true);
+    const int64_t num_values = value_builder_->length();
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_values));
+    }
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    const offset_type* offsets = array.GetValues<offset_type>(1);
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(Append());
+        int64_t slot_length = offsets[row + 1] - offsets[row];
+        ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(*array.child_data[0],
+                                                             offsets[row], slot_length));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
+
+    // Offset padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+    if (value_builder_->length() == 0) {
+      // Try to make sure we get a non-null values buffer (ARROW-2744)
+      ARROW_RETURN_NOT_OK(value_builder_->Resize(0));
+    }
+
+    std::shared_ptr<ArrayData> items;
+    ARROW_RETURN_NOT_OK(value_builder_->FinishInternal(&items));
+
+    *out = ArrayData::Make(type(), length_, {null_bitmap, offsets}, {std::move(items)},
+                           null_count_);
+    Reset();
+    return Status::OK();
+  }
+
+  Status ValidateOverflow(int64_t new_elements) const {
+    auto new_length = value_builder_->length() + new_elements;
+    if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
+      return Status::CapacityError("List array cannot contain more than ",
+                                   maximum_elements(), " elements, have ", new_elements);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t maximum_elements() {
+    return std::numeric_limits<offset_type>::max() - 1;
+  }
+
+  std::shared_ptr<DataType> type() const override {
+    return std::make_shared<TYPE>(value_field_->WithType(value_builder_->type()));
+  }
+
+ protected:
+  TypedBufferBuilder<offset_type> offsets_builder_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
+  std::shared_ptr<Field> value_field_;
+
+  Status AppendNextOffset() {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
+    const int64_t num_values = value_builder_->length();
+    return offsets_builder_.Append(static_cast<offset_type>(num_values));
+  }
+};
+
+/// \class ListBuilder
+/// \brief Builder class for variable-length list array value types
+///
+/// To use this class, you must append values to the child array builder and use
+/// the Append function to delimit each distinct list value (once the values
+/// have been appended to the child array) or use the bulk API to append
+/// a sequence of offsets and null values.
+///
+/// A note on types.  Per arrow/type.h all types in the c++ implementation are
+/// logical so even though this class always builds list array, this can
+/// represent multiple different logical types.  If no logical type is provided
+/// at construction time, the class defaults to List<T> where t is taken from the
+/// value_builder/values that the object is constructed with.
+class ARROW_EXPORT ListBuilder : public BaseListBuilder<ListType> {
+ public:
+  using BaseListBuilder::BaseListBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<ListArray>* out) { return FinishTyped(out); }
+};
+
+/// \class LargeListBuilder
+/// \brief Builder class for large variable-length list array value types
+///
+/// Like ListBuilder, but to create large list arrays (with 64-bit offsets).
+class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
+ public:
+  using BaseListBuilder::BaseListBuilder;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeListArray>* out) { return FinishTyped(out); }
+};
+
+// ----------------------------------------------------------------------
+// Map builder
+
+/// \class MapBuilder
+/// \brief Builder class for arrays of variable-size maps
+///
+/// To use this class, you must append values to the key and item array builders
+/// and use the Append function to delimit each distinct map (once the keys and items
+/// have been appended) or use the bulk API to append a sequence of offsets and null
+/// maps.
+///
+/// Key uniqueness and ordering are not validated.
+class ARROW_EXPORT MapBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to define the built array's type explicitly. If key_builder
+  /// or item_builder has indeterminate type, this builder will also.
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+             const std::shared_ptr<ArrayBuilder>& item_builder,
+             const std::shared_ptr<DataType>& type);
+
+  /// Use this constructor to infer the built array's type. If key_builder or
+  /// item_builder has indeterminate type, this builder will also.
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
+             const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);
+
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
+             const std::shared_ptr<DataType>& type);
+
+  Status Resize(int64_t capacity) override;
+  void Reset() override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<MapArray>* out) { return FinishTyped(out); }
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes is of equal length to values, and any zero byte
+  /// will be considered as a null for that slot
+  Status AppendValues(const int32_t* offsets, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Start a new variable-length map slot
+  ///
+  /// This function should be called before beginning to append elements to the
+  /// key and item builders
+  Status Append();
+
+  Status AppendNull() final;
+
+  Status AppendNulls(int64_t length) final;
+
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    const int32_t* offsets = array.GetValues<int32_t>(1);
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(Append());
+        const int64_t slot_length = offsets[row + 1] - offsets[row];
+        ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
+            *array.child_data[0]->child_data[0], offsets[row], slot_length));
+        ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
+            *array.child_data[0]->child_data[1], offsets[row], slot_length));
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  /// \brief Get builder to append keys.
+  ///
+  /// Append a key with this builder should be followed by appending
+  /// an item or null value with item_builder().
+  ArrayBuilder* key_builder() const { return key_builder_.get(); }
+
+  /// \brief Get builder to append items
+  ///
+  /// Appending an item with this builder should have been preceded
+  /// by appending a key with key_builder().
+  ArrayBuilder* item_builder() const { return item_builder_.get(); }
+
+  /// \brief Get builder to add Map entries as struct values.
+  ///
+  /// This is used instead of key_builder()/item_builder() and allows
+  /// the Map to be built as a list of struct values.
+  ArrayBuilder* value_builder() const { return list_builder_->value_builder(); }
+
+  std::shared_ptr<DataType> type() const override {
+    // Key and Item builder may update types, but they don't contain the field names,
+    // so we need to reconstruct the type. (See ARROW-13735.)
+    return std::make_shared<MapType>(
+        field(entries_name_,
+              struct_({field(key_name_, key_builder_->type(), false),
+                       field(item_name_, item_builder_->type(), item_nullable_)}),
+              false),
+        keys_sorted_);
+  }
+
+  Status ValidateOverflow(int64_t new_elements) {
+    return list_builder_->ValidateOverflow(new_elements);
+  }
+
+ protected:
+  inline Status AdjustStructBuilderLength();
+
+ protected:
+  bool keys_sorted_ = false;
+  bool item_nullable_ = false;
+  std::string entries_name_;
+  std::string key_name_;
+  std::string item_name_;
+  std::shared_ptr<ListBuilder> list_builder_;
+  std::shared_ptr<ArrayBuilder> key_builder_;
+  std::shared_ptr<ArrayBuilder> item_builder_;
+};
+
+// ----------------------------------------------------------------------
+// FixedSizeList builder
+
+/// \class FixedSizeListBuilder
+/// \brief Builder class for fixed-length list array value types
+class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
+ public:
+  /// Use this constructor to define the built array's type explicitly. If value_builder
+  /// has indeterminate type, this builder will also.
+  FixedSizeListBuilder(MemoryPool* pool,
+                       std::shared_ptr<ArrayBuilder> const& value_builder,
+                       int32_t list_size);
+
+  /// Use this constructor to infer the built array's type. If value_builder has
+  /// indeterminate type, this builder will also.
+  FixedSizeListBuilder(MemoryPool* pool,
+                       std::shared_ptr<ArrayBuilder> const& value_builder,
+                       const std::shared_ptr<DataType>& type);
+
+  Status Resize(int64_t capacity) override;
+  void Reset() override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<FixedSizeListArray>* out) { return FinishTyped(out); }
+
+  /// \brief Append a valid fixed length list.
+  ///
+  /// This function affects only the validity bitmap; the child values must be appended
+  /// using the child array builder.
+  Status Append();
+
+  /// \brief Vector append
+  ///
+  /// If passed, valid_bytes wil be read and any zero byte
+  /// will cause the corresponding slot to be null
+  ///
+  /// This function affects only the validity bitmap; the child values must be appended
+  /// using the child array builder. This includes appending nulls for null lists.
+  /// XXX this restriction is confusing, should this method be omitted?
+  Status AppendValues(int64_t length, const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a null fixed length list.
+  ///
+  /// The child array builder will have the appropriate number of nulls appended
+  /// automatically.
+  Status AppendNull() final;
+
+  /// \brief Append length null fixed length lists.
+  ///
+  /// The child array builder will have the appropriate number of nulls appended
+  /// automatically.
+  Status AppendNulls(int64_t length) final;
+
+  Status ValidateOverflow(int64_t new_elements);
+
+  Status AppendEmptyValue() final;
+
+  Status AppendEmptyValues(int64_t length) final;
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    for (int64_t row = offset; row < offset + length; row++) {
+      if (!validity || bit_util::GetBit(validity, array.offset + row)) {
+        ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(
+            *array.child_data[0], list_size_ * (array.offset + row), list_size_));
+        ARROW_RETURN_NOT_OK(Append());
+      } else {
+        ARROW_RETURN_NOT_OK(AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
+  ArrayBuilder* value_builder() const { return value_builder_.get(); }
+
+  std::shared_ptr<DataType> type() const override {
+    return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t maximum_elements() {
+    return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
+  }
+
+ protected:
+  std::shared_ptr<Field> value_field_;
+  const int32_t list_size_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
+};
+
+// ----------------------------------------------------------------------
+// Struct
+
+// ---------------------------------------------------------------------------------
+// StructArray builder
+/// Append, Resize and Reserve methods are acting on StructBuilder.
+/// Please make sure all these methods of all child-builders' are consistently
+/// called to maintain data-structure consistency.
+class ARROW_EXPORT StructBuilder : public ArrayBuilder {
+ public:
+  /// If any of field_builders has indeterminate type, this builder will also
+  StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
+                std::vector<std::shared_ptr<ArrayBuilder>> field_builders);
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<StructArray>* out) { return FinishTyped(out); }
+
+  /// Null bitmap is of equal length to every child field, and any zero byte
+  /// will be considered as a null for that field, but users must using app-
+  /// end methods or advance methods of the child builders' independently to
+  /// insert data.
+  Status AppendValues(int64_t length, const uint8_t* valid_bytes) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// Append an element to the Struct. All child-builders' Append method must
+  /// be called independently to maintain data-structure consistency.
+  Status Append(bool is_valid = true) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  /// \brief Append a null value. Automatically appends an empty value to each child
+  /// builder.
+  Status AppendNull() final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
+    }
+    return Append(false);
+  }
+
+  /// \brief Append multiple null values. Automatically appends empty values to each
+  /// child builder.
+  Status AppendNulls(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValue());
+    }
+    return Append(true);
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    for (const auto& field : children_) {
+      ARROW_RETURN_NOT_OK(field->AppendEmptyValues(length));
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(length, true);
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    for (int i = 0; static_cast<size_t>(i) < children_.size(); i++) {
+      ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(*array.child_data[i],
+                                                         array.offset + offset, length));
+    }
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(validity, array.offset + offset, length);
+    return Status::OK();
+  }
+
+  void Reset() override;
+
+  ArrayBuilder* field_builder(int i) const { return children_[i].get(); }
+
+  int num_fields() const { return static_cast<int>(children_.size()); }
+
+  std::shared_ptr<DataType> type() const override;
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_primitive.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_primitive.h
@@ -0,0 +1,539 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+
+namespace arrow {
+
+class ARROW_EXPORT NullBuilder : public ArrayBuilder {
+ public:
+  explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
+  explicit NullBuilder(const std::shared_ptr<DataType>& type,
+                       MemoryPool* pool = default_memory_pool())
+      : NullBuilder(pool) {}
+
+  /// \brief Append the specified number of null elements
+  Status AppendNulls(int64_t length) final {
+    if (length < 0) return Status::Invalid("length must be positive");
+    null_count_ += length;
+    length_ += length;
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() final { return AppendNulls(1); }
+
+  Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); }
+
+  Status AppendEmptyValue() final { return AppendEmptyValues(1); }
+
+  Status Append(std::nullptr_t) { return AppendNull(); }
+
+  Status AppendArraySlice(const ArrayData&, int64_t, int64_t length) override {
+    return AppendNulls(length);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  std::shared_ptr<DataType> type() const override { return null(); }
+
+  Status Finish(std::shared_ptr<NullArray>* out) { return FinishTyped(out); }
+};
+
+/// \addtogroup numeric-builders
+///
+/// @{
+
+/// Base class for all Builders that emit an Array of a scalar numerical type.
+template <typename T>
+class NumericBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = T;
+  using value_type = typename T::c_type;
+  using ArrayType = typename TypeTraits<T>::ArrayType;
+
+  template <typename T1 = T>
+  explicit NumericBuilder(
+      enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
+      : ArrayBuilder(pool), type_(TypeTraits<T>::type_singleton()), data_builder_(pool) {}
+
+  NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ArrayBuilder(pool), type_(type), data_builder_(pool) {}
+
+  /// Append a single scalar and increase the size if necessary.
+  Status Append(const value_type val) {
+    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  /// The memory at the corresponding data slot is set to 0 to prevent
+  /// uninitialized memory access
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
+    UnsafeSetNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(value_type{});  // zero
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  /// \brief Append a empty element
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(value_type{});  // zero
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  /// \brief Append several empty elements
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, value_type{});  // zero
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
+
+  void Reset() override { data_builder_.Reset(); }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  value_type operator[](int64_t index) const { return GetValue(index); }
+
+  value_type& operator[](int64_t index) {
+    return reinterpret_cast<value_type*>(data_builder_.mutable_data())[index];
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] bitmap a validity bitmap to copy (may be null)
+  /// \param[in] bitmap_offset an offset into the validity bitmap
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length, const uint8_t* bitmap,
+                      int64_t bitmap_offset) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(bitmap, bitmap_offset, length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const value_type* values, int64_t length,
+                      const std::vector<bool>& is_valid) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values,
+                      const std::vector<bool>& is_valid) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
+  }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of values
+  /// \return Status
+  Status AppendValues(const std::vector<value_type>& values) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
+                          null_bitmap_builder_.FinishWithLength(length_));
+    ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
+    *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
+    capacity_ = length_ = null_count_ = 0;
+    return Status::OK();
+  }
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<ArrayType>* out) { return FinishTyped(out); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \return Status
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    // this updates the length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values.
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  // Same as above, with a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    // this updates the length_
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
+      length_ = null_bitmap_builder_.length();
+      null_count_ = null_bitmap_builder_.false_count();
+    }
+
+    return Status::OK();
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(array.GetValues<value_type>(1) + offset, length,
+                        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  /// Append a single scalar under the assumption that the underlying Buffer is
+  /// large enough.
+  ///
+  /// This method does not capacity-check; make sure to call Reserve
+  /// beforehand.
+  void UnsafeAppend(const value_type val) {
+    ArrayBuilder::UnsafeAppendToBitmap(true);
+    data_builder_.UnsafeAppend(val);
+  }
+
+  void UnsafeAppendNull() {
+    ArrayBuilder::UnsafeAppendToBitmap(false);
+    data_builder_.UnsafeAppend(value_type{});  // zero
+  }
+
+  std::shared_ptr<DataType> type() const override { return type_; }
+
+ protected:
+  std::shared_ptr<DataType> type_;
+  TypedBufferBuilder<value_type> data_builder_;
+};
+
+// Builders
+
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
+
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
+
+/// @}
+
+/// \addtogroup temporal-builders
+///
+/// @{
+
+using Date32Builder = NumericBuilder<Date32Type>;
+using Date64Builder = NumericBuilder<Date64Type>;
+using Time32Builder = NumericBuilder<Time32Type>;
+using Time64Builder = NumericBuilder<Time64Type>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
+using MonthIntervalBuilder = NumericBuilder<MonthIntervalType>;
+using DurationBuilder = NumericBuilder<DurationType>;
+
+/// @}
+
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = BooleanType;
+  using value_type = bool;
+
+  explicit BooleanBuilder(MemoryPool* pool = default_memory_pool());
+
+  BooleanBuilder(const std::shared_ptr<DataType>& type,
+                 MemoryPool* pool = default_memory_pool());
+
+  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNull(length);
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendNull();
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(false);
+    UnsafeSetNotNull(1);
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// Scalar append
+  Status Append(const bool val) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t val) { return Append(val != 0); }
+
+  /// Scalar append, without checking for capacity
+  void UnsafeAppend(const bool val) {
+    data_builder_.UnsafeAppend(val);
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppendNull() {
+    data_builder_.UnsafeAppend(false);
+    UnsafeAppendToBitmap(false);
+  }
+
+  void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous array of bytes (non-zero is 1)
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a bitmap of values
+  /// \param[in] length the number of values to append
+  /// \param[in] validity a validity bitmap to copy (may be null)
+  /// \param[in] offset an offset into the values and validity bitmaps
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length, const uint8_t* validity,
+                      int64_t offset);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a contiguous C array of values
+  /// \param[in] length the number of values to append
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const uint8_t* values, int64_t length,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values,
+                      const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values a std::vector of bytes
+  /// \return Status
+  Status AppendValues(const std::vector<uint8_t>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
+  /// (0). Equal in length to values
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values an std::vector<bool> indicating true (1) or false
+  /// \return Status
+  Status AppendValues(const std::vector<bool>& values);
+
+  /// \brief Append a sequence of elements in one shot
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter>
+  Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+    // this updates length_
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of elements in one shot, with a specified nullmap
+  /// \param[in] values_begin InputIterator to the beginning of the values
+  /// \param[in] values_end InputIterator pointing to the end of the values
+  /// \param[in] valid_begin InputIterator with elements indication valid(1)
+  ///  or null(0) values
+  /// \return Status
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    static_assert(!internal::is_null_pointer<ValidIter>::value,
+                  "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
+                  "version instead");
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  // Same as above, for a pointer type ValidIter
+  template <typename ValuesIter, typename ValidIter>
+  enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
+      ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
+    int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+
+    if (valid_begin == NULLPTR) {
+      UnsafeSetNotNull(length);
+    } else {
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
+    }
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
+    return Status::OK();
+  }
+
+  Status AppendValues(int64_t length, bool value);
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override {
+    return AppendValues(array.GetValues<uint8_t>(1, 0), length,
+                        array.GetValues<uint8_t>(0, 0), array.offset + offset);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BooleanArray>* out) { return FinishTyped(out); }
+
+  void Reset() override;
+  Status Resize(int64_t capacity) override;
+
+  std::shared_ptr<DataType> type() const override { return boolean(); }
+
+ protected:
+  TypedBufferBuilder<bool> data_builder_;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_time.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_time.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains declarations of time related Arrow builder types.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_primitive.h"
+
+namespace arrow {
+
+/// \addtogroup temporal-builders
+///
+/// @{
+
+// TODO(ARROW-7938): this class is untested
+
+class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
+ public:
+  using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;
+
+  explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool())
+      : DayTimeIntervalBuilder(day_time_interval(), pool) {}
+
+  explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
+                                  MemoryPool* pool = default_memory_pool())
+      : NumericBuilder<DayTimeIntervalType>(type, pool) {}
+};
+
+class ARROW_EXPORT MonthDayNanoIntervalBuilder
+    : public NumericBuilder<MonthDayNanoIntervalType> {
+ public:
+  using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos;
+
+  explicit MonthDayNanoIntervalBuilder(MemoryPool* pool = default_memory_pool())
+      : MonthDayNanoIntervalBuilder(month_day_nano_interval(), pool) {}
+
+  explicit MonthDayNanoIntervalBuilder(std::shared_ptr<DataType> type,
+                                       MemoryPool* pool = default_memory_pool())
+      : NumericBuilder<MonthDayNanoIntervalType>(type, pool) {}
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_union.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/builder_union.h
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \addtogroup nested-builders
+///
+/// @{
+
+/// \brief Base class for union array builds.
+///
+/// Note that while we subclass ArrayBuilder, as union types do not have a
+/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
+class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
+ public:
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+
+  /// \brief Make a new child builder available to the UnionArray
+  ///
+  /// \param[in] new_child the child builder
+  /// \param[in] field_name the name of the field in the union array type
+  /// if type inference is used
+  /// \return child index, which is the "type" argument that needs
+  /// to be passed to the "Append" method to add a new element to
+  /// the union array.
+  int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+                     const std::string& field_name = "");
+
+  std::shared_ptr<DataType> type() const override;
+
+  int64_t length() const override { return types_builder_.length(); }
+
+ protected:
+  BasicUnionBuilder(MemoryPool* pool,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type);
+
+  int8_t NextTypeId();
+
+  std::vector<std::shared_ptr<Field>> child_fields_;
+  std::vector<int8_t> type_codes_;
+  UnionMode::type mode_;
+
+  std::vector<ArrayBuilder*> type_id_to_children_;
+  std::vector<int> type_id_to_child_id_;
+  // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
+  int8_t dense_type_id_ = 0;
+  TypedBufferBuilder<int8_t> types_builder_;
+};
+
+/// \class DenseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
+ public:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit DenseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  DenseUnionBuilder(MemoryPool* pool,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
+
+  Status AppendNull() final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+    // Append a null arbitrarily to the first child
+    return child_builder->AppendNull();
+  }
+
+  Status AppendNulls(int64_t length) final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+    // Append just a single null to the first child
+    return child_builder->AppendNull();
+  }
+
+  Status AppendEmptyValue() final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+    // Append an empty value arbitrarily to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    const int8_t first_child_code = type_codes_[0];
+    ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(
+        offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+    // Append just a single empty value to the first child
+    return child_builder->AppendEmptyValue();
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called.
+  Status Append(int8_t next_type) {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+    if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
+      return Status::CapacityError(
+          "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
+          "child");
+    }
+    auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
+    return offsets_builder_.Append(offset);
+  }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override;
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ private:
+  TypedBufferBuilder<int32_t> offsets_builder_;
+};
+
+/// \class SparseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
+ public:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit SparseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  SparseUnionBuilder(MemoryPool* pool,
+                     const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                     const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, children, type) {}
+
+  /// \brief Append a null value.
+  ///
+  /// A null is appended to the first child, empty values to the other children.
+  Status AppendNull() final {
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append multiple null values.
+  ///
+  /// Nulls are appended to the first child, empty values to the other children.
+  Status AppendNulls(int64_t length) final {
+    const auto first_child_code = type_codes_[0];
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+    ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
+    for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+      ARROW_RETURN_NOT_OK(
+          type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
+    for (int8_t code : type_codes_) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
+    for (int8_t code : type_codes_) {
+      ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
+    }
+    return Status::OK();
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called, and all other child builders must have null or empty value appended.
+  Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
+
+  Status AppendArraySlice(const ArrayData& array, int64_t offset,
+                          int64_t length) override;
+};
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/concatenate.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/concatenate.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Concatenate arrays
+///
+/// \param[in] arrays a vector of arrays to be concatenated
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return the concatenated array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays,
+                                           MemoryPool* pool = default_memory_pool());
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/data.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/data.h
@@ -0,0 +1,258 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>  // IWYU pragma: export
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// When slicing, we do not know the null count of the sliced range without
+// doing some computation. To avoid doing this eagerly, we set the null count
+// to -1 (any negative number will do). When Array::null_count is called the
+// first time, the null count will be computed. See ARROW-33
+constexpr int64_t kUnknownNullCount = -1;
+
+// ----------------------------------------------------------------------
+// Generic array data container
+
+/// \class ArrayData
+/// \brief Mutable container for generic Arrow array data
+///
+/// This data structure is a self-contained representation of the memory and
+/// metadata inside an Arrow array data structure (called vectors in Java). The
+/// classes arrow::Array and its subclasses provide strongly-typed accessors
+/// with support for the visitor pattern and other affordances.
+///
+/// This class is designed for easy internal data manipulation, analytical data
+/// processing, and data transport to and from IPC messages. For example, we
+/// could cast from int64 to float64 like so:
+///
+/// Int64Array arr = GetMyData();
+/// auto new_data = arr.data()->Copy();
+/// new_data->type = arrow::float64();
+/// DoubleArray double_arr(new_data);
+///
+/// This object is also useful in an analytics setting where memory may be
+/// reused. For example, if we had a group of operations all returning doubles,
+/// say:
+///
+/// Log(Sqrt(Expr(arr)))
+///
+/// Then the low-level implementations of each of these functions could have
+/// the signatures
+///
+/// void Log(const ArrayData& values, ArrayData* out);
+///
+/// As another example a function may consume one or more memory buffers in an
+/// input array and replace them with newly-allocated data, changing the output
+/// data type as well.
+struct ARROW_EXPORT ArrayData {
+  ArrayData() = default;
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            std::vector<std::shared_ptr<Buffer>> buffers,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : ArrayData(std::move(type), length, null_count, offset) {
+    this->buffers = std::move(buffers);
+  }
+
+  ArrayData(std::shared_ptr<DataType> type, int64_t length,
+            std::vector<std::shared_ptr<Buffer>> buffers,
+            std::vector<std::shared_ptr<ArrayData>> child_data,
+            int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : ArrayData(std::move(type), length, null_count, offset) {
+    this->buffers = std::move(buffers);
+    this->child_data = std::move(child_data);
+  }
+
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
+                                         std::vector<std::shared_ptr<Buffer>> buffers,
+                                         int64_t null_count = kUnknownNullCount,
+                                         int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(
+      std::shared_ptr<DataType> type, int64_t length,
+      std::vector<std::shared_ptr<Buffer>> buffers,
+      std::vector<std::shared_ptr<ArrayData>> child_data,
+      int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(
+      std::shared_ptr<DataType> type, int64_t length,
+      std::vector<std::shared_ptr<Buffer>> buffers,
+      std::vector<std::shared_ptr<ArrayData>> child_data,
+      std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
+      int64_t offset = 0);
+
+  static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
+                                         int64_t null_count = kUnknownNullCount,
+                                         int64_t offset = 0);
+
+  // Move constructor
+  ArrayData(ArrayData&& other) noexcept
+      : type(std::move(other.type)),
+        length(other.length),
+        offset(other.offset),
+        buffers(std::move(other.buffers)),
+        child_data(std::move(other.child_data)),
+        dictionary(std::move(other.dictionary)) {
+    SetNullCount(other.null_count);
+  }
+
+  // Copy constructor
+  ArrayData(const ArrayData& other) noexcept
+      : type(other.type),
+        length(other.length),
+        offset(other.offset),
+        buffers(other.buffers),
+        child_data(other.child_data),
+        dictionary(other.dictionary) {
+    SetNullCount(other.null_count);
+  }
+
+  // Move assignment
+  ArrayData& operator=(ArrayData&& other) {
+    type = std::move(other.type);
+    length = other.length;
+    SetNullCount(other.null_count);
+    offset = other.offset;
+    buffers = std::move(other.buffers);
+    child_data = std::move(other.child_data);
+    dictionary = std::move(other.dictionary);
+    return *this;
+  }
+
+  // Copy assignment
+  ArrayData& operator=(const ArrayData& other) {
+    type = other.type;
+    length = other.length;
+    SetNullCount(other.null_count);
+    offset = other.offset;
+    buffers = other.buffers;
+    child_data = other.child_data;
+    dictionary = other.dictionary;
+    return *this;
+  }
+
+  std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
+
+  // Access a buffer's data as a typed C pointer
+  template <typename T>
+  inline const T* GetValues(int i, int64_t absolute_offset) const {
+    if (buffers[i]) {
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline const T* GetValues(int i) const {
+    return GetValues<T>(i, offset);
+  }
+
+  // Like GetValues, but returns NULLPTR instead of aborting if the underlying
+  // buffer is not a CPU buffer.
+  template <typename T>
+  inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
+    if (buffers[i] && buffers[i]->is_cpu()) {
+      return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline const T* GetValuesSafe(int i) const {
+    return GetValuesSafe<T>(i, offset);
+  }
+
+  // Access a buffer's data as a typed C pointer
+  template <typename T>
+  inline T* GetMutableValues(int i, int64_t absolute_offset) {
+    if (buffers[i]) {
+      return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
+    } else {
+      return NULLPTR;
+    }
+  }
+
+  template <typename T>
+  inline T* GetMutableValues(int i) {
+    return GetMutableValues<T>(i, offset);
+  }
+
+  /// \brief Construct a zero-copy slice of the data with the given offset and length
+  std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
+
+  /// \brief Input-checking variant of Slice
+  ///
+  /// An Invalid Status is returned if the requested slice falls out of bounds.
+  /// Note that unlike Slice, `length` isn't clamped to the available buffer size.
+  Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
+
+  void SetNullCount(int64_t v) { null_count.store(v); }
+
+  /// \brief Return null count, or compute and set it if it's not known
+  int64_t GetNullCount() const;
+
+  bool MayHaveNulls() const {
+    // If an ArrayData is slightly malformed it may have kUnknownNullCount set
+    // but no buffer
+    return null_count.load() != 0 && buffers[0] != NULLPTR;
+  }
+
+  std::shared_ptr<DataType> type;
+  int64_t length = 0;
+  mutable std::atomic<int64_t> null_count{0};
+  // The logical start point into the physical buffers (in values, not bytes).
+  // Note that, for child data, this must be *added* to the child data's own offset.
+  int64_t offset = 0;
+  std::vector<std::shared_ptr<Buffer>> buffers;
+  std::vector<std::shared_ptr<ArrayData>> child_data;
+
+  // The dictionary for this Array, if any. Only used for dictionary type
+  std::shared_ptr<ArrayData> dictionary;
+};
+
+namespace internal {
+
+/// Construct a zero-copy view of this ArrayData with the given type.
+///
+/// This method checks if the types are layout-compatible.
+/// Nested types are traversed in depth-first order. Data buffers must have
+/// the same item sizes, even though the logical types may be different.
+/// An error is returned if the types are not layout-compatible.
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> GetArrayView(const std::shared_ptr<ArrayData>& data,
+                                                const std::shared_ptr<DataType>& type);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/diff.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/diff.h
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_nested.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Compare two arrays, returning an edit script which expresses the difference
+/// between them
+///
+/// An edit script is an array of struct(insert: bool, run_length: int64_t).
+/// Each element of "insert" determines whether an element was inserted into (true)
+/// or deleted from (false) base. Each insertion or deletion is followed by a run of
+/// elements which are unchanged from base to target; the length of this run is stored
+/// in "run_length". (Note that the edit script begins and ends with a run of shared
+/// elements but both fields of the struct must have the same length. To accommodate this
+/// the first element of "insert" should be ignored.)
+///
+/// For example for base "hlloo" and target "hello", the edit script would be
+/// [
+///   {"insert": false, "run_length": 1}, // leading run of length 1 ("h")
+///   {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo")
+///   {"insert": false, "run_length": 0} // delete("o") then an empty run
+/// ]
+///
+/// Diffing arrays containing nulls is not currently supported.
+///
+/// \param[in] base baseline for comparison
+/// \param[in] target an array of identical type to base whose elements differ from base's
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \return an edit script array which can be applied to base to produce target
+ARROW_EXPORT
+Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
+                                          MemoryPool* pool = default_memory_pool());
+
+/// \brief visitor interface for easy traversal of an edit script
+///
+/// visitor will be called for each hunk of insertions and deletions.
+ARROW_EXPORT Status VisitEditScript(
+    const Array& edits,
+    const std::function<Status(int64_t delete_begin, int64_t delete_end,
+                               int64_t insert_begin, int64_t insert_end)>& visitor);
+
+/// \brief return a function which will format an edit script in unified
+/// diff format to os, given base and target arrays of type
+ARROW_EXPORT Result<
+    std::function<Status(const Array& edits, const Array& base, const Array& target)>>
+MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os);
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/util.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Create a strongly-typed Array instance from generic ArrayData
+/// \param[in] data the array contents
+/// \return the resulting Array instance
+ARROW_EXPORT
+std::shared_ptr<Array> MakeArray(const std::shared_ptr<ArrayData>& data);
+
+/// \brief Create a strongly-typed Array instance with all elements null
+/// \param[in] type the array type
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayOfNull(const std::shared_ptr<DataType>& type,
+                                               int64_t length,
+                                               MemoryPool* pool = default_memory_pool());
+
+/// \brief Create an Array instance whose slots are the given scalar
+/// \param[in] scalar the value with which to fill the array
+/// \param[in] length the array length
+/// \param[in] pool the memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeArrayFromScalar(
+    const Scalar& scalar, int64_t length, MemoryPool* pool = default_memory_pool());
+
+/// \brief Create an empty Array of a given type
+///
+/// The output Array will be of the given type.
+///
+/// \param[in] type the data type of the empty Array
+/// \param[in] pool the memory pool to allocate memory from
+/// \return the resulting Array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> MakeEmptyArray(std::shared_ptr<DataType> type,
+                                              MemoryPool* pool = default_memory_pool());
+
+namespace internal {
+
+/// \brief Swap endian of each element in a generic ArrayData
+///
+/// As dictionaries are often shared between different arrays, dictionaries
+/// are not swapped by this function and should be handled separately.
+///
+/// \param[in] data the array contents
+/// \return the resulting ArrayData whose elements were swapped
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> SwapEndianArrayData(
+    const std::shared_ptr<ArrayData>& data);
+
+/// Given a number of ArrayVectors, treat each ArrayVector as the
+/// chunks of a chunked array.  Then rechunk each ArrayVector such that
+/// all ArrayVectors are chunked identically.  It is mandatory that
+/// all ArrayVectors contain the same total number of elements.
+ARROW_EXPORT
+std::vector<ArrayVector> RechunkArraysConsistently(const std::vector<ArrayVector>&);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/array/validate.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/array/validate.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// Internal functions implementing Array::Validate() and friends.
+
+// O(1) array metadata validation
+
+ARROW_EXPORT
+Status ValidateArray(const Array& array);
+
+ARROW_EXPORT
+Status ValidateArray(const ArrayData& data);
+
+// O(N) array data validation.
+// Note that, starting from 7.0.0, "full" routines also validate metadata.
+// Before, ValidateArray() needed to be called before ValidateArrayFull()
+// to ensure metadata correctness, otherwise invalid memory accesses
+// may occur.
+
+ARROW_EXPORT
+Status ValidateArrayFull(const Array& array);
+
+ARROW_EXPORT
+Status ValidateArrayFull(const ArrayData& data);
+
+ARROW_EXPORT
+Status ValidateUTF8(const Array& array);
+
+ARROW_EXPORT
+Status ValidateUTF8(const ArrayData& data);
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/buffer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/buffer.h
@@ -0,0 +1,506 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/device.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer classes
+
+/// \class Buffer
+/// \brief Object containing a pointer to a piece of contiguous memory with a
+/// particular size.
+///
+/// Buffers have two related notions of length: size and capacity. Size is
+/// the number of bytes that might have valid data. Capacity is the number
+/// of bytes that were allocated for the buffer in total.
+///
+/// The Buffer base class does not own its memory, but subclasses often do.
+///
+/// The following invariant is always true: Size <= Capacity
+class ARROW_EXPORT Buffer {
+ public:
+  /// \brief Construct from buffer and size without copying memory
+  ///
+  /// \param[in] data a memory buffer
+  /// \param[in] size buffer size
+  ///
+  /// \note The passed memory must be kept alive through some other means
+  Buffer(const uint8_t* data, int64_t size)
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), capacity_(size) {
+    SetMemoryManager(default_cpu_memory_manager());
+  }
+
+  Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
+         std::shared_ptr<Buffer> parent = NULLPTR)
+      : is_mutable_(false), data_(data), size_(size), capacity_(size), parent_(parent) {
+    SetMemoryManager(std::move(mm));
+  }
+
+  Buffer(uintptr_t address, int64_t size, std::shared_ptr<MemoryManager> mm,
+         std::shared_ptr<Buffer> parent = NULLPTR)
+      : Buffer(reinterpret_cast<const uint8_t*>(address), size, std::move(mm),
+               std::move(parent)) {}
+
+  /// \brief Construct from string_view without copying memory
+  ///
+  /// \param[in] data a string_view object
+  ///
+  /// \note The memory viewed by data must not be deallocated in the lifetime of the
+  /// Buffer; temporary rvalue strings must be stored in an lvalue somewhere
+  explicit Buffer(util::string_view data)
+      : Buffer(reinterpret_cast<const uint8_t*>(data.data()),
+               static_cast<int64_t>(data.size())) {}
+
+  virtual ~Buffer() = default;
+
+  /// An offset into data that is owned by another buffer, but we want to be
+  /// able to retain a valid pointer to it even after other shared_ptr's to the
+  /// parent buffer have been destroyed
+  ///
+  /// This method makes no assertions about alignment or padding of the buffer but
+  /// in general we expected buffers to be aligned and padded to 64 bytes.  In the future
+  /// we might add utility methods to help determine if a buffer satisfies this contract.
+  Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
+      : Buffer(parent->data_ + offset, size) {
+    parent_ = parent;
+    SetMemoryManager(parent->memory_manager_);
+  }
+
+  uint8_t operator[](std::size_t i) const { return data_[i]; }
+
+  /// \brief Construct a new std::string with a hexadecimal representation of the buffer.
+  /// \return std::string
+  std::string ToHexString();
+
+  /// Return true if both buffers are the same size and contain the same bytes
+  /// up to the number of compared bytes
+  bool Equals(const Buffer& other, int64_t nbytes) const;
+
+  /// Return true if both buffers are the same size and contain the same bytes
+  bool Equals(const Buffer& other) const;
+
+  /// Copy a section of the buffer into a new Buffer.
+  Result<std::shared_ptr<Buffer>> CopySlice(
+      const int64_t start, const int64_t nbytes,
+      MemoryPool* pool = default_memory_pool()) const;
+
+  /// Zero bytes in padding, i.e. bytes between size_ and capacity_.
+  void ZeroPadding() {
+#ifndef NDEBUG
+    CheckMutable();
+#endif
+    // A zero-capacity buffer can have a null data pointer
+    if (capacity_ != 0) {
+      memset(mutable_data() + size_, 0, static_cast<size_t>(capacity_ - size_));
+    }
+  }
+
+  /// \brief Construct an immutable buffer that takes ownership of the contents
+  /// of an std::string (without copying it).
+  ///
+  /// \param[in] data a string to own
+  /// \return a new Buffer instance
+  static std::shared_ptr<Buffer> FromString(std::string data);
+
+  /// \brief Create buffer referencing typed memory with some length without
+  /// copying
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
+                                    static_cast<int64_t>(sizeof(T) * length));
+  }
+
+  /// \brief Create buffer referencing std::vector with some length without
+  /// copying
+  /// \param[in] data the vector to be referenced. If this vector is changed,
+  /// the buffer may become invalid
+  /// \return a new shared_ptr<Buffer>
+  template <typename T>
+  static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
+                                    static_cast<int64_t>(sizeof(T) * data.size()));
+  }
+
+  /// \brief Copy buffer contents into a new std::string
+  /// \return std::string
+  /// \note Can throw std::bad_alloc if buffer is large
+  std::string ToString() const;
+
+  /// \brief View buffer contents as a util::string_view
+  /// \return util::string_view
+  explicit operator util::string_view() const {
+    return util::string_view(reinterpret_cast<const char*>(data_), size_);
+  }
+
+  /// \brief View buffer contents as a util::bytes_view
+  /// \return util::bytes_view
+  explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }
+
+  /// \brief Return a pointer to the buffer's data
+  ///
+  /// The buffer has to be a CPU buffer (`is_cpu()` is true).
+  /// Otherwise, an assertion may be thrown or a null pointer may be returned.
+  ///
+  /// To get the buffer's data address regardless of its device, call `address()`.
+  const uint8_t* data() const {
+#ifndef NDEBUG
+    CheckCPU();
+#endif
+    return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
+  }
+
+  /// \brief Return a writable pointer to the buffer's data
+  ///
+  /// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
+  /// are true).  Otherwise, an assertion may be thrown or a null pointer may
+  /// be returned.
+  ///
+  /// To get the buffer's mutable data address regardless of its device, call
+  /// `mutable_address()`.
+  uint8_t* mutable_data() {
+#ifndef NDEBUG
+    CheckCPU();
+    CheckMutable();
+#endif
+    return ARROW_PREDICT_TRUE(is_cpu_ && is_mutable_) ? const_cast<uint8_t*>(data_)
+                                                      : NULLPTR;
+  }
+
+  /// \brief Return the device address of the buffer's data
+  uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
+
+  /// \brief Return a writable device address to the buffer's data
+  ///
+  /// The buffer has to be a mutable buffer (`is_mutable()` is true).
+  /// Otherwise, an assertion may be thrown or 0 may be returned.
+  uintptr_t mutable_address() const {
+#ifndef NDEBUG
+    CheckMutable();
+#endif
+    return ARROW_PREDICT_TRUE(is_mutable_) ? reinterpret_cast<uintptr_t>(data_) : 0;
+  }
+
+  /// \brief Return the buffer's size in bytes
+  int64_t size() const { return size_; }
+
+  /// \brief Return the buffer's capacity (number of allocated bytes)
+  int64_t capacity() const { return capacity_; }
+
+  /// \brief Whether the buffer is directly CPU-accessible
+  ///
+  /// If this function returns true, you can read directly from the buffer's
+  /// `data()` pointer.  Otherwise, you'll have to `View()` or `Copy()` it.
+  bool is_cpu() const { return is_cpu_; }
+
+  /// \brief Whether the buffer is mutable
+  ///
+  /// If this function returns true, you are allowed to modify buffer contents
+  /// using the pointer returned by `mutable_data()` or `mutable_address()`.
+  bool is_mutable() const { return is_mutable_; }
+
+  const std::shared_ptr<Device>& device() const { return memory_manager_->device(); }
+
+  const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
+
+  std::shared_ptr<Buffer> parent() const { return parent_; }
+
+  /// \brief Get a RandomAccessFile for reading a buffer
+  ///
+  /// The returned file object reads from this buffer's underlying memory.
+  static Result<std::shared_ptr<io::RandomAccessFile>> GetReader(std::shared_ptr<Buffer>);
+
+  /// \brief Get a OutputStream for writing to a buffer
+  ///
+  /// The buffer must be mutable.  The returned stream object writes into the buffer's
+  /// underlying memory (but it won't resize it).
+  static Result<std::shared_ptr<io::OutputStream>> GetWriter(std::shared_ptr<Buffer>);
+
+  /// \brief Copy buffer
+  ///
+  /// The buffer contents will be copied into a new buffer allocated by the
+  /// given MemoryManager.  This function supports cross-device copies.
+  static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
+                                              const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief Copy a non-owned buffer
+  ///
+  /// This is useful for cases where the source memory area is externally managed
+  /// (its lifetime not tied to the source Buffer), otherwise please use Copy().
+  static Result<std::unique_ptr<Buffer>> CopyNonOwned(
+      const Buffer& source, const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief View buffer
+  ///
+  /// Return a Buffer that reflects this buffer, seen potentially from another
+  /// device, without making an explicit copy of the contents.  The underlying
+  /// mechanism is typically implemented by the kernel or device driver, and may
+  /// involve lazy caching of parts of the buffer contents on the destination
+  /// device's memory.
+  ///
+  /// If a non-copy view is unsupported for the buffer on the given device,
+  /// nullptr is returned.  An error can be returned if some low-level
+  /// operation fails (such as an out-of-memory condition).
+  static Result<std::shared_ptr<Buffer>> View(std::shared_ptr<Buffer> source,
+                                              const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief View or copy buffer
+  ///
+  /// Try to view buffer contents on the given MemoryManager's device, but
+  /// fall back to copying if a no-copy view isn't supported.
+  static Result<std::shared_ptr<Buffer>> ViewOrCopy(
+      std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
+
+ protected:
+  bool is_mutable_;
+  bool is_cpu_;
+  const uint8_t* data_;
+  int64_t size_;
+  int64_t capacity_;
+
+  // null by default, but may be set
+  std::shared_ptr<Buffer> parent_;
+
+ private:
+  // private so that subclasses are forced to call SetMemoryManager()
+  std::shared_ptr<MemoryManager> memory_manager_;
+
+ protected:
+  void CheckMutable() const;
+  void CheckCPU() const;
+
+  void SetMemoryManager(std::shared_ptr<MemoryManager> mm) {
+    memory_manager_ = std::move(mm);
+    is_cpu_ = memory_manager_->is_cpu();
+  }
+
+ private:
+  Buffer() = delete;
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
+};
+
+/// \defgroup buffer-slicing-functions Functions for slicing buffers
+///
+/// @{
+
+/// \brief Construct a view on a buffer at the given offset and length.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+                                                  const int64_t offset,
+                                                  const int64_t length) {
+  return std::make_shared<Buffer>(buffer, offset, length);
+}
+
+/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
+///
+/// This function cannot fail and does not check for errors (except in debug builds)
+static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+                                                  const int64_t offset) {
+  int64_t length = buffer->size() - offset;
+  return SliceBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+                                                int64_t offset);
+/// \brief Input-checking version of SliceBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+                                                int64_t offset, int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+ARROW_EXPORT
+std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+                                           const int64_t offset, const int64_t length);
+
+/// \brief Like SliceBuffer, but construct a mutable buffer slice.
+///
+/// If the parent buffer is not mutable, behavior is undefined (it may abort
+/// in debug builds).
+static inline std::shared_ptr<Buffer> SliceMutableBuffer(
+    const std::shared_ptr<Buffer>& buffer, const int64_t offset) {
+  int64_t length = buffer->size() - offset;
+  return SliceMutableBuffer(buffer, offset, length);
+}
+
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset);
+/// \brief Input-checking version of SliceMutableBuffer
+///
+/// An Invalid Status is returned if the requested slice falls out of bounds.
+/// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
+/// @}
+
+/// \class MutableBuffer
+/// \brief A Buffer whose contents can be mutated. May or may not own its data.
+class ARROW_EXPORT MutableBuffer : public Buffer {
+ public:
+  MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
+    is_mutable_ = true;
+  }
+
+  MutableBuffer(uint8_t* data, const int64_t size, std::shared_ptr<MemoryManager> mm)
+      : Buffer(data, size, std::move(mm)) {
+    is_mutable_ = true;
+  }
+
+  MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
+                const int64_t size);
+
+  /// \brief Create buffer referencing typed memory with some length
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
+    return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
+                                           static_cast<int64_t>(sizeof(T) * length));
+  }
+
+ protected:
+  MutableBuffer() : Buffer(NULLPTR, 0) {}
+};
+
+/// \class ResizableBuffer
+/// \brief A mutable buffer that can be resized
+class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
+ public:
+  /// Change buffer reported size to indicated size, allocating memory if
+  /// necessary.  This will ensure that the capacity of the buffer is a multiple
+  /// of 64 bytes as defined in Layout.md.
+  /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
+  /// specification.
+  ///
+  /// @param new_size The new size for the buffer.
+  /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
+  virtual Status Resize(const int64_t new_size, bool shrink_to_fit) = 0;
+  Status Resize(const int64_t new_size) {
+    return Resize(new_size, /*shrink_to_fit=*/true);
+  }
+
+  /// Ensure that buffer has enough memory allocated to fit the indicated
+  /// capacity (and meets the 64 byte padding requirement in Layout.md).
+  /// It does not change buffer's reported size and doesn't zero the padding.
+  virtual Status Reserve(const int64_t new_capacity) = 0;
+
+  template <class T>
+  Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
+    return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
+  }
+
+  template <class T>
+  Status TypedReserve(const int64_t new_nb_elements) {
+    return Reserve(sizeof(T) * new_nb_elements);
+  }
+
+ protected:
+  ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
+  ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm)
+      : MutableBuffer(data, size, std::move(mm)) {}
+};
+
+/// \defgroup buffer-allocation-functions Functions for allocating buffers
+///
+/// @{
+
+/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size,
+                                               MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
+///
+/// \param[in] size size of buffer to allocate
+/// \param[in] pool a memory pool
+ARROW_EXPORT
+Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(
+    const int64_t size, MemoryPool* pool = NULLPTR);
+
+/// \brief Allocate a bitmap buffer from a memory pool
+/// no guarantee on values is provided.
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateBitmap(int64_t length,
+                                               MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
+
+/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
+///
+/// \param[in] length size in bits of bitmap to allocate
+/// \param[in] pool memory pool to allocate memory from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> AllocateEmptyBitmap(int64_t length,
+                                                    MemoryPool* pool = NULLPTR);
+
+/// \brief Concatenate multiple buffers into a single buffer
+///
+/// \param[in] buffers to be concatenated
+/// \param[in] pool memory pool to allocate the new buffer from
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> ConcatenateBuffers(const BufferVector& buffers,
+                                                   MemoryPool* pool = NULLPTR);
+
+ARROW_EXPORT
+Status ConcatenateBuffers(const BufferVector& buffers, MemoryPool* pool,
+                          std::shared_ptr<Buffer>* out);
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/buffer_builder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/buffer_builder.h
@@ -0,0 +1,459 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_generate.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// Buffer builder classes
+
+/// \class BufferBuilder
+/// \brief A class for incrementally building a contiguous chunk of in-memory
+/// data
+class ARROW_EXPORT BufferBuilder {
+ public:
+  explicit BufferBuilder(MemoryPool* pool = default_memory_pool())
+      : pool_(pool),
+        data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
+              util::MakeNonNull<uint8_t>()),
+        capacity_(0),
+        size_(0) {}
+
+  /// \brief Constructs new Builder that will start using
+  /// the provided buffer until Finish/Reset are called.
+  /// The buffer is not resized.
+  explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+                         MemoryPool* pool = default_memory_pool())
+      : buffer_(std::move(buffer)),
+        pool_(pool),
+        data_(buffer_->mutable_data()),
+        capacity_(buffer_->capacity()),
+        size_(buffer_->size()) {}
+
+  /// \brief Resize the buffer to the nearest multiple of 64 bytes
+  ///
+  /// \param new_capacity the new capacity of the of the builder. Will be
+  /// rounded up to a multiple of 64 bytes for padding
+  /// \param shrink_to_fit if new capacity is smaller than the existing,
+  /// reallocate internal buffer. Set to false to avoid reallocations when
+  /// shrinking the builder.
+  /// \return Status
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    if (buffer_ == NULLPTR) {
+      ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(new_capacity, pool_));
+    } else {
+      ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
+    }
+    capacity_ = buffer_->capacity();
+    data_ = buffer_->mutable_data();
+    return Status::OK();
+  }
+
+  /// \brief Ensure that builder can accommodate the additional number of bytes
+  /// without the need to perform allocations
+  ///
+  /// \param[in] additional_bytes number of additional bytes to make space for
+  /// \return Status
+  Status Reserve(const int64_t additional_bytes) {
+    auto min_capacity = size_ + additional_bytes;
+    if (min_capacity <= capacity_) {
+      return Status::OK();
+    }
+    return Resize(GrowByFactor(capacity_, min_capacity), false);
+  }
+
+  /// \brief Return a capacity expanded by the desired growth factor
+  static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+    // Doubling capacity except for large Reserve requests. 2x growth strategy
+    // (versus 1.5x) seems to have slightly better performance when using
+    // jemalloc, but significantly better performance when using the system
+    // allocator. See ARROW-6450 for further discussion
+    return std::max(new_capacity, current_capacity * 2);
+  }
+
+  /// \brief Append the given data to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const void* data, const int64_t length) {
+    if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
+      ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
+    }
+    UnsafeAppend(data, length);
+    return Status::OK();
+  }
+
+  /// \brief Append copies of a value to the buffer
+  ///
+  /// The buffer is automatically expanded if necessary.
+  Status Append(const int64_t num_copies, uint8_t value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  // Advance pointer and zero out memory
+  Status Advance(const int64_t length) { return Append(length, 0); }
+
+  // Advance pointer, but don't allocate or zero memory
+  void UnsafeAdvance(const int64_t length) { size_ += length; }
+
+  // Unsafe methods don't check existing size
+  void UnsafeAppend(const void* data, const int64_t length) {
+    memcpy(data_ + size_, data, static_cast<size_t>(length));
+    size_ += length;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, uint8_t value) {
+    memset(data_ + size_, value, static_cast<size_t>(num_copies));
+    size_ += num_copies;
+  }
+
+  /// \brief Return result of builder as a Buffer object.
+  ///
+  /// The builder is reset and can be reused afterwards.
+  ///
+  /// \param[out] out the finalized Buffer object
+  /// \param shrink_to_fit if the buffer size is smaller than its capacity,
+  /// reallocate to fit more tightly in memory. Set to false to avoid
+  /// a reallocation, at the expense of potentially more memory consumption.
+  /// \return Status
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
+    if (size_ != 0) buffer_->ZeroPadding();
+    *out = buffer_;
+    if (*out == NULLPTR) {
+      ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, pool_));
+    }
+    Reset();
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using BufferBuilder
+  /// mostly for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    size_ = final_length;
+    return Finish(shrink_to_fit);
+  }
+
+  void Reset() {
+    buffer_ = NULLPTR;
+    capacity_ = size_ = 0;
+  }
+
+  /// \brief Set size to a smaller value without modifying builder
+  /// contents. For reusable BufferBuilder classes
+  /// \param[in] position must be non-negative and less than or equal
+  /// to the current length()
+  void Rewind(int64_t position) { size_ = position; }
+
+  int64_t capacity() const { return capacity_; }
+  int64_t length() const { return size_; }
+  const uint8_t* data() const { return data_; }
+  uint8_t* mutable_data() { return data_; }
+
+ private:
+  std::shared_ptr<ResizableBuffer> buffer_;
+  MemoryPool* pool_;
+  uint8_t* data_;
+  int64_t capacity_;
+  int64_t size_;
+};
+
+template <typename T, typename Enable = void>
+class TypedBufferBuilder;
+
+/// \brief A BufferBuilder for building a buffer of arithmetic elements
+template <typename T>
+class TypedBufferBuilder<
+    T, typename std::enable_if<std::is_arithmetic<T>::value ||
+                               std::is_standard_layout<T>::value>::type> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(pool) {}
+
+  explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
+                              MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(std::move(buffer), pool) {}
+
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
+  Status Append(T value) {
+    return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  Status Append(const T* values, int64_t num_elements) {
+    return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
+                                 num_elements * sizeof(T));
+  }
+
+  Status Append(const int64_t num_copies, T value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(T value) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
+  }
+
+  void UnsafeAppend(const T* values, int64_t num_elements) {
+    bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
+                                num_elements * sizeof(T));
+  }
+
+  template <typename Iter>
+  void UnsafeAppend(Iter values_begin, Iter values_end) {
+    int64_t num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
+    std::copy(values_begin, values_end, data);
+  }
+
+  void UnsafeAppend(const int64_t num_copies, T value) {
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
+    std::fill(data, data + num_copies, value);
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return bytes_builder_.Reserve(additional_elements * sizeof(T));
+  }
+
+  Status Advance(const int64_t length) {
+    return bytes_builder_.Advance(length * sizeof(T));
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
+  }
+
+  void Reset() { bytes_builder_.Reset(); }
+
+  int64_t length() const { return bytes_builder_.length() / sizeof(T); }
+  int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
+  const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
+  T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
+
+ private:
+  BufferBuilder bytes_builder_;
+};
+
+/// \brief A BufferBuilder for building a buffer containing a bitmap
+template <>
+class TypedBufferBuilder<bool> {
+ public:
+  explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool())
+      : bytes_builder_(pool) {}
+
+  explicit TypedBufferBuilder(BufferBuilder builder)
+      : bytes_builder_(std::move(builder)) {}
+
+  BufferBuilder* bytes_builder() { return &bytes_builder_; }
+
+  Status Append(bool value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(value);
+    return Status::OK();
+  }
+
+  Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
+    ARROW_RETURN_NOT_OK(Reserve(num_elements));
+    UnsafeAppend(valid_bytes, num_elements);
+    return Status::OK();
+  }
+
+  Status Append(const int64_t num_copies, bool value) {
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
+    UnsafeAppend(num_copies, value);
+    return Status::OK();
+  }
+
+  void UnsafeAppend(bool value) {
+    bit_util::SetBitTo(mutable_data(), bit_length_, value);
+    if (!value) {
+      ++false_count_;
+    }
+    ++bit_length_;
+  }
+
+  /// \brief Append bits from an array of bytes (one value per byte)
+  void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
+    if (num_elements == 0) return;
+    int64_t i = 0;
+    internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+      bool value = bytes[i++];
+      false_count_ += !value;
+      return value;
+    });
+    bit_length_ += num_elements;
+  }
+
+  /// \brief Append bits from a packed bitmap
+  void UnsafeAppend(const uint8_t* bitmap, int64_t offset, int64_t num_elements) {
+    if (num_elements == 0) return;
+    internal::CopyBitmap(bitmap, offset, num_elements, mutable_data(), bit_length_);
+    false_count_ += num_elements - internal::CountSetBits(bitmap, offset, num_elements);
+    bit_length_ += num_elements;
+  }
+
+  void UnsafeAppend(const int64_t num_copies, bool value) {
+    bit_util::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
+    false_count_ += num_copies * !value;
+    bit_length_ += num_copies;
+  }
+
+  template <bool count_falses, typename Generator>
+  void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
+    if (num_elements == 0) return;
+
+    if (count_falses) {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
+        bool value = gen();
+        false_count_ += !value;
+        return value;
+      });
+    } else {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
+                                     std::forward<Generator>(gen));
+    }
+    bit_length_ += num_elements;
+  }
+
+  Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
+    const int64_t old_byte_capacity = bytes_builder_.capacity();
+    ARROW_RETURN_NOT_OK(
+        bytes_builder_.Resize(bit_util::BytesForBits(new_capacity), shrink_to_fit));
+    // Resize() may have chosen a larger capacity (e.g. for padding),
+    // so ask it again before calling memset().
+    const int64_t new_byte_capacity = bytes_builder_.capacity();
+    if (new_byte_capacity > old_byte_capacity) {
+      // The additional buffer space is 0-initialized for convenience,
+      // so that other methods can simply bump the length.
+      memset(mutable_data() + old_byte_capacity, 0,
+             static_cast<size_t>(new_byte_capacity - old_byte_capacity));
+    }
+    return Status::OK();
+  }
+
+  Status Reserve(const int64_t additional_elements) {
+    return Resize(
+        BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
+        false);
+  }
+
+  Status Advance(const int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    bit_length_ += length;
+    false_count_ += length;
+    return Status::OK();
+  }
+
+  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    // set bytes_builder_.size_ == byte size of data
+    bytes_builder_.UnsafeAdvance(bit_util::BytesForBits(bit_length_) -
+                                 bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.Finish(out, shrink_to_fit);
+  }
+
+  Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
+    std::shared_ptr<Buffer> out;
+    ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
+    return out;
+  }
+
+  /// \brief Like Finish, but override the final buffer size
+  ///
+  /// This is useful after writing data directly into the builder memory
+  /// without calling the Append methods (basically, when using TypedBufferBuilder
+  /// only for memory allocation).
+  Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
+                                                   bool shrink_to_fit = true) {
+    const auto final_byte_length = bit_util::BytesForBits(final_length);
+    bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
+    bit_length_ = false_count_ = 0;
+    return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
+  }
+
+  void Reset() {
+    bytes_builder_.Reset();
+    bit_length_ = false_count_ = 0;
+  }
+
+  int64_t length() const { return bit_length_; }
+  int64_t capacity() const { return bytes_builder_.capacity() * 8; }
+  const uint8_t* data() const { return bytes_builder_.data(); }
+  uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
+  int64_t false_count() const { return false_count_; }
+
+ private:
+  BufferBuilder bytes_builder_;
+  int64_t bit_length_ = 0;
+  int64_t false_count_ = 0;
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/builder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/builder.h
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/array/builder_adaptive.h"   // IWYU pragma: keep
+#include "arrow/array/builder_base.h"       // IWYU pragma: keep
+#include "arrow/array/builder_binary.h"     // IWYU pragma: keep
+#include "arrow/array/builder_decimal.h"    // IWYU pragma: keep
+#include "arrow/array/builder_dict.h"       // IWYU pragma: keep
+#include "arrow/array/builder_nested.h"     // IWYU pragma: keep
+#include "arrow/array/builder_primitive.h"  // IWYU pragma: keep
+#include "arrow/array/builder_time.h"       // IWYU pragma: keep
+#include "arrow/array/builder_union.h"      // IWYU pragma: keep
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/c/abi.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/c/abi.h
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+// EXPERIMENTAL: C stream interface
+
+struct ArrowArrayStream {
+  // Callback to get the stream type
+  // (will be the same for all arrays in the stream).
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowSchema must be released independently from the stream.
+  int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+  // Callback to get the next array
+  // (if no error and the array is released, the stream has ended)
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowArray must be released independently from the stream.
+  int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+  // Callback to get optional detailed error information.
+  // This must only be called if the last stream operation failed
+  // with a non-0 return code.
+  //
+  // Return value: pointer to a null-terminated character array describing
+  // the last error, or NULL if no description is available.
+  //
+  // The returned pointer is only valid until the next operation on this stream
+  // (including release).
+  const char* (*get_last_error)(struct ArrowArrayStream*);
+
+  // Release callback: release the stream's own resources.
+  // Note that arrays returned by `get_next` must be individually released.
+  void (*release)(struct ArrowArrayStream*);
+
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#ifdef __cplusplus
+}
+#endif
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/c/bridge.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/c/bridge.h
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/c/abi.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \defgroup c-data-interface Functions for working with the C data interface.
+///
+/// @{
+
+/// \brief Export C++ DataType using the C data interface format.
+///
+/// The root type is considered to have empty name and metadata.
+/// If you want the root type to have a name and/or metadata, pass
+/// a Field instead.
+///
+/// \param[in] type DataType object to export
+/// \param[out] out C struct where to export the datatype
+ARROW_EXPORT
+Status ExportType(const DataType& type, struct ArrowSchema* out);
+
+/// \brief Export C++ Field using the C data interface format.
+///
+/// \param[in] field Field object to export
+/// \param[out] out C struct where to export the field
+ARROW_EXPORT
+Status ExportField(const Field& field, struct ArrowSchema* out);
+
+/// \brief Export C++ Schema using the C data interface format.
+///
+/// \param[in] schema Schema object to export
+/// \param[out] out C struct where to export the field
+ARROW_EXPORT
+Status ExportSchema(const Schema& schema, struct ArrowSchema* out);
+
+/// \brief Export C++ Array using the C data interface format.
+///
+/// The resulting ArrowArray struct keeps the array data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] array Array object to export
+/// \param[out] out C struct where to export the array
+/// \param[out] out_schema optional C struct where to export the array type
+ARROW_EXPORT
+Status ExportArray(const Array& array, struct ArrowArray* out,
+                   struct ArrowSchema* out_schema = NULLPTR);
+
+/// \brief Export C++ RecordBatch using the C data interface format.
+///
+/// The record batch is exported as if it were a struct array.
+/// The resulting ArrowArray struct keeps the record batch data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] batch Record batch to export
+/// \param[out] out C struct where to export the record batch
+/// \param[out] out_schema optional C struct where to export the record batch schema
+ARROW_EXPORT
+Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
+                         struct ArrowSchema* out_schema = NULLPTR);
+
+/// \brief Import C++ DataType from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the data type
+/// \return Imported type object
+ARROW_EXPORT
+Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema);
+
+/// \brief Import C++ Field from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the field
+/// \return Imported field object
+ARROW_EXPORT
+Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema);
+
+/// \brief Import C++ Schema from the C data interface.
+///
+/// The given ArrowSchema struct is released (as per the C data interface
+/// specification), even if this function fails.
+///
+/// \param[in,out] schema C data interface struct representing the field
+/// \return Imported field object
+ARROW_EXPORT
+Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema);
+
+/// \brief Import C++ array from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting array.
+///
+/// \param[in,out] array C data interface struct holding the array data
+/// \param[in] type type of the imported array
+/// \return Imported array object
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+                                           std::shared_ptr<DataType> type);
+
+/// \brief Import C++ array and its type from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting array.
+/// The ArrowSchema struct is released, even if this function fails.
+///
+/// \param[in,out] array C data interface struct holding the array data
+/// \param[in,out] type C data interface struct holding the array type
+/// \return Imported array object
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
+                                           struct ArrowSchema* type);
+
+/// \brief Import C++ record batch from the C data interface.
+///
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting record batch.
+///
+/// \param[in,out] array C data interface struct holding the record batch data
+/// \param[in] schema schema of the imported record batch
+/// \return Imported record batch object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+                                                       std::shared_ptr<Schema> schema);
+
+/// \brief Import C++ record batch and its schema from the C data interface.
+///
+/// The type represented by the ArrowSchema struct must be a struct type array.
+/// The ArrowArray struct has its contents moved (as per the C data interface
+/// specification) to a private object held alive by the resulting record batch.
+/// The ArrowSchema struct is released, even if this function fails.
+///
+/// \param[in,out] array C data interface struct holding the record batch data
+/// \param[in,out] schema C data interface struct holding the record batch schema
+/// \return Imported record batch object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
+                                                       struct ArrowSchema* schema);
+
+/// @}
+
+/// \defgroup c-stream-interface Functions for working with the C data interface.
+///
+/// @{
+
+/// \brief EXPERIMENTAL: Export C++ RecordBatchReader using the C stream interface.
+///
+/// The resulting ArrowArrayStream struct keeps the record batch reader alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] reader RecordBatchReader object to export
+/// \param[out] out C struct where to export the stream
+ARROW_EXPORT
+Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
+                               struct ArrowArrayStream* out);
+
+/// \brief EXPERIMENTAL: Import C++ RecordBatchReader from the C stream interface.
+///
+/// The ArrowArrayStream struct has its contents moved to a private object
+/// held alive by the resulting record batch reader.
+///
+/// \param[in,out] stream C stream interface struct
+/// \return Imported RecordBatchReader object
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
+    struct ArrowArrayStream* stream);
+
+/// @}
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/c/helpers.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/c/helpers.h
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <assert.h>
+#include <string.h>
+
+#include "arrow/c/abi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Query whether the C schema is released
+inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
+  return schema->release == NULL;
+}
+
+/// Mark the C schema released (for use in release callbacks)
+inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
+  schema->release = NULL;
+}
+
+/// Move the C schema from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid schema already, otherwise there
+/// will be a memory leak.
+inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
+  assert(dest != src);
+  assert(!ArrowSchemaIsReleased(src));
+  memcpy(dest, src, sizeof(struct ArrowSchema));
+  ArrowSchemaMarkReleased(src);
+}
+
+/// Release the C schema, if necessary, by calling its release callback
+inline void ArrowSchemaRelease(struct ArrowSchema* schema) {
+  if (!ArrowSchemaIsReleased(schema)) {
+    schema->release(schema);
+    assert(ArrowSchemaIsReleased(schema));
+  }
+}
+
+/// Query whether the C array is released
+inline int ArrowArrayIsReleased(const struct ArrowArray* array) {
+  return array->release == NULL;
+}
+
+/// Mark the C array released (for use in release callbacks)
+inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; }
+
+/// Move the C array from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid array already, otherwise there
+/// will be a memory leak.
+inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
+  assert(dest != src);
+  assert(!ArrowArrayIsReleased(src));
+  memcpy(dest, src, sizeof(struct ArrowArray));
+  ArrowArrayMarkReleased(src);
+}
+
+/// Release the C array, if necessary, by calling its release callback
+inline void ArrowArrayRelease(struct ArrowArray* array) {
+  if (!ArrowArrayIsReleased(array)) {
+    array->release(array);
+    assert(ArrowArrayIsReleased(array));
+  }
+}
+
+/// Query whether the C array stream is released
+inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
+  return stream->release == NULL;
+}
+
+/// Mark the C array stream released (for use in release callbacks)
+inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
+  stream->release = NULL;
+}
+
+/// Move the C array stream from `src` to `dest`
+///
+/// Note `dest` must *not* point to a valid stream already, otherwise there
+/// will be a memory leak.
+inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
+                                 struct ArrowArrayStream* dest) {
+  assert(dest != src);
+  assert(!ArrowArrayStreamIsReleased(src));
+  memcpy(dest, src, sizeof(struct ArrowArrayStream));
+  ArrowArrayStreamMarkReleased(src);
+}
+
+/// Release the C array stream, if necessary, by calling its release callback
+inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
+  if (!ArrowArrayStreamIsReleased(stream)) {
+    stream->release(stream);
+    assert(ArrowArrayStreamIsReleased(stream));
+  }
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/chunk_resolver.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/chunk_resolver.h
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+struct ChunkLocation {
+  int64_t chunk_index, index_in_chunk;
+};
+
+// An object that resolves an array chunk depending on a logical index
+struct ChunkResolver {
+  explicit ChunkResolver(const ArrayVector& chunks);
+
+  explicit ChunkResolver(const std::vector<const Array*>& chunks);
+
+  explicit ChunkResolver(const RecordBatchVector& batches);
+
+  ChunkResolver(ChunkResolver&& other)
+      : offsets_(std::move(other.offsets_)), cached_chunk_(other.cached_chunk_.load()) {}
+
+  ChunkResolver& operator=(ChunkResolver&& other) {
+    offsets_ = std::move(other.offsets_);
+    cached_chunk_.store(other.cached_chunk_.load());
+    return *this;
+  }
+
+  /// \brief Return a ChunkLocation containing the chunk index and in-chunk value index of
+  /// the chunked array at logical index
+  inline ChunkLocation Resolve(const int64_t index) const {
+    // It is common for the algorithms below to make consecutive accesses at
+    // a relatively small distance from each other, hence often falling in
+    // the same chunk.
+    // This is trivial when merging (assuming each side of the merge uses
+    // its own resolver), but also in the inner recursive invocations of
+    // partitioning.
+    if (offsets_.size() <= 1) {
+      return {0, index};
+    }
+    const auto cached_chunk = cached_chunk_.load();
+    const bool cache_hit =
+        (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]);
+    if (ARROW_PREDICT_TRUE(cache_hit)) {
+      return {cached_chunk, index - offsets_[cached_chunk]};
+    }
+    auto chunk_index = Bisect(index);
+    cached_chunk_.store(chunk_index);
+    return {chunk_index, index - offsets_[chunk_index]};
+  }
+
+ protected:
+  // Find the chunk index corresponding to a value index using binary search
+  inline int64_t Bisect(const int64_t index) const {
+    // Like std::upper_bound(), but hand-written as it can help the compiler.
+    // Search [lo, lo + n)
+    int64_t lo = 0;
+    auto n = static_cast<int64_t>(offsets_.size());
+    while (n > 1) {
+      const int64_t m = n >> 1;
+      const int64_t mid = lo + m;
+      if (static_cast<int64_t>(index) >= offsets_[mid]) {
+        lo = mid;
+        n -= m;
+      } else {
+        n = m;
+      }
+    }
+    return lo;
+  }
+
+ private:
+  // Collection of starting offsets used for binary search
+  std::vector<int64_t> offsets_;
+
+  // Tracks the most recently used chunk index to allow fast
+  // access for consecutive indices corresponding to the same chunk
+  mutable std::atomic<int64_t> cached_chunk_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/chunked_array.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/chunked_array.h
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/chunk_resolver.h"
+#include "arrow/compare.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+class MemoryPool;
+
+/// \class ChunkedArray
+/// \brief A data structure managing a list of primitive Arrow arrays logically
+/// as one large array
+///
+/// Data chunking is treated throughout this project largely as an
+/// implementation detail for performance and memory use optimization.
+/// ChunkedArray allows Array objects to be collected and interpreted
+/// as a single logical array without requiring an expensive concatenation
+/// step.
+///
+/// In some cases, data produced by a function may exceed the capacity of an
+/// Array (like BinaryArray or StringArray) and so returning multiple Arrays is
+/// the only possibility. In these cases, we recommend returning a ChunkedArray
+/// instead of vector of Arrays or some alternative.
+///
+/// When data is processed in parallel, it may not be practical or possible to
+/// create large contiguous memory allocations and write output into them. With
+/// some data types, like binary and string types, it is not possible at all to
+/// produce non-chunked array outputs without requiring a concatenation step at
+/// the end of processing.
+///
+/// Application developers may tune chunk sizes based on analysis of
+/// performance profiles but many developer-users will not need to be
+/// especially concerned with the chunking details.
+///
+/// Preserving the chunk layout/sizes in processing steps is generally not
+/// considered to be a contract in APIs. A function may decide to alter the
+/// chunking of its result. Similarly, APIs accepting multiple ChunkedArray
+/// inputs should not expect the chunk layout to be the same in each input.
+class ARROW_EXPORT ChunkedArray {
+ public:
+  ChunkedArray(ChunkedArray&&) = default;
+  ChunkedArray& operator=(ChunkedArray&&) = default;
+
+  /// \brief Construct a chunked array from a single Array
+  explicit ChunkedArray(std::shared_ptr<Array> chunk)
+      : ChunkedArray(ArrayVector{std::move(chunk)}) {}
+
+  /// \brief Construct a chunked array from a vector of arrays and an optional data type
+  ///
+  /// The vector elements must have the same data type.
+  /// If the data type is passed explicitly, the vector may be empty.
+  /// If the data type is omitted, the vector must be non-empty.
+  explicit ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
+
+  // \brief Constructor with basic input validation.
+  static Result<std::shared_ptr<ChunkedArray>> Make(
+      ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
+
+  /// \brief Create an empty ChunkedArray of a given type
+  ///
+  /// The output ChunkedArray will have one chunk with an empty
+  /// array of the given type.
+  ///
+  /// \param[in] type the data type of the empty ChunkedArray
+  /// \param[in] pool the memory pool to allocate memory from
+  /// \return the resulting ChunkedArray
+  static Result<std::shared_ptr<ChunkedArray>> MakeEmpty(
+      std::shared_ptr<DataType> type, MemoryPool* pool = default_memory_pool());
+
+  /// \return the total length of the chunked array; computed on construction
+  int64_t length() const { return length_; }
+
+  /// \return the total number of nulls among all chunks
+  int64_t null_count() const { return null_count_; }
+
+  /// \return the total number of chunks in the chunked array
+  int num_chunks() const { return static_cast<int>(chunks_.size()); }
+
+  /// \return chunk a particular chunk from the chunked array
+  std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; }
+
+  /// \return an ArrayVector of chunks
+  const ArrayVector& chunks() const { return chunks_; }
+
+  /// \brief Construct a zero-copy slice of the chunked array with the
+  /// indicated offset and length
+  ///
+  /// \param[in] offset the position of the first element in the constructed
+  /// slice
+  /// \param[in] length the length of the slice. If there are not enough
+  /// elements in the chunked array, the length will be adjusted accordingly
+  ///
+  /// \return a new object wrapped in std::shared_ptr<ChunkedArray>
+  std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
+
+  /// \brief Slice from offset until end of the chunked array
+  std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
+
+  /// \brief Flatten this chunked array as a vector of chunked arrays, one
+  /// for each struct field
+  ///
+  /// \param[in] pool The pool for buffer allocations, if any
+  Result<std::vector<std::shared_ptr<ChunkedArray>>> Flatten(
+      MemoryPool* pool = default_memory_pool()) const;
+
+  /// Construct a zero-copy view of this chunked array with the given
+  /// type. Calls Array::View on each constituent chunk. Always succeeds if
+  /// there are zero chunks
+  Result<std::shared_ptr<ChunkedArray>> View(const std::shared_ptr<DataType>& type) const;
+
+  /// \brief Return the type of the chunked array
+  const std::shared_ptr<DataType>& type() const { return type_; }
+
+  /// \brief Return a Scalar containing the value of this array at index
+  Result<std::shared_ptr<Scalar>> GetScalar(int64_t index) const;
+
+  /// \brief Determine if two chunked arrays are equal.
+  ///
+  /// Two chunked arrays can be equal only if they have equal datatypes.
+  /// However, they may be equal even if they have different chunkings.
+  bool Equals(const ChunkedArray& other) const;
+  /// \brief Determine if two chunked arrays are equal.
+  bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+  /// \brief Determine if two chunked arrays approximately equal
+  bool ApproxEquals(const ChunkedArray& other,
+                    const EqualOptions& = EqualOptions::Defaults()) const;
+
+  /// \return PrettyPrint representation suitable for debugging
+  std::string ToString() const;
+
+  /// \brief Perform cheap validation checks to determine obvious inconsistencies
+  /// within the chunk array's internal data.
+  ///
+  /// This is O(k*m) where k is the number of array descendents,
+  /// and m is the number of chunks.
+  ///
+  /// \return Status
+  Status Validate() const;
+
+  /// \brief Perform extensive validation checks to determine inconsistencies
+  /// within the chunk array's internal data.
+  ///
+  /// This is O(k*n) where k is the number of array descendents,
+  /// and n is the length in elements.
+  ///
+  /// \return Status
+  Status ValidateFull() const;
+
+ protected:
+  ArrayVector chunks_;
+  std::shared_ptr<DataType> type_;
+  int64_t length_;
+  int64_t null_count_;
+
+ private:
+  internal::ChunkResolver chunk_resolver_;
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
+};
+
+namespace internal {
+
+/// \brief EXPERIMENTAL: Utility for incremental iteration over contiguous
+/// pieces of potentially differently-chunked ChunkedArray objects
+class ARROW_EXPORT MultipleChunkIterator {
+ public:
+  MultipleChunkIterator(const ChunkedArray& left, const ChunkedArray& right)
+      : left_(left),
+        right_(right),
+        pos_(0),
+        length_(left.length()),
+        chunk_idx_left_(0),
+        chunk_idx_right_(0),
+        chunk_pos_left_(0),
+        chunk_pos_right_(0) {}
+
+  bool Next(std::shared_ptr<Array>* next_left, std::shared_ptr<Array>* next_right);
+
+  int64_t position() const { return pos_; }
+
+ private:
+  const ChunkedArray& left_;
+  const ChunkedArray& right_;
+
+  // The amount of the entire ChunkedArray consumed
+  int64_t pos_;
+
+  // Length of the chunked array(s)
+  int64_t length_;
+
+  // Current left chunk
+  int chunk_idx_left_;
+
+  // Current right chunk
+  int chunk_idx_right_;
+
+  // Offset into the current left chunk
+  int64_t chunk_pos_left_;
+
+  // Offset into the current right chunk
+  int64_t chunk_pos_right_;
+};
+
+/// \brief Evaluate binary function on two ChunkedArray objects having possibly
+/// different chunk layouts. The passed binary function / functor should have
+/// the following signature.
+///
+///    Status(const Array&, const Array&, int64_t)
+///
+/// The third argument is the absolute position relative to the start of each
+/// ChunkedArray. The function is executed against each contiguous pair of
+/// array segments, slicing if necessary.
+///
+/// For example, if two arrays have chunk sizes
+///
+///   left: [10, 10, 20]
+///   right: [15, 10, 15]
+///
+/// Then the following invocations take place (pseudocode)
+///
+///   func(left.chunk[0][0:10], right.chunk[0][0:10], 0)
+///   func(left.chunk[1][0:5], right.chunk[0][10:15], 10)
+///   func(left.chunk[1][5:10], right.chunk[1][0:5], 15)
+///   func(left.chunk[2][0:5], right.chunk[1][5:10], 20)
+///   func(left.chunk[2][5:20], right.chunk[2][:], 25)
+template <typename Action>
+Status ApplyBinaryChunked(const ChunkedArray& left, const ChunkedArray& right,
+                          Action&& action) {
+  MultipleChunkIterator iterator(left, right);
+  std::shared_ptr<Array> left_piece, right_piece;
+  while (iterator.Next(&left_piece, &right_piece)) {
+    ARROW_RETURN_NOT_OK(action(*left_piece, *right_piece, iterator.position()));
+  }
+  return Status::OK();
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compare.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compare.h
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for comparing Arrow data structures
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+class Tensor;
+class SparseTensor;
+struct Scalar;
+
+static constexpr double kDefaultAbsoluteTolerance = 1E-5;
+
+/// A container of options for equality comparisons
+class EqualOptions {
+ public:
+  /// Whether or not NaNs are considered equal.
+  bool nans_equal() const { return nans_equal_; }
+
+  /// Return a new EqualOptions object with the "nans_equal" property changed.
+  EqualOptions nans_equal(bool v) const {
+    auto res = EqualOptions(*this);
+    res.nans_equal_ = v;
+    return res;
+  }
+
+  /// Whether or not zeros with differing signs are considered equal.
+  bool signed_zeros_equal() const { return signed_zeros_equal_; }
+
+  /// Return a new EqualOptions object with the "signed_zeros_equal" property changed.
+  EqualOptions signed_zeros_equal(bool v) const {
+    auto res = EqualOptions(*this);
+    res.signed_zeros_equal_ = v;
+    return res;
+  }
+
+  /// The absolute tolerance for approximate comparisons of floating-point values.
+  double atol() const { return atol_; }
+
+  /// Return a new EqualOptions object with the "atol" property changed.
+  EqualOptions atol(double v) const {
+    auto res = EqualOptions(*this);
+    res.atol_ = v;
+    return res;
+  }
+
+  /// The ostream to which a diff will be formatted if arrays disagree.
+  /// If this is null (the default) no diff will be formatted.
+  std::ostream* diff_sink() const { return diff_sink_; }
+
+  /// Return a new EqualOptions object with the "diff_sink" property changed.
+  /// This option will be ignored if diff formatting of the types of compared arrays is
+  /// not supported.
+  EqualOptions diff_sink(std::ostream* diff_sink) const {
+    auto res = EqualOptions(*this);
+    res.diff_sink_ = diff_sink;
+    return res;
+  }
+
+  static EqualOptions Defaults() { return {}; }
+
+ protected:
+  double atol_ = kDefaultAbsoluteTolerance;
+  bool nans_equal_ = false;
+  bool signed_zeros_equal_ = true;
+
+  std::ostream* diff_sink_ = NULLPTR;
+};
+
+/// Returns true if the arrays are exactly equal
+bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right,
+                              const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if the arrays are approximately equal. For non-floating point
+/// types, this is equivalent to ArrayEquals(left, right)
+bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right,
+                                    const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if indicated equal-length segment of arrays are exactly equal
+bool ARROW_EXPORT ArrayRangeEquals(const Array& left, const Array& right,
+                                   int64_t start_idx, int64_t end_idx,
+                                   int64_t other_start_idx,
+                                   const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if indicated equal-length segment of arrays are approximately equal
+bool ARROW_EXPORT ArrayRangeApproxEquals(const Array& left, const Array& right,
+                                         int64_t start_idx, int64_t end_idx,
+                                         int64_t other_start_idx,
+                                         const EqualOptions& = EqualOptions::Defaults());
+
+bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right,
+                               const EqualOptions& = EqualOptions::Defaults());
+
+/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right,
+                                     const EqualOptions& = EqualOptions::Defaults());
+
+/// Returns true if the type metadata are exactly equal
+/// \param[in] left a DataType
+/// \param[in] right a DataType
+/// \param[in] check_metadata whether to compare KeyValueMetadata for child
+/// fields
+bool ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
+                             bool check_metadata = true);
+
+/// Returns true if scalars are equal
+/// \param[in] left a Scalar
+/// \param[in] right a Scalar
+/// \param[in] options comparison options
+bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right,
+                               const EqualOptions& options = EqualOptions::Defaults());
+
+/// Returns true if scalars are approximately equal
+/// \param[in] left a Scalar
+/// \param[in] right a Scalar
+/// \param[in] options comparison options
+bool ARROW_EXPORT
+ScalarApproxEquals(const Scalar& left, const Scalar& right,
+                   const EqualOptions& options = EqualOptions::Defaults());
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle
+
+#pragma once
+
+/// \defgroup compute-concrete-options Concrete option classes for compute functions
+/// @{
+/// @}
+
+#include "arrow/compute/api_aggregate.h"  // IWYU pragma: export
+#include "arrow/compute/api_scalar.h"     // IWYU pragma: export
+#include "arrow/compute/api_vector.h"     // IWYU pragma: export
+#include "arrow/compute/cast.h"           // IWYU pragma: export
+#include "arrow/compute/exec.h"           // IWYU pragma: export
+#include "arrow/compute/function.h"       // IWYU pragma: export
+#include "arrow/compute/kernel.h"         // IWYU pragma: export
+#include "arrow/compute/registry.h"       // IWYU pragma: export
+#include "arrow/datum.h"                  // IWYU pragma: export
+
+/// \defgroup execnode-expressions Utilities for creating expressions to
+/// use in execution plans
+/// @{
+/// @}
+
+#include "arrow/compute/exec/expression.h"  // IWYU pragma: export
+
+/// \defgroup execnode-options Concrete option classes for ExecNode options
+/// @{
+/// @}
+
+#include "arrow/compute/exec/options.h"  // IWYU pragma: export
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_aggregate.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_aggregate.h
@@ -0,0 +1,494 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Eager evaluation convenience APIs for invoking common functions, including
+// necessary memory allocations
+
+#pragma once
+
+#include "arrow/compute/function.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+namespace compute {
+
+class ExecContext;
+
+// ----------------------------------------------------------------------
+// Aggregate functions
+
+/// \addtogroup compute-concrete-options
+/// @{
+
+/// \brief Control general scalar aggregate kernel behavior
+///
+/// By default, null values are ignored (skip_nulls = true).
+class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+ public:
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 1);
+  static constexpr char const kTypeName[] = "ScalarAggregateOptions";
+  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
+
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
+/// \brief Control count aggregate kernel behavior.
+///
+/// By default, only non-null values are counted.
+class ARROW_EXPORT CountOptions : public FunctionOptions {
+ public:
+  enum CountMode {
+    /// Count only non-null values.
+    ONLY_VALID = 0,
+    /// Count only null values.
+    ONLY_NULL,
+    /// Count both non-null and null values.
+    ALL,
+  };
+  explicit CountOptions(CountMode mode = CountMode::ONLY_VALID);
+  static constexpr char const kTypeName[] = "CountOptions";
+  static CountOptions Defaults() { return CountOptions{}; }
+
+  CountMode mode;
+};
+
+/// \brief Control Mode kernel behavior
+///
+/// Returns top-n common values and counts.
+/// By default, returns the most common value and count.
+class ARROW_EXPORT ModeOptions : public FunctionOptions {
+ public:
+  explicit ModeOptions(int64_t n = 1, bool skip_nulls = true, uint32_t min_count = 0);
+  static constexpr char const kTypeName[] = "ModeOptions";
+  static ModeOptions Defaults() { return ModeOptions{}; }
+
+  int64_t n = 1;
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
+/// \brief Control Delta Degrees of Freedom (ddof) of Variance and Stddev kernel
+///
+/// The divisor used in calculations is N - ddof, where N is the number of elements.
+/// By default, ddof is zero, and population variance or stddev is returned.
+class ARROW_EXPORT VarianceOptions : public FunctionOptions {
+ public:
+  explicit VarianceOptions(int ddof = 0, bool skip_nulls = true, uint32_t min_count = 0);
+  static constexpr char const kTypeName[] = "VarianceOptions";
+  static VarianceOptions Defaults() { return VarianceOptions{}; }
+
+  int ddof = 0;
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
+/// \brief Control Quantile kernel behavior
+///
+/// By default, returns the median value.
+class ARROW_EXPORT QuantileOptions : public FunctionOptions {
+ public:
+  /// Interpolation method to use when quantile lies between two data points
+  enum Interpolation {
+    LINEAR = 0,
+    LOWER,
+    HIGHER,
+    NEAREST,
+    MIDPOINT,
+  };
+
+  explicit QuantileOptions(double q = 0.5, enum Interpolation interpolation = LINEAR,
+                           bool skip_nulls = true, uint32_t min_count = 0);
+
+  explicit QuantileOptions(std::vector<double> q,
+                           enum Interpolation interpolation = LINEAR,
+                           bool skip_nulls = true, uint32_t min_count = 0);
+
+  static constexpr char const kTypeName[] = "QuantileOptions";
+  static QuantileOptions Defaults() { return QuantileOptions{}; }
+
+  /// quantile must be between 0 and 1 inclusive
+  std::vector<double> q;
+  enum Interpolation interpolation;
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
+/// \brief Control TDigest approximate quantile kernel behavior
+///
+/// By default, returns the median value.
+class ARROW_EXPORT TDigestOptions : public FunctionOptions {
+ public:
+  explicit TDigestOptions(double q = 0.5, uint32_t delta = 100,
+                          uint32_t buffer_size = 500, bool skip_nulls = true,
+                          uint32_t min_count = 0);
+  explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
+                          uint32_t buffer_size = 500, bool skip_nulls = true,
+                          uint32_t min_count = 0);
+  static constexpr char const kTypeName[] = "TDigestOptions";
+  static TDigestOptions Defaults() { return TDigestOptions{}; }
+
+  /// quantile must be between 0 and 1 inclusive
+  std::vector<double> q;
+  /// compression parameter, default 100
+  uint32_t delta;
+  /// input buffer size, default 500
+  uint32_t buffer_size;
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
+/// \brief Control Index kernel behavior
+class ARROW_EXPORT IndexOptions : public FunctionOptions {
+ public:
+  explicit IndexOptions(std::shared_ptr<Scalar> value);
+  // Default constructor for serialization
+  IndexOptions();
+  static constexpr char const kTypeName[] = "IndexOptions";
+
+  std::shared_ptr<Scalar> value;
+};
+
+/// @}
+
+/// \brief Count values in an array.
+///
+/// \param[in] options counting options, see CountOptions for more information
+/// \param[in] datum to count
+/// \param[in] ctx the function execution context, optional
+/// \return out resulting datum
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Count(const Datum& datum,
+                    const CountOptions& options = CountOptions::Defaults(),
+                    ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the mean of a numeric array.
+///
+/// \param[in] value datum to compute the mean, expecting Array
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed mean as a DoubleScalar
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Mean(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Compute the product of values of a numeric array.
+///
+/// \param[in] value datum to compute product of, expecting Array or ChunkedArray
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed sum as a Scalar
+///
+/// \since 6.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Product(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Sum values of a numeric array.
+///
+/// \param[in] value datum to sum, expecting Array or ChunkedArray
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed sum as a Scalar
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Sum(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the min / max of a numeric array
+///
+/// This function returns both the min and max as a struct scalar, with type
+/// struct<min: T, max: T>, where T is the input type
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as a struct<min: T, max: T> scalar
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> MinMax(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Test whether any element in a boolean array evaluates to true.
+///
+/// This function returns true if any of the elements in the array evaluates
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneOr for more details on Kleene logic.
+///
+/// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as a BooleanScalar
+///
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Any(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Test whether all elements in a boolean array evaluate to true.
+///
+/// This function returns true if all of the elements in the array evaluate
+/// to true and false otherwise. Null values are ignored by default.
+/// If null values are taken into account by setting ScalarAggregateOptions
+/// parameter skip_nulls = false then Kleene logic is used.
+/// See KleeneAnd for more details on Kleene logic.
+///
+/// \param[in] value input datum, expecting a boolean array
+/// \param[in] options see ScalarAggregateOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as a BooleanScalar
+
+/// \since 3.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> All(
+    const Datum& value,
+    const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the modal (most common) value of a numeric array
+///
+/// This function returns top-n most common values and number of times they occur as
+/// an array of `struct<mode: T, count: int64>`, where T is the input type.
+/// Values with larger counts are returned before smaller ones.
+/// If there are more than one values with same count, smaller value is returned first.
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see ModeOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as an array of struct<mode: T, count: int64>
+///
+/// \since 2.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Mode(const Datum& value,
+                   const ModeOptions& options = ModeOptions::Defaults(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the standard deviation of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see VarianceOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed standard deviation as a DoubleScalar
+///
+/// \since 2.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Stddev(const Datum& value,
+                     const VarianceOptions& options = VarianceOptions::Defaults(),
+                     ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the variance of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see VarianceOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed variance as a DoubleScalar
+///
+/// \since 2.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Variance(const Datum& value,
+                       const VarianceOptions& options = VarianceOptions::Defaults(),
+                       ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the quantiles of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see QuantileOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as an array
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Quantile(const Datum& value,
+                       const QuantileOptions& options = QuantileOptions::Defaults(),
+                       ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the approximate quantiles of a numeric array with T-Digest algorithm
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see TDigestOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return resulting datum as an array
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> TDigest(const Datum& value,
+                      const TDigestOptions& options = TDigestOptions::Defaults(),
+                      ExecContext* ctx = NULLPTR);
+
+/// \brief Find the first index of a value in an array.
+///
+/// \param[in] value The array to search.
+/// \param[in] options The array to search for. See IndexOoptions.
+/// \param[in] ctx the function execution context, optional
+/// \return out a Scalar containing the index (or -1 if not found).
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Index(const Datum& value, const IndexOptions& options,
+                    ExecContext* ctx = NULLPTR);
+
+namespace internal {
+
+/// Internal use only: streaming group identifier.
+/// Consumes batches of keys and yields batches of the group ids.
+class ARROW_EXPORT Grouper {
+ public:
+  virtual ~Grouper() = default;
+
+  /// Construct a Grouper which receives the specified key types
+  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
+                                               ExecContext* ctx = default_exec_context());
+
+  /// Consume a batch of keys, producing the corresponding group ids as an integer array.
+  /// Currently only uint32 indices will be produced, eventually the bit width will only
+  /// be as wide as necessary.
+  virtual Result<Datum> Consume(const ExecBatch& batch) = 0;
+
+  /// Get current unique keys. May be called multiple times.
+  virtual Result<ExecBatch> GetUniques() = 0;
+
+  /// Get the current number of groups.
+  virtual uint32_t num_groups() const = 0;
+
+  /// \brief Assemble lists of indices of identical elements.
+  ///
+  /// \param[in] ids An unsigned, all-valid integral array which will be
+  ///                used as grouping criteria.
+  /// \param[in] num_groups An upper bound for the elements of ids
+  /// \return A num_groups-long ListArray where the slot at i contains a
+  ///         list of indices where i appears in ids.
+  ///
+  ///   MakeGroupings([
+  ///       2,
+  ///       2,
+  ///       5,
+  ///       5,
+  ///       2,
+  ///       3
+  ///   ], 8) == [
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> MakeGroupings(
+      const UInt32Array& ids, uint32_t num_groups,
+      ExecContext* ctx = default_exec_context());
+
+  /// \brief Produce a ListArray whose slots are selections of `array` which correspond to
+  /// the provided groupings.
+  ///
+  /// For example,
+  ///   ApplyGroupings([
+  ///       [],
+  ///       [],
+  ///       [0, 1, 4],
+  ///       [5],
+  ///       [],
+  ///       [2, 3],
+  ///       [],
+  ///       []
+  ///   ], [2, 2, 5, 5, 2, 3]) == [
+  ///       [],
+  ///       [],
+  ///       [2, 2, 2],
+  ///       [3],
+  ///       [],
+  ///       [5, 5],
+  ///       [],
+  ///       []
+  ///   ]
+  static Result<std::shared_ptr<ListArray>> ApplyGroupings(
+      const ListArray& groupings, const Array& array,
+      ExecContext* ctx = default_exec_context());
+};
+
+/// \brief Configure a grouped aggregation
+struct ARROW_EXPORT Aggregate {
+  /// the name of the aggregation function
+  std::string function;
+
+  /// options for the aggregation function
+  const FunctionOptions* options;
+};
+
+/// Internal use only: helper function for testing HashAggregateKernels.
+/// This will be replaced by streaming execution operators.
+ARROW_EXPORT
+Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Datum>& keys,
+                      const std::vector<Aggregate>& aggregates, bool use_threads = false,
+                      ExecContext* ctx = default_exec_context());
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_scalar.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_scalar.h
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_vector.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/api_vector.h
@@ -0,0 +1,534 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "arrow/compute/function.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+namespace compute {
+
+class ExecContext;
+
+/// \addtogroup compute-concrete-options
+/// @{
+
+class ARROW_EXPORT FilterOptions : public FunctionOptions {
+ public:
+  /// Configure the action taken when a slot of the selection mask is null
+  enum NullSelectionBehavior {
+    /// The corresponding filtered value will be removed in the output.
+    DROP,
+    /// The corresponding filtered value will be null in the output.
+    EMIT_NULL,
+  };
+
+  explicit FilterOptions(NullSelectionBehavior null_selection = DROP);
+  static constexpr char const kTypeName[] = "FilterOptions";
+  static FilterOptions Defaults() { return FilterOptions(); }
+
+  NullSelectionBehavior null_selection_behavior = DROP;
+};
+
+class ARROW_EXPORT TakeOptions : public FunctionOptions {
+ public:
+  explicit TakeOptions(bool boundscheck = true);
+  static constexpr char const kTypeName[] = "TakeOptions";
+  static TakeOptions BoundsCheck() { return TakeOptions(true); }
+  static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
+  static TakeOptions Defaults() { return BoundsCheck(); }
+
+  bool boundscheck = true;
+};
+
+/// \brief Options for the dictionary encode function
+class ARROW_EXPORT DictionaryEncodeOptions : public FunctionOptions {
+ public:
+  /// Configure how null values will be encoded
+  enum NullEncodingBehavior {
+    /// The null value will be added to the dictionary with a proper index.
+    ENCODE,
+    /// The null value will be masked in the indices array.
+    MASK
+  };
+
+  explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK);
+  static constexpr char const kTypeName[] = "DictionaryEncodeOptions";
+  static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }
+
+  NullEncodingBehavior null_encoding_behavior = MASK;
+};
+
+enum class SortOrder {
+  /// Arrange values in increasing order
+  Ascending,
+  /// Arrange values in decreasing order
+  Descending,
+};
+
+enum class NullPlacement {
+  /// Place nulls and NaNs before any non-null values.
+  /// NaNs will come after nulls.
+  AtStart,
+  /// Place nulls and NaNs after any non-null values.
+  /// NaNs will come before nulls.
+  AtEnd,
+};
+
+/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
+class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
+ public:
+  explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending)
+      : target(std::move(target)), order(order) {}
+
+  using util::EqualityComparable<SortKey>::Equals;
+  using util::EqualityComparable<SortKey>::operator==;
+  using util::EqualityComparable<SortKey>::operator!=;
+  bool Equals(const SortKey& other) const;
+  std::string ToString() const;
+
+  /// A FieldRef targetting the sort column.
+  FieldRef target;
+  /// How to order by this sort key.
+  SortOrder order;
+};
+
+class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
+ public:
+  explicit ArraySortOptions(SortOrder order = SortOrder::Ascending,
+                            NullPlacement null_placement = NullPlacement::AtEnd);
+  static constexpr char const kTypeName[] = "ArraySortOptions";
+  static ArraySortOptions Defaults() { return ArraySortOptions(); }
+
+  /// Sorting order
+  SortOrder order;
+  /// Whether nulls and NaNs are placed at the start or at the end
+  NullPlacement null_placement;
+};
+
+class ARROW_EXPORT SortOptions : public FunctionOptions {
+ public:
+  explicit SortOptions(std::vector<SortKey> sort_keys = {},
+                       NullPlacement null_placement = NullPlacement::AtEnd);
+  static constexpr char const kTypeName[] = "SortOptions";
+  static SortOptions Defaults() { return SortOptions(); }
+
+  /// Column key(s) to order by and how to order by these sort keys.
+  std::vector<SortKey> sort_keys;
+  /// Whether nulls and NaNs are placed at the start or at the end
+  NullPlacement null_placement;
+};
+
+/// \brief SelectK options
+class ARROW_EXPORT SelectKOptions : public FunctionOptions {
+ public:
+  explicit SelectKOptions(int64_t k = -1, std::vector<SortKey> sort_keys = {});
+  static constexpr char const kTypeName[] = "SelectKOptions";
+  static SelectKOptions Defaults() { return SelectKOptions(); }
+
+  static SelectKOptions TopKDefault(int64_t k, std::vector<std::string> key_names = {}) {
+    std::vector<SortKey> keys;
+    for (const auto& name : key_names) {
+      keys.emplace_back(SortKey(name, SortOrder::Descending));
+    }
+    if (key_names.empty()) {
+      keys.emplace_back(SortKey("not-used", SortOrder::Descending));
+    }
+    return SelectKOptions{k, keys};
+  }
+  static SelectKOptions BottomKDefault(int64_t k,
+                                       std::vector<std::string> key_names = {}) {
+    std::vector<SortKey> keys;
+    for (const auto& name : key_names) {
+      keys.emplace_back(SortKey(name, SortOrder::Ascending));
+    }
+    if (key_names.empty()) {
+      keys.emplace_back(SortKey("not-used", SortOrder::Ascending));
+    }
+    return SelectKOptions{k, keys};
+  }
+
+  /// The number of `k` elements to keep.
+  int64_t k;
+  /// Column key(s) to order by and how to order by these sort keys.
+  std::vector<SortKey> sort_keys;
+};
+
+/// \brief Partitioning options for NthToIndices
+class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
+ public:
+  explicit PartitionNthOptions(int64_t pivot,
+                               NullPlacement null_placement = NullPlacement::AtEnd);
+  PartitionNthOptions() : PartitionNthOptions(0) {}
+  static constexpr char const kTypeName[] = "PartitionNthOptions";
+
+  /// The index into the equivalent sorted array of the partition pivot element.
+  int64_t pivot;
+  /// Whether nulls and NaNs are partitioned at the start or at the end
+  NullPlacement null_placement;
+};
+
+/// @}
+
+/// \brief Filter with a boolean selection filter
+///
+/// The output will be populated with values from the input at positions
+/// where the selection filter is not 0. Nulls in the filter will be handled
+/// based on options.null_selection_behavior.
+///
+/// For example given values = ["a", "b", "c", null, "e", "f"] and
+/// filter = [0, 1, 1, 0, null, 1], the output will be
+/// (null_selection_behavior == DROP)      = ["b", "c", "f"]
+/// (null_selection_behavior == EMIT_NULL) = ["b", "c", null, "f"]
+///
+/// \param[in] values array to filter
+/// \param[in] filter indicates which values should be filtered out
+/// \param[in] options configures null_selection_behavior
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+ARROW_EXPORT
+Result<Datum> Filter(const Datum& values, const Datum& filter,
+                     const FilterOptions& options = FilterOptions::Defaults(),
+                     ExecContext* ctx = NULLPTR);
+
+namespace internal {
+
+// These internal functions are implemented in kernels/vector_selection.cc
+
+/// \brief Return the number of selected indices in the boolean filter
+ARROW_EXPORT
+int64_t GetFilterOutputSize(const ArrayData& filter,
+                            FilterOptions::NullSelectionBehavior null_selection);
+
+/// \brief Compute uint64 selection indices for use with Take given a boolean
+/// filter
+ARROW_EXPORT
+Result<std::shared_ptr<ArrayData>> GetTakeIndices(
+    const ArrayData& filter, FilterOptions::NullSelectionBehavior null_selection,
+    MemoryPool* memory_pool = default_memory_pool());
+
+}  // namespace internal
+
+/// \brief ReplaceWithMask replaces each value in the array corresponding
+/// to a true value in the mask with the next element from `replacements`.
+///
+/// \param[in] values Array input to replace
+/// \param[in] mask Array or Scalar of Boolean mask values
+/// \param[in] replacements The replacement values to draw from. There must
+/// be as many replacement values as true values in the mask.
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum
+///
+/// \since 5.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
+                              const Datum& replacements, ExecContext* ctx = NULLPTR);
+
+/// \brief FillNullForward fill null values in forward direction
+///
+/// The output array will be of the same type as the input values
+/// array, with replaced null values in forward direction.
+///
+/// For example given values = ["a", "b", "c", null, null, "f"],
+/// the output will be = ["a", "b", "c", "c", "c", "f"]
+///
+/// \param[in] values datum from which to take
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+ARROW_EXPORT
+Result<Datum> FillNullForward(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief FillNullBackward fill null values in backward direction
+///
+/// The output array will be of the same type as the input values
+/// array, with replaced null values in backward direction.
+///
+/// For example given values = ["a", "b", "c", null, null, "f"],
+/// the output will be = ["a", "b", "c", "f", "f", "f"]
+///
+/// \param[in] values datum from which to take
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+ARROW_EXPORT
+Result<Datum> FillNullBackward(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Take from an array of values at indices in another array
+///
+/// The output array will be of the same type as the input values
+/// array, with elements taken from the values array at the given
+/// indices. If an index is null then the taken element will be null.
+///
+/// For example given values = ["a", "b", "c", null, "e", "f"] and
+/// indices = [2, 1, null, 3], the output will be
+/// = [values[2], values[1], null, values[3]]
+/// = ["c", "b", null, null]
+///
+/// \param[in] values datum from which to take
+/// \param[in] indices which values to take
+/// \param[in] options options
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+ARROW_EXPORT
+Result<Datum> Take(const Datum& values, const Datum& indices,
+                   const TakeOptions& options = TakeOptions::Defaults(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Take with Array inputs and output
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
+                                    const TakeOptions& options = TakeOptions::Defaults(),
+                                    ExecContext* ctx = NULLPTR);
+
+/// \brief Drop Null from an array of values
+///
+/// The output array will be of the same type as the input values
+/// array, with elements taken from the values array without nulls.
+///
+/// For example given values = ["a", "b", "c", null, "e", "f"],
+/// the output will be = ["a", "b", "c", "e", "f"]
+///
+/// \param[in] values datum from which to take
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+ARROW_EXPORT
+Result<Datum> DropNull(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief DropNull with Array inputs and output
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> DropNull(const Array& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Return indices that partition an array around n-th sorted element.
+///
+/// Find index of n-th(0 based) smallest value and perform indirect
+/// partition of an array around that element. Output indices[0 ~ n-1]
+/// holds values no greater than n-th element, and indices[n+1 ~ end]
+/// holds values no less than n-th element. Elements in each partition
+/// is not sorted. Nulls will be partitioned to the end of the output.
+/// Output is not guaranteed to be stable.
+///
+/// \param[in] values array to be partitioned
+/// \param[in] n pivot array around sorted n-th element
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would partition an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
+                                            ExecContext* ctx = NULLPTR);
+
+/// \brief Return indices that partition an array around n-th sorted element.
+///
+/// This overload takes a PartitionNthOptions specifiying the pivot index
+/// and the null handling.
+///
+/// \param[in] values array to be partitioned
+/// \param[in] options options including pivot index and null handling
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would partition an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> NthToIndices(const Array& values,
+                                            const PartitionNthOptions& options,
+                                            ExecContext* ctx = NULLPTR);
+
+/// \brief Return indices that would select the first `k` elements.
+///
+/// Perform an indirect sort of the datum, keeping only the first `k` elements. The output
+/// array will contain indices such that the item indicated by the k-th index will be in
+/// the position it would be if the datum were sorted by `options.sort_keys`. However,
+/// indices of null values will not be part of the output. The sort is not guaranteed to
+/// be stable.
+///
+/// \param[in] datum datum to be partitioned
+/// \param[in] options options
+/// \param[in] ctx the function execution context, optional
+/// \return a datum with the same schema as the input
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
+                                               const SelectKOptions& options,
+                                               ExecContext* ctx = NULLPTR);
+
+/// \brief Return the indices that would sort an array.
+///
+/// Perform an indirect sort of array. The output array will contain
+/// indices that would sort an array, which would be the same length
+/// as input. Nulls will be stably partitioned to the end of the output
+/// regardless of order.
+///
+/// For example given array = [null, 1, 3.3, null, 2, 5.3] and order
+/// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0,
+/// 3].
+///
+/// \param[in] array array to sort
+/// \param[in] order ascending or descending
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const Array& array,
+                                           SortOrder order = SortOrder::Ascending,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Return the indices that would sort an array.
+///
+/// This overload takes a ArraySortOptions specifiying the sort order
+/// and the null handling.
+///
+/// \param[in] array array to sort
+/// \param[in] options options including sort order and null handling
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const Array& array,
+                                           const ArraySortOptions& options,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Return the indices that would sort a chunked array.
+///
+/// Perform an indirect sort of chunked array. The output array will
+/// contain indices that would sort a chunked array, which would be
+/// the same length as input. Nulls will be stably partitioned to the
+/// end of the output regardless of order.
+///
+/// For example given chunked_array = [[null, 1], [3.3], [null, 2,
+/// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2,
+/// 4, 1, 0, 3].
+///
+/// \param[in] chunked_array chunked array to sort
+/// \param[in] order ascending or descending
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
+                                           SortOrder order = SortOrder::Ascending,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Return the indices that would sort a chunked array.
+///
+/// This overload takes a ArraySortOptions specifiying the sort order
+/// and the null handling.
+///
+/// \param[in] chunked_array chunked array to sort
+/// \param[in] options options including sort order and null handling
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort an array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
+                                           const ArraySortOptions& options,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Return the indices that would sort an input in the
+/// specified order. Input is one of array, chunked array record batch
+/// or table.
+///
+/// Perform an indirect sort of input. The output array will contain
+/// indices that would sort an input, which would be the same length
+/// as input. Nulls will be stably partitioned to the start or to the end
+/// of the output depending on SortOrder::null_placement.
+///
+/// For example given input (table) = {
+/// "column1": [[null,   1], [   3, null, 2, 1]],
+/// "column2": [[   5], [3,   null, null, 5, 5]],
+/// } and options = {
+/// {"column1", SortOrder::Ascending},
+/// {"column2", SortOrder::Descending},
+/// }, the output will be [5, 1, 4, 2, 0, 3].
+///
+/// \param[in] datum array, chunked array, record batch or table to sort
+/// \param[in] options options
+/// \param[in] ctx the function execution context, optional
+/// \return offsets indices that would sort a table
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortIndices(const Datum& datum, const SortOptions& options,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Compute unique elements from an array-like object
+///
+/// Note if a null occurs in the input it will NOT be included in the output.
+///
+/// \param[in] datum array-like input
+/// \param[in] ctx the function execution context, optional
+/// \return result as Array
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Unique(const Datum& datum, ExecContext* ctx = NULLPTR);
+
+// Constants for accessing the output of ValueCounts
+ARROW_EXPORT extern const char kValuesFieldName[];
+ARROW_EXPORT extern const char kCountsFieldName[];
+ARROW_EXPORT extern const int32_t kValuesFieldIndex;
+ARROW_EXPORT extern const int32_t kCountsFieldIndex;
+
+/// \brief Return counts of unique elements from an array-like object.
+///
+/// Note that the counts do not include counts for nulls in the array.  These can be
+/// obtained separately from metadata.
+///
+/// For floating point arrays there is no attempt to normalize -0.0, 0.0 and NaN values
+/// which can lead to unexpected results if the input Array has these values.
+///
+/// \param[in] value array-like input
+/// \param[in] ctx the function execution context, optional
+/// \return counts An array of  <input type "Values", int64_t "Counts"> structs.
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value,
+                                                 ExecContext* ctx = NULLPTR);
+
+/// \brief Dictionary-encode values in an array-like object
+///
+/// Any nulls encountered in the dictionary will be handled according to the
+/// specified null encoding behavior.
+///
+/// For example, given values ["a", "b", null, "a", null] the output will be
+/// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null]
+/// (null_encoding == MASK)   Indices: [0, 1, null, 0, null] / Dict: ["a", "b"]
+///
+/// If the input is already dictionary encoded this function is a no-op unless
+/// it needs to modify the null_encoding (TODO)
+///
+/// \param[in] data array-like input
+/// \param[in] ctx the function execution context, optional
+/// \param[in] options configures null encoding behavior
+/// \return result with same shape and type as input
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DictionaryEncode(
+    const Datum& data,
+    const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(),
+    ExecContext* ctx = NULLPTR);
+
+// ----------------------------------------------------------------------
+// Deprecated functions
+
+ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()")
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> SortToIndices(const Array& values,
+                                             ExecContext* ctx = NULLPTR);
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/cast.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/cast.h
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/function.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+namespace compute {
+
+class ExecContext;
+
+/// \addtogroup compute-concrete-options
+/// @{
+
+class ARROW_EXPORT CastOptions : public FunctionOptions {
+ public:
+  explicit CastOptions(bool safe = true);
+
+  static constexpr char const kTypeName[] = "CastOptions";
+  static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
+    CastOptions safe(true);
+    safe.to_type = std::move(to_type);
+    return safe;
+  }
+
+  static CastOptions Unsafe(std::shared_ptr<DataType> to_type = NULLPTR) {
+    CastOptions unsafe(false);
+    unsafe.to_type = std::move(to_type);
+    return unsafe;
+  }
+
+  // Type being casted to. May be passed separate to eager function
+  // compute::Cast
+  std::shared_ptr<DataType> to_type;
+
+  bool allow_int_overflow;
+  bool allow_time_truncate;
+  bool allow_time_overflow;
+  bool allow_decimal_truncate;
+  bool allow_float_truncate;
+  // Indicate if conversions from Binary/FixedSizeBinary to string must
+  // validate the utf8 payload.
+  bool allow_invalid_utf8;
+};
+
+/// @}
+
+// Cast functions are _not_ registered in the FunctionRegistry, though they use
+// the same execution machinery
+class CastFunction : public ScalarFunction {
+ public:
+  CastFunction(std::string name, Type::type out_type_id);
+
+  Type::type out_type_id() const { return out_type_id_; }
+  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
+
+  Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
+                   OutputType out_type, ArrayKernelExec exec,
+                   NullHandling::type = NullHandling::INTERSECTION,
+                   MemAllocation::type = MemAllocation::PREALLOCATE);
+
+  // Note, this function toggles off memory allocation and sets the init
+  // function to CastInit
+  Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
+
+  Result<const Kernel*> DispatchExact(
+      const std::vector<ValueDescr>& values) const override;
+
+ private:
+  std::vector<Type::type> in_type_ids_;
+  const Type::type out_type_id_;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<CastFunction>> GetCastFunction(
+    const std::shared_ptr<DataType>& to_type);
+
+/// \brief Return true if a cast function is defined
+ARROW_EXPORT
+bool CanCast(const DataType& from_type, const DataType& to_type);
+
+// ----------------------------------------------------------------------
+// Convenience invocation APIs for a number of kernels
+
+/// \brief Cast from one array type to another
+/// \param[in] value array to cast
+/// \param[in] to_type type to cast to
+/// \param[in] options casting options
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting array
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Cast(const Array& value, std::shared_ptr<DataType> to_type,
+                                    const CastOptions& options = CastOptions::Safe(),
+                                    ExecContext* ctx = NULLPTR);
+
+/// \brief Cast from one array type to another
+/// \param[in] value array to cast
+/// \param[in] options casting options. The "to_type" field must be populated
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting array
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Cast(const Datum& value, const CastOptions& options,
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Cast from one value to another
+/// \param[in] value datum to cast
+/// \param[in] to_type type to cast to
+/// \param[in] options casting options
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 1.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
+                   const CastOptions& options = CastOptions::Safe(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Cast several values simultaneously. Safe cast options are used.
+/// \param[in] values datums to cast
+/// \param[in] descrs ValueDescrs to cast to
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datums
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<std::vector<Datum>> Cast(std::vector<Datum> values, std::vector<ValueDescr> descrs,
+                                ExecContext* ctx = NULLPTR);
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec.h
@@ -0,0 +1,277 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/datum.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+class CpuInfo;
+
+}  // namespace internal
+
+namespace compute {
+
+class FunctionOptions;
+class FunctionRegistry;
+
+// It seems like 64K might be a good default chunksize to use for execution
+// based on the experience of other query processing systems. The current
+// default is not to chunk contiguous arrays, though, but this may change in
+// the future once parallel execution is implemented
+static constexpr int64_t kDefaultExecChunksize = UINT16_MAX;
+
+/// \brief Context for expression-global variables and options used by
+/// function evaluation
+class ARROW_EXPORT ExecContext {
+ public:
+  // If no function registry passed, the default is used.
+  explicit ExecContext(MemoryPool* pool = default_memory_pool(),
+                       ::arrow::internal::Executor* executor = NULLPTR,
+                       FunctionRegistry* func_registry = NULLPTR);
+
+  /// \brief The MemoryPool used for allocations, default is
+  /// default_memory_pool().
+  MemoryPool* memory_pool() const { return pool_; }
+
+  ::arrow::internal::CpuInfo* cpu_info() const;
+
+  /// \brief An Executor which may be used to parallelize execution.
+  ::arrow::internal::Executor* executor() const { return executor_; }
+
+  /// \brief The FunctionRegistry for looking up functions by name and
+  /// selecting kernels for execution. Defaults to the library-global function
+  /// registry provided by GetFunctionRegistry.
+  FunctionRegistry* func_registry() const { return func_registry_; }
+
+  // \brief Set maximum length unit of work for kernel execution. Larger
+  // contiguous array inputs will be split into smaller chunks, and, if
+  // possible and enabled, processed in parallel. The default chunksize is
+  // INT64_MAX, so contiguous arrays are not split.
+  void set_exec_chunksize(int64_t chunksize) { exec_chunksize_ = chunksize; }
+
+  // \brief Maximum length for ExecBatch data chunks processed by
+  // kernels. Contiguous array inputs with longer length will be split into
+  // smaller chunks.
+  int64_t exec_chunksize() const { return exec_chunksize_; }
+
+  /// \brief Set whether to use multiple threads for function execution. This
+  /// is not yet used.
+  void set_use_threads(bool use_threads = true) { use_threads_ = use_threads; }
+
+  /// \brief If true, then utilize multiple threads where relevant for function
+  /// execution. This is not yet used.
+  bool use_threads() const { return use_threads_; }
+
+  // Set the preallocation strategy for kernel execution as it relates to
+  // chunked execution. For chunked execution, whether via ChunkedArray inputs
+  // or splitting larger Array arguments into smaller pieces, contiguous
+  // allocation (if permitted by the kernel) will allocate one large array to
+  // write output into yielding it to the caller at the end. If this option is
+  // set to off, then preallocations will be performed independently for each
+  // chunk of execution
+  //
+  // TODO: At some point we might want the limit the size of contiguous
+  // preallocations. For example, even if the exec_chunksize is 64K or less, we
+  // might limit contiguous allocations to 1M records, say.
+  void set_preallocate_contiguous(bool preallocate) {
+    preallocate_contiguous_ = preallocate;
+  }
+
+  /// \brief If contiguous preallocations should be used when doing chunked
+  /// execution as specified by exec_chunksize(). See
+  /// set_preallocate_contiguous() for more information.
+  bool preallocate_contiguous() const { return preallocate_contiguous_; }
+
+ private:
+  MemoryPool* pool_;
+  ::arrow::internal::Executor* executor_;
+  FunctionRegistry* func_registry_;
+  int64_t exec_chunksize_ = std::numeric_limits<int64_t>::max();
+  bool preallocate_contiguous_ = true;
+  bool use_threads_ = true;
+};
+
+ARROW_EXPORT ExecContext* default_exec_context();
+
+// TODO: Consider standardizing on uint16 selection vectors and only use them
+// when we can ensure that each value is 64K length or smaller
+
+/// \brief Container for an array of value selection indices that were
+/// materialized from a filter.
+///
+/// Columnar query engines (see e.g. [1]) have found that rather than
+/// materializing filtered data, the filter can instead be converted to an
+/// array of the "on" indices and then "fusing" these indices in operator
+/// implementations. This is especially relevant for aggregations but also
+/// applies to scalar operations.
+///
+/// We are not yet using this so this is mostly a placeholder for now.
+///
+/// [1]: http://cidrdb.org/cidr2005/papers/P19.pdf
+class ARROW_EXPORT SelectionVector {
+ public:
+  explicit SelectionVector(std::shared_ptr<ArrayData> data);
+
+  explicit SelectionVector(const Array& arr);
+
+  /// \brief Create SelectionVector from boolean mask
+  static Result<std::shared_ptr<SelectionVector>> FromMask(const BooleanArray& arr);
+
+  const int32_t* indices() const { return indices_; }
+  int32_t length() const;
+
+ private:
+  std::shared_ptr<ArrayData> data_;
+  const int32_t* indices_;
+};
+
+/// \brief A unit of work for kernel execution. It contains a collection of
+/// Array and Scalar values and an optional SelectionVector indicating that
+/// there is an unmaterialized filter that either must be materialized, or (if
+/// the kernel supports it) pushed down into the kernel implementation.
+///
+/// ExecBatch is semantically similar to RecordBatch in that in a SQL context
+/// it represents a collection of records, but constant "columns" are
+/// represented by Scalar values rather than having to be converted into arrays
+/// with repeated values.
+///
+/// TODO: Datum uses arrow/util/variant.h which may be a bit heavier-weight
+/// than is desirable for this class. Microbenchmarks would help determine for
+/// sure. See ARROW-8928.
+struct ARROW_EXPORT ExecBatch {
+  ExecBatch() = default;
+  ExecBatch(std::vector<Datum> values, int64_t length)
+      : values(std::move(values)), length(length) {}
+
+  explicit ExecBatch(const RecordBatch& batch);
+
+  static Result<ExecBatch> Make(std::vector<Datum> values);
+
+  Result<std::shared_ptr<RecordBatch>> ToRecordBatch(
+      std::shared_ptr<Schema> schema, MemoryPool* pool = default_memory_pool()) const;
+
+  /// The values representing positional arguments to be passed to a kernel's
+  /// exec function for processing.
+  std::vector<Datum> values;
+
+  /// A deferred filter represented as an array of indices into the values.
+  ///
+  /// For example, the filter [true, true, false, true] would be represented as
+  /// the selection vector [0, 1, 3]. When the selection vector is set,
+  /// ExecBatch::length is equal to the length of this array.
+  std::shared_ptr<SelectionVector> selection_vector;
+
+  /// A predicate Expression guaranteed to evaluate to true for all rows in this batch.
+  Expression guarantee = literal(true);
+
+  /// The semantic length of the ExecBatch. When the values are all scalars,
+  /// the length should be set to 1 for non-aggregate kernels, otherwise the
+  /// length is taken from the array values, except when there is a selection
+  /// vector. When there is a selection vector set, the length of the batch is
+  /// the length of the selection. Aggregate kernels can have an ExecBatch
+  /// formed by projecting just the partition columns from a batch in which
+  /// case, it would have scalar rows with length greater than 1.
+  ///
+  /// If the array values are of length 0 then the length is 0 regardless of
+  /// whether any values are Scalar. In general ExecBatch objects are produced
+  /// by ExecBatchIterator which by design does not yield length-0 batches.
+  int64_t length;
+
+  /// \brief The sum of bytes in each buffer referenced by the batch
+  ///
+  /// Note: Scalars are not counted
+  /// Note: Some values may referenced only part of a buffer, for
+  ///       example, an array with an offset.  The actual data
+  ///       visible to this batch will be smaller than the total
+  ///       buffer size in this case.
+  int64_t TotalBufferSize() const;
+
+  /// \brief Return the value at the i-th index
+  template <typename index_type>
+  inline const Datum& operator[](index_type i) const {
+    return values[i];
+  }
+
+  bool Equals(const ExecBatch& other) const;
+
+  /// \brief A convenience for the number of values / arguments.
+  int num_values() const { return static_cast<int>(values.size()); }
+
+  ExecBatch Slice(int64_t offset, int64_t length) const;
+
+  /// \brief A convenience for returning the ValueDescr objects (types and
+  /// shapes) from the batch.
+  std::vector<ValueDescr> GetDescriptors() const {
+    std::vector<ValueDescr> result;
+    for (const auto& value : this->values) {
+      result.emplace_back(value.descr());
+    }
+    return result;
+  }
+
+  std::string ToString() const;
+
+  ARROW_EXPORT friend void PrintTo(const ExecBatch&, std::ostream*);
+};
+
+inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals(r); }
+inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
+
+/// \defgroup compute-call-function One-shot calls to compute functions
+///
+/// @{
+
+/// \brief One-shot invoker for all types of functions.
+///
+/// Does kernel dispatch, argument checking, iteration of ChunkedArray inputs,
+/// and wrapping of outputs.
+ARROW_EXPORT
+Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum>& args,
+                           const FunctionOptions* options, ExecContext* ctx = NULLPTR);
+
+/// \brief Variant of CallFunction which uses a function's default options.
+///
+/// NB: Some functions require FunctionOptions be provided.
+ARROW_EXPORT
+Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum>& args,
+                           ExecContext* ctx = NULLPTR);
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/bloom_filter.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/bloom_filter.h
@@ -0,0 +1,322 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_HAVE_AVX2)
+#include <immintrin.h>
+#endif
+
+#include <atomic>
+#include <cstdint>
+#include <memory>
+#include "arrow/compute/exec/partition_util.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+// A set of pre-generated bit masks from a 64-bit word.
+//
+// It is used to map selected bits of hash to a bit mask that will be used in
+// a Bloom filter.
+//
+// These bit masks need to look random and need to have a similar fractions of
+// bits set in order for a Bloom filter to have a low false positives rate.
+//
+struct ARROW_EXPORT BloomFilterMasks {
+  // Generate all masks as a single bit vector. Each bit offset in this bit
+  // vector corresponds to a single mask.
+  // In each consecutive kBitsPerMask bits, there must be between
+  // kMinBitsSet and kMaxBitsSet bits set.
+  //
+  BloomFilterMasks();
+
+  inline uint64_t mask(int bit_offset) {
+#if ARROW_LITTLE_ENDIAN
+    return (util::SafeLoadAs<uint64_t>(masks_ + bit_offset / 8) >> (bit_offset % 8)) &
+           kFullMask;
+#else
+    return (BYTESWAP(util::SafeLoadAs<uint64_t>(masks_ + bit_offset / 8)) >>
+            (bit_offset % 8)) &
+           kFullMask;
+#endif
+  }
+
+  // Masks are 57 bits long because then they can be accessed at an
+  // arbitrary bit offset using a single unaligned 64-bit load instruction.
+  //
+  static constexpr int kBitsPerMask = 57;
+  static constexpr uint64_t kFullMask = (1ULL << kBitsPerMask) - 1;
+
+  // Minimum and maximum number of bits set in each mask.
+  // This constraint is enforced when generating the bit masks.
+  // Values should be close to each other and chosen as to minimize a Bloom
+  // filter false positives rate.
+  //
+  static constexpr int kMinBitsSet = 4;
+  static constexpr int kMaxBitsSet = 5;
+
+  // Number of generated masks.
+  // Having more masks to choose will improve false positives rate of Bloom
+  // filter but will also use more memory, which may lead to more CPU cache
+  // misses.
+  // The chosen value results in using only a few cache-lines for mask lookups,
+  // while providing a good variety of available bit masks.
+  //
+  static constexpr int kLogNumMasks = 10;
+  static constexpr int kNumMasks = 1 << kLogNumMasks;
+
+  // Data of masks. Masks are stored in a single bit vector. Nth mask is
+  // kBitsPerMask bits starting at bit offset N.
+  //
+  static constexpr int kTotalBytes = (kNumMasks + 64) / 8;
+  uint8_t masks_[kTotalBytes];
+};
+
+// A variant of a blocked Bloom filter implementation.
+// A Bloom filter is a data structure that provides approximate membership test
+// functionality based only on the hash of the key. Membership test may return
+// false positives but not false negatives. Approximation of the result allows
+// in general case (for arbitrary data types of keys) to save on both memory and
+// lookup cost compared to the accurate membership test.
+// The accurate test may sometimes still be cheaper for a specific data types
+// and inputs, e.g. integers from a small range.
+//
+// This blocked Bloom filter is optimized for use in hash joins, to achieve a
+// good balance between the size of the filter, the cost of its building and
+// querying and the rate of false positives.
+//
+class ARROW_EXPORT BlockedBloomFilter {
+  friend class BloomFilterBuilder_SingleThreaded;
+  friend class BloomFilterBuilder_Parallel;
+
+ public:
+  BlockedBloomFilter() : log_num_blocks_(0), num_blocks_(0), blocks_(NULLPTR) {}
+
+  inline bool Find(uint64_t hash) const {
+    uint64_t m = mask(hash);
+    uint64_t b = blocks_[block_id(hash)];
+    return (b & m) == m;
+  }
+
+  // Uses SIMD if available for smaller Bloom filters.
+  // Uses memory prefetching for larger Bloom filters.
+  //
+  void Find(int64_t hardware_flags, int64_t num_rows, const uint32_t* hashes,
+            uint8_t* result_bit_vector, bool enable_prefetch = true) const;
+  void Find(int64_t hardware_flags, int64_t num_rows, const uint64_t* hashes,
+            uint8_t* result_bit_vector, bool enable_prefetch = true) const;
+
+  int log_num_blocks() const { return log_num_blocks_; }
+
+  int NumHashBitsUsed() const;
+
+  bool IsSameAs(const BlockedBloomFilter* other) const;
+
+  int64_t NumBitsSet() const;
+
+  // Folding of a block Bloom filter after the initial version
+  // has been built.
+  //
+  // One of the parameters for creation of Bloom filter is the number
+  // of bits allocated for it. The more bits allocated, the lower the
+  // probability of false positives. A good heuristic is to aim for
+  // half of the bits set in the constructed Bloom filter. This should
+  // result in a good trade off between size (and following cost of
+  // memory accesses) and false positives rate.
+  //
+  // There might have been many duplicate keys in the input provided
+  // to Bloom filter builder. In that case the resulting bit vector
+  // would be more sparse then originally intended. It is possible to
+  // easily correct that and cut in half the size of Bloom filter
+  // after it has already been constructed. The process to do that is
+  // approximately equal to OR-ing bits from upper and lower half (the
+  // way we address these bits when inserting or querying a hash makes
+  // such folding in half possible).
+  //
+  // We will keep folding as long as the fraction of bits set is less
+  // than 1/4. The resulting bit vector density should be in the [1/4,
+  // 1/2) range.
+  //
+  void Fold();
+
+ private:
+  Status CreateEmpty(int64_t num_rows_to_insert, MemoryPool* pool);
+
+  inline void Insert(uint64_t hash) {
+    uint64_t m = mask(hash);
+    uint64_t& b = blocks_[block_id(hash)];
+    b |= m;
+  }
+
+  void Insert(int64_t hardware_flags, int64_t num_rows, const uint32_t* hashes);
+  void Insert(int64_t hardware_flags, int64_t num_rows, const uint64_t* hashes);
+
+  inline uint64_t mask(uint64_t hash) const {
+    // The lowest bits of hash are used to pick mask index.
+    //
+    int mask_id = static_cast<int>(hash & (BloomFilterMasks::kNumMasks - 1));
+    uint64_t result = masks_.mask(mask_id);
+
+    // The next set of hash bits is used to pick the amount of bit
+    // rotation of the mask.
+    //
+    int rotation = (hash >> BloomFilterMasks::kLogNumMasks) & 63;
+    result = ROTL64(result, rotation);
+
+    return result;
+  }
+
+  inline int64_t block_id(uint64_t hash) const {
+    // The next set of hash bits following the bits used to select a
+    // mask is used to pick block id (index of 64-bit word in a bit
+    // vector).
+    //
+    return (hash >> (BloomFilterMasks::kLogNumMasks + 6)) & (num_blocks_ - 1);
+  }
+
+  template <typename T>
+  inline void InsertImp(int64_t num_rows, const T* hashes);
+
+  template <typename T>
+  inline void FindImp(int64_t num_rows, const T* hashes, uint8_t* result_bit_vector,
+                      bool enable_prefetch) const;
+
+  void SingleFold(int num_folds);
+
+#if defined(ARROW_HAVE_AVX2)
+  inline __m256i mask_avx2(__m256i hash) const;
+  inline __m256i block_id_avx2(__m256i hash) const;
+  int64_t Insert_avx2(int64_t num_rows, const uint32_t* hashes);
+  int64_t Insert_avx2(int64_t num_rows, const uint64_t* hashes);
+  template <typename T>
+  int64_t InsertImp_avx2(int64_t num_rows, const T* hashes);
+  int64_t Find_avx2(int64_t num_rows, const uint32_t* hashes,
+                    uint8_t* result_bit_vector) const;
+  int64_t Find_avx2(int64_t num_rows, const uint64_t* hashes,
+                    uint8_t* result_bit_vector) const;
+  template <typename T>
+  int64_t FindImp_avx2(int64_t num_rows, const T* hashes,
+                       uint8_t* result_bit_vector) const;
+#endif
+
+  bool UsePrefetch() const {
+    return num_blocks_ * sizeof(uint64_t) > kPrefetchLimitBytes;
+  }
+
+  static constexpr int64_t kPrefetchLimitBytes = 256 * 1024;
+
+  static BloomFilterMasks masks_;
+
+  // Total number of bits used by block Bloom filter must be a power
+  // of 2.
+  //
+  int log_num_blocks_;
+  int64_t num_blocks_;
+
+  // Buffer allocated to store an array of power of 2 64-bit blocks.
+  //
+  std::shared_ptr<Buffer> buf_;
+  // Pointer to mutable data owned by Buffer
+  //
+  uint64_t* blocks_;
+};
+
+// We have two separate implementations of building a Bloom filter, multi-threaded and
+// single-threaded.
+//
+// Single threaded version is useful in two ways:
+// a) It allows to verify parallel implementation in tests (the single threaded one is
+// simpler and can be used as the source of truth).
+// b) It is preferred for small and medium size Bloom filters, because it skips extra
+// synchronization related steps from parallel variant (partitioning and taking locks).
+//
+enum class ARROW_EXPORT BloomFilterBuildStrategy {
+  SINGLE_THREADED = 0,
+  PARALLEL = 1,
+};
+
+class ARROW_EXPORT BloomFilterBuilder {
+ public:
+  virtual ~BloomFilterBuilder() = default;
+  virtual Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
+                       int64_t num_rows, int64_t num_batches,
+                       BlockedBloomFilter* build_target) = 0;
+  virtual int64_t num_tasks() const { return 0; }
+  virtual Status PushNextBatch(size_t thread_index, int num_rows,
+                               const uint32_t* hashes) = 0;
+  virtual Status PushNextBatch(size_t thread_index, int num_rows,
+                               const uint64_t* hashes) = 0;
+  virtual void CleanUp() {}
+  static std::unique_ptr<BloomFilterBuilder> Make(BloomFilterBuildStrategy strategy);
+};
+
+class BloomFilterBuilder_SingleThreaded : public BloomFilterBuilder {
+ public:
+  Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
+               int64_t num_rows, int64_t num_batches,
+               BlockedBloomFilter* build_target) override;
+
+  Status PushNextBatch(size_t /*thread_index*/, int num_rows,
+                       const uint32_t* hashes) override;
+
+  Status PushNextBatch(size_t /*thread_index*/, int num_rows,
+                       const uint64_t* hashes) override;
+
+ private:
+  template <typename T>
+  void PushNextBatchImp(int num_rows, const T* hashes);
+
+  int64_t hardware_flags_;
+  BlockedBloomFilter* build_target_;
+};
+
+class BloomFilterBuilder_Parallel : public BloomFilterBuilder {
+ public:
+  Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
+               int64_t num_rows, int64_t num_batches,
+               BlockedBloomFilter* build_target) override;
+
+  Status PushNextBatch(size_t thread_id, int num_rows, const uint32_t* hashes) override;
+
+  Status PushNextBatch(size_t thread_id, int num_rows, const uint64_t* hashes) override;
+
+  void CleanUp() override;
+
+ private:
+  template <typename T>
+  void PushNextBatchImp(size_t thread_id, int num_rows, const T* hashes);
+
+  int64_t hardware_flags_;
+  BlockedBloomFilter* build_target_;
+  int log_num_prtns_;
+  struct ThreadLocalState {
+    std::vector<uint32_t> partitioned_hashes_32;
+    std::vector<uint64_t> partitioned_hashes_64;
+    std::vector<uint16_t> partition_ranges;
+    std::vector<int> unprocessed_partition_ids;
+  };
+  std::vector<ThreadLocalState> thread_local_states_;
+  PartitionLocks prtn_locks_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/exec_plan.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/exec_plan.h
@@ -0,0 +1,460 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/async_util.h"
+#include "arrow/util/cancel.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/tracing.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+namespace compute {
+
+class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecPlan() = default;
+
+  ExecContext* exec_context() const { return exec_context_; }
+
+  /// Make an empty exec plan
+  static Result<std::shared_ptr<ExecPlan>> Make(
+      ExecContext* = default_exec_context(),
+      std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
+  ExecNode* AddNode(std::unique_ptr<ExecNode> node);
+
+  template <typename Node, typename... Args>
+  Node* EmplaceNode(Args&&... args) {
+    std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}};
+    auto out = node.get();
+    AddNode(std::move(node));
+    return out;
+  }
+
+  /// The initial inputs
+  const NodeVector& sources() const;
+
+  /// The final outputs
+  const NodeVector& sinks() const;
+
+  Status Validate();
+
+  /// \brief Start producing on all nodes
+  ///
+  /// Nodes are started in reverse topological order, such that any node
+  /// is started before all of its inputs.
+  Status StartProducing();
+
+  /// \brief Stop producing on all nodes
+  ///
+  /// Nodes are stopped in topological order, such that any node
+  /// is stopped before all of its outputs.
+  void StopProducing();
+
+  /// \brief A future which will be marked finished when all nodes have stopped producing.
+  Future<> finished();
+
+  /// \brief Return whether the plan has non-empty metadata
+  bool HasMetadata() const;
+
+  /// \brief Return the plan's attached metadata
+  std::shared_ptr<const KeyValueMetadata> metadata() const;
+
+  std::string ToString() const;
+
+ protected:
+  ExecContext* exec_context_;
+  explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {}
+};
+
+class ARROW_EXPORT ExecNode {
+ public:
+  using NodeVector = std::vector<ExecNode*>;
+
+  virtual ~ExecNode() = default;
+
+  virtual const char* kind_name() const = 0;
+
+  // The number of inputs/outputs expected by this node
+  int num_inputs() const { return static_cast<int>(inputs_.size()); }
+  int num_outputs() const { return num_outputs_; }
+
+  /// This node's predecessors in the exec plan
+  const NodeVector& inputs() const { return inputs_; }
+
+  /// \brief Labels identifying the function of each input.
+  const std::vector<std::string>& input_labels() const { return input_labels_; }
+
+  /// This node's successors in the exec plan
+  const NodeVector& outputs() const { return outputs_; }
+
+  /// The datatypes for batches produced by this node
+  const std::shared_ptr<Schema>& output_schema() const { return output_schema_; }
+
+  /// This node's exec plan
+  ExecPlan* plan() { return plan_; }
+
+  /// \brief An optional label, for display and debugging
+  ///
+  /// There is no guarantee that this value is non-empty or unique.
+  const std::string& label() const { return label_; }
+  void SetLabel(std::string label) { label_ = std::move(label); }
+
+  Status Validate() const;
+
+  /// Upstream API:
+  /// These functions are called by input nodes that want to inform this node
+  /// about an updated condition (a new input batch, an error, an impeding
+  /// end of stream).
+  ///
+  /// Implementation rules:
+  /// - these may be called anytime after StartProducing() has succeeded
+  ///   (and even during or after StopProducing())
+  /// - these may be called concurrently
+  /// - these are allowed to call back into PauseProducing(), ResumeProducing()
+  ///   and StopProducing()
+
+  /// Transfer input batch to ExecNode
+  virtual void InputReceived(ExecNode* input, ExecBatch batch) = 0;
+
+  /// Signal error to ExecNode
+  virtual void ErrorReceived(ExecNode* input, Status error) = 0;
+
+  /// Mark the inputs finished after the given number of batches.
+  ///
+  /// This may be called before all inputs are received.  This simply fixes
+  /// the total number of incoming batches for an input, so that the ExecNode
+  /// knows when it has received all input, regardless of order.
+  virtual void InputFinished(ExecNode* input, int total_batches) = 0;
+
+  /// Lifecycle API:
+  /// - start / stop to initiate and terminate production
+  /// - pause / resume to apply backpressure
+  ///
+  /// Implementation rules:
+  /// - StartProducing() should not recurse into the inputs, as it is
+  ///   handled by ExecPlan::StartProducing()
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   concurrently (but only after StartProducing() has returned successfully)
+  /// - PauseProducing(), ResumeProducing(), StopProducing() may be called
+  ///   by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished()
+  ///   methods
+  /// - StopProducing() should recurse into the inputs
+  /// - StopProducing() must be idempotent
+
+  // XXX What happens if StartProducing() calls an output's InputReceived()
+  // synchronously, and InputReceived() decides to call back into StopProducing()
+  // (or PauseProducing()) because it received enough data?
+  //
+  // Right now, since synchronous calls happen in both directions (input to
+  // output and then output to input), a node must be careful to be reentrant
+  // against synchronous calls from its output, *and* also concurrent calls from
+  // other threads.  The most reliable solution is to update the internal state
+  // first, and notify outputs only at the end.
+  //
+  // Alternate rules:
+  // - StartProducing(), ResumeProducing() can call synchronously into
+  //   its ouputs' consuming methods (InputReceived() etc.)
+  // - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously
+  //   into its inputs' PauseProducing(), StopProducing()
+  //
+  // Alternate API:
+  // - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint
+  //   enum: either None (default), PauseProducing, ResumeProducing, StopProducing
+  // - A method allows passing a ProductionHint asynchronously from an output node
+  //   (replacing PauseProducing(), ResumeProducing(), StopProducing())
+
+  // Concurrent calls to PauseProducing and ResumeProducing can be hard to sequence
+  // as they may travel at different speeds through the plan.
+  //
+  // For example, consider a resume that comes quickly after a pause.  If the source
+  // receives the resume before the pause the source may think the destination is full
+  // and halt production which would lead to deadlock.
+  //
+  // To resolve this a counter is sent for all calls to pause/resume.  Only the call with
+  // the highest counter value is valid.  So if a call to PauseProducing(5) comes after
+  // a call to ResumeProducing(6) then the source should continue producing.
+  //
+  // If a node has multiple outputs it should emit a new counter value to its inputs
+  // whenever any of its outputs changes which means the counters sent to inputs may be
+  // larger than the counters received on its outputs.
+  //
+  // A node with multiple outputs will also need to ensure it is applying backpressure if
+  // any of its outputs is asking to pause
+
+  /// \brief Start producing
+  ///
+  /// This must only be called once.  If this fails, then other lifecycle
+  /// methods must not be called.
+  ///
+  /// This is typically called automatically by ExecPlan::StartProducing().
+  virtual Status StartProducing() = 0;
+
+  /// \brief Pause producing temporarily
+  ///
+  /// \param output Pointer to the output that is full
+  /// \param counter Counter used to sequence calls to pause/resume
+  ///
+  /// This call is a hint that an output node is currently not willing
+  /// to receive data.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  /// However, the node is still free to produce data (which may be difficult
+  /// to prevent anyway if data is produced using multiple threads).
+  virtual void PauseProducing(ExecNode* output, int32_t counter) = 0;
+
+  /// \brief Resume producing after a temporary pause
+  ///
+  /// \param output Pointer to the output that is now free
+  /// \param counter Counter used to sequence calls to pause/resume
+  ///
+  /// This call is a hint that an output node is willing to receive data again.
+  ///
+  /// This may be called any number of times after StartProducing() succeeds.
+  virtual void ResumeProducing(ExecNode* output, int32_t counter) = 0;
+
+  /// \brief Stop producing definitively to a single output
+  ///
+  /// This call is a hint that an output node has completed and is not willing
+  /// to receive any further data.
+  virtual void StopProducing(ExecNode* output) = 0;
+
+  /// \brief Stop producing definitively to all outputs
+  virtual void StopProducing() = 0;
+
+  /// \brief A future which will be marked finished when this node has stopped producing.
+  virtual Future<> finished() = 0;
+
+  std::string ToString(int indent = 0) const;
+
+ protected:
+  ExecNode(ExecPlan* plan, NodeVector inputs, std::vector<std::string> input_labels,
+           std::shared_ptr<Schema> output_schema, int num_outputs);
+
+  // A helper method to send an error status to all outputs.
+  // Returns true if the status was an error.
+  bool ErrorIfNotOk(Status status);
+
+  /// Provide extra info to include in the string representation.
+  virtual std::string ToStringExtra(int indent) const;
+
+  ExecPlan* plan_;
+  std::string label_;
+
+  NodeVector inputs_;
+  std::vector<std::string> input_labels_;
+
+  std::shared_ptr<Schema> output_schema_;
+  int num_outputs_;
+  NodeVector outputs_;
+
+  // Future to sync finished
+  Future<> finished_ = Future<>::MakeFinished();
+
+  util::tracing::Span span_;
+};
+
+/// \brief MapNode is an ExecNode type class which process a task like filter/project
+/// (See SubmitTask method) to each given ExecBatch object, which have one input, one
+/// output, and are pure functions on the input
+///
+/// A simple parallel runner is created with a "map_fn" which is just a function that
+/// takes a batch in and returns a batch.  This simple parallel runner also needs an
+/// executor (use simple synchronous runner if there is no executor)
+
+class MapNode : public ExecNode {
+ public:
+  MapNode(ExecPlan* plan, std::vector<ExecNode*> inputs,
+          std::shared_ptr<Schema> output_schema, bool async_mode);
+
+  void ErrorReceived(ExecNode* input, Status error) override;
+
+  void InputFinished(ExecNode* input, int total_batches) override;
+
+  Status StartProducing() override;
+
+  void PauseProducing(ExecNode* output, int32_t counter) override;
+
+  void ResumeProducing(ExecNode* output, int32_t counter) override;
+
+  void StopProducing(ExecNode* output) override;
+
+  void StopProducing() override;
+
+  Future<> finished() override;
+
+ protected:
+  void SubmitTask(std::function<Result<ExecBatch>(ExecBatch)> map_fn, ExecBatch batch);
+
+  void Finish(Status finish_st = Status::OK());
+
+ protected:
+  // Counter for the number of batches received
+  AtomicCounter input_counter_;
+
+  ::arrow::internal::Executor* executor_;
+
+  // Variable used to cancel remaining tasks in the executor
+  StopSource stop_source_;
+};
+
+/// \brief An extensible registry for factories of ExecNodes
+class ARROW_EXPORT ExecFactoryRegistry {
+ public:
+  using Factory = std::function<Result<ExecNode*>(ExecPlan*, std::vector<ExecNode*>,
+                                                  const ExecNodeOptions&)>;
+
+  virtual ~ExecFactoryRegistry() = default;
+
+  /// \brief Get the named factory from this registry
+  ///
+  /// will raise if factory_name is not found
+  virtual Result<Factory> GetFactory(const std::string& factory_name) = 0;
+
+  /// \brief Add a factory to this registry with the provided name
+  ///
+  /// will raise if factory_name is already in the registry
+  virtual Status AddFactory(std::string factory_name, Factory factory) = 0;
+};
+
+/// The default registry, which includes built-in factories.
+ARROW_EXPORT
+ExecFactoryRegistry* default_exec_factory_registry();
+
+/// \brief Construct an ExecNode using the named factory
+inline Result<ExecNode*> MakeExecNode(
+    const std::string& factory_name, ExecPlan* plan, std::vector<ExecNode*> inputs,
+    const ExecNodeOptions& options,
+    ExecFactoryRegistry* registry = default_exec_factory_registry()) {
+  ARROW_ASSIGN_OR_RAISE(auto factory, registry->GetFactory(factory_name));
+  return factory(plan, std::move(inputs), options);
+}
+
+/// \brief Helper class for declaring sets of ExecNodes efficiently
+///
+/// A Declaration represents an unconstructed ExecNode (and potentially more since its
+/// inputs may also be Declarations). The node can be constructed and added to a plan
+/// with Declaration::AddToPlan, which will recursively construct any inputs as necessary.
+struct ARROW_EXPORT Declaration {
+  using Input = util::Variant<ExecNode*, Declaration>;
+
+  Declaration(std::string factory_name, std::vector<Input> inputs,
+              std::shared_ptr<ExecNodeOptions> options, std::string label)
+      : factory_name{std::move(factory_name)},
+        inputs{std::move(inputs)},
+        options{std::move(options)},
+        label{std::move(label)} {}
+
+  template <typename Options>
+  Declaration(std::string factory_name, std::vector<Input> inputs, Options options,
+              std::string label)
+      : Declaration{std::move(factory_name), std::move(inputs),
+                    std::shared_ptr<ExecNodeOptions>(
+                        std::make_shared<Options>(std::move(options))),
+                    std::move(label)} {}
+
+  template <typename Options>
+  Declaration(std::string factory_name, std::vector<Input> inputs, Options options)
+      : Declaration{std::move(factory_name), std::move(inputs), std::move(options),
+                    /*label=*/""} {}
+
+  template <typename Options>
+  Declaration(std::string factory_name, Options options)
+      : Declaration{std::move(factory_name), {}, std::move(options), /*label=*/""} {}
+
+  template <typename Options>
+  Declaration(std::string factory_name, Options options, std::string label)
+      : Declaration{std::move(factory_name), {}, std::move(options), std::move(label)} {}
+
+  /// \brief Convenience factory for the common case of a simple sequence of nodes.
+  ///
+  /// Each of decls will be appended to the inputs of the subsequent declaration,
+  /// and the final modified declaration will be returned.
+  ///
+  /// Without this convenience factory, constructing a sequence would require explicit,
+  /// difficult-to-read nesting:
+  ///
+  ///     Declaration{"n3",
+  ///                   {
+  ///                       Declaration{"n2",
+  ///                                   {
+  ///                                       Declaration{"n1",
+  ///                                                   {
+  ///                                                       Declaration{"n0", N0Opts{}},
+  ///                                                   },
+  ///                                                   N1Opts{}},
+  ///                                   },
+  ///                                   N2Opts{}},
+  ///                   },
+  ///                   N3Opts{}};
+  ///
+  /// An equivalent Declaration can be constructed more tersely using Sequence:
+  ///
+  ///     Declaration::Sequence({
+  ///         {"n0", N0Opts{}},
+  ///         {"n1", N1Opts{}},
+  ///         {"n2", N2Opts{}},
+  ///         {"n3", N3Opts{}},
+  ///     });
+  static Declaration Sequence(std::vector<Declaration> decls);
+
+  Result<ExecNode*> AddToPlan(ExecPlan* plan, ExecFactoryRegistry* registry =
+                                                  default_exec_factory_registry()) const;
+
+  std::string factory_name;
+  std::vector<Input> inputs;
+  std::shared_ptr<ExecNodeOptions> options;
+  std::string label;
+};
+
+/// \brief Wrap an ExecBatch generator in a RecordBatchReader.
+///
+/// The RecordBatchReader does not impose any ordering on emitted batches.
+ARROW_EXPORT
+std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
+    std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>,
+    MemoryPool*);
+
+constexpr int kDefaultBackgroundMaxQ = 32;
+constexpr int kDefaultBackgroundQRestart = 16;
+
+/// \brief Make a generator of RecordBatchReaders
+///
+/// Useful as a source node for an Exec plan
+ARROW_EXPORT
+Result<std::function<Future<util::optional<ExecBatch>>()>> MakeReaderGenerator(
+    std::shared_ptr<RecordBatchReader> reader, arrow::internal::Executor* io_executor,
+    int max_q = kDefaultBackgroundMaxQ, int q_restart = kDefaultBackgroundQRestart);
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/expression.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/expression.h
@@ -0,0 +1,283 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/datum.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/small_vector.h"
+#include "arrow/util/variant.h"
+
+namespace arrow {
+namespace compute {
+
+/// \defgroup expression-core Expressions to describe transformations in execution plans
+///
+/// @{
+
+/// An unbound expression which maps a single Datum to another Datum.
+/// An expression is one of
+/// - A literal Datum.
+/// - A reference to a single (potentially nested) field of the input Datum.
+/// - A call to a compute function, with arguments specified by other Expressions.
+class ARROW_EXPORT Expression {
+ public:
+  struct Call {
+    std::string function_name;
+    std::vector<Expression> arguments;
+    std::shared_ptr<FunctionOptions> options;
+    // Cached hash value
+    size_t hash;
+
+    // post-Bind properties:
+    std::shared_ptr<Function> function;
+    const Kernel* kernel = NULLPTR;
+    std::shared_ptr<KernelState> kernel_state;
+    ValueDescr descr;
+
+    void ComputeHash();
+  };
+
+  std::string ToString() const;
+  bool Equals(const Expression& other) const;
+  size_t hash() const;
+  struct Hash {
+    size_t operator()(const Expression& expr) const { return expr.hash(); }
+  };
+
+  /// Bind this expression to the given input type, looking up Kernels and field types.
+  /// Some expression simplification may be performed and implicit casts will be inserted.
+  /// Any state necessary for execution will be initialized and returned.
+  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
+
+  // XXX someday
+  // Clone all KernelState in this bound expression. If any function referenced by this
+  // expression has mutable KernelState, it is not safe to execute or apply simplification
+  // passes to it (or copies of it!) from multiple threads. Cloning state produces new
+  // KernelStates where necessary to ensure that Expressions may be manipulated safely
+  // on multiple threads.
+  // Result<ExpressionState> CloneState() const;
+  // Status SetState(ExpressionState);
+
+  /// Return true if all an expression's field references have explicit ValueDescr and all
+  /// of its functions' kernels are looked up.
+  bool IsBound() const;
+
+  /// Return true if this expression is composed only of Scalar literals, field
+  /// references, and calls to ScalarFunctions.
+  bool IsScalarExpression() const;
+
+  /// Return true if this expression is literal and entirely null.
+  bool IsNullLiteral() const;
+
+  /// Return true if this expression could evaluate to true. Will return true for any
+  /// unbound, non-boolean, or unsimplified Expressions
+  bool IsSatisfiable() const;
+
+  // XXX someday
+  // Result<PipelineGraph> GetPipelines();
+
+  /// Access a Call or return nullptr if this expression is not a call
+  const Call* call() const;
+  /// Access a Datum or return nullptr if this expression is not a literal
+  const Datum* literal() const;
+  /// Access a FieldRef or return nullptr if this expression is not a field_ref
+  const FieldRef* field_ref() const;
+
+  /// The type and shape to which this expression will evaluate
+  ValueDescr descr() const;
+  const std::shared_ptr<DataType>& type() const;
+  // XXX someday
+  // NullGeneralization::type nullable() const;
+
+  struct Parameter {
+    FieldRef ref;
+
+    // post-bind properties
+    ValueDescr descr;
+    ::arrow::internal::SmallVector<int, 2> indices;
+  };
+  const Parameter* parameter() const;
+
+  Expression() = default;
+  explicit Expression(Call call);
+  explicit Expression(Datum literal);
+  explicit Expression(Parameter parameter);
+
+ private:
+  using Impl = util::Variant<Datum, Parameter, Call>;
+  std::shared_ptr<Impl> impl_;
+
+  ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
+
+  ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
+};
+
+inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
+inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
+
+// Factories
+
+ARROW_EXPORT
+Expression literal(Datum lit);
+
+template <typename Arg>
+Expression literal(Arg&& arg) {
+  return literal(Datum(std::forward<Arg>(arg)));
+}
+
+ARROW_EXPORT
+Expression field_ref(FieldRef ref);
+
+ARROW_EXPORT
+Expression call(std::string function, std::vector<Expression> arguments,
+                std::shared_ptr<FunctionOptions> options = NULLPTR);
+
+template <typename Options, typename = typename std::enable_if<
+                                std::is_base_of<FunctionOptions, Options>::value>::type>
+Expression call(std::string function, std::vector<Expression> arguments,
+                Options options) {
+  return call(std::move(function), std::move(arguments),
+              std::make_shared<Options>(std::move(options)));
+}
+
+/// Assemble a list of all fields referenced by an Expression at any depth.
+ARROW_EXPORT
+std::vector<FieldRef> FieldsInExpression(const Expression&);
+
+/// Check if the expression references any fields.
+ARROW_EXPORT
+bool ExpressionHasFieldRefs(const Expression&);
+
+struct ARROW_EXPORT KnownFieldValues;
+
+/// Assemble a mapping from field references to known values. This derives known values
+/// from "equal" and "is_null" Expressions referencing a field and a literal.
+ARROW_EXPORT
+Result<KnownFieldValues> ExtractKnownFieldValues(
+    const Expression& guaranteed_true_predicate);
+
+/// @}
+
+/// \defgroup expression-passes Functions for modification of Expressions
+///
+/// @{
+///
+/// These transform bound expressions. Some transforms utilize a guarantee, which is
+/// provided as an Expression which is guaranteed to evaluate to true. The
+/// guaranteed_true_predicate need not be bound, but canonicalization is currently
+/// deferred to producers of guarantees. For example in order to be recognized as a
+/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
+/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
+/// other semantically identical Expressions will not be recognized.
+
+/// Weak canonicalization which establishes guarantees for subsequent passes. Even
+/// equivalent Expressions may result in different canonicalized expressions.
+/// TODO this could be a strong canonicalization
+ARROW_EXPORT
+Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
+
+/// Simplify Expressions based on literal arguments (for example, add(null, x) will always
+/// be null so replace the call with a null literal). Includes early evaluation of all
+/// calls whose arguments are entirely literal.
+ARROW_EXPORT
+Result<Expression> FoldConstants(Expression);
+
+/// Simplify Expressions by replacing with known values of the fields which it references.
+ARROW_EXPORT
+Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
+                                                Expression);
+
+/// Simplify an expression by replacing subexpressions based on a guarantee:
+/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
+/// used to remove redundant function calls from a filter expression or to replace a
+/// reference to a constant-value field with a literal.
+ARROW_EXPORT
+Result<Expression> SimplifyWithGuarantee(Expression,
+                                         const Expression& guaranteed_true_predicate);
+
+/// @}
+
+// Execution
+
+/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
+/// RecordBatch which may have missing or incorrectly ordered columns.
+/// Missing fields will be replaced with null scalars.
+ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
+                                             const Datum& partial);
+
+/// Execute a scalar expression against the provided state and input ExecBatch. This
+/// expression must be bound.
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
+                                      ExecContext* = NULLPTR);
+
+/// Convenience function for invoking against a RecordBatch
+ARROW_EXPORT
+Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
+                                      const Datum& partial_input, ExecContext* = NULLPTR);
+
+// Serialization
+
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
+
+ARROW_EXPORT
+Result<Expression> Deserialize(std::shared_ptr<Buffer>);
+
+/// \defgroup expression-convenience Functions convenient expression creation
+///
+/// @{
+
+ARROW_EXPORT Expression project(std::vector<Expression> values,
+                                std::vector<std::string> names);
+
+ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
+
+ARROW_EXPORT Expression is_null(Expression lhs, bool nan_is_null = false);
+
+ARROW_EXPORT Expression is_valid(Expression lhs);
+
+ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression and_(const std::vector<Expression>&);
+ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
+ARROW_EXPORT Expression or_(const std::vector<Expression>&);
+ARROW_EXPORT Expression not_(Expression operand);
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/hash_join.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/hash_join.h
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/options.h"
+#include "arrow/compute/exec/schema_util.h"
+#include "arrow/compute/exec/task_util.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/tracing_internal.h"
+
+namespace arrow {
+namespace compute {
+
+class ARROW_EXPORT HashJoinSchema {
+ public:
+  Status Init(JoinType join_type, const Schema& left_schema,
+              const std::vector<FieldRef>& left_keys, const Schema& right_schema,
+              const std::vector<FieldRef>& right_keys, const Expression& filter,
+              const std::string& left_field_name_prefix,
+              const std::string& right_field_name_prefix);
+
+  Status Init(JoinType join_type, const Schema& left_schema,
+              const std::vector<FieldRef>& left_keys,
+              const std::vector<FieldRef>& left_output, const Schema& right_schema,
+              const std::vector<FieldRef>& right_keys,
+              const std::vector<FieldRef>& right_output, const Expression& filter,
+              const std::string& left_field_name_prefix,
+              const std::string& right_field_name_prefix);
+
+  static Status ValidateSchemas(JoinType join_type, const Schema& left_schema,
+                                const std::vector<FieldRef>& left_keys,
+                                const std::vector<FieldRef>& left_output,
+                                const Schema& right_schema,
+                                const std::vector<FieldRef>& right_keys,
+                                const std::vector<FieldRef>& right_output,
+                                const std::string& left_field_name_prefix,
+                                const std::string& right_field_name_prefix);
+
+  Result<Expression> BindFilter(Expression filter, const Schema& left_schema,
+                                const Schema& right_schema);
+  std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
+                                           const std::string& right_field_name_suffix);
+
+  bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
+
+  bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
+
+  static int kMissingField() {
+    return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
+  }
+
+  SchemaProjectionMaps<HashJoinProjection> proj_maps[2];
+
+ private:
+  static bool IsTypeSupported(const DataType& type);
+
+  Status CollectFilterColumns(std::vector<FieldRef>& left_filter,
+                              std::vector<FieldRef>& right_filter,
+                              const Expression& filter, const Schema& left_schema,
+                              const Schema& right_schema);
+
+  Expression RewriteFilterToUseFilterSchema(int right_filter_offset,
+                                            const SchemaProjectionMap& left_to_filter,
+                                            const SchemaProjectionMap& right_to_filter,
+                                            const Expression& filter);
+
+  bool PayloadIsEmpty(int side) {
+    ARROW_DCHECK(side == 0 || side == 1);
+    return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
+  }
+
+  static Result<std::vector<FieldRef>> ComputePayload(const Schema& schema,
+                                                      const std::vector<FieldRef>& output,
+                                                      const std::vector<FieldRef>& filter,
+                                                      const std::vector<FieldRef>& key);
+};
+
+class HashJoinImpl {
+ public:
+  using OutputBatchCallback = std::function<void(ExecBatch)>;
+  using FinishedCallback = std::function<void(int64_t)>;
+
+  virtual ~HashJoinImpl() = default;
+  virtual Status Init(ExecContext* ctx, JoinType join_type, bool use_sync_execution,
+                      size_t num_threads, HashJoinSchema* schema_mgr,
+                      std::vector<JoinKeyCmp> key_cmp, Expression filter,
+                      OutputBatchCallback output_batch_callback,
+                      FinishedCallback finished_callback,
+                      TaskScheduler::ScheduleImpl schedule_task_callback) = 0;
+  virtual Status InputReceived(size_t thread_index, int side, ExecBatch batch) = 0;
+  virtual Status InputFinished(size_t thread_index, int side) = 0;
+  virtual void Abort(TaskScheduler::AbortContinuationImpl pos_abort_callback) = 0;
+
+  static Result<std::unique_ptr<HashJoinImpl>> MakeBasic();
+
+ protected:
+  util::tracing::Span span_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/hash_join_dict.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/hash_join_dict.h
@@ -0,0 +1,315 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/schema_util.h"
+#include "arrow/compute/kernels/row_encoder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+
+// This file contains hash join logic related to handling of dictionary encoded key
+// columns.
+//
+// A key column from probe side of the join can be matched against a key column from build
+// side of the join, as long as the underlying value types are equal. That means that:
+// - both scalars and arrays can be used and even mixed in the same column
+// - dictionary column can be matched against non-dictionary column if underlying value
+// types are equal
+// - dictionary column can be matched against dictionary column with a different index
+// type, and potentially using a different dictionary, if underlying value types are equal
+//
+// We currently require in hash join that for all dictionary encoded columns, the same
+// dictionary is used in all input exec batches.
+//
+// In order to allow matching columns with different dictionaries, different dictionary
+// index types, and dictionary key against non-dictionary key, internally comparisons will
+// be evaluated after remapping values on both sides of the join to a common
+// representation (which will be called "unified representation"). This common
+// representation is a column of int32() type (not a dictionary column). It represents an
+// index in the unified dictionary computed for the (only) dictionary present on build
+// side (an empty dictionary is still created for an empty build side). Null value is
+// always represented in this common representation as null int32 value, unified
+// dictionary will never contain a null value (so there is no ambiguity of representing
+// nulls as either index to a null entry in the dictionary or null index).
+//
+// Unified dictionary represents values present on build side. There may be values on
+// probe side that are not present in it. All such values, that are not null, are mapped
+// in the common representation to a special constant kMissingValueId.
+//
+
+namespace arrow {
+namespace compute {
+
+using internal::RowEncoder;
+
+/// Helper class with operations that are stateless and common to processing of dictionary
+/// keys on both build and probe side.
+class HashJoinDictUtil {
+ public:
+  // Null values in unified representation are always represented as null that has
+  // corresponding integer set to this constant
+  static constexpr int32_t kNullId = 0;
+  // Constant representing a value, that is not null, missing on the build side, in
+  // unified representation.
+  static constexpr int32_t kMissingValueId = -1;
+
+  // Check if data types of corresponding pair of key column on build and probe side are
+  // compatible
+  static bool KeyDataTypesValid(const std::shared_ptr<DataType>& probe_data_type,
+                                const std::shared_ptr<DataType>& build_data_type);
+
+  // Input must be dictionary array or dictionary scalar.
+  // A precomputed and provided here lookup table in the form of int32() array will be
+  // used to remap input indices to unified representation.
+  //
+  static Result<std::shared_ptr<ArrayData>> IndexRemapUsingLUT(
+      ExecContext* ctx, const Datum& indices, int64_t batch_length,
+      const std::shared_ptr<ArrayData>& map_array,
+      const std::shared_ptr<DataType>& data_type);
+
+  // Return int32() array that contains indices of input dictionary array or scalar after
+  // type casting.
+  static Result<std::shared_ptr<ArrayData>> ConvertToInt32(
+      const std::shared_ptr<DataType>& from_type, const Datum& input,
+      int64_t batch_length, ExecContext* ctx);
+
+  // Return an array that contains elements of input int32() array after casting to a
+  // given integer type. This is used for mapping unified representation stored in the
+  // hash table on build side back to original input data type of hash join, when
+  // outputting hash join results to parent exec node.
+  //
+  static Result<std::shared_ptr<ArrayData>> ConvertFromInt32(
+      const std::shared_ptr<DataType>& to_type, const Datum& input, int64_t batch_length,
+      ExecContext* ctx);
+
+  // Return dictionary referenced in either dictionary array or dictionary scalar
+  static std::shared_ptr<Array> ExtractDictionary(const Datum& data);
+};
+
+/// Implements processing of dictionary arrays/scalars in key columns on the build side of
+/// a hash join.
+/// Each instance of this class corresponds to a single column and stores and
+/// processes only the information related to that column.
+/// Const methods are thread-safe, non-const methods are not (the caller must make sure
+/// that only one thread at any time will access them).
+///
+class HashJoinDictBuild {
+ public:
+  // Returns true if the key column (described in input by its data type) requires any
+  // pre- or post-processing related to handling dictionaries.
+  //
+  static bool KeyNeedsProcessing(const std::shared_ptr<DataType>& build_data_type) {
+    return (build_data_type->id() == Type::DICTIONARY);
+  }
+
+  // Data type of unified representation
+  static std::shared_ptr<DataType> DataTypeAfterRemapping() { return int32(); }
+
+  // Should be called only once in hash join, before processing any build or probe
+  // batches.
+  //
+  // Takes a pointer to the dictionary for a corresponding key column on the build side as
+  // an input. If the build side is empty, it still needs to be called, but with
+  // dictionary pointer set to null.
+  //
+  // Currently it is required that all input batches on build side share the same
+  // dictionary. For each input batch during its pre-processing, dictionary will be
+  // checked and error will be returned if it is different then the one provided in the
+  // call to this method.
+  //
+  // Unifies the dictionary. The order of the values is still preserved.
+  // Null and duplicate entries are removed. If the dictionary is already unified, its
+  // copy will be produced and stored within this class.
+  //
+  // Prepares the mapping from ids within original dictionary to the ids in the resulting
+  // dictionary. This is used later on to pre-process (map to unified representation) key
+  // column on build side.
+  //
+  // Prepares the reverse mapping (in the form of hash table) from values to the ids in
+  // the resulting dictionary. This will be used later on to pre-process (map to unified
+  // representation) key column on probe side. Values on probe side that are not present
+  // in the original dictionary will be mapped to a special constant kMissingValueId. The
+  // exception is made for nulls, which get always mapped to nulls (both when null is
+  // represented as a dictionary id pointing to a null and a null dictionary id).
+  //
+  Status Init(ExecContext* ctx, std::shared_ptr<Array> dictionary,
+              std::shared_ptr<DataType> index_type, std::shared_ptr<DataType> value_type);
+
+  // Remap array or scalar values into unified representation (array of int32()).
+  // Outputs kMissingValueId if input value is not found in the unified dictionary.
+  // Outputs null for null input value (with corresponding data set to kNullId).
+  //
+  Result<std::shared_ptr<ArrayData>> RemapInputValues(ExecContext* ctx,
+                                                      const Datum& values,
+                                                      int64_t batch_length) const;
+
+  // Remap dictionary array or dictionary scalar on build side to unified representation.
+  // Dictionary referenced in the input must match the dictionary that was
+  // given during initialization.
+  // The output is a dictionary array that references unified dictionary.
+  //
+  Result<std::shared_ptr<ArrayData>> RemapInput(
+      ExecContext* ctx, const Datum& indices, int64_t batch_length,
+      const std::shared_ptr<DataType>& data_type) const;
+
+  // Outputs dictionary array referencing unified dictionary, given an array with 32-bit
+  // ids.
+  // Used to post-process values looked up in a hash table on build side of the hash join
+  // before outputting to the parent exec node.
+  //
+  Result<std::shared_ptr<ArrayData>> RemapOutput(const ArrayData& indices32Bit,
+                                                 ExecContext* ctx) const;
+
+  // Release shared pointers and memory
+  void CleanUp();
+
+ private:
+  // Data type of dictionary ids for the input dictionary on build side
+  std::shared_ptr<DataType> index_type_;
+  // Data type of values for the input dictionary on build side
+  std::shared_ptr<DataType> value_type_;
+  // Mapping from (encoded as string) values to the ids in unified dictionary
+  std::unordered_map<std::string, int32_t> hash_table_;
+  // Mapping from input dictionary ids to unified dictionary ids
+  std::shared_ptr<ArrayData> remapped_ids_;
+  // Input dictionary
+  std::shared_ptr<Array> dictionary_;
+  // Unified dictionary
+  std::shared_ptr<ArrayData> unified_dictionary_;
+};
+
+/// Implements processing of dictionary arrays/scalars in key columns on the probe side of
+/// a hash join.
+/// Each instance of this class corresponds to a single column and stores and
+/// processes only the information related to that column.
+/// It is not thread-safe - every participating thread should use its own instance of
+/// this class.
+///
+class HashJoinDictProbe {
+ public:
+  static bool KeyNeedsProcessing(const std::shared_ptr<DataType>& probe_data_type,
+                                 const std::shared_ptr<DataType>& build_data_type);
+
+  // Data type of the result of remapping input key column.
+  //
+  // The result of remapping is what is used in hash join for matching keys on build and
+  // probe side. The exact data types may be different, as described below, and therefore
+  // a common representation is needed for simplifying comparisons of pairs of keys on
+  // both sides.
+  //
+  // We support matching key that is of non-dictionary type with key that is of dictionary
+  // type, as long as the underlying value types are equal. We support matching when both
+  // keys are of dictionary type, regardless whether underlying dictionary index types are
+  // the same or not.
+  //
+  static std::shared_ptr<DataType> DataTypeAfterRemapping(
+      const std::shared_ptr<DataType>& build_data_type);
+
+  // Should only be called if KeyNeedsProcessing method returns true for a pair of
+  // corresponding key columns from build and probe side.
+  // Converts values in order to match the common representation for
+  // both build and probe side used in hash table comparison.
+  // Supports arrays and scalars as input.
+  // Argument opt_build_side should be null if dictionary key on probe side is matched
+  // with non-dictionary key on build side.
+  //
+  Result<std::shared_ptr<ArrayData>> RemapInput(
+      const HashJoinDictBuild* opt_build_side, const Datum& data, int64_t batch_length,
+      const std::shared_ptr<DataType>& probe_data_type,
+      const std::shared_ptr<DataType>& build_data_type, ExecContext* ctx);
+
+  void CleanUp();
+
+ private:
+  // May be null if probe side key is non-dictionary. Otherwise it is used to verify that
+  // only a single dictionary is referenced in exec batch on probe side of hash join.
+  std::shared_ptr<Array> dictionary_;
+  // Mapping from dictionary on probe side of hash join (if it is used) to unified
+  // representation.
+  std::shared_ptr<ArrayData> remapped_ids_;
+  // Encoder of key columns that uses unified representation instead of original data type
+  // for key columns that need to use it (have dictionaries on either side of the join).
+  internal::RowEncoder encoder_;
+};
+
+// Encapsulates dictionary handling logic for build side of hash join.
+//
+class HashJoinDictBuildMulti {
+ public:
+  Status Init(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
+              const ExecBatch* opt_non_empty_batch, ExecContext* ctx);
+  static void InitEncoder(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
+                          RowEncoder* encoder, ExecContext* ctx);
+  Status EncodeBatch(size_t thread_index,
+                     const SchemaProjectionMaps<HashJoinProjection>& proj_map,
+                     const ExecBatch& batch, RowEncoder* encoder, ExecContext* ctx) const;
+  Status PostDecode(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
+                    ExecBatch* decoded_key_batch, ExecContext* ctx);
+  const HashJoinDictBuild& get_dict_build(int icol) const { return remap_imp_[icol]; }
+
+ private:
+  std::vector<bool> needs_remap_;
+  std::vector<HashJoinDictBuild> remap_imp_;
+};
+
+// Encapsulates dictionary handling logic for probe side of hash join
+//
+class HashJoinDictProbeMulti {
+ public:
+  void Init(size_t num_threads);
+  bool BatchRemapNeeded(size_t thread_index,
+                        const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
+                        const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
+                        ExecContext* ctx);
+  Status EncodeBatch(size_t thread_index,
+                     const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
+                     const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
+                     const HashJoinDictBuildMulti& dict_build, const ExecBatch& batch,
+                     RowEncoder** out_encoder, ExecBatch* opt_out_key_batch,
+                     ExecContext* ctx);
+
+ private:
+  void InitLocalStateIfNeeded(
+      size_t thread_index, const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
+      const SchemaProjectionMaps<HashJoinProjection>& proj_map_build, ExecContext* ctx);
+  static void InitEncoder(const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
+                          const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
+                          RowEncoder* encoder, ExecContext* ctx);
+  struct ThreadLocalState {
+    bool is_initialized;
+    // Whether any key column needs remapping (because of dictionaries used) before doing
+    // join hash table lookups
+    bool any_needs_remap;
+    // Whether each key column needs remapping before doing join hash table lookups
+    std::vector<bool> needs_remap;
+    std::vector<HashJoinDictProbe> remap_imp;
+    // Encoder of key columns that uses unified representation instead of original data
+    // type for key columns that need to use it (have dictionaries on either side of the
+    // join).
+    RowEncoder post_remap_encoder;
+  };
+  std::vector<ThreadLocalState> local_states_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/ir_consumer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/ir_consumer.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <flatbuffers/flatbuffers.h>
+
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec/options.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/util/visibility.h"
+
+#include "generated/Plan_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = org::apache::arrow::flatbuf;
+
+namespace compute {
+
+namespace ir = org::apache::arrow::computeir::flatbuf;
+
+class ARROW_EXPORT CatalogSourceNodeOptions : public ExecNodeOptions {
+ public:
+  CatalogSourceNodeOptions(std::string name, std::shared_ptr<Schema> schema,
+                           Expression filter = literal(true),
+                           std::vector<FieldRef> projection = {})
+      : name(std::move(name)),
+        schema(std::move(schema)),
+        filter(std::move(filter)),
+        projection(std::move(projection)) {}
+
+  std::string name;
+  std::shared_ptr<Schema> schema;
+  Expression filter;
+  std::vector<FieldRef> projection;
+};
+
+ARROW_EXPORT
+Result<Datum> Convert(const ir::Literal& lit);
+
+ARROW_EXPORT
+Result<Expression> Convert(const ir::Expression& lit);
+
+ARROW_EXPORT
+Result<Declaration> Convert(const ir::Relation& rel);
+
+template <typename Ir>
+auto ConvertRoot(const Buffer& buf) -> decltype(Convert(std::declval<Ir>())) {
+  return Convert(*flatbuffers::GetRoot<Ir>(buf.data()));
+}
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_compare.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_compare.h
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class KeyCompare {
+ public:
+  // Returns a single 16-bit selection vector of rows that failed comparison.
+  // If there is input selection on the left, the resulting selection is a filtered image
+  // of input selection.
+  static void CompareColumnsToRows(
+      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
+      const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
+      uint32_t* out_num_rows, uint16_t* out_sel_left_maybe_same,
+      const std::vector<KeyColumnArray>& cols, const KeyEncoder::KeyRowArray& rows);
+
+ private:
+  template <bool use_selection>
+  static void NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare,
+                                    const uint16_t* sel_left_maybe_null,
+                                    const uint32_t* left_to_right_map,
+                                    KeyEncoder::KeyEncoderContext* ctx,
+                                    const KeyColumnArray& col,
+                                    const KeyEncoder::KeyRowArray& rows,
+                                    uint8_t* match_bytevector);
+
+  template <bool use_selection, class COMPARE_FN>
+  static void CompareBinaryColumnToRowHelper(
+      uint32_t offset_within_row, uint32_t first_row_to_compare,
+      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
+      const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
+      const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
+      uint8_t* match_bytevector, COMPARE_FN compare_fn);
+
+  template <bool use_selection>
+  static void CompareBinaryColumnToRow(
+      uint32_t offset_within_row, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  template <bool use_selection, bool is_first_varbinary_col>
+  static void CompareVarBinaryColumnToRow(
+      uint32_t id_varlen_col, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  static void AndByteVectors(KeyEncoder::KeyEncoderContext* ctx, uint32_t num_elements,
+                             uint8_t* bytevector_A, const uint8_t* bytevector_B);
+
+#if defined(ARROW_HAVE_AVX2)
+
+  template <bool use_selection>
+  static uint32_t NullUpdateColumnToRowImp_avx2(
+      uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
+      const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
+      const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
+      uint8_t* match_bytevector);
+
+  template <bool use_selection, class COMPARE8_FN>
+  static uint32_t CompareBinaryColumnToRowHelper_avx2(
+      uint32_t offset_within_row, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector,
+      COMPARE8_FN compare8_fn);
+
+  template <bool use_selection>
+  static uint32_t CompareBinaryColumnToRowImp_avx2(
+      uint32_t offset_within_row, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  template <bool use_selection, bool is_first_varbinary_col>
+  static void CompareVarBinaryColumnToRowImp_avx2(
+      uint32_t id_varlen_col, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  static uint32_t AndByteVectors_avx2(uint32_t num_elements, uint8_t* bytevector_A,
+                                      const uint8_t* bytevector_B);
+
+  static uint32_t NullUpdateColumnToRow_avx2(
+      bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  static uint32_t CompareBinaryColumnToRow_avx2(
+      bool use_selection, uint32_t offset_within_row, uint32_t num_rows_to_compare,
+      const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
+      KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
+      const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
+
+  static void CompareVarBinaryColumnToRow_avx2(
+      bool use_selection, bool is_first_varbinary_col, uint32_t id_varlen_col,
+      uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
+      const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
+      const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
+      uint8_t* match_bytevector);
+
+#endif
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_encode.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_encode.h
@@ -0,0 +1,500 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/compute/light_array.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace compute {
+
+/// Converts between key representation as a collection of arrays for
+/// individual columns and another representation as a single array of rows
+/// combining data from all columns into one value.
+/// This conversion is reversible.
+/// Row-oriented storage is beneficial when there is a need for random access
+/// of individual rows and at the same time all included columns are likely to
+/// be accessed together, as in the case of hash table key.
+class KeyEncoder {
+ public:
+  struct KeyEncoderContext {
+    bool has_avx2() const {
+      return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
+    }
+    int64_t hardware_flags;
+    util::TempVectorStack* stack;
+  };
+
+  /// Description of a storage format for rows produced by encoder.
+  struct KeyRowMetadata {
+    /// Is row a varying-length binary, using offsets array to find a beginning of a row,
+    /// or is it a fixed-length binary.
+    bool is_fixed_length;
+
+    /// For a fixed-length binary row, common size of rows in bytes,
+    /// rounded up to the multiple of alignment.
+    ///
+    /// For a varying-length binary, size of all encoded fixed-length key columns,
+    /// including lengths of varying-length columns, rounded up to the multiple of string
+    /// alignment.
+    uint32_t fixed_length;
+
+    /// Offset within a row to the array of 32-bit offsets within a row of
+    /// ends of varbinary fields.
+    /// Used only when the row is not fixed-length, zero for fixed-length row.
+    /// There are N elements for N varbinary fields.
+    /// Each element is the offset within a row of the first byte after
+    /// the corresponding varbinary field bytes in that row.
+    /// If varbinary fields begin at aligned addresses, than the end of the previous
+    /// varbinary field needs to be rounded up according to the specified alignment
+    /// to obtain the beginning of the next varbinary field.
+    /// The first varbinary field starts at offset specified by fixed_length,
+    /// which should already be aligned.
+    uint32_t varbinary_end_array_offset;
+
+    /// Fixed number of bytes per row that are used to encode null masks.
+    /// Null masks indicate for a single row which of its key columns are null.
+    /// Nth bit in the sequence of bytes assigned to a row represents null
+    /// information for Nth field according to the order in which they are encoded.
+    int null_masks_bytes_per_row;
+
+    /// Power of 2. Every row will start at the offset aligned to that number of bytes.
+    int row_alignment;
+
+    /// Power of 2. Must be no greater than row alignment.
+    /// Every non-power-of-2 binary field and every varbinary field bytes
+    /// will start aligned to that number of bytes.
+    int string_alignment;
+
+    /// Metadata of encoded columns in their original order.
+    std::vector<KeyColumnMetadata> column_metadatas;
+
+    /// Order in which fields are encoded.
+    std::vector<uint32_t> column_order;
+
+    /// Offsets within a row to fields in their encoding order.
+    std::vector<uint32_t> column_offsets;
+
+    /// Rounding up offset to the nearest multiple of alignment value.
+    /// Alignment must be a power of 2.
+    static inline uint32_t padding_for_alignment(uint32_t offset,
+                                                 int required_alignment) {
+      ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+      return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
+                                   (required_alignment - 1));
+    }
+
+    /// Rounding up offset to the beginning of next column,
+    /// chosing required alignment based on the data type of that column.
+    static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
+                                                 const KeyColumnMetadata& col_metadata) {
+      if (!col_metadata.is_fixed_length ||
+          ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
+        return 0;
+      } else {
+        return padding_for_alignment(offset, string_alignment);
+      }
+    }
+
+    /// Returns an array of offsets within a row of ends of varbinary fields.
+    inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
+    }
+    inline uint32_t* varbinary_end_array(uint8_t* row) const {
+      ARROW_DCHECK(!is_fixed_length);
+      return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
+    }
+
+    /// Returns the offset within the row and length of the first varbinary field.
+    inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
+                                                  uint32_t* length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      *offset = fixed_length;
+      *length = varbinary_end_array(row)[0] - fixed_length;
+    }
+
+    /// Returns the offset within the row and length of the second and further varbinary
+    /// fields.
+    inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
+                                                uint32_t* out_offset,
+                                                uint32_t* out_length) const {
+      ARROW_DCHECK(!is_fixed_length);
+      ARROW_DCHECK(varbinary_id > 0);
+      const uint32_t* varbinary_end = varbinary_end_array(row);
+      uint32_t offset = varbinary_end[varbinary_id - 1];
+      offset += padding_for_alignment(offset, string_alignment);
+      *out_offset = offset;
+      *out_length = varbinary_end[varbinary_id] - offset;
+    }
+
+    uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
+
+    uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
+
+    uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
+
+    uint32_t num_varbinary_cols() const;
+
+    void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
+                                  int in_row_alignment, int in_string_alignment);
+
+    bool is_compatible(const KeyRowMetadata& other) const;
+  };
+
+  class KeyRowArray {
+   public:
+    KeyRowArray();
+    Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
+    void Clean();
+    Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
+    Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
+                               const uint16_t* source_row_ids);
+    const KeyRowMetadata& metadata() const { return metadata_; }
+    int64_t length() const { return num_rows_; }
+    const uint8_t* data(int i) const {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return buffers_[i];
+    }
+    uint8_t* mutable_data(int i) {
+      ARROW_DCHECK(i >= 0 && i <= max_buffers_);
+      return mutable_buffers_[i];
+    }
+    const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
+    uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+    const uint8_t* null_masks() const { return null_masks_->data(); }
+    uint8_t* null_masks() { return null_masks_->mutable_data(); }
+
+    bool has_any_nulls(const KeyEncoderContext* ctx) const;
+
+   private:
+    Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+    Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
+
+    int64_t size_null_masks(int64_t num_rows);
+    int64_t size_offsets(int64_t num_rows);
+    int64_t size_rows_fixed_length(int64_t num_rows);
+    int64_t size_rows_varying_length(int64_t num_bytes);
+    void update_buffer_pointers();
+
+    static constexpr int64_t padding_for_vectors = 64;
+    MemoryPool* pool_;
+    KeyRowMetadata metadata_;
+    /// Buffers can only expand during lifetime and never shrink.
+    std::unique_ptr<ResizableBuffer> null_masks_;
+    std::unique_ptr<ResizableBuffer> offsets_;
+    std::unique_ptr<ResizableBuffer> rows_;
+    static constexpr int max_buffers_ = 3;
+    const uint8_t* buffers_[max_buffers_];
+    uint8_t* mutable_buffers_[max_buffers_];
+    int64_t num_rows_;
+    int64_t rows_capacity_;
+    int64_t bytes_capacity_;
+
+    // Mutable to allow lazy evaluation
+    mutable int64_t num_rows_for_has_any_nulls_;
+    mutable bool has_any_nulls_;
+  };
+
+  void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
+            int row_alignment, int string_alignment);
+
+  const KeyRowMetadata& row_metadata() { return row_metadata_; }
+
+  void PrepareEncodeSelected(int64_t start_row, int64_t num_rows,
+                             const std::vector<KeyColumnArray>& cols);
+  Status EncodeSelected(KeyRowArray* rows, uint32_t num_selected,
+                        const uint16_t* selection);
+
+  /// Decode a window of row oriented data into a corresponding
+  /// window of column oriented storage.
+  /// The output buffers need to be correctly allocated and sized before
+  /// calling each method.
+  /// For that reason decoding is split into two functions.
+  /// The output of the first one, that processes everything except for
+  /// varying length buffers, can be used to find out required varying
+  /// length buffers sizes.
+  void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                int64_t num_rows, const KeyRowArray& rows,
+                                std::vector<KeyColumnArray>* cols);
+
+  void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
+                                  int64_t num_rows, const KeyRowArray& rows,
+                                  std::vector<KeyColumnArray>* cols);
+
+  const std::vector<KeyColumnArray>& GetBatchColumns() const { return batch_all_cols_; }
+
+ private:
+  /// Prepare column array vectors.
+  /// Output column arrays represent a range of input column arrays
+  /// specified by starting row and number of rows.
+  /// Three vectors are generated:
+  /// - all columns
+  /// - fixed-length columns only
+  /// - varying-length columns only
+  void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
+                              const std::vector<KeyColumnArray>& cols_in);
+
+  class TransformBoolean {
+   public:
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+  };
+
+  class EncoderInteger {
+   public:
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool UsesTransform(const KeyColumnArray& column);
+    static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
+                                       const KeyColumnArray& temp);
+    static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
+                           KeyEncoderContext* ctx);
+
+   private:
+    static bool IsBoolean(const KeyColumnMetadata& metadata);
+  };
+
+  class EncoderBinary {
+   public:
+    static void EncodeSelected(uint32_t offset_within_row, KeyRowArray* rows,
+                               const KeyColumnArray& col, uint32_t num_selected,
+                               const uint16_t* selection);
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx, KeyColumnArray* temp);
+    static bool IsInteger(const KeyColumnMetadata& metadata);
+
+   private:
+    template <class COPY_FN, class SET_NULL_FN>
+    static void EncodeSelectedImp(uint32_t offset_within_row, KeyRowArray* rows,
+                                  const KeyColumnArray& col, uint32_t num_selected,
+                                  const uint16_t* selection, COPY_FN copy_fn,
+                                  SET_NULL_FN set_null_fn);
+
+    template <bool is_row_fixed_length, class COPY_FN>
+    static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                    uint32_t offset_within_row,
+                                    const KeyRowArray* rows_const,
+                                    KeyRowArray* rows_mutable_maybe_null,
+                                    const KeyColumnArray* col_const,
+                                    KeyColumnArray* col_mutable_maybe_null,
+                                    COPY_FN copy_fn);
+    template <bool is_row_fixed_length>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t offset_within_row, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
+                                  uint32_t num_rows, uint32_t offset_within_row,
+                                  const KeyRowArray& rows, KeyColumnArray* col);
+    template <bool is_row_fixed_length>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t offset_within_row, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+  };
+
+  class EncoderBinaryPair {
+   public:
+    static bool CanProcessPair(const KeyColumnMetadata& col1,
+                               const KeyColumnMetadata& col2) {
+      return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
+    }
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+                       const KeyRowArray& rows, KeyColumnArray* col1,
+                       KeyColumnArray* col2, KeyEncoderContext* ctx,
+                       KeyColumnArray* temp1, KeyColumnArray* temp2);
+
+   private:
+    template <bool is_row_fixed_length, typename col1_type, typename col2_type>
+    static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
+                          uint32_t num_rows, uint32_t offset_within_row,
+                          const KeyRowArray& rows, KeyColumnArray* col1,
+                          KeyColumnArray* col2);
+#if defined(ARROW_HAVE_AVX2)
+    static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
+                                      uint32_t start_row, uint32_t num_rows,
+                                      uint32_t offset_within_row, const KeyRowArray& rows,
+                                      KeyColumnArray* col1, KeyColumnArray* col2);
+    template <bool is_row_fixed_length, uint32_t col_width>
+    static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                                   uint32_t offset_within_row, const KeyRowArray& rows,
+                                   KeyColumnArray* col1, KeyColumnArray* col2);
+#endif
+  };
+
+  class EncoderOffsets {
+   public:
+    static void GetRowOffsetsSelected(KeyRowArray* rows,
+                                      const std::vector<KeyColumnArray>& cols,
+                                      uint32_t num_selected, const uint16_t* selection);
+    static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
+                               uint32_t num_selected, const uint16_t* selection);
+
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* varbinary_cols,
+                       const std::vector<uint32_t>& varbinary_cols_base_offset,
+                       KeyEncoderContext* ctx);
+
+   private:
+    template <bool has_nulls, bool is_first_varbinary>
+    static void EncodeSelectedImp(uint32_t ivarbinary, KeyRowArray* rows,
+                                  const std::vector<KeyColumnArray>& cols,
+                                  uint32_t num_selected, const uint16_t* selection);
+  };
+
+  class EncoderVarBinary {
+   public:
+    static void EncodeSelected(uint32_t ivarbinary, KeyRowArray* rows,
+                               const KeyColumnArray& cols, uint32_t num_selected,
+                               const uint16_t* selection);
+
+    static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+                       const KeyRowArray& rows, KeyColumnArray* col,
+                       KeyEncoderContext* ctx);
+
+   private:
+    template <bool first_varbinary_col, class COPY_FN>
+    static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
+                                    uint32_t varbinary_col_id,
+                                    const KeyRowArray* rows_const,
+                                    KeyRowArray* rows_mutable_maybe_null,
+                                    const KeyColumnArray* col_const,
+                                    KeyColumnArray* col_mutable_maybe_null,
+                                    COPY_FN copy_fn);
+    template <bool first_varbinary_col>
+    static void DecodeImp(uint32_t start_row, uint32_t num_rows,
+                          uint32_t varbinary_col_id, const KeyRowArray& rows,
+                          KeyColumnArray* col);
+#if defined(ARROW_HAVE_AVX2)
+    static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
+                                  uint32_t varbinary_col_id, const KeyRowArray& rows,
+                                  KeyColumnArray* col);
+    template <bool first_varbinary_col>
+    static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
+                               uint32_t varbinary_col_id, const KeyRowArray& rows,
+                               KeyColumnArray* col);
+#endif
+  };
+
+  class EncoderNulls {
+   public:
+    static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
+                               uint32_t num_selected, const uint16_t* selection);
+
+    static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
+                       std::vector<KeyColumnArray>* cols);
+  };
+
+  KeyEncoderContext* ctx_;
+
+  // Data initialized once, based on data types of key columns
+  KeyRowMetadata row_metadata_;
+
+  // Data initialized for each input batch.
+  // All elements are ordered according to the order of encoded fields in a row.
+  std::vector<KeyColumnArray> batch_all_cols_;
+  std::vector<KeyColumnArray> batch_varbinary_cols_;
+  std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
+};
+
+template <bool is_row_fixed_length, class COPY_FN>
+inline void KeyEncoder::EncoderBinary::DecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
+  uint32_t col_width = col_const->metadata().fixed_length;
+
+  if (is_row_fixed_length) {
+    uint32_t row_width = rows_const->metadata().fixed_length;
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
+      dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      copy_fn(dst, src, col_width);
+    }
+  } else {
+    const uint32_t* row_offsets = rows_const->offsets();
+    for (uint32_t i = 0; i < num_rows; ++i) {
+      const uint8_t* src;
+      uint8_t* dst;
+      src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
+      dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
+      copy_fn(dst, src, col_width);
+    }
+  }
+}
+
+template <bool first_varbinary_col, class COPY_FN>
+inline void KeyEncoder::EncoderVarBinary::DecodeHelper(
+    uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
+    const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
+    const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
+    COPY_FN copy_fn) {
+  // Column and rows need to be varying length
+  ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
+               !col_const->metadata().is_fixed_length);
+
+  const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
+  const uint32_t* col_offsets = col_const->offsets();
+
+  uint32_t col_offset_next = col_offsets[0];
+  for (uint32_t i = 0; i < num_rows; ++i) {
+    uint32_t col_offset = col_offset_next;
+    col_offset_next = col_offsets[i + 1];
+
+    uint32_t row_offset = row_offsets_for_batch[i];
+    const uint8_t* row = rows_const->data(2) + row_offset;
+
+    uint32_t offset_within_row;
+    uint32_t length;
+    if (first_varbinary_col) {
+      rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
+                                                               &length);
+    } else {
+      rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
+                                                             &offset_within_row, &length);
+    }
+
+    row_offset += offset_within_row;
+
+    const uint8_t* src;
+    uint8_t* dst;
+    src = rows_const->data(2) + row_offset;
+    dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
+    copy_fn(dst, src, length);
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_hash.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_hash.h
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(ARROW_HAVE_AVX2)
+#include <immintrin.h>
+#endif
+
+#include <cstdint>
+
+#include "arrow/compute/exec/key_encode.h"
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+// Forward declarations only needed for making test functions a friend of the classes in
+// this file.
+//
+enum class BloomFilterBuildStrategy;
+
+// Implementations are based on xxh3 32-bit algorithm description from:
+// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
+//
+class ARROW_EXPORT Hashing32 {
+  friend class TestVectorHash;
+  template <typename T>
+  friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector<uint64_t>&,
+                                       int64_t, int, T*);
+  friend void TestBloomSmall(BloomFilterBuildStrategy, int64_t, int, bool, bool);
+
+ public:
+  static void HashMultiColumn(const std::vector<KeyColumnArray>& cols,
+                              KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_hash);
+
+ private:
+  static const uint32_t PRIME32_1 = 0x9E3779B1;
+  static const uint32_t PRIME32_2 = 0x85EBCA77;
+  static const uint32_t PRIME32_3 = 0xC2B2AE3D;
+  static const uint32_t PRIME32_4 = 0x27D4EB2F;
+  static const uint32_t PRIME32_5 = 0x165667B1;
+  static const uint32_t kCombineConst = 0x9e3779b9UL;
+  static const int64_t kStripeSize = 4 * sizeof(uint32_t);
+
+  static void HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys,
+                        uint64_t length_key, const uint8_t* keys, uint32_t* hashes,
+                        uint32_t* temp_hashes_for_combine);
+
+  static void HashVarLen(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows,
+                         const uint32_t* offsets, const uint8_t* concatenated_keys,
+                         uint32_t* hashes, uint32_t* temp_hashes_for_combine);
+
+  static void HashVarLen(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows,
+                         const uint64_t* offsets, const uint8_t* concatenated_keys,
+                         uint32_t* hashes, uint32_t* temp_hashes_for_combine);
+
+  static inline uint32_t Avalanche(uint32_t acc) {
+    acc ^= (acc >> 15);
+    acc *= PRIME32_2;
+    acc ^= (acc >> 13);
+    acc *= PRIME32_3;
+    acc ^= (acc >> 16);
+    return acc;
+  }
+  static inline uint32_t Round(uint32_t acc, uint32_t input);
+  static inline uint32_t CombineAccumulators(uint32_t acc1, uint32_t acc2, uint32_t acc3,
+                                             uint32_t acc4);
+  static inline uint32_t CombineHashesImp(uint32_t previous_hash, uint32_t hash) {
+    uint32_t next_hash = previous_hash ^ (hash + kCombineConst + (previous_hash << 6) +
+                                          (previous_hash >> 2));
+    return next_hash;
+  }
+  static inline void ProcessFullStripes(uint64_t num_stripes, const uint8_t* key,
+                                        uint32_t* out_acc1, uint32_t* out_acc2,
+                                        uint32_t* out_acc3, uint32_t* out_acc4);
+  static inline void ProcessLastStripe(uint32_t mask1, uint32_t mask2, uint32_t mask3,
+                                       uint32_t mask4, const uint8_t* last_stripe,
+                                       uint32_t* acc1, uint32_t* acc2, uint32_t* acc3,
+                                       uint32_t* acc4);
+  static inline void StripeMask(int i, uint32_t* mask1, uint32_t* mask2, uint32_t* mask3,
+                                uint32_t* mask4);
+  template <bool T_COMBINE_HASHES>
+  static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
+                              uint32_t* hashes);
+  template <typename T, bool T_COMBINE_HASHES>
+  static void HashVarLenImp(uint32_t num_rows, const T* offsets,
+                            const uint8_t* concatenated_keys, uint32_t* hashes);
+  template <bool T_COMBINE_HASHES>
+  static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys,
+                         uint32_t* hashes);
+  static void HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_keys,
+                      const uint8_t* keys, uint32_t* hashes);
+  template <bool T_COMBINE_HASHES, typename T>
+  static void HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes);
+  static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+                      const uint8_t* keys, uint32_t* hashes);
+
+#if defined(ARROW_HAVE_AVX2)
+  static inline __m256i Avalanche_avx2(__m256i hash);
+  static inline __m256i CombineHashesImp_avx2(__m256i previous_hash, __m256i hash);
+  template <bool T_COMBINE_HASHES>
+  static void AvalancheAll_avx2(uint32_t num_rows, uint32_t* hashes,
+                                const uint32_t* hashes_temp_for_combine);
+  static inline __m256i Round_avx2(__m256i acc, __m256i input);
+  static inline uint64_t CombineAccumulators_avx2(__m256i acc);
+  static inline __m256i StripeMask_avx2(int i, int j);
+  template <bool two_equal_lengths>
+  static inline __m256i ProcessStripes_avx2(int64_t num_stripes_A, int64_t num_stripes_B,
+                                            __m256i mask_last_stripe, const uint8_t* keys,
+                                            int64_t offset_A, int64_t offset_B);
+  template <bool T_COMBINE_HASHES>
+  static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length,
+                                       const uint8_t* keys, uint32_t* hashes,
+                                       uint32_t* hashes_temp_for_combine);
+  static uint32_t HashFixedLen_avx2(bool combine_hashes, uint32_t num_rows,
+                                    uint64_t length, const uint8_t* keys,
+                                    uint32_t* hashes, uint32_t* hashes_temp_for_combine);
+  template <typename T, bool T_COMBINE_HASHES>
+  static uint32_t HashVarLenImp_avx2(uint32_t num_rows, const T* offsets,
+                                     const uint8_t* concatenated_keys, uint32_t* hashes,
+                                     uint32_t* hashes_temp_for_combine);
+  static uint32_t HashVarLen_avx2(bool combine_hashes, uint32_t num_rows,
+                                  const uint32_t* offsets,
+                                  const uint8_t* concatenated_keys, uint32_t* hashes,
+                                  uint32_t* hashes_temp_for_combine);
+  static uint32_t HashVarLen_avx2(bool combine_hashes, uint32_t num_rows,
+                                  const uint64_t* offsets,
+                                  const uint8_t* concatenated_keys, uint32_t* hashes,
+                                  uint32_t* hashes_temp_for_combine);
+#endif
+};
+
+class ARROW_EXPORT Hashing64 {
+  friend class TestVectorHash;
+  template <typename T>
+  friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector<uint64_t>&,
+                                       int64_t, int, T*);
+  friend void TestBloomSmall(BloomFilterBuildStrategy, int64_t, int, bool, bool);
+
+ public:
+  static void HashMultiColumn(const std::vector<KeyColumnArray>& cols,
+                              KeyEncoder::KeyEncoderContext* ctx, uint64_t* hashes);
+
+ private:
+  static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
+  static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
+  static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
+  static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL;
+  static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL;
+  static const uint32_t kCombineConst = 0x9e3779b9UL;
+  static const int64_t kStripeSize = 4 * sizeof(uint64_t);
+
+  static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+                        const uint8_t* keys, uint64_t* hashes);
+
+  static void HashVarLen(bool combine_hashes, uint32_t num_rows, const uint32_t* offsets,
+                         const uint8_t* concatenated_keys, uint64_t* hashes);
+
+  static void HashVarLen(bool combine_hashes, uint32_t num_rows, const uint64_t* offsets,
+                         const uint8_t* concatenated_keys, uint64_t* hashes);
+
+  static inline uint64_t Avalanche(uint64_t acc);
+  static inline uint64_t Round(uint64_t acc, uint64_t input);
+  static inline uint64_t CombineAccumulators(uint64_t acc1, uint64_t acc2, uint64_t acc3,
+                                             uint64_t acc4);
+  static inline uint64_t CombineHashesImp(uint64_t previous_hash, uint64_t hash) {
+    uint64_t next_hash = previous_hash ^ (hash + kCombineConst + (previous_hash << 6) +
+                                          (previous_hash >> 2));
+    return next_hash;
+  }
+  static inline void ProcessFullStripes(uint64_t num_stripes, const uint8_t* key,
+                                        uint64_t* out_acc1, uint64_t* out_acc2,
+                                        uint64_t* out_acc3, uint64_t* out_acc4);
+  static inline void ProcessLastStripe(uint64_t mask1, uint64_t mask2, uint64_t mask3,
+                                       uint64_t mask4, const uint8_t* last_stripe,
+                                       uint64_t* acc1, uint64_t* acc2, uint64_t* acc3,
+                                       uint64_t* acc4);
+  static inline void StripeMask(int i, uint64_t* mask1, uint64_t* mask2, uint64_t* mask3,
+                                uint64_t* mask4);
+  template <bool T_COMBINE_HASHES>
+  static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
+                              uint64_t* hashes);
+  template <typename T, bool T_COMBINE_HASHES>
+  static void HashVarLenImp(uint32_t num_rows, const T* offsets,
+                            const uint8_t* concatenated_keys, uint64_t* hashes);
+  template <bool T_COMBINE_HASHES>
+  static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys,
+                         uint64_t* hashes);
+  static void HashBit(bool T_COMBINE_HASHES, int64_t bit_offset, uint32_t num_keys,
+                      const uint8_t* keys, uint64_t* hashes);
+  template <bool T_COMBINE_HASHES, typename T>
+  static void HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes);
+  static void HashInt(bool T_COMBINE_HASHES, uint32_t num_keys, uint64_t length_key,
+                      const uint8_t* keys, uint64_t* hashes);
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_map.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/key_map.h
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/compute/exec/util.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace compute {
+
+class SwissTable {
+ public:
+  SwissTable() = default;
+  ~SwissTable() { cleanup(); }
+
+  using EqualImpl =
+      std::function<void(int num_keys, const uint16_t* selection /* may be null */,
+                         const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
+                         uint16_t* out_selection_mismatch)>;
+  using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>;
+
+  Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack,
+              int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl);
+
+  void cleanup();
+
+  void early_filter(const int num_keys, const uint32_t* hashes,
+                    uint8_t* out_match_bitvector, uint8_t* out_local_slots) const;
+
+  void find(const int num_keys, const uint32_t* hashes, uint8_t* inout_match_bitvector,
+            const uint8_t* local_slots, uint32_t* out_group_ids) const;
+
+  Status map_new_keys(uint32_t num_ids, uint16_t* ids, const uint32_t* hashes,
+                      uint32_t* group_ids);
+
+ private:
+  // Lookup helpers
+
+  /// \brief Scan bytes in block in reverse and stop as soon
+  /// as a position of interest is found.
+  ///
+  /// Positions of interest:
+  /// a) slot with a matching stamp is encountered,
+  /// b) first empty slot is encountered,
+  /// c) we reach the end of the block.
+  ///
+  /// Optionally an index of the first slot to start the search from can be specified.
+  /// In this case slots before it will be ignored.
+  ///
+  /// \param[in] block 8 byte block of hash table
+  /// \param[in] stamp 7 bits of hash used as a stamp
+  /// \param[in] start_slot Index of the first slot in the block to start search from.  We
+  ///            assume that this index always points to a non-empty slot, equivalently
+  ///            that it comes before any empty slots.  (Used only by one template
+  ///            variant.)
+  /// \param[out] out_slot index corresponding to the discovered position of interest (8
+  ///            represents end of block).
+  /// \param[out] out_match_found an integer flag (0 or 1) indicating if we reached an
+  /// empty slot (0) or not (1). Therefore 1 can mean that either actual match was found
+  /// (case a) above) or we reached the end of full block (case b) above).
+  ///
+  template <bool use_start_slot>
+  inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot,
+                           int* out_match_found) const;
+
+  /// \brief Extract group id for a given slot in a given block.
+  ///
+  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
+                                   uint64_t group_id_mask) const;
+  void extract_group_ids(const int num_keys, const uint16_t* optional_selection,
+                         const uint32_t* hashes, const uint8_t* local_slots,
+                         uint32_t* out_group_ids) const;
+
+  template <typename T, bool use_selection>
+  void extract_group_ids_imp(const int num_keys, const uint16_t* selection,
+                             const uint32_t* hashes, const uint8_t* local_slots,
+                             uint32_t* out_group_ids, int elements_offset,
+                             int element_mutltiplier) const;
+
+  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot,
+                                     int match_found) const;
+
+  inline uint64_t num_groups_for_resize() const;
+
+  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id) const;
+
+  void init_slot_ids(const int num_keys, const uint16_t* selection,
+                     const uint32_t* hashes, const uint8_t* local_slots,
+                     const uint8_t* match_bitvector, uint32_t* out_slot_ids) const;
+
+  void init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* ids,
+                                  const uint32_t* hashes, uint32_t* slot_ids) const;
+
+  // Quickly filter out keys that have no matches based only on hash value and the
+  // corresponding starting 64-bit block of slot status bytes. May return false positives.
+  //
+  void early_filter_imp(const int num_keys, const uint32_t* hashes,
+                        uint8_t* out_match_bitvector, uint8_t* out_local_slots) const;
+#if defined(ARROW_HAVE_AVX2)
+  void early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* hashes,
+                                uint8_t* out_match_bitvector,
+                                uint8_t* out_local_slots) const;
+  void early_filter_imp_avx2_x32(const int num_hashes, const uint32_t* hashes,
+                                 uint8_t* out_match_bitvector,
+                                 uint8_t* out_local_slots) const;
+  void extract_group_ids_avx2(const int num_keys, const uint32_t* hashes,
+                              const uint8_t* local_slots, uint32_t* out_group_ids,
+                              int byte_offset, int byte_multiplier, int byte_size) const;
+#endif
+
+  void run_comparisons(const int num_keys, const uint16_t* optional_selection_ids,
+                       const uint8_t* optional_selection_bitvector,
+                       const uint32_t* groupids, int* out_num_not_equal,
+                       uint16_t* out_not_equal_selection) const;
+
+  inline bool find_next_stamp_match(const uint32_t hash, const uint32_t in_slot_id,
+                                    uint32_t* out_slot_id, uint32_t* out_group_id) const;
+
+  inline void insert_into_empty_slot(uint32_t slot_id, uint32_t hash, uint32_t group_id);
+
+  // Slow processing of input keys in the most generic case.
+  // Handles inserting new keys.
+  // Pre-existing keys will be handled correctly, although the intended use is for this
+  // call to follow a call to find() method, which would only pass on new keys that were
+  // not present in the hash table.
+  //
+  Status map_new_keys_helper(const uint32_t* hashes, uint32_t* inout_num_selected,
+                             uint16_t* inout_selection, bool* out_need_resize,
+                             uint32_t* out_group_ids, uint32_t* out_next_slot_ids);
+
+  // Resize small hash tables when 50% full (up to 8KB).
+  // Resize large hash tables when 75% full.
+  Status grow_double();
+
+  static int num_groupid_bits_from_log_blocks(int log_blocks) {
+    int required_bits = log_blocks + 3;
+    return required_bits <= 8    ? 8
+           : required_bits <= 16 ? 16
+           : required_bits <= 32 ? 32
+                                 : 64;
+  }
+
+  // Use 32-bit hash for now
+  static constexpr int bits_hash_ = 32;
+
+  // Number of hash bits stored in slots in a block.
+  // The highest bits of hash determine block id.
+  // The next set of highest bits is a "stamp" stored in a slot in a block.
+  static constexpr int bits_stamp_ = 7;
+
+  // Padding bytes added at the end of buffers for ease of SIMD access
+  static constexpr int padding_ = 64;
+
+  int log_minibatch_;
+  // Base 2 log of the number of blocks
+  int log_blocks_ = 0;
+  // Number of keys inserted into hash table
+  uint32_t num_inserted_ = 0;
+
+  // Data for blocks.
+  // Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for
+  // these slots. In 8B status word, the order of bytes is reversed. Group ids are in
+  // normal order. There is 64B padding at the end.
+  //
+  // 0 byte - 7 bucket | 1. byte - 6 bucket | ...
+  // ---------------------------------------------------
+  // |     Empty bit*   |    Empty bit       |
+  // ---------------------------------------------------
+  // |   7-bit hash    |    7-bit hash      |
+  // ---------------------------------------------------
+  // * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0.
+  //
+  uint8_t* blocks_;
+
+  // Array of hashes of values inserted into slots.
+  // Undefined if the corresponding slot is empty.
+  // There is 64B padding at the end.
+  uint32_t* hashes_;
+
+  int64_t hardware_flags_;
+  MemoryPool* pool_;
+  util::TempVectorStack* temp_stack_;
+
+  EqualImpl equal_impl_;
+  AppendImpl append_impl_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/options.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/options.h
@@ -0,0 +1,393 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/result.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/async_util.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+using AsyncExecBatchGenerator = AsyncGenerator<util::optional<ExecBatch>>;
+
+/// \addtogroup execnode-options
+/// @{
+class ARROW_EXPORT ExecNodeOptions {
+ public:
+  virtual ~ExecNodeOptions() = default;
+};
+
+/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
+///
+/// plan->exec_context()->executor() will be used to parallelize pushing to
+/// outputs, if provided.
+class ARROW_EXPORT SourceNodeOptions : public ExecNodeOptions {
+ public:
+  SourceNodeOptions(std::shared_ptr<Schema> output_schema,
+                    std::function<Future<util::optional<ExecBatch>>()> generator)
+      : output_schema(std::move(output_schema)), generator(std::move(generator)) {}
+
+  static Result<std::shared_ptr<SourceNodeOptions>> FromTable(const Table& table,
+                                                              arrow::internal::Executor*);
+
+  std::shared_ptr<Schema> output_schema;
+  std::function<Future<util::optional<ExecBatch>>()> generator;
+};
+
+/// \brief An extended Source node which accepts a table
+class ARROW_EXPORT TableSourceNodeOptions : public ExecNodeOptions {
+ public:
+  TableSourceNodeOptions(std::shared_ptr<Table> table, int64_t max_batch_size)
+      : table(table), max_batch_size(max_batch_size) {}
+
+  // arrow table which acts as the data source
+  std::shared_ptr<Table> table;
+  // Size of batches to emit from this node
+  // If the table is larger the node will emit multiple batches from the
+  // the table to be processed in parallel.
+  int64_t max_batch_size;
+};
+
+/// \brief Make a node which excludes some rows from batches passed through it
+///
+/// filter_expression will be evaluated against each batch which is pushed to
+/// this node. Any rows for which filter_expression does not evaluate to `true` will be
+/// excluded in the batch emitted by this node.
+class ARROW_EXPORT FilterNodeOptions : public ExecNodeOptions {
+ public:
+  explicit FilterNodeOptions(Expression filter_expression, bool async_mode = true)
+      : filter_expression(std::move(filter_expression)), async_mode(async_mode) {}
+
+  Expression filter_expression;
+  bool async_mode;
+};
+
+/// \brief Make a node which executes expressions on input batches, producing new batches.
+///
+/// Each expression will be evaluated against each batch which is pushed to
+/// this node to produce a corresponding output column.
+///
+/// If names are not provided, the string representations of exprs will be used.
+class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
+ public:
+  explicit ProjectNodeOptions(std::vector<Expression> expressions,
+                              std::vector<std::string> names = {}, bool async_mode = true)
+      : expressions(std::move(expressions)),
+        names(std::move(names)),
+        async_mode(async_mode) {}
+
+  std::vector<Expression> expressions;
+  std::vector<std::string> names;
+  bool async_mode;
+};
+
+/// \brief Make a node which aggregates input batches, optionally grouped by keys.
+class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
+ public:
+  AggregateNodeOptions(std::vector<internal::Aggregate> aggregates,
+                       std::vector<FieldRef> targets, std::vector<std::string> names,
+                       std::vector<FieldRef> keys = {})
+      : aggregates(std::move(aggregates)),
+        targets(std::move(targets)),
+        names(std::move(names)),
+        keys(std::move(keys)) {}
+
+  // aggregations which will be applied to the targetted fields
+  std::vector<internal::Aggregate> aggregates;
+  // fields to which aggregations will be applied
+  std::vector<FieldRef> targets;
+  // output field names for aggregations
+  std::vector<std::string> names;
+  // keys by which aggregations will be grouped
+  std::vector<FieldRef> keys;
+};
+
+constexpr int32_t kDefaultBackpressureHighBytes = 1 << 30;  // 1GiB
+constexpr int32_t kDefaultBackpressureLowBytes = 1 << 28;   // 256MiB
+
+class ARROW_EXPORT BackpressureMonitor {
+ public:
+  virtual ~BackpressureMonitor() = default;
+  virtual uint64_t bytes_in_use() const = 0;
+  virtual bool is_paused() const = 0;
+};
+
+/// \brief Options to control backpressure behavior
+struct ARROW_EXPORT BackpressureOptions {
+  /// \brief Create default options that perform no backpressure
+  BackpressureOptions() : resume_if_below(0), pause_if_above(0) {}
+  /// \brief Create options that will perform backpressure
+  ///
+  /// \param resume_if_below The producer should resume producing if the backpressure
+  ///                        queue has fewer than resume_if_below items.
+  /// \param pause_if_above The producer should pause producing if the backpressure
+  ///                       queue has more than pause_if_above items
+  BackpressureOptions(uint32_t resume_if_below, uint32_t pause_if_above)
+      : resume_if_below(resume_if_below), pause_if_above(pause_if_above) {}
+
+  static BackpressureOptions DefaultBackpressure() {
+    return BackpressureOptions(kDefaultBackpressureLowBytes,
+                               kDefaultBackpressureHighBytes);
+  }
+
+  bool should_apply_backpressure() const { return pause_if_above > 0; }
+
+  uint64_t resume_if_below;
+  uint64_t pause_if_above;
+};
+
+/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
+///
+/// Emitted batches will not be ordered.
+class ARROW_EXPORT SinkNodeOptions : public ExecNodeOptions {
+ public:
+  explicit SinkNodeOptions(std::function<Future<util::optional<ExecBatch>>()>* generator,
+                           BackpressureOptions backpressure = {},
+                           BackpressureMonitor** backpressure_monitor = NULLPTR)
+      : generator(generator),
+        backpressure(std::move(backpressure)),
+        backpressure_monitor(backpressure_monitor) {}
+
+  /// \brief A pointer to a generator of batches.
+  ///
+  /// This will be set when the node is added to the plan and should be used to consume
+  /// data from the plan.  If this function is not called frequently enough then the sink
+  /// node will start to accumulate data and may apply backpressure.
+  std::function<Future<util::optional<ExecBatch>>()>* generator;
+  /// \brief Options to control when to apply backpressure
+  ///
+  /// This is optional, the default is to never apply backpressure.  If the plan is not
+  /// consumed quickly enough the system may eventually run out of memory.
+  BackpressureOptions backpressure;
+  /// \brief A pointer to a backpressure monitor
+  ///
+  /// This will be set when the node is added to the plan.  This can be used to inspect
+  /// the amount of data currently queued in the sink node.  This is an optional utility
+  /// and backpressure can be applied even if this is not used.
+  BackpressureMonitor** backpressure_monitor;
+};
+
+/// \brief Control used by a SinkNodeConsumer to pause & resume
+///
+/// Callers should ensure that they do not call Pause and Resume simultaneously and they
+/// should sequence things so that a call to Pause() is always followed by an eventual
+/// call to Resume()
+class ARROW_EXPORT BackpressureControl {
+ public:
+  virtual ~BackpressureControl() = default;
+  /// \brief Ask the input to pause
+  ///
+  /// This is best effort, batches may continue to arrive
+  /// Must eventually be followed by a call to Resume() or deadlock will occur
+  virtual void Pause() = 0;
+  /// \brief Ask the input to resume
+  virtual void Resume() = 0;
+};
+
+class ARROW_EXPORT SinkNodeConsumer {
+ public:
+  virtual ~SinkNodeConsumer() = default;
+  /// \brief Prepare any consumer state
+  ///
+  /// This will be run once the schema is finalized as the plan is starting and
+  /// before any calls to Consume.  A common use is to save off the schema so that
+  /// batches can be interpreted.
+  virtual Status Init(const std::shared_ptr<Schema>& schema,
+                      BackpressureControl* backpressure_control) = 0;
+  /// \brief Consume a batch of data
+  virtual Status Consume(ExecBatch batch) = 0;
+  /// \brief Signal to the consumer that the last batch has been delivered
+  ///
+  /// The returned future should only finish when all outstanding tasks have completed
+  virtual Future<> Finish() = 0;
+};
+
+/// \brief Add a sink node which consumes data within the exec plan run
+class ARROW_EXPORT ConsumingSinkNodeOptions : public ExecNodeOptions {
+ public:
+  explicit ConsumingSinkNodeOptions(std::shared_ptr<SinkNodeConsumer> consumer)
+      : consumer(std::move(consumer)) {}
+
+  std::shared_ptr<SinkNodeConsumer> consumer;
+};
+
+/// \brief Make a node which sorts rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then sorted, by the given
+/// fields. Then sorted batches will be forwarded to the generator in sorted order.
+class ARROW_EXPORT OrderBySinkNodeOptions : public SinkNodeOptions {
+ public:
+  explicit OrderBySinkNodeOptions(
+      SortOptions sort_options,
+      std::function<Future<util::optional<ExecBatch>>()>* generator)
+      : SinkNodeOptions(generator), sort_options(std::move(sort_options)) {}
+
+  SortOptions sort_options;
+};
+
+/// @}
+
+enum class JoinType {
+  LEFT_SEMI,
+  RIGHT_SEMI,
+  LEFT_ANTI,
+  RIGHT_ANTI,
+  INNER,
+  LEFT_OUTER,
+  RIGHT_OUTER,
+  FULL_OUTER
+};
+
+std::string ToString(JoinType t);
+
+enum class JoinKeyCmp { EQ, IS };
+
+/// \addtogroup execnode-options
+/// @{
+
+/// \brief Make a node which implements join operation using hash join strategy.
+class ARROW_EXPORT HashJoinNodeOptions : public ExecNodeOptions {
+ public:
+  static constexpr const char* default_output_suffix_for_left = "";
+  static constexpr const char* default_output_suffix_for_right = "";
+  HashJoinNodeOptions(
+      JoinType in_join_type, std::vector<FieldRef> in_left_keys,
+      std::vector<FieldRef> in_right_keys, Expression filter = literal(true),
+      std::string output_suffix_for_left = default_output_suffix_for_left,
+      std::string output_suffix_for_right = default_output_suffix_for_right)
+      : join_type(in_join_type),
+        left_keys(std::move(in_left_keys)),
+        right_keys(std::move(in_right_keys)),
+        output_all(true),
+        output_suffix_for_left(std::move(output_suffix_for_left)),
+        output_suffix_for_right(std::move(output_suffix_for_right)),
+        filter(std::move(filter)) {
+    this->key_cmp.resize(this->left_keys.size());
+    for (size_t i = 0; i < this->left_keys.size(); ++i) {
+      this->key_cmp[i] = JoinKeyCmp::EQ;
+    }
+  }
+  HashJoinNodeOptions(
+      JoinType join_type, std::vector<FieldRef> left_keys,
+      std::vector<FieldRef> right_keys, std::vector<FieldRef> left_output,
+      std::vector<FieldRef> right_output, Expression filter = literal(true),
+      std::string output_suffix_for_left = default_output_suffix_for_left,
+      std::string output_suffix_for_right = default_output_suffix_for_right)
+      : join_type(join_type),
+        left_keys(std::move(left_keys)),
+        right_keys(std::move(right_keys)),
+        output_all(false),
+        left_output(std::move(left_output)),
+        right_output(std::move(right_output)),
+        output_suffix_for_left(std::move(output_suffix_for_left)),
+        output_suffix_for_right(std::move(output_suffix_for_right)),
+        filter(std::move(filter)) {
+    this->key_cmp.resize(this->left_keys.size());
+    for (size_t i = 0; i < this->left_keys.size(); ++i) {
+      this->key_cmp[i] = JoinKeyCmp::EQ;
+    }
+  }
+  HashJoinNodeOptions(
+      JoinType join_type, std::vector<FieldRef> left_keys,
+      std::vector<FieldRef> right_keys, std::vector<FieldRef> left_output,
+      std::vector<FieldRef> right_output, std::vector<JoinKeyCmp> key_cmp,
+      Expression filter = literal(true),
+      std::string output_suffix_for_left = default_output_suffix_for_left,
+      std::string output_suffix_for_right = default_output_suffix_for_right)
+      : join_type(join_type),
+        left_keys(std::move(left_keys)),
+        right_keys(std::move(right_keys)),
+        output_all(false),
+        left_output(std::move(left_output)),
+        right_output(std::move(right_output)),
+        key_cmp(std::move(key_cmp)),
+        output_suffix_for_left(std::move(output_suffix_for_left)),
+        output_suffix_for_right(std::move(output_suffix_for_right)),
+        filter(std::move(filter)) {}
+
+  // type of join (inner, left, semi...)
+  JoinType join_type;
+  // key fields from left input
+  std::vector<FieldRef> left_keys;
+  // key fields from right input
+  std::vector<FieldRef> right_keys;
+  // if set all valid fields from both left and right input will be output
+  // (and field ref vectors for output fields will be ignored)
+  bool output_all;
+  // output fields passed from left input
+  std::vector<FieldRef> left_output;
+  // output fields passed from right input
+  std::vector<FieldRef> right_output;
+  // key comparison function (determines whether a null key is equal another null
+  // key or not)
+  std::vector<JoinKeyCmp> key_cmp;
+  // suffix added to names of output fields coming from left input (used to distinguish,
+  // if necessary, between fields of the same name in left and right input and can be left
+  // empty if there are no name collisions)
+  std::string output_suffix_for_left;
+  // suffix added to names of output fields coming from right input
+  std::string output_suffix_for_right;
+  // residual filter which is applied to matching rows.  Rows that do not match
+  // the filter are not included.  The filter is applied against the
+  // concatenated input schema (left fields then right fields) and can reference
+  // fields that are not included in the output.
+  Expression filter;
+};
+
+/// \brief Make a node which select top_k/bottom_k rows passed through it
+///
+/// All batches pushed to this node will be accumulated, then selected, by the given
+/// fields. Then sorted batches will be forwarded to the generator in sorted order.
+class ARROW_EXPORT SelectKSinkNodeOptions : public SinkNodeOptions {
+ public:
+  explicit SelectKSinkNodeOptions(
+      SelectKOptions select_k_options,
+      std::function<Future<util::optional<ExecBatch>>()>* generator)
+      : SinkNodeOptions(generator), select_k_options(std::move(select_k_options)) {}
+
+  /// SelectK options
+  SelectKOptions select_k_options;
+};
+/// @}
+
+/// \brief Adapt a Table as a sink node
+///
+/// obtains the output of an execution plan to
+/// a table pointer.
+class ARROW_EXPORT TableSinkNodeOptions : public ExecNodeOptions {
+ public:
+  explicit TableSinkNodeOptions(std::shared_ptr<Table>* output_table)
+      : output_table(output_table) {}
+
+  std::shared_ptr<Table>* output_table;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/order_by_impl.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/order_by_impl.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/exec/options.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+
+namespace arrow {
+namespace compute {
+
+class OrderByImpl {
+ public:
+  virtual ~OrderByImpl() = default;
+
+  virtual void InputReceived(const std::shared_ptr<RecordBatch>& batch) = 0;
+
+  virtual Result<Datum> DoFinish() = 0;
+
+  virtual std::string ToString() const = 0;
+
+  static Result<std::unique_ptr<OrderByImpl>> MakeSort(
+      ExecContext* ctx, const std::shared_ptr<Schema>& output_schema,
+      const SortOptions& options);
+
+  static Result<std::unique_ptr<OrderByImpl>> MakeSelectK(
+      ExecContext* ctx, const std::shared_ptr<Schema>& output_schema,
+      const SelectKOptions& options);
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/partition_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/partition_util.h
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <random>
+#include "arrow/buffer.h"
+#include "arrow/compute/exec/util.h"
+
+namespace arrow {
+namespace compute {
+
+class PartitionSort {
+ public:
+  /// \brief Bucket sort rows on partition ids in O(num_rows) time.
+  ///
+  /// Include in the output exclusive cummulative sum of bucket sizes.
+  /// This corresponds to ranges in the sorted array containing all row ids for
+  /// each of the partitions.
+  ///
+  /// prtn_ranges must be initailized and have at least prtn_ranges + 1 elements
+  /// when this method returns prtn_ranges[i] will contains the total number of
+  /// elements in partitions 0 through i.  prtn_ranges[0] will be 0.
+  ///
+  /// prtn_id_impl must be a function that takes in a row id (int) and returns
+  /// a partition id (int).  The returned partition id must be between 0 and
+  /// num_prtns (exclusive).
+  ///
+  /// output_pos_impl is a function that takes in a row id (int) and a position (int)
+  /// in the bucket sorted output.  The function should insert the row in the
+  /// output.
+  ///
+  /// For example:
+  ///
+  /// in_arr: [5, 7, 2, 3, 5, 4]
+  /// num_prtns: 3
+  /// prtn_id_impl: [&in_arr] (int row_id) { return in_arr[row_id] / 3; }
+  /// output_pos_impl: [&out_arr] (int row_id, int pos) { out_arr[pos] = row_id; }
+  ///
+  /// After Execution
+  /// out_arr: [2, 5, 3, 5, 4, 7]
+  /// prtn_ranges: [0, 1, 5, 6]
+  template <class INPUT_PRTN_ID_FN, class OUTPUT_POS_FN>
+  static void Eval(int num_rows, int num_prtns, uint16_t* prtn_ranges,
+                   INPUT_PRTN_ID_FN prtn_id_impl, OUTPUT_POS_FN output_pos_impl) {
+    ARROW_DCHECK(num_rows > 0 && num_rows <= (1 << 15));
+    ARROW_DCHECK(num_prtns >= 1 && num_prtns <= (1 << 15));
+
+    memset(prtn_ranges, 0, (num_prtns + 1) * sizeof(uint16_t));
+
+    for (int i = 0; i < num_rows; ++i) {
+      int prtn_id = static_cast<int>(prtn_id_impl(i));
+      ++prtn_ranges[prtn_id + 1];
+    }
+
+    uint16_t sum = 0;
+    for (int i = 0; i < num_prtns; ++i) {
+      uint16_t sum_next = sum + prtn_ranges[i + 1];
+      prtn_ranges[i + 1] = sum;
+      sum = sum_next;
+    }
+
+    for (int i = 0; i < num_rows; ++i) {
+      int prtn_id = static_cast<int>(prtn_id_impl(i));
+      int pos = prtn_ranges[prtn_id + 1]++;
+      output_pos_impl(i, pos);
+    }
+  }
+};
+
+/// \brief A control for synchronizing threads on a partitionable workload
+class PartitionLocks {
+ public:
+  PartitionLocks();
+  ~PartitionLocks();
+  /// \brief Initializes the control, must be called before use
+  ///
+  /// \param num_prtns Number of partitions to synchronize
+  void Init(int num_prtns);
+  /// \brief Cleans up the control, it should not be used after this call
+  void CleanUp();
+  /// \brief Acquire a partition to work on one
+  ///
+  /// \param num_prtns Length of prtns_to_try, must be <= num_prtns used in Init
+  /// \param prtns_to_try An array of partitions that still have remaining work
+  /// \param limit_retries If false, this method will spinwait forever until success
+  /// \param max_retries Max times to attempt checking out work before returning false
+  /// \param[out] locked_prtn_id The id of the partition locked
+  /// \param[out] locked_prtn_id_pos The index of the partition locked in prtns_to_try
+  /// \return True if a partition was locked, false if max_retries was attempted
+  ///         without successfully acquiring a lock
+  ///
+  /// This method is thread safe
+  bool AcquirePartitionLock(int num_prtns, const int* prtns_to_try, bool limit_retries,
+                            int max_retries, int* locked_prtn_id,
+                            int* locked_prtn_id_pos);
+  /// \brief Release a partition so that other threads can work on it
+  void ReleasePartitionLock(int prtn_id);
+
+ private:
+  std::atomic<bool>* lock_ptr(int prtn_id);
+  int random_int(int num_values);
+
+  struct PartitionLock {
+    static constexpr int kCacheLineBytes = 64;
+    std::atomic<bool> lock;
+    uint8_t padding[kCacheLineBytes];
+  };
+  int num_prtns_;
+  std::unique_ptr<PartitionLock[]> locks_;
+
+  std::seed_seq rand_seed_;
+  std::mt19937 rand_engine_;
+  std::uniform_int_distribution<uint64_t> rand_distribution_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/schema_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/schema_util.h
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/exec/key_encode.h"  // for KeyColumnMetadata
+#include "arrow/type.h"                     // for DataType, FieldRef, Field and Schema
+#include "arrow/util/mutex.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace compute {
+
+// Identifiers for all different row schemas that are used in a join
+//
+enum class HashJoinProjection : int {
+  INPUT = 0,
+  KEY = 1,
+  PAYLOAD = 2,
+  FILTER = 3,
+  OUTPUT = 4
+};
+
+struct SchemaProjectionMap {
+  static constexpr int kMissingField = -1;
+  int num_cols;
+  const int* source_to_base;
+  const int* base_to_target;
+  inline int get(int i) const {
+    ARROW_DCHECK(i >= 0 && i < num_cols);
+    ARROW_DCHECK(source_to_base[i] != kMissingField);
+    return base_to_target[source_to_base[i]];
+  }
+};
+
+/// Helper class for managing different projections of the same row schema.
+/// Used to efficiently map any field in one projection to a corresponding field in
+/// another projection.
+/// Materialized mappings are generated lazily at the time of the first access.
+/// Thread-safe apart from initialization.
+template <typename ProjectionIdEnum>
+class SchemaProjectionMaps {
+ public:
+  static constexpr int kMissingField = -1;
+
+  Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
+              const std::vector<ProjectionIdEnum>& projection_handles,
+              const std::vector<const std::vector<FieldRef>*>& projections) {
+    ARROW_DCHECK(projection_handles.size() == projections.size());
+    ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
+    for (size_t i = 0; i < projections.size(); ++i) {
+      ARROW_RETURN_NOT_OK(
+          RegisterProjectedSchema(projection_handles[i], *(projections[i]), schema));
+    }
+    RegisterEnd();
+    return Status::OK();
+  }
+
+  int num_cols(ProjectionIdEnum schema_handle) const {
+    int id = schema_id(schema_handle);
+    return static_cast<int>(schemas_[id].second.size());
+  }
+
+  const std::string& field_name(ProjectionIdEnum schema_handle, int field_id) const {
+    return field(schema_handle, field_id).field_name;
+  }
+
+  const std::shared_ptr<DataType>& data_type(ProjectionIdEnum schema_handle,
+                                             int field_id) const {
+    return field(schema_handle, field_id).data_type;
+  }
+
+  SchemaProjectionMap map(ProjectionIdEnum from, ProjectionIdEnum to) const {
+    int id_from = schema_id(from);
+    int id_to = schema_id(to);
+    SchemaProjectionMap result;
+    result.num_cols = num_cols(from);
+    result.source_to_base = mappings_[id_from].data();
+    result.base_to_target = inverse_mappings_[id_to].data();
+    return result;
+  }
+
+ protected:
+  struct FieldInfo {
+    int field_path;
+    std::string field_name;
+    std::shared_ptr<DataType> data_type;
+  };
+
+  Status RegisterSchema(ProjectionIdEnum handle, const Schema& schema) {
+    std::vector<FieldInfo> out_fields;
+    const FieldVector& in_fields = schema.fields();
+    out_fields.resize(in_fields.size());
+    for (size_t i = 0; i < in_fields.size(); ++i) {
+      const std::string& name = in_fields[i]->name();
+      const std::shared_ptr<DataType>& type = in_fields[i]->type();
+      out_fields[i].field_path = static_cast<int>(i);
+      out_fields[i].field_name = name;
+      out_fields[i].data_type = type;
+    }
+    schemas_.push_back(std::make_pair(handle, out_fields));
+    return Status::OK();
+  }
+
+  Status RegisterProjectedSchema(ProjectionIdEnum handle,
+                                 const std::vector<FieldRef>& selected_fields,
+                                 const Schema& full_schema) {
+    std::vector<FieldInfo> out_fields;
+    const FieldVector& in_fields = full_schema.fields();
+    out_fields.resize(selected_fields.size());
+    for (size_t i = 0; i < selected_fields.size(); ++i) {
+      // All fields must be found in schema without ambiguity
+      ARROW_ASSIGN_OR_RAISE(auto match, selected_fields[i].FindOne(full_schema));
+      const std::string& name = in_fields[match[0]]->name();
+      const std::shared_ptr<DataType>& type = in_fields[match[0]]->type();
+      out_fields[i].field_path = match[0];
+      out_fields[i].field_name = name;
+      out_fields[i].data_type = type;
+    }
+    schemas_.push_back(std::make_pair(handle, out_fields));
+    return Status::OK();
+  }
+
+  void RegisterEnd() {
+    size_t size = schemas_.size();
+    mappings_.resize(size);
+    inverse_mappings_.resize(size);
+    int id_base = 0;
+    for (size_t i = 0; i < size; ++i) {
+      GenerateMapForProjection(static_cast<int>(i), id_base);
+    }
+  }
+
+  int schema_id(ProjectionIdEnum schema_handle) const {
+    for (size_t i = 0; i < schemas_.size(); ++i) {
+      if (schemas_[i].first == schema_handle) {
+        return static_cast<int>(i);
+      }
+    }
+    // We should never get here
+    ARROW_DCHECK(false);
+    return -1;
+  }
+
+  const FieldInfo& field(ProjectionIdEnum schema_handle, int field_id) const {
+    int id = schema_id(schema_handle);
+    const std::vector<FieldInfo>& field_infos = schemas_[id].second;
+    return field_infos[field_id];
+  }
+
+  void GenerateMapForProjection(int id_proj, int id_base) {
+    int num_cols_proj = static_cast<int>(schemas_[id_proj].second.size());
+    int num_cols_base = static_cast<int>(schemas_[id_base].second.size());
+
+    std::vector<int>& mapping = mappings_[id_proj];
+    std::vector<int>& inverse_mapping = inverse_mappings_[id_proj];
+    mapping.resize(num_cols_proj);
+    inverse_mapping.resize(num_cols_base);
+
+    if (id_proj == id_base) {
+      for (int i = 0; i < num_cols_base; ++i) {
+        mapping[i] = inverse_mapping[i] = i;
+      }
+    } else {
+      const std::vector<FieldInfo>& fields_proj = schemas_[id_proj].second;
+      const std::vector<FieldInfo>& fields_base = schemas_[id_base].second;
+      for (int i = 0; i < num_cols_base; ++i) {
+        inverse_mapping[i] = SchemaProjectionMap::kMissingField;
+      }
+      for (int i = 0; i < num_cols_proj; ++i) {
+        int field_id = SchemaProjectionMap::kMissingField;
+        for (int j = 0; j < num_cols_base; ++j) {
+          if (fields_proj[i].field_path == fields_base[j].field_path) {
+            field_id = j;
+            // If there are multiple matches for the same input field,
+            // it will be mapped to the first match.
+            break;
+          }
+        }
+        ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
+        mapping[i] = field_id;
+        inverse_mapping[field_id] = i;
+      }
+    }
+  }
+
+  // vector used as a mapping from ProjectionIdEnum to fields
+  std::vector<std::pair<ProjectionIdEnum, std::vector<FieldInfo>>> schemas_;
+  std::vector<std::vector<int>> mappings_;
+  std::vector<std::vector<int>> inverse_mappings_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/task_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/task_util.h
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <functional>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace compute {
+
+// Atomic value surrounded by padding bytes to avoid cache line invalidation
+// whenever it is modified by a concurrent thread on a different CPU core.
+//
+template <typename T>
+class AtomicWithPadding {
+ private:
+  static constexpr int kCacheLineSize = 64;
+  uint8_t padding_before[kCacheLineSize];
+
+ public:
+  std::atomic<T> value;
+
+ private:
+  uint8_t padding_after[kCacheLineSize];
+};
+
+// Used for asynchronous execution of operations that can be broken into
+// a fixed number of symmetric tasks that can be executed concurrently.
+//
+// Implements priorities between multiple such operations, called task groups.
+//
+// Allows to specify the maximum number of in-flight tasks at any moment.
+//
+// Also allows for executing next pending tasks immediately using a caller thread.
+//
+class TaskScheduler {
+ public:
+  using TaskImpl = std::function<Status(size_t, int64_t)>;
+  using TaskGroupContinuationImpl = std::function<Status(size_t)>;
+  using ScheduleImpl = std::function<Status(TaskGroupContinuationImpl)>;
+  using AbortContinuationImpl = std::function<void()>;
+
+  virtual ~TaskScheduler() = default;
+
+  // Order in which task groups are registered represents priorities of their tasks
+  // (the first group has the highest priority).
+  //
+  // Returns task group identifier that is used to request operations on the task group.
+  virtual int RegisterTaskGroup(TaskImpl task_impl,
+                                TaskGroupContinuationImpl cont_impl) = 0;
+
+  virtual void RegisterEnd() = 0;
+
+  // total_num_tasks may be zero, in which case task group continuation will be executed
+  // immediately
+  virtual Status StartTaskGroup(size_t thread_id, int group_id,
+                                int64_t total_num_tasks) = 0;
+
+  // Execute given number of tasks immediately using caller thread
+  virtual Status ExecuteMore(size_t thread_id, int num_tasks_to_execute,
+                             bool execute_all) = 0;
+
+  // Begin scheduling tasks using provided callback and
+  // the limit on the number of in-flight tasks at any moment.
+  //
+  // Scheduling will continue as long as there are waiting tasks.
+  //
+  // It will automatically resume whenever new task group gets started.
+  virtual Status StartScheduling(size_t thread_id, ScheduleImpl schedule_impl,
+                                 int num_concurrent_tasks, bool use_sync_execution) = 0;
+
+  // Abort scheduling and execution.
+  // Used in case of being notified about unrecoverable error for the entire query.
+  virtual void Abort(AbortContinuationImpl impl) = 0;
+
+  static std::unique_ptr<TaskScheduler> Make();
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/test_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/test_util.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/testing/gtest_util.h>
+#include <arrow/util/vector.h>
+
+#include <functional>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/testing/visibility.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/pcg_random.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace compute {
+
+using StartProducingFunc = std::function<Status(ExecNode*)>;
+using StopProducingFunc = std::function<void(ExecNode*)>;
+
+// Make a dummy node that has no execution behaviour
+ARROW_TESTING_EXPORT
+ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
+                        int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
+
+ARROW_TESTING_EXPORT
+ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+                            util::string_view json);
+
+struct BatchesWithSchema {
+  std::vector<ExecBatch> batches;
+  std::shared_ptr<Schema> schema;
+
+  AsyncGenerator<util::optional<ExecBatch>> gen(bool parallel, bool slow) const {
+    auto opt_batches = ::arrow::internal::MapVector(
+        [](ExecBatch batch) { return util::make_optional(std::move(batch)); }, batches);
+
+    AsyncGenerator<util::optional<ExecBatch>> gen;
+
+    if (parallel) {
+      // emulate batches completing initial decode-after-scan on a cpu thread
+      gen = MakeBackgroundGenerator(MakeVectorIterator(std::move(opt_batches)),
+                                    ::arrow::internal::GetCpuThreadPool())
+                .ValueOrDie();
+
+      // ensure that callbacks are not executed immediately on a background thread
+      gen =
+          MakeTransferredGenerator(std::move(gen), ::arrow::internal::GetCpuThreadPool());
+    } else {
+      gen = MakeVectorGenerator(std::move(opt_batches));
+    }
+
+    if (slow) {
+      gen =
+          MakeMappedGenerator(std::move(gen), [](const util::optional<ExecBatch>& batch) {
+            SleepABit();
+            return batch;
+          });
+    }
+
+    return gen;
+  }
+};
+
+ARROW_TESTING_EXPORT
+Future<std::vector<ExecBatch>> StartAndCollect(
+    ExecPlan* plan, AsyncGenerator<util::optional<ExecBatch>> gen);
+
+ARROW_TESTING_EXPORT
+BatchesWithSchema MakeBasicBatches();
+
+ARROW_TESTING_EXPORT
+BatchesWithSchema MakeNestedBatches();
+
+ARROW_TESTING_EXPORT
+BatchesWithSchema MakeRandomBatches(const std::shared_ptr<Schema>& schema,
+                                    int num_batches = 10, int batch_size = 4);
+
+ARROW_TESTING_EXPORT
+Result<std::shared_ptr<Table>> SortTableOnAllFields(const std::shared_ptr<Table>& tab);
+
+ARROW_TESTING_EXPORT
+void AssertTablesEqual(const std::shared_ptr<Table>& exp,
+                       const std::shared_ptr<Table>& act);
+
+ARROW_TESTING_EXPORT
+void AssertExecBatchesEqual(const std::shared_ptr<Schema>& schema,
+                            const std::vector<ExecBatch>& exp,
+                            const std::vector<ExecBatch>& act);
+
+ARROW_TESTING_EXPORT
+bool operator==(const Declaration&, const Declaration&);
+
+ARROW_TESTING_EXPORT
+void PrintTo(const Declaration& decl, std::ostream* os);
+
+class Random64Bit {
+ public:
+  explicit Random64Bit(int32_t seed) : rng_(seed) {}
+  uint64_t next() { return dist_(rng_); }
+  template <typename T>
+  inline T from_range(const T& min_val, const T& max_val) {
+    return static_cast<T>(min_val + (next() % (max_val - min_val + 1)));
+  }
+
+ private:
+  random::pcg32_fast rng_;
+  std::uniform_int_distribution<uint64_t> dist_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/tpch_node.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/tpch_node.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+class ARROW_EXPORT TpchGen {
+ public:
+  virtual ~TpchGen() = default;
+
+  /*
+   * \brief Create a factory for nodes that generate TPC-H data
+   *
+   * Note: Individual tables will reference each other.  It is important that you only
+   * create a single TpchGen instance for each plan and then you can create nodes for each
+   * table from that single TpchGen instance. Note: Every batch will be scheduled as a new
+   * task using the ExecPlan's scheduler.
+   */
+  static Result<std::unique_ptr<TpchGen>> Make(
+      ExecPlan* plan, double scale_factor = 1.0, int64_t batch_size = 4096,
+      util::optional<int64_t> seed = util::nullopt);
+
+  // The below methods will create and add an ExecNode to the plan that generates
+  // data for the desired table. If columns is empty, all columns will be generated.
+  // The methods return the added ExecNode, which should be used for inputs.
+  virtual Result<ExecNode*> Supplier(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Part(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> PartSupp(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Customer(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Orders(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Lineitem(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Nation(std::vector<std::string> columns = {}) = 0;
+  virtual Result<ExecNode*> Region(std::vector<std::string> columns = {}) = 0;
+};
+
+}  // namespace internal
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/exec/util.h
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/cpu_info.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/mutex.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/thread_pool.h"
+
+#if defined(__clang__) || defined(__GNUC__)
+#define BYTESWAP(x) __builtin_bswap64(x)
+#define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
+#define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
+#define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#define BYTESWAP(x) _byteswap_uint64(x)
+#define ROTL(x, n) _rotl((x), (n))
+#define ROTL64(x, n) _rotl64((x), (n))
+#if defined(_M_X64) || defined(_M_I86)
+#include <mmintrin.h>  // https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
+#define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#else
+#define PREFETCH(ptr) (void)(ptr) /* disabled */
+#endif
+#endif
+
+namespace arrow {
+namespace util {
+
+template <typename T>
+inline void CheckAlignment(const void* ptr) {
+  ARROW_DCHECK(reinterpret_cast<uint64_t>(ptr) % sizeof(T) == 0);
+}
+
+// Some platforms typedef int64_t as long int instead of long long int,
+// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics
+// which need long long.
+// We use the cast to the type below in these intrinsics to make the code
+// compile in all cases.
+//
+using int64_for_gather_t = const long long int;  // NOLINT runtime-int
+
+// All MiniBatch... classes use TempVectorStack for vector allocations and can
+// only work with vectors up to 1024 elements.
+//
+// They should only be allocated on the stack to guarantee the right sequence
+// of allocation and deallocation of vectors from TempVectorStack.
+//
+class MiniBatch {
+ public:
+  static constexpr int kMiniBatchLength = 1024;
+};
+
+/// Storage used to allocate temporary vectors of a batch size.
+/// Temporary vectors should resemble allocating temporary variables on the stack
+/// but in the context of vectorized processing where we need to store a vector of
+/// temporaries instead of a single value.
+class TempVectorStack {
+  template <typename>
+  friend class TempVectorHolder;
+
+ public:
+  Status Init(MemoryPool* pool, int64_t size) {
+    num_vectors_ = 0;
+    top_ = 0;
+    buffer_size_ = size;
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
+    // Ensure later operations don't accidentally read uninitialized memory.
+    std::memset(buffer->mutable_data(), 0xFF, size);
+    buffer_ = std::move(buffer);
+    return Status::OK();
+  }
+
+ private:
+  int64_t PaddedAllocationSize(int64_t num_bytes) {
+    // Round up allocation size to multiple of 8 bytes
+    // to avoid returning temp vectors with unaligned address.
+    //
+    // Also add padding at the end to facilitate loads and stores
+    // using SIMD when number of vector elements is not divisible
+    // by the number of SIMD lanes.
+    //
+    return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding;
+  }
+  void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
+    int64_t old_top = top_;
+    top_ += PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
+    // Stack overflow check
+    ARROW_DCHECK(top_ <= buffer_size_);
+    *data = buffer_->mutable_data() + old_top + sizeof(uint64_t);
+    // We set 8 bytes before the beginning of the allocated range and
+    // 8 bytes after the end to check for stack overflow (which would
+    // result in those known bytes being corrupted).
+    reinterpret_cast<uint64_t*>(buffer_->mutable_data() + old_top)[0] = kGuard1;
+    reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[-1] = kGuard2;
+    *id = num_vectors_++;
+  }
+  void release(int id, uint32_t num_bytes) {
+    ARROW_DCHECK(num_vectors_ == id + 1);
+    int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
+    ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[-1] ==
+                 kGuard2);
+    ARROW_DCHECK(top_ >= size);
+    top_ -= size;
+    ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[0] ==
+                 kGuard1);
+    --num_vectors_;
+  }
+  static constexpr uint64_t kGuard1 = 0x3141592653589793ULL;
+  static constexpr uint64_t kGuard2 = 0x0577215664901532ULL;
+  static constexpr int64_t kPadding = 64;
+  int num_vectors_;
+  int64_t top_;
+  std::unique_ptr<Buffer> buffer_;
+  int64_t buffer_size_;
+};
+
+template <typename T>
+class TempVectorHolder {
+  friend class TempVectorStack;
+
+ public:
+  ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); }
+  T* mutable_data() { return reinterpret_cast<T*>(data_); }
+  TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) {
+    stack_ = stack;
+    num_elements_ = num_elements;
+    stack_->alloc(num_elements * sizeof(T), &data_, &id_);
+  }
+
+ private:
+  TempVectorStack* stack_;
+  uint8_t* data_;
+  int id_;
+  uint32_t num_elements_;
+};
+
+class bit_util {
+ public:
+  static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
+                              const int num_bits, const uint8_t* bits, int* num_indexes,
+                              uint16_t* indexes, int bit_offset = 0);
+
+  static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
+                                  const int num_bits, const uint8_t* bits,
+                                  const uint16_t* input_indexes, int* num_indexes,
+                                  uint16_t* indexes, int bit_offset = 0);
+
+  // Input and output indexes may be pointing to the same data (in-place filtering).
+  static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
+                                 const uint8_t* bits, int* num_indexes_bit0,
+                                 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
+                                 int bit_offset = 0);
+
+  // Bit 1 is replaced with byte 0xFF.
+  static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
+
+  // Return highest bit of each byte.
+  static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
+                            const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
+
+  static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
+                                 uint32_t num_bytes);
+
+ private:
+  inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
+                                            int* num_indexes, uint16_t* indexes);
+  inline static void bits_filter_indexes_helper(uint64_t word,
+                                                const uint16_t* input_indexes,
+                                                int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search, bool filter_input_indexes>
+  static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes,
+                                       uint16_t base_index = 0);
+
+#if defined(ARROW_HAVE_AVX2)
+  static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
+                                   const uint8_t* bits, int* num_indexes,
+                                   uint16_t* indexes, uint16_t base_index = 0);
+  static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
+                                       const uint8_t* bits, const uint16_t* input_indexes,
+                                       int* num_indexes, uint16_t* indexes);
+  template <int bit_to_search>
+  static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                       int* num_indexes, uint16_t* indexes,
+                                       uint16_t base_index = 0);
+  template <int bit_to_search>
+  static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
+                                           const uint16_t* input_indexes,
+                                           int* num_indexes, uint16_t* indexes);
+  static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
+  static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
+  static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
+#endif
+};
+
+}  // namespace util
+namespace compute {
+
+ARROW_EXPORT
+Status ValidateExecNodeInputs(ExecPlan* plan, const std::vector<ExecNode*>& inputs,
+                              int expected_num_inputs, const char* kind_name);
+
+ARROW_EXPORT
+Result<std::shared_ptr<Table>> TableFromExecBatches(
+    const std::shared_ptr<Schema>& schema, const std::vector<ExecBatch>& exec_batches);
+
+class AtomicCounter {
+ public:
+  AtomicCounter() = default;
+
+  int count() const { return count_.load(); }
+
+  util::optional<int> total() const {
+    int total = total_.load();
+    if (total == -1) return {};
+    return total;
+  }
+
+  // return true if the counter is complete
+  bool Increment() {
+    DCHECK_NE(count_.load(), total_.load());
+    int count = count_.fetch_add(1) + 1;
+    if (count != total_.load()) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter is complete
+  bool SetTotal(int total) {
+    total_.store(total);
+    if (count_.load() != total) return false;
+    return DoneOnce();
+  }
+
+  // return true if the counter has not already been completed
+  bool Cancel() { return DoneOnce(); }
+
+  // return true if the counter has finished or been cancelled
+  bool Completed() { return complete_.load(); }
+
+ private:
+  // ensure there is only one true return from Increment(), SetTotal(), or Cancel()
+  bool DoneOnce() {
+    bool expected = false;
+    return complete_.compare_exchange_strong(expected, true);
+  }
+
+  std::atomic<int> count_{0}, total_{-1};
+  std::atomic<bool> complete_{false};
+};
+
+class ThreadIndexer {
+ public:
+  size_t operator()();
+
+  static size_t Capacity();
+
+ private:
+  static size_t Check(size_t thread_index);
+
+  util::Mutex mutex_;
+  std::unordered_map<std::thread::id, size_t> id_to_index_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/function.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/function.h
@@ -0,0 +1,403 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+/// \defgroup compute-functions Abstract compute function API
+///
+/// @{
+
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
+};
+
+/// \brief Base class for specifying options configuring a function's behavior,
+/// such as error handling.
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
+  virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  using util::EqualityComparable<FunctionOptions>::Equals;
+  using util::EqualityComparable<FunctionOptions>::operator==;
+  using util::EqualityComparable<FunctionOptions>::operator!=;
+  std::string ToString() const;
+  std::unique_ptr<FunctionOptions> Copy() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
+};
+
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
+/// \brief Contains the number of required arguments for the function.
+///
+/// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
+struct ARROW_EXPORT Arity {
+  /// \brief A function taking no arguments
+  static Arity Nullary() { return Arity(0, false); }
+
+  /// \brief A function taking 1 argument
+  static Arity Unary() { return Arity(1, false); }
+
+  /// \brief A function taking 2 arguments
+  static Arity Binary() { return Arity(2, false); }
+
+  /// \brief A function taking 3 arguments
+  static Arity Ternary() { return Arity(3, false); }
+
+  /// \brief A function taking a variable number of arguments
+  ///
+  /// \param[in] min_args the minimum number of arguments required when
+  /// invoking the function
+  static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }
+
+  // NOTE: the 0-argument form (default constructor) is required for Cython
+  explicit Arity(int num_args = 0, bool is_varargs = false)
+      : num_args(num_args), is_varargs(is_varargs) {}
+
+  /// The number of required arguments (or the minimum number for varargs
+  /// functions).
+  int num_args;
+
+  /// If true, then the num_args is the minimum number of required arguments.
+  bool is_varargs = false;
+};
+
+struct ARROW_EXPORT FunctionDoc {
+  /// \brief A one-line summary of the function, using a verb.
+  ///
+  /// For example, "Add two numeric arrays or scalars".
+  std::string summary;
+
+  /// \brief A detailed description of the function, meant to follow the summary.
+  std::string description;
+
+  /// \brief Symbolic names (identifiers) for the function arguments.
+  ///
+  /// Some bindings may use this to generate nicer function signatures.
+  std::vector<std::string> arg_names;
+
+  // TODO add argument descriptions?
+
+  /// \brief Name of the options class, if any.
+  std::string options_class;
+
+  /// \brief Whether options are required for function execution
+  ///
+  /// If false, then either the function does not have an options class
+  /// or there is a usable default options value.
+  bool options_required;
+
+  FunctionDoc() = default;
+
+  FunctionDoc(std::string summary, std::string description,
+              std::vector<std::string> arg_names, std::string options_class = "",
+              bool options_required = false)
+      : summary(std::move(summary)),
+        description(std::move(description)),
+        arg_names(std::move(arg_names)),
+        options_class(std::move(options_class)),
+        options_required(options_required) {}
+
+  static const FunctionDoc& Empty();
+};
+
+/// \brief Base class for compute functions. Function implementations contain a
+/// collection of "kernels" which are implementations of the function for
+/// specific argument types. Selecting a viable kernel for executing a function
+/// is referred to as "dispatching".
+class ARROW_EXPORT Function {
+ public:
+  /// \brief The kind of function, which indicates in what contexts it is
+  /// valid for use.
+  enum Kind {
+    /// A function that performs scalar data operations on whole arrays of
+    /// data. Can generally process Array or Scalar values. The size of the
+    /// output will be the same as the size (or broadcasted size, in the case
+    /// of mixing Array and Scalar inputs) of the input.
+    SCALAR,
+
+    /// A function with array input and output whose behavior depends on the
+    /// values of the entire arrays passed, rather than the value of each scalar
+    /// value.
+    VECTOR,
+
+    /// A function that computes scalar summary statistics from array input.
+    SCALAR_AGGREGATE,
+
+    /// A function that computes grouped summary statistics from array input
+    /// and an array of group identifiers.
+    HASH_AGGREGATE,
+
+    /// A function that dispatches to other functions and does not contain its
+    /// own kernels.
+    META
+  };
+
+  virtual ~Function() = default;
+
+  /// \brief The name of the kernel. The registry enforces uniqueness of names.
+  const std::string& name() const { return name_; }
+
+  /// \brief The kind of kernel, which indicates in what contexts it is valid
+  /// for use.
+  Function::Kind kind() const { return kind_; }
+
+  /// \brief Contains the number of arguments the function requires, or if the
+  /// function accepts variable numbers of arguments.
+  const Arity& arity() const { return arity_; }
+
+  /// \brief Return the function documentation
+  const FunctionDoc& doc() const { return *doc_; }
+
+  /// \brief Returns the number of registered kernels for this function.
+  virtual int num_kernels() const = 0;
+
+  /// \brief Return a kernel that can execute the function given the exact
+  /// argument types (without implicit type casts or scalar->array promotions).
+  ///
+  /// NB: This function is overridden in CastFunction.
+  virtual Result<const Kernel*> DispatchExact(
+      const std::vector<ValueDescr>& values) const;
+
+  /// \brief Return a best-match kernel that can execute the function given the argument
+  /// types, after implicit casts are applied.
+  ///
+  /// \param[in,out] values Argument types. An element may be modified to indicate that
+  /// the returned kernel only approximately matches the input value descriptors; callers
+  /// are responsible for casting inputs to the type and shape required by the kernel.
+  virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
+
+  /// \brief Execute the function eagerly with the passed input arguments with
+  /// kernel dispatch, batch iteration, and memory allocation details taken
+  /// care of.
+  ///
+  /// If the `options` pointer is null, then `default_options()` will be used.
+  ///
+  /// This function can be overridden in subclasses.
+  virtual Result<Datum> Execute(const std::vector<Datum>& args,
+                                const FunctionOptions* options, ExecContext* ctx) const;
+
+  /// \brief Returns the default options for this function.
+  ///
+  /// Whatever option semantics a Function has, implementations must guarantee
+  /// that default_options() is valid to pass to Execute as options.
+  const FunctionOptions* default_options() const { return default_options_; }
+
+  virtual Status Validate() const;
+
+ protected:
+  Function(std::string name, Function::Kind kind, const Arity& arity,
+           const FunctionDoc* doc, const FunctionOptions* default_options)
+      : name_(std::move(name)),
+        kind_(kind),
+        arity_(arity),
+        doc_(doc ? doc : &FunctionDoc::Empty()),
+        default_options_(default_options) {}
+
+  Status CheckArity(const std::vector<InputType>&) const;
+  Status CheckArity(const std::vector<ValueDescr>&) const;
+
+  std::string name_;
+  Function::Kind kind_;
+  Arity arity_;
+  const FunctionDoc* doc_;
+  const FunctionOptions* default_options_ = NULLPTR;
+};
+
+namespace detail {
+
+template <typename KernelType>
+class FunctionImpl : public Function {
+ public:
+  /// \brief Return pointers to current-available kernels for inspection
+  std::vector<const KernelType*> kernels() const {
+    std::vector<const KernelType*> result;
+    for (const auto& kernel : kernels_) {
+      result.push_back(&kernel);
+    }
+    return result;
+  }
+
+  int num_kernels() const override { return static_cast<int>(kernels_.size()); }
+
+ protected:
+  FunctionImpl(std::string name, Function::Kind kind, const Arity& arity,
+               const FunctionDoc* doc, const FunctionOptions* default_options)
+      : Function(std::move(name), kind, arity, doc, default_options) {}
+
+  std::vector<KernelType> kernels_;
+};
+
+/// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
+ARROW_EXPORT
+const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
+
+/// \brief Return an error message if no Kernel is found.
+ARROW_EXPORT
+Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
+
+}  // namespace detail
+
+/// \brief A function that executes elementwise operations on arrays or
+/// scalars, and therefore whose results generally do not depend on the order
+/// of the values in the arguments. Accepts and returns arrays that are all of
+/// the same size. These functions roughly correspond to the functions used in
+/// SQL expressions.
+class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
+ public:
+  using KernelType = ScalarKernel;
+
+  ScalarFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+                 const FunctionOptions* default_options = NULLPTR)
+      : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity, doc,
+                                           default_options) {}
+
+  /// \brief Add a kernel with given input/output types, no required state
+  /// initialization, preallocation for fixed-width types, and default null
+  /// handling (intersect validity bitmaps of inputs).
+  Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
+                   ArrayKernelExec exec, KernelInit init = NULLPTR);
+
+  /// \brief Add a kernel (function implementation). Returns error if the
+  /// kernel's signature does not match the function's arity.
+  Status AddKernel(ScalarKernel kernel);
+};
+
+/// \brief A function that executes general array operations that may yield
+/// outputs of different sizes or have results that depend on the whole array
+/// contents. These functions roughly correspond to the functions found in
+/// non-SQL array languages like APL and its derivatives.
+class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
+ public:
+  using KernelType = VectorKernel;
+
+  VectorFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+                 const FunctionOptions* default_options = NULLPTR)
+      : detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity, doc,
+                                           default_options) {}
+
+  /// \brief Add a simple kernel with given input/output types, no required
+  /// state initialization, no data preallocation, and no preallocation of the
+  /// validity bitmap.
+  Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
+                   ArrayKernelExec exec, KernelInit init = NULLPTR);
+
+  /// \brief Add a kernel (function implementation). Returns error if the
+  /// kernel's signature does not match the function's arity.
+  Status AddKernel(VectorKernel kernel);
+};
+
+class ARROW_EXPORT ScalarAggregateFunction
+    : public detail::FunctionImpl<ScalarAggregateKernel> {
+ public:
+  using KernelType = ScalarAggregateKernel;
+
+  ScalarAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+                          const FunctionOptions* default_options = NULLPTR)
+      : detail::FunctionImpl<ScalarAggregateKernel>(
+            std::move(name), Function::SCALAR_AGGREGATE, arity, doc, default_options) {}
+
+  /// \brief Add a kernel (function implementation). Returns error if the
+  /// kernel's signature does not match the function's arity.
+  Status AddKernel(ScalarAggregateKernel kernel);
+};
+
+class ARROW_EXPORT HashAggregateFunction
+    : public detail::FunctionImpl<HashAggregateKernel> {
+ public:
+  using KernelType = HashAggregateKernel;
+
+  HashAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+                        const FunctionOptions* default_options = NULLPTR)
+      : detail::FunctionImpl<HashAggregateKernel>(
+            std::move(name), Function::HASH_AGGREGATE, arity, doc, default_options) {}
+
+  /// \brief Add a kernel (function implementation). Returns error if the
+  /// kernel's signature does not match the function's arity.
+  Status AddKernel(HashAggregateKernel kernel);
+};
+
+/// \brief A function that dispatches to other functions. Must implement
+/// MetaFunction::ExecuteImpl.
+///
+/// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
+/// of concrete Function types, but must handle other Datum kinds on its own.
+class ARROW_EXPORT MetaFunction : public Function {
+ public:
+  int num_kernels() const override { return 0; }
+
+  Result<Datum> Execute(const std::vector<Datum>& args, const FunctionOptions* options,
+                        ExecContext* ctx) const override;
+
+ protected:
+  virtual Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                                    const FunctionOptions* options,
+                                    ExecContext* ctx) const = 0;
+
+  MetaFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
+               const FunctionOptions* default_options = NULLPTR)
+      : Function(std::move(name), Function::META, arity, doc, default_options) {}
+};
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/kernel.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/kernel.h
@@ -0,0 +1,752 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/compute/exec.h"
+#include "arrow/datum.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+class FunctionOptions;
+
+/// \brief Base class for opaque kernel-specific state. For example, if there
+/// is some kind of initialization required.
+struct ARROW_EXPORT KernelState {
+  virtual ~KernelState() = default;
+};
+
+/// \brief Context/state for the execution of a particular kernel.
+class ARROW_EXPORT KernelContext {
+ public:
+  explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx) {}
+
+  /// \brief Allocate buffer from the context's memory pool. The contents are
+  /// not initialized.
+  Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);
+
+  /// \brief Allocate buffer for bitmap from the context's memory pool. Like
+  /// Allocate, the contents of the buffer are not initialized but the last
+  /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
+  Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
+
+  /// \brief Assign the active KernelState to be utilized for each stage of
+  /// kernel execution. Ownership and memory lifetime of the KernelState must
+  /// be minded separately.
+  void SetState(KernelState* state) { state_ = state; }
+
+  KernelState* state() { return state_; }
+
+  /// \brief Configuration related to function execution that is to be shared
+  /// across multiple kernels.
+  ExecContext* exec_context() { return exec_ctx_; }
+
+  /// \brief The memory pool to use for allocations. For now, it uses the
+  /// MemoryPool contained in the ExecContext used to create the KernelContext.
+  MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }
+
+ private:
+  ExecContext* exec_ctx_;
+  KernelState* state_ = NULLPTR;
+};
+
+/// \brief The standard kernel execution API that must be implemented for
+/// SCALAR and VECTOR kernel types. This includes both stateless and stateful
+/// kernels. Kernels depending on some execution state access that state via
+/// subclasses of KernelState set on the KernelContext object. May be used for
+/// SCALAR and VECTOR kernel kinds. Implementations should endeavor to write
+/// into pre-allocated memory if they are able, though for some kernels
+/// (e.g. in cases when a builder like StringBuilder) must be employed this may
+/// not be possible.
+using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;
+
+/// \brief An type-checking interface to permit customizable validation rules
+/// for use with InputType and KernelSignature. This is for scenarios where the
+/// acceptance is not an exact type instance, such as a TIMESTAMP type for a
+/// specific TimeUnit, but permitting any time zone.
+struct ARROW_EXPORT TypeMatcher {
+  virtual ~TypeMatcher() = default;
+
+  /// \brief Return true if this matcher accepts the data type.
+  virtual bool Matches(const DataType& type) const = 0;
+
+  /// \brief A human-interpretable string representation of what the type
+  /// matcher checks for, usable when printing KernelSignature or formatting
+  /// error messages.
+  virtual std::string ToString() const = 0;
+
+  /// \brief Return true if this TypeMatcher contains the same matching rule as
+  /// the other. Currently depends on RTTI.
+  virtual bool Equals(const TypeMatcher& other) const = 0;
+};
+
+namespace match {
+
+/// \brief Match any DataType instance having the same DataType::id.
+ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);
+
+/// \brief Match any TimestampType instance having the same unit, but the time
+/// zones can be different.
+ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
+ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
+ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
+ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);
+
+// \brief Match any integer type
+ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();
+
+// Match types using 32-bit varbinary representation
+ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();
+
+// Match types using 64-bit varbinary representation
+ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();
+
+// Match any fixed binary type
+ARROW_EXPORT std::shared_ptr<TypeMatcher> FixedSizeBinaryLike();
+
+// \brief Match any primitive type (boolean or any type representable as a C
+// Type)
+ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
+
+}  // namespace match
+
+/// \brief An object used for type- and shape-checking arguments to be passed
+/// to a kernel and stored in a KernelSignature. Distinguishes between ARRAY
+/// and SCALAR arguments using ValueDescr::Shape. The type-checking rule can be
+/// supplied either with an exact DataType instance or a custom TypeMatcher.
+class ARROW_EXPORT InputType {
+ public:
+  /// \brief The kind of type-checking rule that the InputType contains.
+  enum Kind {
+    /// \brief Accept any value type.
+    ANY_TYPE,
+
+    /// \brief A fixed arrow::DataType and will only exact match having this
+    /// exact type (e.g. same TimestampType unit, same decimal scale and
+    /// precision, or same nested child types).
+    EXACT_TYPE,
+
+    /// \brief Uses a TypeMatcher implementation to check the type.
+    USE_TYPE_MATCHER
+  };
+
+  /// \brief Accept any value type but with a specific shape (e.g. any Array or
+  /// any Scalar).
+  InputType(ValueDescr::Shape shape = ValueDescr::ANY)  // NOLINT implicit construction
+      : kind_(ANY_TYPE), shape_(shape) {}
+
+  /// \brief Accept an exact value type.
+  InputType(std::shared_ptr<DataType> type,  // NOLINT implicit construction
+            ValueDescr::Shape shape = ValueDescr::ANY)
+      : kind_(EXACT_TYPE), shape_(shape), type_(std::move(type)) {}
+
+  /// \brief Accept an exact value type and shape provided by a ValueDescr.
+  InputType(const ValueDescr& descr)  // NOLINT implicit construction
+      : InputType(descr.type, descr.shape) {}
+
+  /// \brief Use the passed TypeMatcher to type check.
+  InputType(std::shared_ptr<TypeMatcher> type_matcher,  // NOLINT implicit construction
+            ValueDescr::Shape shape = ValueDescr::ANY)
+      : kind_(USE_TYPE_MATCHER), shape_(shape), type_matcher_(std::move(type_matcher)) {}
+
+  /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
+  /// its implementation.
+  explicit InputType(Type::type type_id, ValueDescr::Shape shape = ValueDescr::ANY)
+      : InputType(match::SameTypeId(type_id), shape) {}
+
+  InputType(const InputType& other) { CopyInto(other); }
+
+  void operator=(const InputType& other) { CopyInto(other); }
+
+  InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
+
+  void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
+
+  // \brief Match an array with the given exact type. Convenience constructor.
+  static InputType Array(std::shared_ptr<DataType> type) {
+    return InputType(std::move(type), ValueDescr::ARRAY);
+  }
+
+  // \brief Match a scalar with the given exact type. Convenience constructor.
+  static InputType Scalar(std::shared_ptr<DataType> type) {
+    return InputType(std::move(type), ValueDescr::SCALAR);
+  }
+
+  // \brief Match an array with the given Type::type id. Convenience
+  // constructor.
+  static InputType Array(Type::type id) { return InputType(id, ValueDescr::ARRAY); }
+
+  // \brief Match a scalar with the given Type::type id. Convenience
+  // constructor.
+  static InputType Scalar(Type::type id) { return InputType(id, ValueDescr::SCALAR); }
+
+  /// \brief Return true if this input type matches the same type cases as the
+  /// other.
+  bool Equals(const InputType& other) const;
+
+  bool operator==(const InputType& other) const { return this->Equals(other); }
+
+  bool operator!=(const InputType& other) const { return !(*this == other); }
+
+  /// \brief Return hash code.
+  size_t Hash() const;
+
+  /// \brief Render a human-readable string representation.
+  std::string ToString() const;
+
+  /// \brief Return true if the value matches this argument kind in type
+  /// and shape.
+  bool Matches(const Datum& value) const;
+
+  /// \brief Return true if the value descriptor matches this argument kind in
+  /// type and shape.
+  bool Matches(const ValueDescr& value) const;
+
+  /// \brief The type matching rule that this InputType uses.
+  Kind kind() const { return kind_; }
+
+  /// \brief Indicates whether this InputType matches Array (ValueDescr::ARRAY),
+  /// Scalar (ValueDescr::SCALAR) values, or both (ValueDescr::ANY).
+  ValueDescr::Shape shape() const { return shape_; }
+
+  /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
+  /// must match. Otherwise this function should not be used and will assert in
+  /// debug builds.
+  const std::shared_ptr<DataType>& type() const;
+
+  /// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
+  /// checking the type of a value. Otherwise this function should not be used
+  /// and will assert in debug builds.
+  const TypeMatcher& type_matcher() const;
+
+ private:
+  void CopyInto(const InputType& other) {
+    this->kind_ = other.kind_;
+    this->shape_ = other.shape_;
+    this->type_ = other.type_;
+    this->type_matcher_ = other.type_matcher_;
+  }
+
+  void MoveInto(InputType&& other) {
+    this->kind_ = other.kind_;
+    this->shape_ = other.shape_;
+    this->type_ = std::move(other.type_);
+    this->type_matcher_ = std::move(other.type_matcher_);
+  }
+
+  Kind kind_;
+
+  ValueDescr::Shape shape_ = ValueDescr::ANY;
+
+  // For EXACT_TYPE Kind
+  std::shared_ptr<DataType> type_;
+
+  // For USE_TYPE_MATCHER Kind
+  std::shared_ptr<TypeMatcher> type_matcher_;
+};
+
+/// \brief Container to capture both exact and input-dependent output types.
+///
+/// The value shape returned by Resolve will be determined by broadcasting the
+/// shapes of the input arguments, otherwise this is handled by the
+/// user-defined resolver function:
+///
+/// * Any ARRAY shape -> output shape is ARRAY
+/// * All SCALAR shapes -> output shape is SCALAR
+class ARROW_EXPORT OutputType {
+ public:
+  /// \brief An enum indicating whether the value type is an invariant fixed
+  /// value or one that's computed by a kernel-defined resolver function.
+  enum ResolveKind { FIXED, COMPUTED };
+
+  /// Type resolution function. Given input types and shapes, return output
+  /// type and shape.  This function MAY may use the kernel state to decide
+  /// the output type based on the functionoptions.
+  ///
+  /// This function SHOULD _not_ be used to check for arity, that is to be
+  /// performed one or more layers above.
+  using Resolver =
+      std::function<Result<ValueDescr>(KernelContext*, const std::vector<ValueDescr>&)>;
+
+  /// \brief Output an exact type, but with shape determined by promoting the
+  /// shapes of the inputs (any ARRAY argument yields ARRAY).
+  OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
+      : kind_(FIXED), type_(std::move(type)) {}
+
+  /// \brief Output the exact type and shape provided by a ValueDescr
+  OutputType(ValueDescr descr);  // NOLINT implicit construction
+
+  /// \brief Output a computed type depending on actual input types
+  OutputType(Resolver resolver)  // NOLINT implicit construction
+      : kind_(COMPUTED), resolver_(std::move(resolver)) {}
+
+  OutputType(const OutputType& other) {
+    this->kind_ = other.kind_;
+    this->shape_ = other.shape_;
+    this->type_ = other.type_;
+    this->resolver_ = other.resolver_;
+  }
+
+  OutputType(OutputType&& other) {
+    this->kind_ = other.kind_;
+    this->type_ = std::move(other.type_);
+    this->shape_ = other.shape_;
+    this->resolver_ = other.resolver_;
+  }
+
+  OutputType& operator=(const OutputType&) = default;
+  OutputType& operator=(OutputType&&) = default;
+
+  /// \brief Return the shape and type of the expected output value of the
+  /// kernel given the value descriptors (shapes and types) of the input
+  /// arguments. The resolver may make use of state information kept in the
+  /// KernelContext.
+  Result<ValueDescr> Resolve(KernelContext* ctx,
+                             const std::vector<ValueDescr>& args) const;
+
+  /// \brief The exact output value type for the FIXED kind.
+  const std::shared_ptr<DataType>& type() const;
+
+  /// \brief For use with COMPUTED resolution strategy. It may be more
+  /// convenient to invoke this with OutputType::Resolve returned from this
+  /// method.
+  const Resolver& resolver() const;
+
+  /// \brief Render a human-readable string representation.
+  std::string ToString() const;
+
+  /// \brief Return the kind of type resolution of this output type, whether
+  /// fixed/invariant or computed by a resolver.
+  ResolveKind kind() const { return kind_; }
+
+  /// \brief If the shape is ANY, then Resolve will compute the shape based on
+  /// the input arguments.
+  ValueDescr::Shape shape() const { return shape_; }
+
+ private:
+  ResolveKind kind_;
+
+  // For FIXED resolution
+  std::shared_ptr<DataType> type_;
+
+  /// \brief The shape of the output type to return when using Resolve. If ANY
+  /// will promote the input shapes.
+  ValueDescr::Shape shape_ = ValueDescr::ANY;
+
+  // For COMPUTED resolution
+  Resolver resolver_;
+};
+
+/// \brief Holds the input types and output type of the kernel.
+///
+/// VarArgs functions with minimum N arguments should pass up to N input types to be
+/// used to validate the input types of a function invocation. The first N-1 types
+/// will be matched against the first N-1 arguments, and the last type will be
+/// matched against the remaining arguments.
+class ARROW_EXPORT KernelSignature {
+ public:
+  KernelSignature(std::vector<InputType> in_types, OutputType out_type,
+                  bool is_varargs = false);
+
+  /// \brief Convenience ctor since make_shared can be awkward
+  static std::shared_ptr<KernelSignature> Make(std::vector<InputType> in_types,
+                                               OutputType out_type,
+                                               bool is_varargs = false);
+
+  /// \brief Return true if the signature if compatible with the list of input
+  /// value descriptors.
+  bool MatchesInputs(const std::vector<ValueDescr>& descriptors) const;
+
+  /// \brief Returns true if the input types of each signature are
+  /// equal. Well-formed functions should have a deterministic output type
+  /// given input types, but currently it is the responsibility of the
+  /// developer to ensure this.
+  bool Equals(const KernelSignature& other) const;
+
+  bool operator==(const KernelSignature& other) const { return this->Equals(other); }
+
+  bool operator!=(const KernelSignature& other) const { return !(*this == other); }
+
+  /// \brief Compute a hash code for the signature
+  size_t Hash() const;
+
+  /// \brief The input types for the kernel. For VarArgs functions, this should
+  /// generally contain a single validator to use for validating all of the
+  /// function arguments.
+  const std::vector<InputType>& in_types() const { return in_types_; }
+
+  /// \brief The output type for the kernel. Use Resolve to return the exact
+  /// output given input argument ValueDescrs, since many kernels' output types
+  /// depend on their input types (or their type metadata).
+  const OutputType& out_type() const { return out_type_; }
+
+  /// \brief Render a human-readable string representation
+  std::string ToString() const;
+
+  bool is_varargs() const { return is_varargs_; }
+
+ private:
+  std::vector<InputType> in_types_;
+  OutputType out_type_;
+  bool is_varargs_;
+
+  // For caching the hash code after it's computed the first time
+  mutable uint64_t hash_code_;
+};
+
+/// \brief A function may contain multiple variants of a kernel for a given
+/// type combination for different SIMD levels. Based on the active system's
+/// CPU info or the user's preferences, we can elect to use one over the other.
+struct SimdLevel {
+  enum type { NONE = 0, SSE4_2, AVX, AVX2, AVX512, NEON, MAX };
+};
+
+/// \brief The strategy to use for propagating or otherwise populating the
+/// validity bitmap of a kernel output.
+struct NullHandling {
+  enum type {
+    /// Compute the output validity bitmap by intersecting the validity bitmaps
+    /// of the arguments using bitwise-and operations. This means that values
+    /// in the output are valid/non-null only if the corresponding values in
+    /// all input arguments were valid/non-null. Kernel generally need not
+    /// touch the bitmap thereafter, but a kernel's exec function is permitted
+    /// to alter the bitmap after the null intersection is computed if it needs
+    /// to.
+    INTERSECTION,
+
+    /// Kernel expects a pre-allocated buffer to write the result bitmap
+    /// into. The preallocated memory is not zeroed (except for the last byte),
+    /// so the kernel should ensure to completely populate the bitmap.
+    COMPUTED_PREALLOCATE,
+
+    /// Kernel allocates and sets the validity bitmap of the output.
+    COMPUTED_NO_PREALLOCATE,
+
+    /// Kernel output is never null and a validity bitmap does not need to be
+    /// allocated.
+    OUTPUT_NOT_NULL
+  };
+};
+
+/// \brief The preference for memory preallocation of fixed-width type outputs
+/// in kernel execution.
+struct MemAllocation {
+  enum type {
+    // For data types that support pre-allocation (i.e. fixed-width), the
+    // kernel expects to be provided a pre-allocated data buffer to write
+    // into. Non-fixed-width types must always allocate their own data
+    // buffers. The allocation made for the same length as the execution batch,
+    // so vector kernels yielding differently sized output should not use this.
+    //
+    // It is valid for the data to not be preallocated but the validity bitmap
+    // is (or is computed using the intersection/bitwise-and method).
+    //
+    // For variable-size output types like BinaryType or StringType, or for
+    // nested types, this option has no effect.
+    PREALLOCATE,
+
+    // The kernel is responsible for allocating its own data buffer for
+    // fixed-width type outputs.
+    NO_PREALLOCATE
+  };
+};
+
+struct Kernel;
+
+/// \brief Arguments to pass to a KernelInit function. A struct is used to help
+/// avoid API breakage should the arguments passed need to be expanded.
+struct KernelInitArgs {
+  /// \brief A pointer to the kernel being initialized. The init function may
+  /// depend on the kernel's KernelSignature or other data contained there.
+  const Kernel* kernel;
+
+  /// \brief The types and shapes of the input arguments that the kernel is
+  /// about to be executed against.
+  ///
+  /// TODO: should this be const std::vector<ValueDescr>*? const-ref is being
+  /// used to avoid the cost of copying the struct into the args struct.
+  const std::vector<ValueDescr>& inputs;
+
+  /// \brief Opaque options specific to this kernel. May be nullptr for functions
+  /// that do not require options.
+  const FunctionOptions* options;
+};
+
+/// \brief Common initializer function for all kernel types.
+using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
+    KernelContext*, const KernelInitArgs&)>;
+
+/// \brief Base type for kernels. Contains the function signature and
+/// optionally the state initialization function, along with some common
+/// attributes
+struct Kernel {
+  Kernel() = default;
+
+  Kernel(std::shared_ptr<KernelSignature> sig, KernelInit init)
+      : signature(std::move(sig)), init(std::move(init)) {}
+
+  Kernel(std::vector<InputType> in_types, OutputType out_type, KernelInit init)
+      : Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)),
+               std::move(init)) {}
+
+  /// \brief The "signature" of the kernel containing the InputType input
+  /// argument validators and OutputType output type and shape resolver.
+  std::shared_ptr<KernelSignature> signature;
+
+  /// \brief Create a new KernelState for invocations of this kernel, e.g. to
+  /// set up any options or state relevant for execution.
+  KernelInit init;
+
+  /// \brief Create a vector of new KernelState for invocations of this kernel.
+  static Status InitAll(KernelContext*, const KernelInitArgs&,
+                        std::vector<std::unique_ptr<KernelState>>*);
+
+  /// \brief Indicates whether execution can benefit from parallelization
+  /// (splitting large chunks into smaller chunks and using multiple
+  /// threads). Some kernels may not support parallel execution at
+  /// all. Synchronization and concurrency-related issues are currently the
+  /// responsibility of the Kernel's implementation.
+  bool parallelizable = true;
+
+  /// \brief Indicates the level of SIMD instruction support in the host CPU is
+  /// required to use the function. The intention is for functions to be able to
+  /// contain multiple kernels with the same signature but different levels of SIMD,
+  /// so that the most optimized kernel supported on a host's processor can be chosen.
+  SimdLevel::type simd_level = SimdLevel::NONE;
+};
+
+/// \brief Common kernel base data structure for ScalarKernel and
+/// VectorKernel. It is called "ArrayKernel" in that the functions generally
+/// output array values (as opposed to scalar values in the case of aggregate
+/// functions).
+struct ArrayKernel : public Kernel {
+  ArrayKernel() = default;
+
+  ArrayKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
+              KernelInit init = NULLPTR)
+      : Kernel(std::move(sig), init), exec(std::move(exec)) {}
+
+  ArrayKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
+              KernelInit init = NULLPTR)
+      : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
+        exec(std::move(exec)) {}
+
+  /// \brief Perform a single invocation of this kernel. Depending on the
+  /// implementation, it may only write into preallocated memory, while in some
+  /// cases it will allocate its own memory. Any required state is managed
+  /// through the KernelContext.
+  ArrayKernelExec exec;
+
+  /// \brief Writing execution results into larger contiguous allocations
+  /// requires that the kernel be able to write into sliced output ArrayData*,
+  /// including sliced output validity bitmaps. Some kernel implementations may
+  /// not be able to do this, so setting this to false disables this
+  /// functionality.
+  bool can_write_into_slices = true;
+};
+
+/// \brief Kernel data structure for implementations of ScalarFunction. In
+/// addition to the members found in ArrayKernel, contains the null handling
+/// and memory pre-allocation preferences.
+struct ScalarKernel : public ArrayKernel {
+  using ArrayKernel::ArrayKernel;
+
+  // For scalar functions preallocated data and intersecting arg validity
+  // bitmaps is a reasonable default
+  NullHandling::type null_handling = NullHandling::INTERSECTION;
+  MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
+};
+
+// ----------------------------------------------------------------------
+// VectorKernel (for VectorFunction)
+
+/// \brief See VectorKernel::finalize member for usage
+using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;
+
+/// \brief Kernel data structure for implementations of VectorFunction. In
+/// addition to the members found in ArrayKernel, contains an optional
+/// finalizer function, the null handling and memory pre-allocation preferences
+/// (which have different defaults from ScalarKernel), and some other
+/// execution-related options.
+struct VectorKernel : public ArrayKernel {
+  VectorKernel() = default;
+
+  VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec)
+      : ArrayKernel(std::move(sig), std::move(exec)) {}
+
+  VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
+               KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
+      : ArrayKernel(std::move(in_types), std::move(out_type), std::move(exec),
+                    std::move(init)),
+        finalize(std::move(finalize)) {}
+
+  VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
+               KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
+      : ArrayKernel(std::move(sig), std::move(exec), std::move(init)),
+        finalize(std::move(finalize)) {}
+
+  /// \brief For VectorKernel, convert intermediate results into finalized
+  /// results. Mutates input argument. Some kernels may accumulate state
+  /// (example: hashing-related functions) through processing chunked inputs, and
+  /// then need to attach some accumulated state to each of the outputs of
+  /// processing each chunk of data.
+  VectorFinalize finalize;
+
+  /// Since vector kernels generally are implemented rather differently from
+  /// scalar/elementwise kernels (and they may not even yield arrays of the same
+  /// size), so we make the developer opt-in to any memory preallocation rather
+  /// than having to turn it off.
+  NullHandling::type null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+  MemAllocation::type mem_allocation = MemAllocation::NO_PREALLOCATE;
+
+  /// Some vector kernels can do chunkwise execution using ExecBatchIterator,
+  /// in some cases accumulating some state. Other kernels (like Take) need to
+  /// be passed whole arrays and don't work on ChunkedArray inputs
+  bool can_execute_chunkwise = true;
+
+  /// Some kernels (like unique and value_counts) yield non-chunked output from
+  /// chunked-array inputs. This option controls how the results are boxed when
+  /// returned from ExecVectorFunction
+  ///
+  /// true -> ChunkedArray
+  /// false -> Array
+  bool output_chunked = true;
+};
+
+// ----------------------------------------------------------------------
+// ScalarAggregateKernel (for ScalarAggregateFunction)
+
+using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
+
+using ScalarAggregateMerge =
+    std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
+
+// Finalize returns Datum to permit multiple return values
+using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
+
+/// \brief Kernel data structure for implementations of
+/// ScalarAggregateFunction. The four necessary components of an aggregation
+/// kernel are the init, consume, merge, and finalize functions.
+///
+/// * init: creates a new KernelState for a kernel.
+/// * consume: processes an ExecBatch and updates the KernelState found in the
+///   KernelContext.
+/// * merge: combines one KernelState with another.
+/// * finalize: produces the end result of the aggregation using the
+///   KernelState in the KernelContext.
+struct ScalarAggregateKernel : public Kernel {
+  ScalarAggregateKernel() = default;
+
+  ScalarAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
+                        ScalarAggregateConsume consume, ScalarAggregateMerge merge,
+                        ScalarAggregateFinalize finalize)
+      : Kernel(std::move(sig), std::move(init)),
+        consume(std::move(consume)),
+        merge(std::move(merge)),
+        finalize(std::move(finalize)) {}
+
+  ScalarAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
+                        KernelInit init, ScalarAggregateConsume consume,
+                        ScalarAggregateMerge merge, ScalarAggregateFinalize finalize)
+      : ScalarAggregateKernel(
+            KernelSignature::Make(std::move(in_types), std::move(out_type)),
+            std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
+
+  /// \brief Merge a vector of KernelStates into a single KernelState.
+  /// The merged state will be returned and will be set on the KernelContext.
+  static Result<std::unique_ptr<KernelState>> MergeAll(
+      const ScalarAggregateKernel* kernel, KernelContext* ctx,
+      std::vector<std::unique_ptr<KernelState>> states);
+
+  ScalarAggregateConsume consume;
+  ScalarAggregateMerge merge;
+  ScalarAggregateFinalize finalize;
+};
+
+// ----------------------------------------------------------------------
+// HashAggregateKernel (for HashAggregateFunction)
+
+using HashAggregateResize = std::function<Status(KernelContext*, int64_t)>;
+
+using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
+
+using HashAggregateMerge =
+    std::function<Status(KernelContext*, KernelState&&, const ArrayData&)>;
+
+// Finalize returns Datum to permit multiple return values
+using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
+
+/// \brief Kernel data structure for implementations of
+/// HashAggregateFunction. The four necessary components of an aggregation
+/// kernel are the init, consume, merge, and finalize functions.
+///
+/// * init: creates a new KernelState for a kernel.
+/// * resize: ensure that the KernelState can accommodate the specified number of groups.
+/// * consume: processes an ExecBatch (which includes the argument as well
+///   as an array of group identifiers) and updates the KernelState found in the
+///   KernelContext.
+/// * merge: combines one KernelState with another.
+/// * finalize: produces the end result of the aggregation using the
+///   KernelState in the KernelContext.
+struct HashAggregateKernel : public Kernel {
+  HashAggregateKernel() = default;
+
+  HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
+                      HashAggregateResize resize, HashAggregateConsume consume,
+                      HashAggregateMerge merge, HashAggregateFinalize finalize)
+      : Kernel(std::move(sig), std::move(init)),
+        resize(std::move(resize)),
+        consume(std::move(consume)),
+        merge(std::move(merge)),
+        finalize(std::move(finalize)) {}
+
+  HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
+                      KernelInit init, HashAggregateConsume consume,
+                      HashAggregateResize resize, HashAggregateMerge merge,
+                      HashAggregateFinalize finalize)
+      : HashAggregateKernel(
+            KernelSignature::Make(std::move(in_types), std::move(out_type)),
+            std::move(init), std::move(resize), std::move(consume), std::move(merge),
+            std::move(finalize)) {}
+
+  HashAggregateResize resize;
+  HashAggregateConsume consume;
+  HashAggregateMerge merge;
+  HashAggregateFinalize finalize;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/light_array.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/light_array.h
@@ -0,0 +1,382 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/array.h"
+#include "arrow/compute/exec.h"
+#include "arrow/type.h"
+#include "arrow/util/logging.h"
+
+/// This file contains lightweight containers for Arrow buffers.  These containers
+/// makes compromises in terms of strong ownership and the range of data types supported
+/// in order to gain performance and reduced overhead.
+
+namespace arrow {
+namespace compute {
+
+/// \brief Description of the layout of a "key" column
+///
+/// A "key" column is a non-nested, non-union column.
+/// Every key column has either 0 (null), 2 (e.g. int32) or 3 (e.g. string) buffers
+/// and no children.
+///
+/// This metadata object is a zero-allocation analogue of arrow::DataType
+struct ARROW_EXPORT KeyColumnMetadata {
+  KeyColumnMetadata() = default;
+  KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in,
+                    bool is_null_type_in = false)
+      : is_fixed_length(is_fixed_length_in),
+        is_null_type(is_null_type_in),
+        fixed_length(fixed_length_in) {}
+  /// \brief True if the column is not a varying-length binary type
+  ///
+  /// If this is true the column will have a validity buffer and
+  /// a data buffer and the third buffer will be unused.
+  bool is_fixed_length;
+  /// \brief True if this column is the null type
+  bool is_null_type;
+  /// \brief The number of bytes for each item
+  ///
+  /// Zero has a special meaning, indicating a bit vector with one bit per value if it
+  /// isn't a null type column.
+  ///
+  /// For a varying-length binary column this represents the number of bytes per offset.
+  uint32_t fixed_length;
+};
+
+/// \brief A lightweight view into a "key" array
+///
+/// A "key" column is a non-nested, non-union column \see KeyColumnMetadata
+///
+/// This metadata object is a zero-allocation analogue of arrow::ArrayData
+class ARROW_EXPORT KeyColumnArray {
+ public:
+  /// \brief Create an uninitialized KeyColumnArray
+  KeyColumnArray() = default;
+  /// \brief Create a read-only view from buffers
+  ///
+  /// This is a view only and does not take ownership of the buffers.  The lifetime
+  /// of the buffers must exceed the lifetime of this view
+  KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
+                 const uint8_t* validity_buffer, const uint8_t* fixed_length_buffer,
+                 const uint8_t* var_length_buffer, int bit_offset_validity = 0,
+                 int bit_offset_fixed = 0);
+  /// \brief Create a mutable view from buffers
+  ///
+  /// This is a view only and does not take ownership of the buffers.  The lifetime
+  /// of the buffers must exceed the lifetime of this view
+  KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
+                 uint8_t* validity_buffer, uint8_t* fixed_length_buffer,
+                 uint8_t* var_length_buffer, int bit_offset_validity = 0,
+                 int bit_offset_fixed = 0);
+  /// \brief Create a sliced view of `this`
+  ///
+  /// The number of rows used in offset must be divisible by 8
+  /// in order to not split bit vectors within a single byte.
+  KeyColumnArray Slice(int64_t offset, int64_t length) const;
+  /// \brief Create a copy of `this` with a buffer from `other`
+  ///
+  /// The copy will be identical to `this` except the buffer at buffer_id_to_replace
+  /// will be replaced by the corresponding buffer in `other`.
+  KeyColumnArray WithBufferFrom(const KeyColumnArray& other,
+                                int buffer_id_to_replace) const;
+
+  /// \brief Create a copy of `this` with new metadata
+  KeyColumnArray WithMetadata(const KeyColumnMetadata& metadata) const;
+
+  // Constants used for accessing buffers using data() and mutable_data().
+  static constexpr int kValidityBuffer = 0;
+  static constexpr int kFixedLengthBuffer = 1;
+  static constexpr int kVariableLengthBuffer = 2;
+
+  /// \brief Return one of the underlying mutable buffers
+  uint8_t* mutable_data(int i) {
+    ARROW_DCHECK(i >= 0 && i <= kMaxBuffers);
+    return mutable_buffers_[i];
+  }
+  /// \brief Return one of the underlying read-only buffers
+  const uint8_t* data(int i) const {
+    ARROW_DCHECK(i >= 0 && i <= kMaxBuffers);
+    return buffers_[i];
+  }
+  /// \brief Return a mutable version of the offsets buffer
+  ///
+  /// Only valid if this is a view into a varbinary type
+  uint32_t* mutable_offsets() {
+    DCHECK(!metadata_.is_fixed_length);
+    return reinterpret_cast<uint32_t*>(mutable_data(kFixedLengthBuffer));
+  }
+  /// \brief Return a read-only version of the offsets buffer
+  ///
+  /// Only valid if this is a view into a varbinary type
+  const uint32_t* offsets() const {
+    DCHECK(!metadata_.is_fixed_length);
+    return reinterpret_cast<const uint32_t*>(data(kFixedLengthBuffer));
+  }
+  /// \brief Return the type metadata
+  const KeyColumnMetadata& metadata() const { return metadata_; }
+  /// \brief Return the length (in rows) of the array
+  int64_t length() const { return length_; }
+  /// \brief Return the bit offset into the corresponding vector
+  ///
+  /// if i == 1 then this must be a bool array
+  int bit_offset(int i) const {
+    ARROW_DCHECK(i >= 0 && i < kMaxBuffers);
+    return bit_offset_[i];
+  }
+
+ private:
+  static constexpr int kMaxBuffers = 3;
+  const uint8_t* buffers_[kMaxBuffers];
+  uint8_t* mutable_buffers_[kMaxBuffers];
+  KeyColumnMetadata metadata_;
+  int64_t length_;
+  // Starting bit offset within the first byte (between 0 and 7)
+  // to be used when accessing buffers that store bit vectors.
+  int bit_offset_[kMaxBuffers - 1];
+};
+
+/// \brief Create KeyColumnMetadata from a DataType
+///
+/// If `type` is a dictionary type then this will return the KeyColumnMetadata for
+/// the indices type
+///
+/// This should only be called on "key" columns.  Calling this with
+/// a non-key column will return Status::TypeError.
+ARROW_EXPORT Result<KeyColumnMetadata> ColumnMetadataFromDataType(
+    const std::shared_ptr<DataType>& type);
+
+/// \brief Create KeyColumnArray from ArrayData
+///
+/// If `type` is a dictionary type then this will return the KeyColumnArray for
+/// the indices array
+///
+/// The caller should ensure this is only called on "key" columns.
+/// \see ColumnMetadataFromDataType for details
+ARROW_EXPORT Result<KeyColumnArray> ColumnArrayFromArrayData(
+    const std::shared_ptr<ArrayData>& array_data, int start_row, int num_rows);
+
+/// \brief Create KeyColumnMetadata instances from an ExecBatch
+///
+/// column_metadatas will be resized to fit
+///
+/// All columns in `batch` must be eligible "key" columns and have an array shape
+/// \see ColumnMetadataFromDataType for more details
+ARROW_EXPORT Status ColumnMetadatasFromExecBatch(
+    const ExecBatch& batch, std::vector<KeyColumnMetadata>* column_metadatas);
+
+/// \brief Create KeyColumnArray instances from a slice of an ExecBatch
+///
+/// column_arrays will be resized to fit
+///
+/// All columns in `batch` must be eligible "key" columns and have an array shape
+/// \see ColumnArrayFromArrayData for more details
+ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, int start_row,
+                                              int num_rows,
+                                              std::vector<KeyColumnArray>* column_arrays);
+
+/// \brief Create KeyColumnArray instances from an ExecBatch
+///
+/// column_arrays will be resized to fit
+///
+/// All columns in `batch` must be eligible "key" columns and have an array shape
+/// \see ColumnArrayFromArrayData for more details
+ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch,
+                                              std::vector<KeyColumnArray>* column_arrays);
+
+/// A lightweight resizable array for "key" columns
+///
+/// Unlike KeyColumnArray this instance owns its buffers
+///
+/// Resizing is handled by arrow::ResizableBuffer and a doubling approach is
+/// used so that resizes will always grow up to the next power of 2
+class ARROW_EXPORT ResizableArrayData {
+ public:
+  /// \brief Create an uninitialized instance
+  ///
+  /// Init must be called before calling any other operations
+  ResizableArrayData()
+      : log_num_rows_min_(0),
+        pool_(NULLPTR),
+        num_rows_(0),
+        num_rows_allocated_(0),
+        var_len_buf_size_(0) {}
+
+  ~ResizableArrayData() { Clear(true); }
+
+  /// \brief Initialize the array
+  /// \param data_type The data type this array is holding data for.
+  /// \param pool The pool to make allocations on
+  /// \param log_num_rows_min All resize operations will allocate at least enough
+  ///                         space for (1 << log_num_rows_min) rows
+  void Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
+            int log_num_rows_min);
+
+  /// \brief Resets the array back to an empty state
+  /// \param release_buffers If true then allocated memory is released and the
+  ///                        next resize operation will have to reallocate memory
+  void Clear(bool release_buffers);
+
+  /// \brief Resize the fixed length buffers
+  ///
+  /// The buffers will be resized to hold at least `num_rows_new` rows of data
+  Status ResizeFixedLengthBuffers(int num_rows_new);
+
+  /// \brief Resize the varying length buffer if this array is a variable binary type
+  ///
+  /// This must be called after offsets have been populated and the buffer will be
+  /// resized to hold at least as much data as the offsets require
+  ///
+  /// Does nothing if the array is not a variable binary type
+  Status ResizeVaryingLengthBuffer();
+
+  /// \brief The current length (in rows) of the array
+  int num_rows() const { return num_rows_; }
+
+  /// \brief A non-owning view into this array
+  KeyColumnArray column_array() const;
+
+  /// \brief A lightweight descriptor of the data held by this array
+  Result<KeyColumnMetadata> column_metadata() const {
+    return ColumnMetadataFromDataType(data_type_);
+  }
+
+  /// \brief Convert the data to an arrow::ArrayData
+  ///
+  /// This is a zero copy operation and the created ArrayData will reference the
+  /// buffers held by this instance.
+  std::shared_ptr<ArrayData> array_data() const;
+
+  // Constants used for accessing buffers using mutable_data().
+  static constexpr int kValidityBuffer = 0;
+  static constexpr int kFixedLengthBuffer = 1;
+  static constexpr int kVariableLengthBuffer = 2;
+
+  /// \brief A raw pointer to the requested buffer
+  ///
+  /// If i is 0 (kValidityBuffer) then this returns the validity buffer
+  /// If i is 1 (kFixedLengthBuffer) then this returns the buffer used for values (if this
+  /// is a fixed
+  ///           length data type) or offsets (if this is a variable binary type)
+  /// If i is 2 (kVariableLengthBuffer) then this returns the buffer used for variable
+  /// length binary data
+  uint8_t* mutable_data(int i) { return buffers_[i]->mutable_data(); }
+
+ private:
+  static constexpr int64_t kNumPaddingBytes = 64;
+  int log_num_rows_min_;
+  std::shared_ptr<DataType> data_type_;
+  MemoryPool* pool_;
+  int num_rows_;
+  int num_rows_allocated_;
+  int var_len_buf_size_;
+  static constexpr int kMaxBuffers = 3;
+  std::shared_ptr<ResizableBuffer> buffers_[kMaxBuffers];
+};
+
+/// \brief A builder to concatenate batches of data into a larger batch
+///
+/// Will only store num_rows_max() rows
+class ARROW_EXPORT ExecBatchBuilder {
+ public:
+  /// \brief Add rows from `source` into `target` column
+  ///
+  /// If `target` is uninitialized or cleared it will be initialized to use
+  /// the given pool.
+  static Status AppendSelected(const std::shared_ptr<ArrayData>& source,
+                               ResizableArrayData* target, int num_rows_to_append,
+                               const uint16_t* row_ids, MemoryPool* pool);
+
+  /// \brief Add nulls into `target` column
+  ///
+  /// If `target` is uninitialized or cleared it will be initialized to use
+  /// the given pool.
+  static Status AppendNulls(const std::shared_ptr<DataType>& type,
+                            ResizableArrayData& target, int num_rows_to_append,
+                            MemoryPool* pool);
+
+  /// \brief Add selected rows from `batch`
+  ///
+  /// If `col_ids` is null then `num_cols` should less than batch.num_values() and
+  /// the first `num_cols` columns of batch will be appended.
+  ///
+  /// All columns in `batch` must have array shape
+  Status AppendSelected(MemoryPool* pool, const ExecBatch& batch, int num_rows_to_append,
+                        const uint16_t* row_ids, int num_cols,
+                        const int* col_ids = NULLPTR);
+
+  /// \brief Add all-null rows
+  Status AppendNulls(MemoryPool* pool,
+                     const std::vector<std::shared_ptr<DataType>>& types,
+                     int num_rows_to_append);
+
+  /// \brief Create an ExecBatch with the data that has been appended so far
+  ///        and clear this builder to be used again
+  ///
+  /// Should only be called if num_rows() returns non-zero.
+  ExecBatch Flush();
+
+  int num_rows() const { return values_.empty() ? 0 : values_[0].num_rows(); }
+
+  static int num_rows_max() { return 1 << kLogNumRows; }
+
+ private:
+  static constexpr int kLogNumRows = 15;
+
+  // Calculate how many rows to skip from the tail of the
+  // sequence of selected rows, such that the total size of skipped rows is at
+  // least equal to the size specified by the caller.
+  //
+  // Skipping of the tail rows
+  // is used to allow for faster processing by the caller of remaining rows
+  // without checking buffer bounds (useful with SIMD or fixed size memory loads
+  // and stores).
+  //
+  // The sequence of row_ids provided must be non-decreasing.
+  //
+  static int NumRowsToSkip(const std::shared_ptr<ArrayData>& column, int num_rows,
+                           const uint16_t* row_ids, int num_tail_bytes_to_skip);
+
+  // The supplied lambda will be called for each row in the given list of rows.
+  // The arguments given to it will be:
+  // - index of a row (within the set of selected rows),
+  // - pointer to the value,
+  // - byte length of the value.
+  //
+  // The information about nulls (validity bitmap) is not used in this call and
+  // has to be processed separately.
+  //
+  template <class PROCESS_VALUE_FN>
+  static void Visit(const std::shared_ptr<ArrayData>& column, int num_rows,
+                    const uint16_t* row_ids, PROCESS_VALUE_FN process_value_fn);
+
+  template <bool OUTPUT_BYTE_ALIGNED>
+  static void CollectBitsImp(const uint8_t* input_bits, int64_t input_bits_offset,
+                             uint8_t* output_bits, int64_t output_bits_offset,
+                             int num_rows, const uint16_t* row_ids);
+  static void CollectBits(const uint8_t* input_bits, int64_t input_bits_offset,
+                          uint8_t* output_bits, int64_t output_bits_offset, int num_rows,
+                          const uint16_t* row_ids);
+
+  std::vector<ResizableArrayData> values_;
+};
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/registry.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/registry.h
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+class Function;
+class FunctionOptionsType;
+
+/// \brief A mutable central function registry for built-in functions as well
+/// as user-defined functions. Functions are implementations of
+/// arrow::compute::Function.
+///
+/// Generally, each function contains kernels which are implementations of a
+/// function for a specific argument signature. After looking up a function in
+/// the registry, one can either execute it eagerly with Function::Execute or
+/// use one of the function's dispatch methods to pick a suitable kernel for
+/// lower-level function execution.
+class ARROW_EXPORT FunctionRegistry {
+ public:
+  ~FunctionRegistry();
+
+  /// \brief Construct a new registry. Most users only need to use the global
+  /// registry
+  static std::unique_ptr<FunctionRegistry> Make();
+
+  /// \brief Add a new function to the registry. Returns Status::KeyError if a
+  /// function with the same name is already registered
+  Status AddFunction(std::shared_ptr<Function> function, bool allow_overwrite = false);
+
+  /// \brief Add aliases for the given function name. Returns Status::KeyError if the
+  /// function with the given name is not registered
+  Status AddAlias(const std::string& target_name, const std::string& source_name);
+
+  /// \brief Add a new function options type to the registry. Returns Status::KeyError if
+  /// a function options type with the same name is already registered
+  Status AddFunctionOptionsType(const FunctionOptionsType* options_type,
+                                bool allow_overwrite = false);
+
+  /// \brief Retrieve a function by name from the registry
+  Result<std::shared_ptr<Function>> GetFunction(const std::string& name) const;
+
+  /// \brief Return vector of all entry names in the registry. Helpful for
+  /// displaying a manifest of available functions
+  std::vector<std::string> GetFunctionNames() const;
+
+  /// \brief Retrieve a function options type by name from the registry
+  Result<const FunctionOptionsType*> GetFunctionOptionsType(
+      const std::string& name) const;
+
+  /// \brief The number of currently registered functions
+  int num_functions() const;
+
+ private:
+  FunctionRegistry();
+
+  // Use PIMPL pattern to not have std::unordered_map here
+  class FunctionRegistryImpl;
+  std::unique_ptr<FunctionRegistryImpl> impl_;
+};
+
+/// \brief Return the process-global function registry
+ARROW_EXPORT FunctionRegistry* GetFunctionRegistry();
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/compute/type_fwd.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/compute/type_fwd.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+
+struct Datum;
+struct ValueDescr;
+
+namespace compute {
+
+class Function;
+class FunctionOptions;
+
+class CastOptions;
+
+struct ExecBatch;
+class ExecContext;
+class KernelContext;
+
+struct Kernel;
+struct ScalarKernel;
+struct ScalarAggregateKernel;
+struct VectorKernel;
+
+struct KernelState;
+
+class Expression;
+class ExecNode;
+class ExecPlan;
+class ExecNodeOptions;
+class ExecFactoryRegistry;
+
+}  // namespace compute
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/config.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/config.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/config.h"  // IWYU pragma: export
+#include "arrow/util/optional.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+struct BuildInfo {
+  /// The packed version number, e.g. 1002003 (decimal) for Arrow 1.2.3
+  int version;
+  /// The "major" version number, e.g. 1 for Arrow 1.2.3
+  int version_major;
+  /// The "minor" version number, e.g. 2 for Arrow 1.2.3
+  int version_minor;
+  /// The "patch" version number, e.g. 3 for Arrow 1.2.3
+  int version_patch;
+  /// The version string, e.g. "1.2.3"
+  std::string version_string;
+  std::string so_version;
+  std::string full_so_version;
+
+  /// The CMake compiler identifier, e.g. "GNU"
+  std::string compiler_id;
+  std::string compiler_version;
+  std::string compiler_flags;
+
+  /// The git changeset id, if available
+  std::string git_id;
+  /// The git changeset description, if available
+  std::string git_description;
+  std::string package_kind;
+
+  /// The uppercase build type, e.g. "DEBUG" or "RELEASE"
+  std::string build_type;
+};
+
+struct RuntimeInfo {
+  /// The enabled SIMD level
+  ///
+  /// This can be less than `detected_simd_level` if the ARROW_USER_SIMD_LEVEL
+  /// environment variable is set to another value.
+  std::string simd_level;
+
+  /// The SIMD level available on the OS and CPU
+  std::string detected_simd_level;
+
+  /// Whether using the OS-based timezone database
+  /// This is set at compile-time.
+  bool using_os_timezone_db;
+
+  /// The path to the timezone database; by default None.
+  util::optional<std::string> timezone_db_path;
+};
+
+/// \brief Get runtime build info.
+///
+/// The returned values correspond to exact loaded version of the Arrow library,
+/// rather than the values frozen at application compile-time through the `ARROW_*`
+/// preprocessor definitions.
+ARROW_EXPORT
+const BuildInfo& GetBuildInfo();
+
+/// \brief Get runtime info.
+///
+ARROW_EXPORT
+RuntimeInfo GetRuntimeInfo();
+
+struct GlobalOptions {
+  /// Path to text timezone database. This is only configurable on Windows,
+  /// which does not have a compatible OS timezone database.
+  util::optional<std::string> timezone_db_path;
+};
+
+ARROW_EXPORT
+Status Initialize(const GlobalOptions& options) noexcept;
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/api.h
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/csv/options.h"
+#include "arrow/csv/reader.h"
+
+// The writer depends on compute module for casting.
+#include "arrow/util/config.h"  // for ARROW_COMPUTE definition
+#ifdef ARROW_COMPUTE
+#include "arrow/csv/writer.h"
+#endif
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/chunker.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/chunker.h
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/csv/options.h"
+#include "arrow/status.h"
+#include "arrow/util/delimiting.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace csv {
+
+ARROW_EXPORT
+std::unique_ptr<Chunker> MakeChunker(const ParseOptions& options);
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/column_builder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/column_builder.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace csv {
+
+class BlockParser;
+struct ConvertOptions;
+
+class ARROW_EXPORT ColumnBuilder {
+ public:
+  virtual ~ColumnBuilder() = default;
+
+  /// Spawn a task that will try to convert and append the given CSV block.
+  /// All calls to Append() should happen on the same thread, otherwise
+  /// call Insert() instead.
+  virtual void Append(const std::shared_ptr<BlockParser>& parser) = 0;
+
+  /// Spawn a task that will try to convert and insert the given CSV block
+  virtual void Insert(int64_t block_index,
+                      const std::shared_ptr<BlockParser>& parser) = 0;
+
+  /// Return the final chunked array.  The TaskGroup _must_ have finished!
+  virtual Result<std::shared_ptr<ChunkedArray>> Finish() = 0;
+
+  std::shared_ptr<arrow::internal::TaskGroup> task_group() { return task_group_; }
+
+  /// Construct a strictly-typed ColumnBuilder.
+  static Result<std::shared_ptr<ColumnBuilder>> Make(
+      MemoryPool* pool, const std::shared_ptr<DataType>& type, int32_t col_index,
+      const ConvertOptions& options,
+      const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
+
+  /// Construct a type-inferring ColumnBuilder.
+  static Result<std::shared_ptr<ColumnBuilder>> Make(
+      MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
+      const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
+
+  /// Construct a ColumnBuilder for a column of nulls
+  /// (i.e. not present in the CSV file).
+  static Result<std::shared_ptr<ColumnBuilder>> MakeNull(
+      MemoryPool* pool, const std::shared_ptr<DataType>& type,
+      const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
+
+ protected:
+  explicit ColumnBuilder(std::shared_ptr<arrow::internal::TaskGroup> task_group)
+      : task_group_(std::move(task_group)) {}
+
+  std::shared_ptr<arrow::internal::TaskGroup> task_group_;
+};
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/column_decoder.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/column_decoder.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace csv {
+
+class BlockParser;
+struct ConvertOptions;
+
+class ARROW_EXPORT ColumnDecoder {
+ public:
+  virtual ~ColumnDecoder() = default;
+
+  /// Spawn a task that will try to convert and insert the given CSV block
+  virtual Future<std::shared_ptr<Array>> Decode(
+      const std::shared_ptr<BlockParser>& parser) = 0;
+
+  /// Construct a strictly-typed ColumnDecoder.
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool,
+                                                     std::shared_ptr<DataType> type,
+                                                     int32_t col_index,
+                                                     const ConvertOptions& options);
+
+  /// Construct a type-inferring ColumnDecoder.
+  /// Inference will run only on the first block, the type will be frozen afterwards.
+  static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool, int32_t col_index,
+                                                     const ConvertOptions& options);
+
+  /// Construct a ColumnDecoder for a column of nulls
+  /// (i.e. not present in the CSV file).
+  static Result<std::shared_ptr<ColumnDecoder>> MakeNull(MemoryPool* pool,
+                                                         std::shared_ptr<DataType> type);
+
+ protected:
+  ColumnDecoder() = default;
+};
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/converter.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/converter.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/csv/options.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace csv {
+
+class BlockParser;
+
+class ARROW_EXPORT Converter {
+ public:
+  Converter(const std::shared_ptr<DataType>& type, const ConvertOptions& options,
+            MemoryPool* pool);
+  virtual ~Converter() = default;
+
+  virtual Result<std::shared_ptr<Array>> Convert(const BlockParser& parser,
+                                                 int32_t col_index) = 0;
+
+  std::shared_ptr<DataType> type() const { return type_; }
+
+  // Create a Converter for the given data type
+  static Result<std::shared_ptr<Converter>> Make(
+      const std::shared_ptr<DataType>& type, const ConvertOptions& options,
+      MemoryPool* pool = default_memory_pool());
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Converter);
+
+  virtual Status Initialize() = 0;
+
+  // CAUTION: ConvertOptions can grow large (if it customizes hundreds or
+  // thousands of columns), so avoid copying it in each Converter.
+  const ConvertOptions& options_;
+  MemoryPool* pool_;
+  std::shared_ptr<DataType> type_;
+};
+
+class ARROW_EXPORT DictionaryConverter : public Converter {
+ public:
+  DictionaryConverter(const std::shared_ptr<DataType>& value_type,
+                      const ConvertOptions& options, MemoryPool* pool);
+
+  // If the dictionary length goes above this value, conversion will fail
+  // with Status::IndexError.
+  virtual void SetMaxCardinality(int32_t max_length) = 0;
+
+  // Create a Converter for the given dictionary value type.
+  // The dictionary index type will always be Int32.
+  static Result<std::shared_ptr<DictionaryConverter>> Make(
+      const std::shared_ptr<DataType>& value_type, const ConvertOptions& options,
+      MemoryPool* pool = default_memory_pool());
+
+ protected:
+  std::shared_ptr<DataType> value_type_;
+};
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/invalid_row.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/invalid_row.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace csv {
+
+/// \brief Description of an invalid row
+struct InvalidRow {
+  /// \brief Number of columns expected in the row
+  int32_t expected_columns;
+  /// \brief Actual number of columns found in the row
+  int32_t actual_columns;
+  /// \brief The physical row number if known or -1
+  ///
+  /// This number is one-based and also accounts for non-data rows (such as
+  /// CSV header rows).
+  int64_t number;
+  /// \brief View of the entire row. Memory will be freed after callback returns
+  const util::string_view text;
+};
+
+/// \brief Result returned by an InvalidRowHandler
+enum class InvalidRowResult {
+  // Generate an error describing this row
+  Error,
+  // Skip over this row
+  Skip
+};
+
+/// \brief callback for handling a row with an invalid number of columns while parsing
+/// \return result indicating if an error should be returned from the parser or the row is
+/// skipped
+using InvalidRowHandler = std::function<InvalidRowResult(const InvalidRow&)>;
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/options.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/options.h
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/csv/invalid_row.h"
+#include "arrow/csv/type_fwd.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class DataType;
+class TimestampParser;
+
+namespace csv {
+
+// Silly workaround for https://github.com/michaeljones/breathe/issues/453
+constexpr char kDefaultEscapeChar = '\\';
+
+struct ARROW_EXPORT ParseOptions {
+  // Parsing options
+
+  /// Field delimiter
+  char delimiter = ',';
+  /// Whether quoting is used
+  bool quoting = true;
+  /// Quoting character (if `quoting` is true)
+  char quote_char = '"';
+  /// Whether a quote inside a value is double-quoted
+  bool double_quote = true;
+  /// Whether escaping is used
+  bool escaping = false;
+  /// Escaping character (if `escaping` is true)
+  char escape_char = kDefaultEscapeChar;
+  /// Whether values are allowed to contain CR (0x0d) and LF (0x0a) characters
+  bool newlines_in_values = false;
+  /// Whether empty lines are ignored.  If false, an empty line represents
+  /// a single empty value (assuming a one-column CSV file).
+  bool ignore_empty_lines = true;
+  /// A handler function for rows which do not have the correct number of columns
+  InvalidRowHandler invalid_row_handler;
+
+  /// Create parsing options with default values
+  static ParseOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
+};
+
+struct ARROW_EXPORT ConvertOptions {
+  // Conversion options
+
+  /// Whether to check UTF8 validity of string columns
+  bool check_utf8 = true;
+  /// Optional per-column types (disabling type inference on those columns)
+  std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
+  /// Recognized spellings for null values
+  std::vector<std::string> null_values;
+  /// Recognized spellings for boolean true values
+  std::vector<std::string> true_values;
+  /// Recognized spellings for boolean false values
+  std::vector<std::string> false_values;
+
+  /// Whether string / binary columns can have null values.
+  ///
+  /// If true, then strings in "null_values" are considered null for string columns.
+  /// If false, then all strings are valid string values.
+  bool strings_can_be_null = false;
+
+  /// Whether quoted values can be null.
+  ///
+  /// If true, then strings in "null_values" are also considered null when they
+  /// appear quoted in the CSV file. Otherwise, quoted values are never considered null.
+  bool quoted_strings_can_be_null = true;
+
+  /// Whether to try to automatically dict-encode string / binary data.
+  /// If true, then when type inference detects a string or binary column,
+  /// it is dict-encoded up to `auto_dict_max_cardinality` distinct values
+  /// (per chunk), after which it switches to regular encoding.
+  ///
+  /// This setting is ignored for non-inferred columns (those in `column_types`).
+  bool auto_dict_encode = false;
+  int32_t auto_dict_max_cardinality = 50;
+
+  /// Decimal point character for floating-point and decimal data
+  char decimal_point = '.';
+
+  // XXX Should we have a separate FilterOptions?
+
+  /// If non-empty, indicates the names of columns from the CSV file that should
+  /// be actually read and converted (in the vector's order).
+  /// Columns not in this vector will be ignored.
+  std::vector<std::string> include_columns;
+  /// If false, columns in `include_columns` but not in the CSV file will error out.
+  /// If true, columns in `include_columns` but not in the CSV file will produce
+  /// a column of nulls (whose type is selected using `column_types`,
+  /// or null by default)
+  /// This option is ignored if `include_columns` is empty.
+  bool include_missing_columns = false;
+
+  /// User-defined timestamp parsers, using the virtual parser interface in
+  /// arrow/util/value_parsing.h. More than one parser can be specified, and
+  /// the CSV conversion logic will try parsing values starting from the
+  /// beginning of this vector. If no parsers are specified, we use the default
+  /// built-in ISO-8601 parser.
+  std::vector<std::shared_ptr<TimestampParser>> timestamp_parsers;
+
+  /// Create conversion options with default values, including conventional
+  /// values for `null_values`, `true_values` and `false_values`
+  static ConvertOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
+};
+
+struct ARROW_EXPORT ReadOptions {
+  // Reader options
+
+  /// Whether to use the global CPU thread pool
+  bool use_threads = true;
+
+  /// \brief Block size we request from the IO layer.
+  ///
+  /// This will determine multi-threading granularity as well as
+  /// the size of individual record batches.
+  /// Minimum valid value for block size is 1
+  int32_t block_size = 1 << 20;  // 1 MB
+
+  /// Number of header rows to skip (not including the row of column names, if any)
+  int32_t skip_rows = 0;
+
+  /// Number of rows to skip after the column names are read, if any
+  int32_t skip_rows_after_names = 0;
+
+  /// Column names for the target table.
+  /// If empty, fall back on autogenerate_column_names.
+  std::vector<std::string> column_names;
+
+  /// Whether to autogenerate column names if `column_names` is empty.
+  /// If true, column names will be of the form "f0", "f1"...
+  /// If false, column names will be read from the first CSV row after `skip_rows`.
+  bool autogenerate_column_names = false;
+
+  /// Create read options with default values
+  static ReadOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
+};
+
+/// \brief Quoting style for CSV writing
+enum class ARROW_EXPORT QuotingStyle {
+  /// Only enclose values in quotes which need them, because their CSV rendering can
+  /// contain quotes itself (e.g. strings or binary values)
+  Needed,
+  /// Enclose all valid values in quotes. Nulls are not quoted. May cause readers to
+  /// interpret all values as strings if schema is inferred.
+  AllValid,
+  /// Do not enclose any values in quotes. Prevents values from containing quotes ("),
+  /// cell delimiters (,) or line endings (\\r, \\n), (following RFC4180). If values
+  /// contain these characters, an error is caused when attempting to write.
+  None
+};
+
+struct ARROW_EXPORT WriteOptions {
+  /// Whether to write an initial header line with column names
+  bool include_header = true;
+
+  /// \brief Maximum number of rows processed at a time
+  ///
+  /// The CSV writer converts and writes data in batches of N rows.
+  /// This number can impact performance.
+  int32_t batch_size = 1024;
+
+  /// Field delimiter
+  char delimiter = ',';
+
+  /// \brief The string to write for null values. Quotes are not allowed in this string.
+  std::string null_string;
+
+  /// \brief IO context for writing.
+  io::IOContext io_context;
+
+  /// \brief The end of line character to use for ending rows
+  std::string eol = "\n";
+
+  /// \brief Quoting style
+  QuotingStyle quoting_style = QuotingStyle::Needed;
+
+  /// Create write options with default values
+  static WriteOptions Defaults();
+
+  /// \brief Test that all set options are valid
+  Status Validate() const;
+};
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/parser.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/parser.h
@@ -0,0 +1,227 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/csv/options.h"
+#include "arrow/csv/type_fwd.h"
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace csv {
+
+/// Skip at most num_rows from the given input.  The input pointer is updated
+/// and the number of actually skipped rows is returns (may be less than
+/// requested if the input is too short).
+ARROW_EXPORT
+int32_t SkipRows(const uint8_t* data, uint32_t size, int32_t num_rows,
+                 const uint8_t** out_data);
+
+class BlockParserImpl;
+
+namespace detail {
+
+struct ParsedValueDesc {
+  uint32_t offset : 31;
+  bool quoted : 1;
+};
+
+class ARROW_EXPORT DataBatch {
+ public:
+  explicit DataBatch(int32_t num_cols) : num_cols_(num_cols) {}
+
+  /// \brief Return the number of parsed rows (not skipped)
+  int32_t num_rows() const { return num_rows_; }
+  /// \brief Return the number of parsed columns
+  int32_t num_cols() const { return num_cols_; }
+  /// \brief Return the total size in bytes of parsed data
+  uint32_t num_bytes() const { return parsed_size_; }
+  /// \brief Return the number of skipped rows
+  int32_t num_skipped_rows() const { return static_cast<int32_t>(skipped_rows_.size()); }
+
+  template <typename Visitor>
+  Status VisitColumn(int32_t col_index, int64_t first_row, Visitor&& visit) const {
+    using detail::ParsedValueDesc;
+
+    int32_t batch_row = 0;
+    for (size_t buf_index = 0; buf_index < values_buffers_.size(); ++buf_index) {
+      const auto& values_buffer = values_buffers_[buf_index];
+      const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
+      const auto max_pos =
+          static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) - 1;
+      for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, ++batch_row) {
+        auto start = values[pos].offset;
+        auto stop = values[pos + 1].offset;
+        auto quoted = values[pos + 1].quoted;
+        Status status = visit(parsed_ + start, stop - start, quoted);
+        if (ARROW_PREDICT_FALSE(!status.ok())) {
+          return DecorateWithRowNumber(std::move(status), first_row, batch_row);
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename Visitor>
+  Status VisitLastRow(Visitor&& visit) const {
+    using detail::ParsedValueDesc;
+
+    const auto& values_buffer = values_buffers_.back();
+    const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
+    const auto start_pos =
+        static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) -
+        num_cols_ - 1;
+    for (int32_t col_index = 0; col_index < num_cols_; ++col_index) {
+      auto start = values[start_pos + col_index].offset;
+      auto stop = values[start_pos + col_index + 1].offset;
+      auto quoted = values[start_pos + col_index + 1].quoted;
+      ARROW_RETURN_NOT_OK(visit(parsed_ + start, stop - start, quoted));
+    }
+    return Status::OK();
+  }
+
+ protected:
+  Status DecorateWithRowNumber(Status&& status, int64_t first_row,
+                               int32_t batch_row) const {
+    if (first_row >= 0) {
+      // `skipped_rows_` is in ascending order by construction, so use bisection
+      // to find out how many rows were skipped before `batch_row`.
+      const auto skips_before =
+          std::upper_bound(skipped_rows_.begin(), skipped_rows_.end(), batch_row) -
+          skipped_rows_.begin();
+      status = status.WithMessage("Row #", batch_row + skips_before + first_row, ": ",
+                                  status.message());
+    }
+    // Use return_if so that when extra context is enabled it will be added
+    ARROW_RETURN_IF_(true, std::move(status), ARROW_STRINGIFY(status));
+  }
+
+  // The number of rows in this batch (not including any skipped ones)
+  int32_t num_rows_ = 0;
+  // The number of columns
+  int32_t num_cols_ = 0;
+
+  // XXX should we ensure the parsed buffer is padded with 8 or 16 excess zero bytes?
+  // It may help with null parsing...
+  std::vector<std::shared_ptr<Buffer>> values_buffers_;
+  std::shared_ptr<Buffer> parsed_buffer_;
+  const uint8_t* parsed_ = NULLPTR;
+  int32_t parsed_size_ = 0;
+
+  // Record the current num_rows_ each time a row is skipped
+  std::vector<int32_t> skipped_rows_;
+
+  friend class ::arrow::csv::BlockParserImpl;
+};
+
+}  // namespace detail
+
+constexpr int32_t kMaxParserNumRows = 100000;
+
+/// \class BlockParser
+/// \brief A reusable block-based parser for CSV data
+///
+/// The parser takes a block of CSV data and delimits rows and fields,
+/// unquoting and unescaping them on the fly.  Parsed data is own by the
+/// parser, so the original buffer can be discarded after Parse() returns.
+///
+/// If the block is truncated (i.e. not all data can be parsed), it is up
+/// to the caller to arrange the next block to start with the trailing data.
+/// Also, if the previous block ends with CR (0x0d) and a new block starts
+/// with LF (0x0a), the parser will consider the leading newline as an empty
+/// line; the caller should therefore strip it.
+class ARROW_EXPORT BlockParser {
+ public:
+  explicit BlockParser(ParseOptions options, int32_t num_cols = -1,
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
+  explicit BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols = -1,
+                       int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
+  ~BlockParser();
+
+  /// \brief Parse a block of data
+  ///
+  /// Parse a block of CSV data, ingesting up to max_num_rows rows.
+  /// The number of bytes actually parsed is returned in out_size.
+  Status Parse(util::string_view data, uint32_t* out_size);
+
+  /// \brief Parse sequential blocks of data
+  ///
+  /// Only the last block is allowed to be truncated.
+  Status Parse(const std::vector<util::string_view>& data, uint32_t* out_size);
+
+  /// \brief Parse the final block of data
+  ///
+  /// Like Parse(), but called with the final block in a file.
+  /// The last row may lack a trailing line separator.
+  Status ParseFinal(util::string_view data, uint32_t* out_size);
+
+  /// \brief Parse the final sequential blocks of data
+  ///
+  /// Only the last block is allowed to be truncated.
+  Status ParseFinal(const std::vector<util::string_view>& data, uint32_t* out_size);
+
+  /// \brief Return the number of parsed rows
+  int32_t num_rows() const { return parsed_batch().num_rows(); }
+  /// \brief Return the number of parsed columns
+  int32_t num_cols() const { return parsed_batch().num_cols(); }
+  /// \brief Return the total size in bytes of parsed data
+  uint32_t num_bytes() const { return parsed_batch().num_bytes(); }
+
+  /// \brief Return the total number of rows including rows which were skipped
+  int32_t total_num_rows() const {
+    return parsed_batch().num_rows() + parsed_batch().num_skipped_rows();
+  }
+
+  /// \brief Return the row number of the first row in the block or -1 if unsupported
+  int64_t first_row_num() const;
+
+  /// \brief Visit parsed values in a column
+  ///
+  /// The signature of the visitor is
+  /// Status(const uint8_t* data, uint32_t size, bool quoted)
+  template <typename Visitor>
+  Status VisitColumn(int32_t col_index, Visitor&& visit) const {
+    return parsed_batch().VisitColumn(col_index, first_row_num(),
+                                      std::forward<Visitor>(visit));
+  }
+
+  template <typename Visitor>
+  Status VisitLastRow(Visitor&& visit) const {
+    return parsed_batch().VisitLastRow(std::forward<Visitor>(visit));
+  }
+
+ protected:
+  std::unique_ptr<BlockParserImpl> impl_;
+
+  const detail::DataBatch& parsed_batch() const;
+};
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/reader.h
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/csv/options.h"  // IWYU pragma: keep
+#include "arrow/io/interfaces.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/future.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+class InputStream;
+}  // namespace io
+
+namespace csv {
+
+/// A class that reads an entire CSV file into a Arrow Table
+class ARROW_EXPORT TableReader {
+ public:
+  virtual ~TableReader() = default;
+
+  /// Read the entire CSV file and convert it to a Arrow Table
+  virtual Result<std::shared_ptr<Table>> Read() = 0;
+  /// Read the entire CSV file and convert it to a Arrow Table
+  virtual Future<std::shared_ptr<Table>> ReadAsync() = 0;
+
+  /// Create a TableReader instance
+  static Result<std::shared_ptr<TableReader>> Make(io::IOContext io_context,
+                                                   std::shared_ptr<io::InputStream> input,
+                                                   const ReadOptions&,
+                                                   const ParseOptions&,
+                                                   const ConvertOptions&);
+
+  ARROW_DEPRECATED(
+      "Deprecated in 4.0.0. "
+      "Use MemoryPool-less variant (the IOContext holds a pool already)")
+  static Result<std::shared_ptr<TableReader>> Make(
+      MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
+      const ReadOptions&, const ParseOptions&, const ConvertOptions&);
+};
+
+/// \brief A class that reads a CSV file incrementally
+///
+/// Caveats:
+/// - For now, this is always single-threaded (regardless of `ReadOptions::use_threads`.
+/// - Type inference is done on the first block and types are frozen afterwards;
+///   to make sure the right data types are inferred, either set
+///   `ReadOptions::block_size` to a large enough value, or use
+///   `ConvertOptions::column_types` to set the desired data types explicitly.
+class ARROW_EXPORT StreamingReader : public RecordBatchReader {
+ public:
+  virtual ~StreamingReader() = default;
+
+  virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
+
+  /// \brief Return the number of bytes which have been read and processed
+  ///
+  /// The returned number includes CSV bytes which the StreamingReader has
+  /// finished processing, but not bytes for which some processing (e.g.
+  /// CSV parsing or conversion to Arrow layout) is still ongoing.
+  ///
+  /// Furthermore, the following rules apply:
+  /// - bytes skipped by `ReadOptions.skip_rows` are counted as being read before
+  /// any records are returned.
+  /// - bytes read while parsing the header are counted as being read before any
+  /// records are returned.
+  /// - bytes skipped by `ReadOptions.skip_rows_after_names` are counted after the
+  /// first batch is returned.
+  virtual int64_t bytes_read() const = 0;
+
+  /// Create a StreamingReader instance
+  ///
+  /// This involves some I/O as the first batch must be loaded during the creation process
+  /// so it is returned as a future
+  ///
+  /// Currently, the StreamingReader is not async-reentrant and does not do any fan-out
+  /// parsing (see ARROW-11889)
+  static Future<std::shared_ptr<StreamingReader>> MakeAsync(
+      io::IOContext io_context, std::shared_ptr<io::InputStream> input,
+      arrow::internal::Executor* cpu_executor, const ReadOptions&, const ParseOptions&,
+      const ConvertOptions&);
+
+  static Result<std::shared_ptr<StreamingReader>> Make(
+      io::IOContext io_context, std::shared_ptr<io::InputStream> input,
+      const ReadOptions&, const ParseOptions&, const ConvertOptions&);
+
+  ARROW_DEPRECATED("Deprecated in 4.0.0. Use IOContext-based overload")
+  static Result<std::shared_ptr<StreamingReader>> Make(
+      MemoryPool* pool, std::shared_ptr<io::InputStream> input,
+      const ReadOptions& read_options, const ParseOptions& parse_options,
+      const ConvertOptions& convert_options);
+};
+
+/// \brief Count the logical rows of data in a CSV file (i.e. the
+/// number of rows you would get if you read the file into a table).
+ARROW_EXPORT
+Future<int64_t> CountRowsAsync(io::IOContext io_context,
+                               std::shared_ptr<io::InputStream> input,
+                               arrow::internal::Executor* cpu_executor,
+                               const ReadOptions&, const ParseOptions&);
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/test_common.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/test_common.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/csv/parser.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow {
+namespace csv {
+
+ARROW_TESTING_EXPORT
+std::string MakeCSVData(std::vector<std::string> lines);
+
+// Make a BlockParser from a vector of lines representing a CSV file
+ARROW_TESTING_EXPORT
+void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, int32_t num_cols,
+                   std::shared_ptr<BlockParser>* out);
+
+ARROW_TESTING_EXPORT
+void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
+                   std::shared_ptr<BlockParser>* out);
+
+ARROW_TESTING_EXPORT
+void MakeCSVParser(std::vector<std::string> lines, std::shared_ptr<BlockParser>* out);
+
+// Make a BlockParser from a vector of strings representing a single CSV column
+ARROW_TESTING_EXPORT
+void MakeColumnParser(std::vector<std::string> items, std::shared_ptr<BlockParser>* out);
+
+ARROW_TESTING_EXPORT
+Result<std::shared_ptr<Buffer>> MakeSampleCsvBuffer(
+    size_t num_rows, std::function<bool(size_t row_num)> is_valid = {});
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/type_fwd.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/type_fwd.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+namespace arrow {
+namespace csv {
+
+class TableReader;
+struct ConvertOptions;
+struct ReadOptions;
+struct ParseOptions;
+struct WriteOptions;
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/csv/writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/csv/writer.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/csv/options.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/ipc/type_fwd.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"
+
+namespace arrow {
+namespace csv {
+
+// Functionality for converting Arrow data to Comma separated value text.
+// This library supports all primitive types that can be cast to a StringArrays.
+// It applies to following formatting rules:
+//  - For non-binary types no quotes surround values.  Nulls are represented as the empty
+//  string.
+//  - For binary types all non-null data is quoted (and quotes within data are escaped
+//  with an additional quote).
+//    Null values are empty and unquoted.
+
+/// \defgroup csv-write-functions High-level functions for writing CSV files
+/// @{
+
+/// \brief Convert table to CSV and write the result to output.
+/// Experimental
+ARROW_EXPORT Status WriteCSV(const Table& table, const WriteOptions& options,
+                             arrow::io::OutputStream* output);
+/// \brief Convert batch to CSV and write the result to output.
+/// Experimental
+ARROW_EXPORT Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
+                             arrow::io::OutputStream* output);
+/// \brief Convert batches read through a RecordBatchReader
+/// to CSV and write the results to output.
+/// Experimental
+ARROW_EXPORT Status WriteCSV(const std::shared_ptr<RecordBatchReader>& reader,
+                             const WriteOptions& options,
+                             arrow::io::OutputStream* output);
+
+/// @}
+
+/// \defgroup csv-writer-factories Functions for creating an incremental CSV writer
+/// @{
+
+/// \brief Create a new CSV writer. User is responsible for closing the
+/// actual OutputStream.
+///
+/// \param[in] sink output stream to write to
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
+
+/// \brief Create a new CSV writer.
+///
+/// \param[in] sink output stream to write to (does not take ownership)
+/// \param[in] schema the schema of the record batches to be written
+/// \param[in] options options for serialization
+/// \return Result<std::shared_ptr<RecordBatchWriter>>
+ARROW_EXPORT
+Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
+    io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
+    const WriteOptions& options = WriteOptions::Defaults());
+
+/// @}
+
+}  // namespace csv
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/api.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/discovery.h"
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/file_csv.h"
+#include "arrow/dataset/file_ipc.h"
+#include "arrow/dataset/file_orc.h"
+#include "arrow/dataset/file_parquet.h"
+#include "arrow/dataset/scanner.h"
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/dataset.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/dataset.h
@@ -0,0 +1,251 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/mutex.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+namespace dataset {
+
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
+
+/// \brief A granular piece of a Dataset, such as an individual file.
+///
+/// A Fragment can be read/scanned separately from other fragments. It yields a
+/// collection of RecordBatches when scanned, encapsulated in one or more
+/// ScanTasks.
+///
+/// Note that Fragments have well defined physical schemas which are reconciled by
+/// the Datasets which contain them; these physical schemas may differ from a parent
+/// Dataset's schema and the physical schemas of sibling Fragments.
+class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
+ public:
+  /// \brief Return the physical schema of the Fragment.
+  ///
+  /// The physical schema is also called the writer schema.
+  /// This method is blocking and may suffer from high latency filesystem.
+  /// The schema is cached after being read once, or may be specified at construction.
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchema();
+
+  /// An asynchronous version of Scan
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) = 0;
+
+  /// \brief Count the number of rows in this fragment matching the filter using metadata
+  /// only. That is, this method may perform I/O, but will not load data.
+  ///
+  /// If this is not possible, resolve with an empty optional. The fragment can perform
+  /// I/O (e.g. to read metadata) before it deciding whether it can satisfy the request.
+  virtual Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate, const std::shared_ptr<ScanOptions>& options);
+
+  virtual std::string type_name() const = 0;
+  virtual std::string ToString() const { return type_name(); }
+
+  /// \brief An expression which evaluates to true for all data viewed by this
+  /// Fragment.
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
+
+  virtual ~Fragment() = default;
+
+ protected:
+  Fragment() = default;
+  explicit Fragment(compute::Expression partition_expression,
+                    std::shared_ptr<Schema> physical_schema);
+
+  virtual Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() = 0;
+
+  util::Mutex physical_schema_mutex_;
+  compute::Expression partition_expression_ = compute::literal(true);
+  std::shared_ptr<Schema> physical_schema_;
+};
+
+/// \brief Per-scan options for fragment(s) in a dataset.
+///
+/// These options are not intrinsic to the format or fragment itself, but do affect
+/// the results of a scan. These are options which make sense to change between
+/// repeated reads of the same dataset, such as format-specific conversion options
+/// (that do not affect the schema).
+///
+/// \ingroup dataset-scanning
+class ARROW_DS_EXPORT FragmentScanOptions {
+ public:
+  virtual std::string type_name() const = 0;
+  virtual std::string ToString() const { return type_name(); }
+  virtual ~FragmentScanOptions() = default;
+};
+
+/// \defgroup dataset-implementations Concrete implementations
+///
+/// @{
+
+/// \brief A trivial Fragment that yields ScanTask out of a fixed set of
+/// RecordBatch.
+class ARROW_DS_EXPORT InMemoryFragment : public Fragment {
+ public:
+  InMemoryFragment(std::shared_ptr<Schema> schema, RecordBatchVector record_batches,
+                   compute::Expression = compute::literal(true));
+  explicit InMemoryFragment(RecordBatchVector record_batches,
+                            compute::Expression = compute::literal(true));
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  std::string type_name() const override { return "in-memory"; }
+
+ protected:
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override;
+
+  RecordBatchVector record_batches_;
+};
+
+/// @}
+
+/// \brief A container of zero or more Fragments.
+///
+/// A Dataset acts as a union of Fragments, e.g. files deeply nested in a
+/// directory. A Dataset has a schema to which Fragments must align during a
+/// scan operation. This is analogous to Avro's reader and writer schema.
+class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this<Dataset> {
+ public:
+  /// \brief Begin to build a new Scan operation against this Dataset
+  Result<std::shared_ptr<ScannerBuilder>> NewScan();
+
+  /// \brief GetFragments returns an iterator of Fragments given a predicate.
+  Result<FragmentIterator> GetFragments(compute::Expression predicate);
+  Result<FragmentIterator> GetFragments();
+
+  const std::shared_ptr<Schema>& schema() const { return schema_; }
+
+  /// \brief An expression which evaluates to true for all data viewed by this Dataset.
+  /// May be null, which indicates no information is available.
+  const compute::Expression& partition_expression() const {
+    return partition_expression_;
+  }
+
+  /// \brief The name identifying the kind of Dataset
+  virtual std::string type_name() const = 0;
+
+  /// \brief Return a copy of this Dataset with a different schema.
+  ///
+  /// The copy will view the same Fragments. If the new schema is not compatible with the
+  /// original dataset's schema then an error will be raised.
+  virtual Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const = 0;
+
+  virtual ~Dataset() = default;
+
+ protected:
+  explicit Dataset(std::shared_ptr<Schema> schema) : schema_(std::move(schema)) {}
+
+  Dataset(std::shared_ptr<Schema> schema, compute::Expression partition_expression);
+
+  virtual Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) = 0;
+
+  std::shared_ptr<Schema> schema_;
+  compute::Expression partition_expression_ = compute::literal(true);
+};
+
+/// \addtogroup dataset-implementations
+///
+/// @{
+
+/// \brief A Source which yields fragments wrapping a stream of record batches.
+///
+/// The record batches must match the schema provided to the source at construction.
+class ARROW_DS_EXPORT InMemoryDataset : public Dataset {
+ public:
+  class RecordBatchGenerator {
+   public:
+    virtual ~RecordBatchGenerator() = default;
+    virtual RecordBatchIterator Get() const = 0;
+  };
+
+  /// Construct a dataset from a schema and a factory of record batch iterators.
+  InMemoryDataset(std::shared_ptr<Schema> schema,
+                  std::shared_ptr<RecordBatchGenerator> get_batches)
+      : Dataset(std::move(schema)), get_batches_(std::move(get_batches)) {}
+
+  /// Convenience constructor taking a fixed list of batches
+  InMemoryDataset(std::shared_ptr<Schema> schema, RecordBatchVector batches);
+
+  /// Convenience constructor taking a Table
+  explicit InMemoryDataset(std::shared_ptr<Table> table);
+
+  std::string type_name() const override { return "in-memory"; }
+
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override;
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
+
+  std::shared_ptr<RecordBatchGenerator> get_batches_;
+};
+
+/// \brief A Dataset wrapping child Datasets.
+class ARROW_DS_EXPORT UnionDataset : public Dataset {
+ public:
+  /// \brief Construct a UnionDataset wrapping child Datasets.
+  ///
+  /// \param[in] schema the schema of the resulting dataset.
+  /// \param[in] children one or more child Datasets. Their schemas must be identical to
+  /// schema.
+  static Result<std::shared_ptr<UnionDataset>> Make(std::shared_ptr<Schema> schema,
+                                                    DatasetVector children);
+
+  const DatasetVector& children() const { return children_; }
+
+  std::string type_name() const override { return "union"; }
+
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override;
+
+ protected:
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
+
+  explicit UnionDataset(std::shared_ptr<Schema> schema, DatasetVector children)
+      : Dataset(std::move(schema)), children_(std::move(children)) {}
+
+  DatasetVector children_;
+
+  friend class UnionDatasetFactory;
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/dataset_writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/dataset_writer.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/dataset/file_base.h"
+#include "arrow/record_batch.h"
+#include "arrow/status.h"
+#include "arrow/util/async_util.h"
+#include "arrow/util/future.h"
+
+namespace arrow {
+namespace dataset {
+namespace internal {
+
+// This lines up with our other defaults in the scanner and execution plan
+constexpr uint64_t kDefaultDatasetWriterMaxRowsQueued = 8 * 1024 * 1024;
+
+/// \brief Utility class that manages a set of writers to different paths
+///
+/// Writers may be closed and reopened (and a new file created) based on the dataset
+/// write options (for example, max_rows_per_file or max_open_files)
+///
+/// The dataset writer enforces its own back pressure based on the # of rows (as opposed
+/// to # of batches which is how it is typically enforced elsewhere) and # of files.
+class ARROW_DS_EXPORT DatasetWriter {
+ public:
+  /// \brief Create a dataset writer
+  ///
+  /// Will fail if basename_template is invalid or if there is existing data and
+  /// existing_data_behavior is kError
+  ///
+  /// \param write_options options to control how the data should be written
+  /// \param max_rows_queued max # of rows allowed to be queued before the dataset_writer
+  ///                        will ask for backpressure
+  static Result<std::unique_ptr<DatasetWriter>> Make(
+      FileSystemDatasetWriteOptions write_options,
+      uint64_t max_rows_queued = kDefaultDatasetWriterMaxRowsQueued);
+
+  ~DatasetWriter();
+
+  /// \brief Write a batch to the dataset
+  /// \param[in] batch The batch to write
+  /// \param[in] directory The directory to write to
+  ///
+  /// Note: The written filename will be {directory}/{filename_factory(i)} where i is a
+  /// counter controlled by `max_open_files` and `max_rows_per_file`
+  ///
+  /// If multiple WriteRecordBatch calls arrive with the same `directory` then the batches
+  /// may be written to the same file.
+  ///
+  /// The returned future will be marked finished when the record batch has been queued
+  /// to be written.  If the returned future is unfinished then this indicates the dataset
+  /// writer's queue is full and the data provider should pause.
+  ///
+  /// This method is NOT async reentrant.  The returned future will only be unfinished
+  /// if back pressure needs to be applied.  Async reentrancy is not necessary for
+  /// concurrent writes to happen.  Calling this method again before the previous future
+  /// completes will not just violate max_rows_queued but likely lead to race conditions.
+  ///
+  /// One thing to note is that the ordering of your data can affect your maximum
+  /// potential parallelism.  If this seems odd then consider a dataset where the first
+  /// 1000 batches go to the same directory and then the 1001st batch goes to a different
+  /// directory.  The only way to get two parallel writes immediately would be to queue
+  /// all 1000 pending writes to the first directory.
+  Future<> WriteRecordBatch(std::shared_ptr<RecordBatch> batch,
+                            const std::string& directory, const std::string& prefix = "");
+
+  /// Finish all pending writes and close any open files
+  Future<> Finish();
+
+ protected:
+  DatasetWriter(FileSystemDatasetWriteOptions write_options,
+                uint64_t max_rows_queued = kDefaultDatasetWriterMaxRowsQueued);
+
+  class DatasetWriterImpl;
+  std::unique_ptr<DatasetWriterImpl, util::DestroyingDeleter<DatasetWriterImpl>> impl_;
+};
+
+}  // namespace internal
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/discovery.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/discovery.h
@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Logic for automatically determining the structure of multi-file
+/// dataset with possible partitioning according to available
+/// partitioning
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/dataset/partition.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/filesystem/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/variant.h"
+
+namespace arrow {
+namespace dataset {
+
+/// \defgroup dataset-discovery Discovery API
+///
+/// @{
+
+struct InspectOptions {
+  /// See `fragments` property.
+  static constexpr int kInspectAllFragments = -1;
+
+  /// Indicate how many fragments should be inspected to infer the unified dataset
+  /// schema. Limiting the number of fragments accessed improves the latency of
+  /// the discovery process when dealing with a high number of fragments and/or
+  /// high latency file systems.
+  ///
+  /// The default value of `1` inspects the schema of the first (in no particular
+  /// order) fragment only. If the dataset has a uniform schema for all fragments,
+  /// this default is the optimal value. In order to inspect all fragments and
+  /// robustly unify their potentially varying schemas, set this option to
+  /// `kInspectAllFragments`. A value of `0` disables inspection of fragments
+  /// altogether so only the partitioning schema will be inspected.
+  int fragments = 1;
+};
+
+struct FinishOptions {
+  /// Finalize the dataset with this given schema. If the schema is not
+  /// provided, infer the schema via the Inspect, see the `inspect_options`
+  /// property.
+  std::shared_ptr<Schema> schema = NULLPTR;
+
+  /// If the schema is not provided, it will be discovered by passing the
+  /// following options to `DatasetDiscovery::Inspect`.
+  InspectOptions inspect_options{};
+
+  /// Indicate if the given Schema (when specified), should be validated against
+  /// the fragments' schemas. `inspect_options` will control how many fragments
+  /// are checked.
+  bool validate_fragments = false;
+};
+
+/// \brief DatasetFactory provides a way to inspect/discover a Dataset's expected
+/// schema before materializing said Dataset.
+class ARROW_DS_EXPORT DatasetFactory {
+ public:
+  /// \brief Get the schemas of the Fragments and Partitioning.
+  virtual Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
+      InspectOptions options) = 0;
+
+  /// \brief Get unified schema for the resulting Dataset.
+  Result<std::shared_ptr<Schema>> Inspect(InspectOptions options = {});
+
+  /// \brief Create a Dataset
+  Result<std::shared_ptr<Dataset>> Finish();
+  /// \brief Create a Dataset with the given schema (see \a InspectOptions::schema)
+  Result<std::shared_ptr<Dataset>> Finish(std::shared_ptr<Schema> schema);
+  /// \brief Create a Dataset with the given options
+  virtual Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) = 0;
+
+  /// \brief Optional root partition for the resulting Dataset.
+  const compute::Expression& root_partition() const { return root_partition_; }
+  /// \brief Set the root partition for the resulting Dataset.
+  Status SetRootPartition(compute::Expression partition) {
+    root_partition_ = std::move(partition);
+    return Status::OK();
+  }
+
+  virtual ~DatasetFactory() = default;
+
+ protected:
+  DatasetFactory();
+
+  compute::Expression root_partition_;
+};
+
+/// @}
+
+/// \brief DatasetFactory provides a way to inspect/discover a Dataset's
+/// expected schema before materialization.
+/// \ingroup dataset-implementations
+class ARROW_DS_EXPORT UnionDatasetFactory : public DatasetFactory {
+ public:
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::vector<std::shared_ptr<DatasetFactory>> factories);
+
+  /// \brief Return the list of child DatasetFactory
+  const std::vector<std::shared_ptr<DatasetFactory>>& factories() const {
+    return factories_;
+  }
+
+  /// \brief Get the schemas of the Datasets.
+  ///
+  /// Instead of applying options globally, it applies at each child factory.
+  /// This will not respect `options.fragments` exactly, but will respect the
+  /// spirit of peeking the first fragments or all of them.
+  Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
+      InspectOptions options) override;
+
+  /// \brief Create a Dataset.
+  Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) override;
+
+ protected:
+  explicit UnionDatasetFactory(std::vector<std::shared_ptr<DatasetFactory>> factories);
+
+  std::vector<std::shared_ptr<DatasetFactory>> factories_;
+};
+
+/// \ingroup dataset-filesystem
+struct FileSystemFactoryOptions {
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
+  PartitioningOrFactory partitioning{Partitioning::Default()};
+
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
+  std::string partition_base_dir;
+
+  /// Invalid files (via selector or explicitly) will be excluded by checking
+  /// with the FileFormat::IsSupported method.  This will incur IO for each files
+  /// in a serial and single threaded fashion. Disabling this feature will skip the
+  /// IO, but unsupported files may be present in the Dataset
+  /// (resulting in an error at scan time).
+  bool exclude_invalid_files = false;
+
+  /// When discovering from a Selector (and not from an explicit file list), ignore
+  /// files and directories matching any of these prefixes.
+  ///
+  /// Example (with selector = "/dataset/**"):
+  /// selector_ignore_prefixes = {"_", ".DS_STORE" };
+  ///
+  /// - "/dataset/data.csv" -> not ignored
+  /// - "/dataset/_metadata" -> ignored
+  /// - "/dataset/.DS_STORE" -> ignored
+  /// - "/dataset/_hidden/dat" -> ignored
+  /// - "/dataset/nested/.DS_STORE" -> ignored
+  std::vector<std::string> selector_ignore_prefixes = {
+      ".",
+      "_",
+  };
+};
+
+/// \brief FileSystemDatasetFactory creates a Dataset from a vector of
+/// fs::FileInfo or a fs::FileSelector.
+/// \ingroup dataset-filesystem
+class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
+ public:
+  /// \brief Build a FileSystemDatasetFactory from an explicit list of
+  /// paths.
+  ///
+  /// \param[in] filesystem passed to FileSystemDataset
+  /// \param[in] paths passed to FileSystemDataset
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<std::string>& paths,
+      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
+
+  /// \brief Build a FileSystemDatasetFactory from a fs::FileSelector.
+  ///
+  /// The selector will expand to a vector of FileInfo. The expansion/crawling
+  /// is performed in this function call. Thus, the finalized Dataset is
+  /// working with a snapshot of the filesystem.
+  //
+  /// If options.partition_base_dir is not provided, it will be overwritten
+  /// with selector.base_dir.
+  ///
+  /// \param[in] filesystem passed to FileSystemDataset
+  /// \param[in] selector used to crawl and search files
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::shared_ptr<fs::FileSystem> filesystem, fs::FileSelector selector,
+      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
+
+  /// \brief Build a FileSystemDatasetFactory from an uri including filesystem
+  /// information.
+  ///
+  /// \param[in] uri passed to FileSystemDataset
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(std::string uri,
+                                                      std::shared_ptr<FileFormat> format,
+                                                      FileSystemFactoryOptions options);
+
+  /// \brief Build a FileSystemDatasetFactory from an explicit list of
+  /// file information.
+  ///
+  /// \param[in] filesystem passed to FileSystemDataset
+  /// \param[in] files passed to FileSystemDataset
+  /// \param[in] format passed to FileSystemDataset
+  /// \param[in] options see FileSystemFactoryOptions for more information.
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      std::shared_ptr<fs::FileSystem> filesystem, const std::vector<fs::FileInfo>& files,
+      std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options);
+
+  Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
+      InspectOptions options) override;
+
+  Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) override;
+
+ protected:
+  FileSystemDatasetFactory(std::vector<fs::FileInfo> files,
+                           std::shared_ptr<fs::FileSystem> filesystem,
+                           std::shared_ptr<FileFormat> format,
+                           FileSystemFactoryOptions options);
+
+  Result<std::shared_ptr<Schema>> PartitionSchema();
+
+  std::vector<fs::FileInfo> files_;
+  std::shared_ptr<fs::FileSystem> fs_;
+  std::shared_ptr<FileFormat> format_;
+  FileSystemFactoryOptions options_;
+};
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_base.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_base.h
@@ -0,0 +1,433 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/dataset/scanner.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/io/file.h"
+#include "arrow/util/compression.h"
+
+namespace arrow {
+
+namespace dataset {
+
+/// \defgroup dataset-file-formats File formats for reading and writing datasets
+/// \defgroup dataset-filesystem File system datasets
+///
+/// @{
+
+/// \brief The path and filesystem where an actual file is located or a buffer which can
+/// be read like a file
+class ARROW_DS_EXPORT FileSource {
+ public:
+  FileSource(std::string path, std::shared_ptr<fs::FileSystem> filesystem,
+             Compression::type compression = Compression::UNCOMPRESSED)
+      : file_info_(std::move(path)),
+        filesystem_(std::move(filesystem)),
+        compression_(compression) {}
+
+  FileSource(fs::FileInfo info, std::shared_ptr<fs::FileSystem> filesystem,
+             Compression::type compression = Compression::UNCOMPRESSED)
+      : file_info_(std::move(info)),
+        filesystem_(std::move(filesystem)),
+        compression_(compression) {}
+
+  explicit FileSource(std::shared_ptr<Buffer> buffer,
+                      Compression::type compression = Compression::UNCOMPRESSED)
+      : buffer_(std::move(buffer)), compression_(compression) {}
+
+  using CustomOpen = std::function<Result<std::shared_ptr<io::RandomAccessFile>>()>;
+  explicit FileSource(CustomOpen open) : custom_open_(std::move(open)) {}
+
+  using CustomOpenWithCompression =
+      std::function<Result<std::shared_ptr<io::RandomAccessFile>>(Compression::type)>;
+  explicit FileSource(CustomOpenWithCompression open_with_compression,
+                      Compression::type compression = Compression::UNCOMPRESSED)
+      : custom_open_(std::bind(std::move(open_with_compression), compression)),
+        compression_(compression) {}
+
+  explicit FileSource(std::shared_ptr<io::RandomAccessFile> file,
+                      Compression::type compression = Compression::UNCOMPRESSED)
+      : custom_open_([=] { return ToResult(file); }), compression_(compression) {}
+
+  FileSource() : custom_open_(CustomOpen{&InvalidOpen}) {}
+
+  static std::vector<FileSource> FromPaths(const std::shared_ptr<fs::FileSystem>& fs,
+                                           std::vector<std::string> paths) {
+    std::vector<FileSource> sources;
+    for (auto&& path : paths) {
+      sources.emplace_back(std::move(path), fs);
+    }
+    return sources;
+  }
+
+  /// \brief Return the type of raw compression on the file, if any.
+  Compression::type compression() const { return compression_; }
+
+  /// \brief Return the file path, if any. Only valid when file source wraps a path.
+  const std::string& path() const {
+    static std::string buffer_path = "<Buffer>";
+    static std::string custom_open_path = "<Buffer>";
+    return filesystem_ ? file_info_.path() : buffer_ ? buffer_path : custom_open_path;
+  }
+
+  /// \brief Return the filesystem, if any. Otherwise returns nullptr
+  const std::shared_ptr<fs::FileSystem>& filesystem() const { return filesystem_; }
+
+  /// \brief Return the buffer containing the file, if any. Otherwise returns nullptr
+  const std::shared_ptr<Buffer>& buffer() const { return buffer_; }
+
+  /// \brief Get a RandomAccessFile which views this file source
+  Result<std::shared_ptr<io::RandomAccessFile>> Open() const;
+
+  /// \brief Get an InputStream which views this file source (and decompresses if needed)
+  /// \param[in] compression If nullopt, guess the compression scheme from the
+  ///     filename, else decompress with the given codec
+  Result<std::shared_ptr<io::InputStream>> OpenCompressed(
+      util::optional<Compression::type> compression = util::nullopt) const;
+
+ private:
+  static Result<std::shared_ptr<io::RandomAccessFile>> InvalidOpen() {
+    return Status::Invalid("Called Open() on an uninitialized FileSource");
+  }
+
+  fs::FileInfo file_info_;
+  std::shared_ptr<fs::FileSystem> filesystem_;
+  std::shared_ptr<Buffer> buffer_;
+  CustomOpen custom_open_;
+  Compression::type compression_ = Compression::UNCOMPRESSED;
+};
+
+/// \brief Base class for file format implementation
+class ARROW_DS_EXPORT FileFormat : public std::enable_shared_from_this<FileFormat> {
+ public:
+  /// Options affecting how this format is scanned.
+  ///
+  /// The options here can be overridden at scan time.
+  std::shared_ptr<FragmentScanOptions> default_fragment_scan_options;
+
+  virtual ~FileFormat() = default;
+
+  /// \brief The name identifying the kind of file format
+  virtual std::string type_name() const = 0;
+
+  virtual bool Equals(const FileFormat& other) const = 0;
+
+  /// \brief Indicate if the FileSource is supported/readable by this format.
+  virtual Result<bool> IsSupported(const FileSource& source) const = 0;
+
+  /// \brief Return the schema of the file if possible.
+  virtual Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const = 0;
+
+  virtual Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const = 0;
+
+  virtual Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options);
+
+  /// \brief Open a fragment
+  virtual Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression,
+      std::shared_ptr<Schema> physical_schema);
+
+  /// \brief Create a FileFragment for a FileSource.
+  Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression);
+
+  /// \brief Create a FileFragment for a FileSource.
+  Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, std::shared_ptr<Schema> physical_schema = NULLPTR);
+
+  /// \brief Create a writer for this format.
+  virtual Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const = 0;
+
+  /// \brief Get default write options for this format.
+  virtual std::shared_ptr<FileWriteOptions> DefaultWriteOptions() = 0;
+};
+
+/// \brief A Fragment that is stored in a file with a known format
+class ARROW_DS_EXPORT FileFragment : public Fragment {
+ public:
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options) override;
+  Future<util::optional<int64_t>> CountRows(
+      compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  std::string type_name() const override { return format_->type_name(); }
+  std::string ToString() const override { return source_.path(); };
+
+  const FileSource& source() const { return source_; }
+  const std::shared_ptr<FileFormat>& format() const { return format_; }
+
+ protected:
+  FileFragment(FileSource source, std::shared_ptr<FileFormat> format,
+               compute::Expression partition_expression,
+               std::shared_ptr<Schema> physical_schema)
+      : Fragment(std::move(partition_expression), std::move(physical_schema)),
+        source_(std::move(source)),
+        format_(std::move(format)) {}
+
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override;
+
+  FileSource source_;
+  std::shared_ptr<FileFormat> format_;
+
+  friend class FileFormat;
+};
+
+/// \brief A Dataset of FileFragments.
+///
+/// A FileSystemDataset is composed of one or more FileFragment. The fragments
+/// are independent and don't need to share the same format and/or filesystem.
+class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
+ public:
+  /// \brief Create a FileSystemDataset.
+  ///
+  /// \param[in] schema the schema of the dataset
+  /// \param[in] root_partition the partition expression of the dataset
+  /// \param[in] format the format of each FileFragment.
+  /// \param[in] filesystem the filesystem of each FileFragment, or nullptr if the
+  ///            fragments wrap buffers.
+  /// \param[in] fragments list of fragments to create the dataset from.
+  /// \param[in] partitioning the Partitioning object in case the dataset is created
+  ///            with a known partitioning (e.g. from a discovered partitioning
+  ///            through a DatasetFactory), or nullptr if not known.
+  ///
+  /// Note that fragments wrapping files resident in differing filesystems are not
+  /// permitted; to work with multiple filesystems use a UnionDataset.
+  ///
+  /// \return A constructed dataset.
+  static Result<std::shared_ptr<FileSystemDataset>> Make(
+      std::shared_ptr<Schema> schema, compute::Expression root_partition,
+      std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
+      std::vector<std::shared_ptr<FileFragment>> fragments,
+      std::shared_ptr<Partitioning> partitioning = NULLPTR);
+
+  /// \brief Write a dataset.
+  static Status Write(const FileSystemDatasetWriteOptions& write_options,
+                      std::shared_ptr<Scanner> scanner);
+
+  /// \brief Return the type name of the dataset.
+  std::string type_name() const override { return "filesystem"; }
+
+  /// \brief Replace the schema of the dataset.
+  Result<std::shared_ptr<Dataset>> ReplaceSchema(
+      std::shared_ptr<Schema> schema) const override;
+
+  /// \brief Return the path of files.
+  std::vector<std::string> files() const;
+
+  /// \brief Return the format.
+  const std::shared_ptr<FileFormat>& format() const { return format_; }
+
+  /// \brief Return the filesystem. May be nullptr if the fragments wrap buffers.
+  const std::shared_ptr<fs::FileSystem>& filesystem() const { return filesystem_; }
+
+  /// \brief Return the partitioning. May be nullptr if the dataset was not constructed
+  /// with a partitioning.
+  const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
+
+  std::string ToString() const;
+
+ protected:
+  struct FragmentSubtrees;
+
+  explicit FileSystemDataset(std::shared_ptr<Schema> schema)
+      : Dataset(std::move(schema)) {}
+
+  FileSystemDataset(std::shared_ptr<Schema> schema,
+                    compute::Expression partition_expression)
+      : Dataset(std::move(schema), partition_expression) {}
+
+  Result<FragmentIterator> GetFragmentsImpl(compute::Expression predicate) override;
+
+  void SetupSubtreePruning();
+
+  std::shared_ptr<FileFormat> format_;
+  std::shared_ptr<fs::FileSystem> filesystem_;
+  std::vector<std::shared_ptr<FileFragment>> fragments_;
+  std::shared_ptr<Partitioning> partitioning_;
+
+  std::shared_ptr<FragmentSubtrees> subtrees_;
+};
+
+/// \brief Options for writing a file of this format.
+class ARROW_DS_EXPORT FileWriteOptions {
+ public:
+  virtual ~FileWriteOptions() = default;
+
+  const std::shared_ptr<FileFormat>& format() const { return format_; }
+
+  std::string type_name() const { return format_->type_name(); }
+
+ protected:
+  explicit FileWriteOptions(std::shared_ptr<FileFormat> format)
+      : format_(std::move(format)) {}
+
+  std::shared_ptr<FileFormat> format_;
+};
+
+/// \brief A writer for this format.
+class ARROW_DS_EXPORT FileWriter {
+ public:
+  virtual ~FileWriter() = default;
+
+  /// \brief Write the given batch.
+  virtual Status Write(const std::shared_ptr<RecordBatch>& batch) = 0;
+
+  /// \brief Write all batches from the reader.
+  Status Write(RecordBatchReader* batches);
+
+  /// \brief Indicate that writing is done.
+  virtual Future<> Finish();
+
+  const std::shared_ptr<FileFormat>& format() const { return options_->format(); }
+  const std::shared_ptr<Schema>& schema() const { return schema_; }
+  const std::shared_ptr<FileWriteOptions>& options() const { return options_; }
+  const fs::FileLocator& destination() const { return destination_locator_; }
+
+ protected:
+  FileWriter(std::shared_ptr<Schema> schema, std::shared_ptr<FileWriteOptions> options,
+             std::shared_ptr<io::OutputStream> destination,
+             fs::FileLocator destination_locator)
+      : schema_(std::move(schema)),
+        options_(std::move(options)),
+        destination_(std::move(destination)),
+        destination_locator_(std::move(destination_locator)) {}
+
+  virtual Future<> FinishInternal() = 0;
+
+  std::shared_ptr<Schema> schema_;
+  std::shared_ptr<FileWriteOptions> options_;
+  std::shared_ptr<io::OutputStream> destination_;
+  fs::FileLocator destination_locator_;
+};
+
+/// \brief Options for writing a dataset.
+struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
+  /// Options for individual fragment writing.
+  std::shared_ptr<FileWriteOptions> file_write_options;
+
+  /// FileSystem into which a dataset will be written.
+  std::shared_ptr<fs::FileSystem> filesystem;
+
+  /// Root directory into which the dataset will be written.
+  std::string base_dir;
+
+  /// Partitioning used to generate fragment paths.
+  std::shared_ptr<Partitioning> partitioning;
+
+  /// Maximum number of partitions any batch may be written into, default is 1K.
+  int max_partitions = 1024;
+
+  /// Template string used to generate fragment basenames.
+  /// {i} will be replaced by an auto incremented integer.
+  std::string basename_template;
+
+  /// If greater than 0 then this will limit the maximum number of files that can be left
+  /// open. If an attempt is made to open too many files then the least recently used file
+  /// will be closed.  If this setting is set too low you may end up fragmenting your data
+  /// into many small files.
+  ///
+  /// The default is 900 which also allows some # of files to be open by the scanner
+  /// before hitting the default Linux limit of 1024
+  uint32_t max_open_files = 900;
+
+  /// If greater than 0 then this will limit how many rows are placed in any single file.
+  /// Otherwise there will be no limit and one file will be created in each output
+  /// directory unless files need to be closed to respect max_open_files
+  uint64_t max_rows_per_file = 0;
+
+  /// If greater than 0 then this will cause the dataset writer to batch incoming data
+  /// and only write the row groups to the disk when sufficient rows have accumulated.
+  /// The final row group size may be less than this value and other options such as
+  /// `max_open_files` or `max_rows_per_file` lead to smaller row group sizes.
+  uint64_t min_rows_per_group = 0;
+
+  /// If greater than 0 then the dataset writer may split up large incoming batches into
+  /// multiple row groups.  If this value is set then min_rows_per_group should also be
+  /// set or else you may end up with very small row groups (e.g. if the incoming row
+  /// group size is just barely larger than this value).
+  uint64_t max_rows_per_group = 1 << 20;
+
+  /// Controls what happens if an output directory already exists.
+  ExistingDataBehavior existing_data_behavior = ExistingDataBehavior::kError;
+
+  /// \brief If false the dataset writer will not create directories
+  /// This is mainly intended for filesystems that do not require directories such as S3.
+  bool create_dir = true;
+
+  /// Callback to be invoked against all FileWriters before
+  /// they are finalized with FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_pre_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
+  /// Callback to be invoked against all FileWriters after they have
+  /// called FileWriter::Finish().
+  std::function<Status(FileWriter*)> writer_post_finish = [](FileWriter*) {
+    return Status::OK();
+  };
+
+  const std::shared_ptr<FileFormat>& format() const {
+    return file_write_options->format();
+  }
+};
+
+/// \brief Wraps FileSystemDatasetWriteOptions for consumption as compute::ExecNodeOptions
+class ARROW_DS_EXPORT WriteNodeOptions : public compute::ExecNodeOptions {
+ public:
+  explicit WriteNodeOptions(
+      FileSystemDatasetWriteOptions options,
+      std::shared_ptr<const KeyValueMetadata> custom_metadata = NULLPTR)
+      : write_options(std::move(options)), custom_metadata(std::move(custom_metadata)) {}
+
+  /// \brief Options to control how to write the dataset
+  FileSystemDatasetWriteOptions write_options;
+  /// \brief Optional metadata to attach to written batches
+  std::shared_ptr<const KeyValueMetadata> custom_metadata;
+};
+
+/// @}
+
+namespace internal {
+ARROW_DS_EXPORT void InitializeDatasetWriter(
+    arrow::compute::ExecFactoryRegistry* registry);
+}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_csv.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_csv.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/csv/options.h"
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/ipc/type_fwd.h"
+#include "arrow/status.h"
+#include "arrow/util/compression.h"
+
+namespace arrow {
+namespace dataset {
+
+constexpr char kCsvTypeName[] = "csv";
+
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
+/// \brief A FileFormat implementation that reads from and writes to Csv files
+class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
+ public:
+  /// Options affecting the parsing of CSV files
+  csv::ParseOptions parse_options = csv::ParseOptions::Defaults();
+
+  std::string type_name() const override { return kCsvTypeName; }
+
+  bool Equals(const FileFormat& other) const override;
+
+  Result<bool> IsSupported(const FileSource& source) const override;
+
+  /// \brief Return the schema of the file if possible.
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& scan_options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+};
+
+/// \brief Per-scan options for CSV fragments
+struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions {
+  std::string type_name() const override { return kCsvTypeName; }
+
+  /// CSV conversion options
+  csv::ConvertOptions convert_options = csv::ConvertOptions::Defaults();
+
+  /// CSV reading options
+  ///
+  /// Note that use_threads is always ignored.
+  csv::ReadOptions read_options = csv::ReadOptions::Defaults();
+};
+
+class ARROW_DS_EXPORT CsvFileWriteOptions : public FileWriteOptions {
+ public:
+  /// Options passed to csv::MakeCSVWriter.
+  std::shared_ptr<csv::WriteOptions> write_options;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class CsvFileFormat;
+};
+
+class ARROW_DS_EXPORT CsvFileWriter : public FileWriter {
+ public:
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+ private:
+  CsvFileWriter(std::shared_ptr<io::OutputStream> destination,
+                std::shared_ptr<ipc::RecordBatchWriter> writer,
+                std::shared_ptr<Schema> schema,
+                std::shared_ptr<CsvFileWriteOptions> options,
+                fs::FileLocator destination_locator);
+
+  Future<> FinishInternal() override;
+
+  std::shared_ptr<io::OutputStream> destination_;
+  std::shared_ptr<ipc::RecordBatchWriter> batch_writer_;
+
+  friend class CsvFileFormat;
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_ipc.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_ipc.h
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/io/type_fwd.h"
+#include "arrow/ipc/type_fwd.h"
+#include "arrow/result.h"
+
+namespace arrow {
+namespace dataset {
+
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
+constexpr char kIpcTypeName[] = "ipc";
+
+/// \brief A FileFormat implementation that reads from and writes to Ipc files
+class ARROW_DS_EXPORT IpcFileFormat : public FileFormat {
+ public:
+  std::string type_name() const override { return kIpcTypeName; }
+
+  bool Equals(const FileFormat& other) const override {
+    return type_name() == other.type_name();
+  }
+
+  Result<bool> IsSupported(const FileSource& source) const override;
+
+  /// \brief Return the schema of the file if possible.
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+};
+
+/// \brief Per-scan options for IPC fragments
+class ARROW_DS_EXPORT IpcFragmentScanOptions : public FragmentScanOptions {
+ public:
+  std::string type_name() const override { return kIpcTypeName; }
+
+  /// Options passed to the IPC file reader.
+  /// included_fields, memory_pool, and use_threads are ignored.
+  std::shared_ptr<ipc::IpcReadOptions> options;
+  /// If present, the async scanner will enable I/O coalescing.
+  /// This is ignored by the sync scanner.
+  std::shared_ptr<io::CacheOptions> cache_options;
+};
+
+class ARROW_DS_EXPORT IpcFileWriteOptions : public FileWriteOptions {
+ public:
+  /// Options passed to ipc::MakeFileWriter. use_threads is ignored
+  std::shared_ptr<ipc::IpcWriteOptions> options;
+
+  /// custom_metadata written to the file's footer
+  std::shared_ptr<const KeyValueMetadata> metadata;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class IpcFileFormat;
+};
+
+class ARROW_DS_EXPORT IpcFileWriter : public FileWriter {
+ public:
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+ private:
+  IpcFileWriter(std::shared_ptr<io::OutputStream> destination,
+                std::shared_ptr<ipc::RecordBatchWriter> writer,
+                std::shared_ptr<Schema> schema,
+                std::shared_ptr<IpcFileWriteOptions> options,
+                fs::FileLocator destination_locator);
+
+  Future<> FinishInternal() override;
+
+  std::shared_ptr<io::OutputStream> destination_;
+  std::shared_ptr<ipc::RecordBatchWriter> batch_writer_;
+
+  friend class IpcFileFormat;
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_orc.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_orc.h
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/io/type_fwd.h"
+#include "arrow/result.h"
+
+namespace arrow {
+namespace dataset {
+
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
+constexpr char kOrcTypeName[] = "orc";
+
+/// \brief A FileFormat implementation that reads from and writes to ORC files
+class ARROW_DS_EXPORT OrcFileFormat : public FileFormat {
+ public:
+  std::string type_name() const override { return kOrcTypeName; }
+
+  bool Equals(const FileFormat& other) const override {
+    return type_name() == other.type_name();
+  }
+
+  Result<bool> IsSupported(const FileSource& source) const override;
+
+  /// \brief Return the schema of the file if possible.
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_parquet.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/file_parquet.h
@@ -0,0 +1,373 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/dataset/discovery.h"
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/io/caching.h"
+#include "arrow/util/optional.h"
+
+namespace parquet {
+class ParquetFileReader;
+class Statistics;
+class ColumnChunkMetaData;
+class RowGroupMetaData;
+class FileMetaData;
+class FileDecryptionProperties;
+class FileEncryptionProperties;
+
+class ReaderProperties;
+class ArrowReaderProperties;
+
+class WriterProperties;
+class ArrowWriterProperties;
+
+namespace arrow {
+class FileReader;
+class FileWriter;
+struct SchemaManifest;
+}  // namespace arrow
+}  // namespace parquet
+
+namespace arrow {
+namespace dataset {
+
+/// \addtogroup dataset-file-formats
+///
+/// @{
+
+constexpr char kParquetTypeName[] = "parquet";
+
+/// \brief A FileFormat implementation that reads from Parquet files
+class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
+ public:
+  ParquetFileFormat() = default;
+
+  /// Convenience constructor which copies properties from a parquet::ReaderProperties.
+  /// memory_pool will be ignored.
+  explicit ParquetFileFormat(const parquet::ReaderProperties& reader_properties);
+
+  std::string type_name() const override { return kParquetTypeName; }
+
+  bool Equals(const FileFormat& other) const override;
+
+  struct ReaderOptions {
+    /// \defgroup parquet-file-format-arrow-reader-properties properties which correspond
+    /// to members of parquet::ArrowReaderProperties.
+    ///
+    /// We don't embed parquet::ReaderProperties directly because column names (rather
+    /// than indices) are used to indicate dictionary columns, and other options are
+    /// deferred to scan time.
+    ///
+    /// @{
+    std::unordered_set<std::string> dict_columns;
+    arrow::TimeUnit::type coerce_int96_timestamp_unit = arrow::TimeUnit::NANO;
+    /// @}
+  } reader_options;
+
+  Result<bool> IsSupported(const FileSource& source) const override;
+
+  /// \brief Return the schema of the file if possible.
+  Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
+
+  Result<RecordBatchGenerator> ScanBatchesAsync(
+      const std::shared_ptr<ScanOptions>& options,
+      const std::shared_ptr<FileFragment>& file) const override;
+
+  Future<util::optional<int64_t>> CountRows(
+      const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
+      const std::shared_ptr<ScanOptions>& options) override;
+
+  using FileFormat::MakeFragment;
+
+  /// \brief Create a Fragment targeting all RowGroups.
+  Result<std::shared_ptr<FileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression,
+      std::shared_ptr<Schema> physical_schema) override;
+
+  /// \brief Create a Fragment, restricted to the specified row groups.
+  Result<std::shared_ptr<ParquetFileFragment>> MakeFragment(
+      FileSource source, compute::Expression partition_expression,
+      std::shared_ptr<Schema> physical_schema, std::vector<int> row_groups);
+
+  /// \brief Return a FileReader on the given source.
+  Result<std::shared_ptr<parquet::arrow::FileReader>> GetReader(
+      const FileSource& source, const std::shared_ptr<ScanOptions>& options) const;
+
+  Future<std::shared_ptr<parquet::arrow::FileReader>> GetReaderAsync(
+      const FileSource& source, const std::shared_ptr<ScanOptions>& options) const;
+
+  Result<std::shared_ptr<FileWriter>> MakeWriter(
+      std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
+      std::shared_ptr<FileWriteOptions> options,
+      fs::FileLocator destination_locator) const override;
+
+  std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+};
+
+/// \brief A FileFragment with parquet logic.
+///
+/// ParquetFileFragment provides a lazy (with respect to IO) interface to
+/// scan parquet files. Any heavy IO calls are deferred to the Scan() method.
+///
+/// The caller can provide an optional list of selected RowGroups to limit the
+/// number of scanned RowGroups, or to partition the scans across multiple
+/// threads.
+///
+/// Metadata can be explicitly provided, enabling pushdown predicate benefits without
+/// the potentially heavy IO of loading Metadata from the file system. This can induce
+/// significant performance boost when scanning high latency file systems.
+class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
+ public:
+  Result<FragmentVector> SplitByRowGroup(compute::Expression predicate);
+
+  /// \brief Return the RowGroups selected by this fragment.
+  const std::vector<int>& row_groups() const {
+    if (row_groups_) return *row_groups_;
+    static std::vector<int> empty;
+    return empty;
+  }
+
+  /// \brief Return the FileMetaData associated with this fragment.
+  const std::shared_ptr<parquet::FileMetaData>& metadata() const { return metadata_; }
+
+  /// \brief Ensure this fragment's FileMetaData is in memory.
+  Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);
+
+  /// \brief Return fragment which selects a filtered subset of this fragment's RowGroups.
+  Result<std::shared_ptr<Fragment>> Subset(compute::Expression predicate);
+  Result<std::shared_ptr<Fragment>> Subset(std::vector<int> row_group_ids);
+
+ private:
+  ParquetFileFragment(FileSource source, std::shared_ptr<FileFormat> format,
+                      compute::Expression partition_expression,
+                      std::shared_ptr<Schema> physical_schema,
+                      util::optional<std::vector<int>> row_groups);
+
+  Status SetMetadata(std::shared_ptr<parquet::FileMetaData> metadata,
+                     std::shared_ptr<parquet::arrow::SchemaManifest> manifest);
+
+  // Overridden to opportunistically set metadata since a reader must be opened anyway.
+  Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
+    ARROW_RETURN_NOT_OK(EnsureCompleteMetadata());
+    return physical_schema_;
+  }
+
+  /// Return a filtered subset of row group indices.
+  Result<std::vector<int>> FilterRowGroups(compute::Expression predicate);
+  /// Simplify the predicate against the statistics of each row group.
+  Result<std::vector<compute::Expression>> TestRowGroups(compute::Expression predicate);
+  /// Try to count rows matching the predicate using metadata. Expects
+  /// metadata to be present, and expects the predicate to have been
+  /// simplified against the partition expression already.
+  Result<util::optional<int64_t>> TryCountRows(compute::Expression predicate);
+
+  ParquetFileFormat& parquet_format_;
+
+  /// Indices of row groups selected by this fragment,
+  /// or util::nullopt if all row groups are selected.
+  util::optional<std::vector<int>> row_groups_;
+
+  std::vector<compute::Expression> statistics_expressions_;
+  std::vector<bool> statistics_expressions_complete_;
+  std::shared_ptr<parquet::FileMetaData> metadata_;
+  std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
+
+  friend class ParquetFileFormat;
+  friend class ParquetDatasetFactory;
+};
+
+/// \brief Per-scan options for Parquet fragments
+class ARROW_DS_EXPORT ParquetFragmentScanOptions : public FragmentScanOptions {
+ public:
+  ParquetFragmentScanOptions();
+  std::string type_name() const override { return kParquetTypeName; }
+
+  /// Reader properties. Not all properties are respected: memory_pool comes from
+  /// ScanOptions.
+  std::shared_ptr<parquet::ReaderProperties> reader_properties;
+  /// Arrow reader properties. Not all properties are respected: batch_size comes from
+  /// ScanOptions. Additionally, dictionary columns come from
+  /// ParquetFileFormat::ReaderOptions::dict_columns.
+  std::shared_ptr<parquet::ArrowReaderProperties> arrow_reader_properties;
+};
+
+class ARROW_DS_EXPORT ParquetFileWriteOptions : public FileWriteOptions {
+ public:
+  /// \brief Parquet writer properties.
+  std::shared_ptr<parquet::WriterProperties> writer_properties;
+
+  /// \brief Parquet Arrow writer properties.
+  std::shared_ptr<parquet::ArrowWriterProperties> arrow_writer_properties;
+
+ protected:
+  using FileWriteOptions::FileWriteOptions;
+
+  friend class ParquetFileFormat;
+};
+
+class ARROW_DS_EXPORT ParquetFileWriter : public FileWriter {
+ public:
+  const std::shared_ptr<parquet::arrow::FileWriter>& parquet_writer() const {
+    return parquet_writer_;
+  }
+
+  Status Write(const std::shared_ptr<RecordBatch>& batch) override;
+
+ private:
+  ParquetFileWriter(std::shared_ptr<io::OutputStream> destination,
+                    std::shared_ptr<parquet::arrow::FileWriter> writer,
+                    std::shared_ptr<ParquetFileWriteOptions> options,
+                    fs::FileLocator destination_locator);
+
+  Future<> FinishInternal() override;
+
+  std::shared_ptr<parquet::arrow::FileWriter> parquet_writer_;
+
+  friend class ParquetFileFormat;
+};
+
+/// \brief Options for making a FileSystemDataset from a Parquet _metadata file.
+struct ParquetFactoryOptions {
+  /// Either an explicit Partitioning or a PartitioningFactory to discover one.
+  ///
+  /// If a factory is provided, it will be used to infer a schema for partition fields
+  /// based on file and directory paths then construct a Partitioning. The default
+  /// is a Partitioning which will yield no partition information.
+  ///
+  /// The (explicit or discovered) partitioning will be applied to discovered files
+  /// and the resulting partition information embedded in the Dataset.
+  PartitioningOrFactory partitioning{Partitioning::Default()};
+
+  /// For the purposes of applying the partitioning, paths will be stripped
+  /// of the partition_base_dir. Files not matching the partition_base_dir
+  /// prefix will be skipped for partition discovery. The ignored files will still
+  /// be part of the Dataset, but will not have partition information.
+  ///
+  /// Example:
+  /// partition_base_dir = "/dataset";
+  ///
+  /// - "/dataset/US/sales.csv" -> "US/sales.csv" will be given to the partitioning
+  ///
+  /// - "/home/john/late_sales.csv" -> Will be ignored for partition discovery.
+  ///
+  /// This is useful for partitioning which parses directory when ordering
+  /// is important, e.g. DirectoryPartitioning.
+  std::string partition_base_dir;
+
+  /// Assert that all ColumnChunk paths are consistent. The parquet spec allows for
+  /// ColumnChunk data to be stored in multiple files, but ParquetDatasetFactory
+  /// supports only a single file with all ColumnChunk data. If this flag is set
+  /// construction of a ParquetDatasetFactory will raise an error if ColumnChunk
+  /// data is not resident in a single file.
+  bool validate_column_chunk_paths = false;
+};
+
+/// \brief Create FileSystemDataset from custom `_metadata` cache file.
+///
+/// Dask and other systems will generate a cache metadata file by concatenating
+/// the RowGroupMetaData of multiple parquet files into a single parquet file
+/// that only contains metadata and no ColumnChunk data.
+///
+/// ParquetDatasetFactory creates a FileSystemDataset composed of
+/// ParquetFileFragment where each fragment is pre-populated with the exact
+/// number of row groups and statistics for each columns.
+class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory {
+ public:
+  /// \brief Create a ParquetDatasetFactory from a metadata path.
+  ///
+  /// The `metadata_path` will be read from `filesystem`. Each RowGroup
+  /// contained in the metadata file will be relative to `dirname(metadata_path)`.
+  ///
+  /// \param[in] metadata_path path of the metadata parquet file
+  /// \param[in] filesystem from which to open/read the path
+  /// \param[in] format to read the file with.
+  /// \param[in] options see ParquetFactoryOptions
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      const std::string& metadata_path, std::shared_ptr<fs::FileSystem> filesystem,
+      std::shared_ptr<ParquetFileFormat> format, ParquetFactoryOptions options);
+
+  /// \brief Create a ParquetDatasetFactory from a metadata source.
+  ///
+  /// Similar to the previous Make definition, but the metadata can be a Buffer
+  /// and the base_path is explicited instead of inferred from the metadata
+  /// path.
+  ///
+  /// \param[in] metadata source to open the metadata parquet file from
+  /// \param[in] base_path used as the prefix of every parquet files referenced
+  /// \param[in] filesystem from which to read the files referenced.
+  /// \param[in] format to read the file with.
+  /// \param[in] options see ParquetFactoryOptions
+  static Result<std::shared_ptr<DatasetFactory>> Make(
+      const FileSource& metadata, const std::string& base_path,
+      std::shared_ptr<fs::FileSystem> filesystem,
+      std::shared_ptr<ParquetFileFormat> format, ParquetFactoryOptions options);
+
+  Result<std::vector<std::shared_ptr<Schema>>> InspectSchemas(
+      InspectOptions options) override;
+
+  Result<std::shared_ptr<Dataset>> Finish(FinishOptions options) override;
+
+ protected:
+  ParquetDatasetFactory(
+      std::shared_ptr<fs::FileSystem> filesystem,
+      std::shared_ptr<ParquetFileFormat> format,
+      std::shared_ptr<parquet::FileMetaData> metadata,
+      std::shared_ptr<parquet::arrow::SchemaManifest> manifest,
+      std::shared_ptr<Schema> physical_schema, std::string base_path,
+      ParquetFactoryOptions options,
+      std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids)
+      : filesystem_(std::move(filesystem)),
+        format_(std::move(format)),
+        metadata_(std::move(metadata)),
+        manifest_(std::move(manifest)),
+        physical_schema_(std::move(physical_schema)),
+        base_path_(std::move(base_path)),
+        options_(std::move(options)),
+        paths_with_row_group_ids_(std::move(paths_with_row_group_ids)) {}
+
+  std::shared_ptr<fs::FileSystem> filesystem_;
+  std::shared_ptr<ParquetFileFormat> format_;
+  std::shared_ptr<parquet::FileMetaData> metadata_;
+  std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
+  std::shared_ptr<Schema> physical_schema_;
+  std::string base_path_;
+  ParquetFactoryOptions options_;
+  std::vector<std::pair<std::string, std::vector<int>>> paths_with_row_group_ids_;
+
+ private:
+  Result<std::vector<std::shared_ptr<FileFragment>>> CollectParquetFragments(
+      const Partitioning& partitioning);
+
+  Result<std::shared_ptr<Schema>> PartitionSchema();
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/partition.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/partition.h
@@ -0,0 +1,409 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <functional>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/util/optional.h"
+
+namespace arrow {
+
+namespace dataset {
+
+constexpr char kFilenamePartitionSep = '_';
+
+// ----------------------------------------------------------------------
+// Partitioning
+
+/// \defgroup dataset-partitioning Partitioning API
+///
+/// @{
+
+/// \brief Interface for parsing partition expressions from string partition
+/// identifiers.
+///
+/// For example, the identifier "foo=5" might be parsed to an equality expression
+/// between the "foo" field and the value 5.
+///
+/// Some partitionings may store the field names in a metadata
+/// store instead of in file paths, for example
+/// dataset_root/2009/11/... could be used when the partition fields
+/// are "year" and "month"
+///
+/// Paths are consumed from left to right. Paths must be relative to
+/// the root of a partition; path prefixes must be removed before passing
+/// the path to a partitioning for parsing.
+class ARROW_DS_EXPORT Partitioning {
+ public:
+  virtual ~Partitioning() = default;
+
+  /// \brief The name identifying the kind of partitioning
+  virtual std::string type_name() const = 0;
+
+  /// \brief If the input batch shares any fields with this partitioning,
+  /// produce sub-batches which satisfy mutually exclusive Expressions.
+  struct PartitionedBatches {
+    RecordBatchVector batches;
+    std::vector<compute::Expression> expressions;
+  };
+  virtual Result<PartitionedBatches> Partition(
+      const std::shared_ptr<RecordBatch>& batch) const = 0;
+
+  /// \brief Parse a path into a partition expression
+  virtual Result<compute::Expression> Parse(const std::string& path) const = 0;
+
+  struct PartitionPathFormat {
+    std::string directory, prefix;
+  };
+
+  virtual Result<PartitionPathFormat> Format(const compute::Expression& expr) const = 0;
+
+  /// \brief A default Partitioning which always yields scalar(true)
+  static std::shared_ptr<Partitioning> Default();
+
+  /// \brief The partition schema.
+  const std::shared_ptr<Schema>& schema() { return schema_; }
+
+ protected:
+  explicit Partitioning(std::shared_ptr<Schema> schema) : schema_(std::move(schema)) {}
+
+  std::shared_ptr<Schema> schema_;
+};
+
+/// \brief The encoding of partition segments.
+enum class SegmentEncoding : int8_t {
+  /// No encoding.
+  None = 0,
+  /// Segment values are URL-encoded.
+  Uri = 1,
+};
+
+ARROW_DS_EXPORT
+std::ostream& operator<<(std::ostream& os, SegmentEncoding segment_encoding);
+
+/// \brief Options for key-value based partitioning (hive/directory).
+struct ARROW_DS_EXPORT KeyValuePartitioningOptions {
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+};
+
+/// \brief Options for inferring a partitioning.
+struct ARROW_DS_EXPORT PartitioningFactoryOptions {
+  /// When inferring a schema for partition fields, yield dictionary encoded types
+  /// instead of plain. This can be more efficient when materializing virtual
+  /// columns, and Expressions parsed by the finished Partitioning will include
+  /// dictionaries of all unique inspected values for each field.
+  bool infer_dictionary = false;
+  /// Optionally, an expected schema can be provided, in which case inference
+  /// will only check discovered fields against the schema and update internal
+  /// state (such as dictionaries).
+  std::shared_ptr<Schema> schema;
+  /// After splitting a path into components, decode the path components
+  /// before parsing according to this scheme.
+  SegmentEncoding segment_encoding = SegmentEncoding::Uri;
+
+  KeyValuePartitioningOptions AsPartitioningOptions() const;
+};
+
+/// \brief Options for inferring a hive-style partitioning.
+struct ARROW_DS_EXPORT HivePartitioningFactoryOptions : PartitioningFactoryOptions {
+  /// The hive partitioning scheme maps null to a hard coded fallback string.
+  std::string null_fallback;
+
+  HivePartitioningOptions AsHivePartitioningOptions() const;
+};
+
+/// \brief PartitioningFactory provides creation of a partitioning  when the
+/// specific schema must be inferred from available paths (no explicit schema is known).
+class ARROW_DS_EXPORT PartitioningFactory {
+ public:
+  virtual ~PartitioningFactory() = default;
+
+  /// \brief The name identifying the kind of partitioning
+  virtual std::string type_name() const = 0;
+
+  /// Get the schema for the resulting Partitioning.
+  /// This may reset internal state, for example dictionaries of unique representations.
+  virtual Result<std::shared_ptr<Schema>> Inspect(
+      const std::vector<std::string>& paths) = 0;
+
+  /// Create a partitioning using the provided schema
+  /// (fields may be dropped).
+  virtual Result<std::shared_ptr<Partitioning>> Finish(
+      const std::shared_ptr<Schema>& schema) const = 0;
+};
+
+/// \brief Subclass for the common case of a partitioning which yields an equality
+/// expression for each segment
+class ARROW_DS_EXPORT KeyValuePartitioning : public Partitioning {
+ public:
+  /// An unconverted equality expression consisting of a field name and the representation
+  /// of a scalar value
+  struct Key {
+    std::string name;
+    util::optional<std::string> value;
+  };
+
+  Result<PartitionedBatches> Partition(
+      const std::shared_ptr<RecordBatch>& batch) const override;
+
+  Result<compute::Expression> Parse(const std::string& path) const override;
+
+  Result<PartitionPathFormat> Format(const compute::Expression& expr) const override;
+
+  const ArrayVector& dictionaries() const { return dictionaries_; }
+
+ protected:
+  KeyValuePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                       KeyValuePartitioningOptions options)
+      : Partitioning(std::move(schema)),
+        dictionaries_(std::move(dictionaries)),
+        options_(options) {
+    if (dictionaries_.empty()) {
+      dictionaries_.resize(schema_->num_fields());
+    }
+  }
+
+  virtual Result<std::vector<Key>> ParseKeys(const std::string& path) const = 0;
+
+  virtual Result<PartitionPathFormat> FormatValues(const ScalarVector& values) const = 0;
+
+  /// Convert a Key to a full expression.
+  Result<compute::Expression> ConvertKey(const Key& key) const;
+
+  Result<std::vector<std::string>> FormatPartitionSegments(
+      const ScalarVector& values) const;
+  Result<std::vector<Key>> ParsePartitionSegments(
+      const std::vector<std::string>& segments) const;
+
+  ArrayVector dictionaries_;
+  KeyValuePartitioningOptions options_;
+};
+
+/// \brief DirectoryPartitioning parses one segment of a path for each field in its
+/// schema. All fields are required, so paths passed to DirectoryPartitioning::Parse
+/// must contain segments for each field.
+///
+/// For example given schema<year:int16, month:int8> the path "/2009/11" would be
+/// parsed to ("year"_ == 2009 and "month"_ == 11)
+class ARROW_DS_EXPORT DirectoryPartitioning : public KeyValuePartitioning {
+ public:
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
+  explicit DirectoryPartitioning(std::shared_ptr<Schema> schema,
+                                 ArrayVector dictionaries = {},
+                                 KeyValuePartitioningOptions options = {});
+
+  std::string type_name() const override { return "directory"; }
+
+  /// \brief Create a factory for a directory partitioning.
+  ///
+  /// \param[in] field_names The names for the partition fields. Types will be
+  ///     inferred.
+  static std::shared_ptr<PartitioningFactory> MakeFactory(
+      std::vector<std::string> field_names, PartitioningFactoryOptions = {});
+
+ private:
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
+
+  Result<PartitionPathFormat> FormatValues(const ScalarVector& values) const override;
+};
+
+/// \brief The default fallback used for null values in a Hive-style partitioning.
+static constexpr char kDefaultHiveNullFallback[] = "__HIVE_DEFAULT_PARTITION__";
+
+struct ARROW_DS_EXPORT HivePartitioningOptions : public KeyValuePartitioningOptions {
+  std::string null_fallback = kDefaultHiveNullFallback;
+
+  static HivePartitioningOptions DefaultsWithNullFallback(std::string fallback) {
+    HivePartitioningOptions options;
+    options.null_fallback = std::move(fallback);
+    return options;
+  }
+};
+
+/// \brief Multi-level, directory based partitioning
+/// originating from Apache Hive with all data files stored in the
+/// leaf directories. Data is partitioned by static values of a
+/// particular column in the schema. Partition keys are represented in
+/// the form $key=$value in directory names.
+/// Field order is ignored, as are missing or unrecognized field names.
+///
+/// For example given schema<year:int16, month:int8, day:int8> the path
+/// "/day=321/ignored=3.4/year=2009" parses to ("year"_ == 2009 and "day"_ == 321)
+class ARROW_DS_EXPORT HivePartitioning : public KeyValuePartitioning {
+ public:
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
+  explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries = {},
+                            std::string null_fallback = kDefaultHiveNullFallback)
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries),
+                             KeyValuePartitioningOptions()),
+        hive_options_(
+            HivePartitioningOptions::DefaultsWithNullFallback(std::move(null_fallback))) {
+  }
+
+  explicit HivePartitioning(std::shared_ptr<Schema> schema, ArrayVector dictionaries,
+                            HivePartitioningOptions options)
+      : KeyValuePartitioning(std::move(schema), std::move(dictionaries), options),
+        hive_options_(options) {}
+
+  std::string type_name() const override { return "hive"; }
+  std::string null_fallback() const { return hive_options_.null_fallback; }
+  const HivePartitioningOptions& options() const { return hive_options_; }
+
+  static Result<util::optional<Key>> ParseKey(const std::string& segment,
+                                              const HivePartitioningOptions& options);
+
+  /// \brief Create a factory for a hive partitioning.
+  static std::shared_ptr<PartitioningFactory> MakeFactory(
+      HivePartitioningFactoryOptions = {});
+
+ private:
+  const HivePartitioningOptions hive_options_;
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
+
+  Result<PartitionPathFormat> FormatValues(const ScalarVector& values) const override;
+};
+
+/// \brief Implementation provided by lambda or other callable
+class ARROW_DS_EXPORT FunctionPartitioning : public Partitioning {
+ public:
+  using ParseImpl = std::function<Result<compute::Expression>(const std::string&)>;
+
+  using FormatImpl =
+      std::function<Result<PartitionPathFormat>(const compute::Expression&)>;
+
+  FunctionPartitioning(std::shared_ptr<Schema> schema, ParseImpl parse_impl,
+                       FormatImpl format_impl = NULLPTR, std::string name = "function")
+      : Partitioning(std::move(schema)),
+        parse_impl_(std::move(parse_impl)),
+        format_impl_(std::move(format_impl)),
+        name_(std::move(name)) {}
+
+  std::string type_name() const override { return name_; }
+
+  Result<compute::Expression> Parse(const std::string& path) const override {
+    return parse_impl_(path);
+  }
+
+  Result<PartitionPathFormat> Format(const compute::Expression& expr) const override {
+    if (format_impl_) {
+      return format_impl_(expr);
+    }
+    return Status::NotImplemented("formatting paths from ", type_name(), " Partitioning");
+  }
+
+  Result<PartitionedBatches> Partition(
+      const std::shared_ptr<RecordBatch>& batch) const override {
+    return Status::NotImplemented("partitioning batches from ", type_name(),
+                                  " Partitioning");
+  }
+
+ private:
+  ParseImpl parse_impl_;
+  FormatImpl format_impl_;
+  std::string name_;
+};
+
+class ARROW_DS_EXPORT FilenamePartitioning : public KeyValuePartitioning {
+ public:
+  /// \brief Construct a FilenamePartitioning from its components.
+  ///
+  /// If a field in schema is of dictionary type, the corresponding element of
+  /// dictionaries must be contain the dictionary of values for that field.
+  explicit FilenamePartitioning(std::shared_ptr<Schema> schema,
+                                ArrayVector dictionaries = {},
+                                KeyValuePartitioningOptions options = {});
+
+  std::string type_name() const override { return "filename"; }
+
+  /// \brief Create a factory for a filename partitioning.
+  ///
+  /// \param[in] field_names The names for the partition fields. Types will be
+  ///     inferred.
+  static std::shared_ptr<PartitioningFactory> MakeFactory(
+      std::vector<std::string> field_names, PartitioningFactoryOptions = {});
+
+ private:
+  Result<std::vector<Key>> ParseKeys(const std::string& path) const override;
+
+  Result<PartitionPathFormat> FormatValues(const ScalarVector& values) const override;
+};
+
+/// \brief Remove a prefix and the filename of a path.
+///
+/// e.g., `StripPrefixAndFilename("/data/year=2019/c.txt", "/data") -> "year=2019"`
+ARROW_DS_EXPORT std::string StripPrefixAndFilename(const std::string& path,
+                                                   const std::string& prefix);
+
+/// \brief Vector version of StripPrefixAndFilename.
+ARROW_DS_EXPORT std::vector<std::string> StripPrefixAndFilename(
+    const std::vector<std::string>& paths, const std::string& prefix);
+
+/// \brief Vector version of StripPrefixAndFilename.
+ARROW_DS_EXPORT std::vector<std::string> StripPrefixAndFilename(
+    const std::vector<fs::FileInfo>& files, const std::string& prefix);
+
+/// \brief Either a Partitioning or a PartitioningFactory
+class ARROW_DS_EXPORT PartitioningOrFactory {
+ public:
+  explicit PartitioningOrFactory(std::shared_ptr<Partitioning> partitioning)
+      : partitioning_(std::move(partitioning)) {}
+
+  explicit PartitioningOrFactory(std::shared_ptr<PartitioningFactory> factory)
+      : factory_(std::move(factory)) {}
+
+  PartitioningOrFactory& operator=(std::shared_ptr<Partitioning> partitioning) {
+    return *this = PartitioningOrFactory(std::move(partitioning));
+  }
+
+  PartitioningOrFactory& operator=(std::shared_ptr<PartitioningFactory> factory) {
+    return *this = PartitioningOrFactory(std::move(factory));
+  }
+
+  /// \brief The partitioning (if given).
+  const std::shared_ptr<Partitioning>& partitioning() const { return partitioning_; }
+
+  /// \brief The partition factory (if given).
+  const std::shared_ptr<PartitioningFactory>& factory() const { return factory_; }
+
+  /// \brief Get the partition schema, inferring it with the given factory if needed.
+  Result<std::shared_ptr<Schema>> GetOrInferSchema(const std::vector<std::string>& paths);
+
+ private:
+  std::shared_ptr<PartitioningFactory> factory_;
+  std::shared_ptr<Partitioning> partitioning_;
+};
+
+/// @}
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/pch.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/pch.h
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Often-used headers, for precompiling.
+// If updating this header, please make sure you check compilation speed
+// before checking in.  Adding headers which are not used extremely often
+// may incur a slowdown, since it makes the precompiled header heavier to load.
+
+// This API is EXPERIMENTAL.
+
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/scanner.h"
+#include "arrow/pch.h"
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/plan.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/plan.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#include "arrow/dataset/visibility.h"
+
+namespace arrow {
+namespace dataset {
+namespace internal {
+
+/// Register dataset-based exec nodes with the exec node registry
+///
+/// This function must be called before using dataset ExecNode factories
+ARROW_DS_EXPORT void Initialize();
+
+}  // namespace internal
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/projector.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/projector.h
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include "arrow/dataset/visibility.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+namespace dataset {
+
+// FIXME this is superceded by compute::Expression::Bind
+ARROW_DS_EXPORT Status CheckProjectable(const Schema& from, const Schema& to);
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/scanner.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/scanner.h
@@ -0,0 +1,432 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec/options.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/projector.h"
+#include "arrow/dataset/type_fwd.h"
+#include "arrow/dataset/visibility.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/type_fwd.h"
+
+namespace arrow {
+
+using RecordBatchGenerator = std::function<Future<std::shared_ptr<RecordBatch>>()>;
+
+namespace dataset {
+
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
+
+constexpr int64_t kDefaultBatchSize = 1 << 17;  // 128Ki rows
+// This will yield 64 batches ~ 8Mi rows
+constexpr int32_t kDefaultBatchReadahead = 16;
+constexpr int32_t kDefaultFragmentReadahead = 4;
+
+/// Scan-specific options, which can be changed between scans of the same dataset.
+struct ARROW_DS_EXPORT ScanOptions {
+  /// A row filter (which will be pushed down to partitioning/reading if supported).
+  compute::Expression filter = compute::literal(true);
+  /// A projection expression (which can add/remove/rename columns).
+  compute::Expression projection;
+
+  /// Schema with which batches will be read from fragments. This is also known as the
+  /// "reader schema" it will be used (for example) in constructing CSV file readers to
+  /// identify column types for parsing. Usually only a subset of its fields (see
+  /// MaterializedFields) will be materialized during a scan.
+  std::shared_ptr<Schema> dataset_schema;
+
+  /// Schema of projected record batches. This is independent of dataset_schema as its
+  /// fields are derived from the projection. For example, let
+  ///
+  ///   dataset_schema = {"a": int32, "b": int32, "id": utf8}
+  ///   projection = project({equal(field_ref("a"), field_ref("b"))}, {"a_plus_b"})
+  ///
+  /// (no filter specified). In this case, the projected_schema would be
+  ///
+  ///   {"a_plus_b": int32}
+  std::shared_ptr<Schema> projected_schema;
+
+  /// Maximum row count for scanned batches.
+  int64_t batch_size = kDefaultBatchSize;
+
+  /// How many batches to read ahead within a file
+  ///
+  /// Set to 0 to disable batch readahead
+  ///
+  /// Note: May not be supported by all formats
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t batch_readahead = kDefaultBatchReadahead;
+
+  /// How many files to read ahead
+  ///
+  /// Set to 0 to disable fragment readahead
+  ///
+  /// Note: May not be enforced by all scanners
+  /// Note: Will be ignored if use_threads is set to false
+  int32_t fragment_readahead = kDefaultFragmentReadahead;
+
+  /// A pool from which materialized and scanned arrays will be allocated.
+  MemoryPool* pool = arrow::default_memory_pool();
+
+  /// IOContext for any IO tasks
+  ///
+  /// Note: The IOContext executor will be ignored if use_threads is set to false
+  io::IOContext io_context;
+
+  /// If true the scanner will scan in parallel
+  ///
+  /// Note: If true, this will use threads from both the cpu_executor and the
+  /// io_context.executor
+  /// Note: This  must be true in order for any readahead to happen
+  bool use_threads = false;
+
+  /// Fragment-specific scan options.
+  std::shared_ptr<FragmentScanOptions> fragment_scan_options;
+
+  /// Return a vector of FieldRefs that require materialization.
+  ///
+  /// This is usually the union of the fields referenced in the projection and the
+  /// filter expression. Examples:
+  ///
+  /// - `SELECT a, b WHERE a < 2 && c > 1` => ["a", "b", "a", "c"]
+  /// - `SELECT a + b < 3 WHERE a > 1` => ["a", "b"]
+  ///
+  /// This is needed for expression where a field may not be directly
+  /// used in the final projection but is still required to evaluate the
+  /// expression.
+  ///
+  /// This is used by Fragment implementations to apply the column
+  /// sub-selection optimization.
+  std::vector<FieldRef> MaterializedFields() const;
+
+  /// Parameters which control when the plan should pause for a slow consumer
+  compute::BackpressureOptions backpressure =
+      compute::BackpressureOptions::DefaultBackpressure();
+};
+
+/// \brief Describes a projection
+struct ARROW_DS_EXPORT ProjectionDescr {
+  /// \brief The projection expression itself
+  /// This expression must be a call to make_struct
+  compute::Expression expression;
+  /// \brief The output schema of the projection.
+
+  /// This can be calculated from the input schema and the expression but it
+  /// is cached here for convenience.
+  std::shared_ptr<Schema> schema;
+
+  /// \brief Create a ProjectionDescr by binding an expression to the dataset schema
+  ///
+  /// expression must return a struct type
+  static Result<ProjectionDescr> FromStructExpression(
+      const compute::Expression& expression, const Schema& dataset_schema);
+
+  /// \brief Create a ProjectionDescr from expressions/names for each field
+  static Result<ProjectionDescr> FromExpressions(std::vector<compute::Expression> exprs,
+                                                 std::vector<std::string> names,
+                                                 const Schema& dataset_schema);
+
+  /// \brief Create a default projection referencing fields in the dataset schema
+  static Result<ProjectionDescr> FromNames(std::vector<std::string> names,
+                                           const Schema& dataset_schema);
+
+  /// \brief Make a projection that projects every field in the dataset schema
+  static Result<ProjectionDescr> Default(const Schema& dataset_schema);
+};
+
+/// \brief Utility method to set the projection expression and schema
+ARROW_DS_EXPORT void SetProjection(ScanOptions* options, ProjectionDescr projection);
+
+/// \brief Combines a record batch with the fragment that the record batch originated
+/// from
+///
+/// Knowing the source fragment can be useful for debugging & understanding loaded
+/// data
+struct TaggedRecordBatch {
+  std::shared_ptr<RecordBatch> record_batch;
+  std::shared_ptr<Fragment> fragment;
+};
+using TaggedRecordBatchGenerator = std::function<Future<TaggedRecordBatch>()>;
+using TaggedRecordBatchIterator = Iterator<TaggedRecordBatch>;
+
+/// \brief Combines a tagged batch with positional information
+///
+/// This is returned when scanning batches in an unordered fashion.  This information is
+/// needed if you ever want to reassemble the batches in order
+struct EnumeratedRecordBatch {
+  Enumerated<std::shared_ptr<RecordBatch>> record_batch;
+  Enumerated<std::shared_ptr<Fragment>> fragment;
+};
+using EnumeratedRecordBatchGenerator = std::function<Future<EnumeratedRecordBatch>()>;
+using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
+
+/// @}
+
+}  // namespace dataset
+
+template <>
+struct IterationTraits<dataset::TaggedRecordBatch> {
+  static dataset::TaggedRecordBatch End() {
+    return dataset::TaggedRecordBatch{NULLPTR, NULLPTR};
+  }
+  static bool IsEnd(const dataset::TaggedRecordBatch& val) {
+    return val.record_batch == NULLPTR;
+  }
+};
+
+template <>
+struct IterationTraits<dataset::EnumeratedRecordBatch> {
+  static dataset::EnumeratedRecordBatch End() {
+    return dataset::EnumeratedRecordBatch{
+        IterationEnd<Enumerated<std::shared_ptr<RecordBatch>>>(),
+        IterationEnd<Enumerated<std::shared_ptr<dataset::Fragment>>>()};
+  }
+  static bool IsEnd(const dataset::EnumeratedRecordBatch& val) {
+    return IsIterationEnd(val.fragment);
+  }
+};
+
+namespace dataset {
+
+/// \defgroup dataset-scanning Scanning API
+///
+/// @{
+
+/// \brief A scanner glues together several dataset classes to load in data.
+/// The dataset contains a collection of fragments and partitioning rules.
+///
+/// The fragments identify independently loadable units of data (i.e. each fragment has
+/// a potentially unique schema and possibly even format.  It should be possible to read
+/// fragments in parallel if desired).
+///
+/// The fragment's format contains the logic necessary to actually create a task to load
+/// the fragment into memory.  That task may or may not support parallel execution of
+/// its own.
+///
+/// The scanner is then responsible for creating scan tasks from every fragment in the
+/// dataset and (potentially) sequencing the loaded record batches together.
+///
+/// The scanner should not buffer the entire dataset in memory (unless asked) instead
+/// yielding record batches as soon as they are ready to scan.  Various readahead
+/// properties control how much data is allowed to be scanned before pausing to let a
+/// slow consumer catchup.
+///
+/// Today the scanner also handles projection & filtering although that may change in
+/// the future.
+class ARROW_DS_EXPORT Scanner {
+ public:
+  virtual ~Scanner() = default;
+
+  /// \brief Apply a visitor to each RecordBatch as it is scanned. If multiple threads
+  /// are used (via use_threads), the visitor will be invoked from those threads and is
+  /// responsible for any synchronization.
+  virtual Status Scan(std::function<Status(TaggedRecordBatch)> visitor) = 0;
+  /// \brief Convert a Scanner into a Table.
+  ///
+  /// Use this convenience utility with care. This will serially materialize the
+  /// Scan result in memory before creating the Table.
+  virtual Result<std::shared_ptr<Table>> ToTable() = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Each batch is tagged
+  /// with the fragment it originated from.  The batches will arrive in order.  The
+  /// order of fragments is determined by the dataset.
+  ///
+  /// Note: The scanner will perform some readahead but will avoid materializing too
+  /// much in memory (this is goverended by the readahead options and use_threads option).
+  /// If the readahead queue fills up then I/O will pause until the calling thread catches
+  /// up.
+  virtual Result<TaggedRecordBatchIterator> ScanBatches() = 0;
+  virtual Result<TaggedRecordBatchGenerator> ScanBatchesAsync() = 0;
+  virtual Result<TaggedRecordBatchGenerator> ScanBatchesAsync(
+      ::arrow::internal::Executor* cpu_thread_pool) = 0;
+  /// \brief Scan the dataset into a stream of record batches.  Unlike ScanBatches this
+  /// method may allow record batches to be returned out of order.  This allows for more
+  /// efficient scanning: some fragments may be accessed more quickly than others (e.g.
+  /// may be cached in RAM or just happen to get scheduled earlier by the I/O)
+  ///
+  /// To make up for the out-of-order iteration each batch is further tagged with
+  /// positional information.
+  virtual Result<EnumeratedRecordBatchIterator> ScanBatchesUnordered() = 0;
+  virtual Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync() = 0;
+  virtual Result<EnumeratedRecordBatchGenerator> ScanBatchesUnorderedAsync(
+      ::arrow::internal::Executor* cpu_thread_pool) = 0;
+  /// \brief A convenience to synchronously load the given rows by index.
+  ///
+  /// Will only consume as many batches as needed from ScanBatches().
+  virtual Result<std::shared_ptr<Table>> TakeRows(const Array& indices) = 0;
+  /// \brief Get the first N rows.
+  virtual Result<std::shared_ptr<Table>> Head(int64_t num_rows) = 0;
+  /// \brief Count rows matching a predicate.
+  ///
+  /// This method will push down the predicate and compute the result based on fragment
+  /// metadata if possible.
+  virtual Result<int64_t> CountRows() = 0;
+  /// \brief Convert the Scanner to a RecordBatchReader so it can be
+  /// easily used with APIs that expect a reader.
+  virtual Result<std::shared_ptr<RecordBatchReader>> ToRecordBatchReader() = 0;
+
+  /// \brief Get the options for this scan.
+  const std::shared_ptr<ScanOptions>& options() const { return scan_options_; }
+  /// \brief Get the dataset that this scanner will scan
+  virtual const std::shared_ptr<Dataset>& dataset() const = 0;
+
+ protected:
+  explicit Scanner(std::shared_ptr<ScanOptions> scan_options)
+      : scan_options_(std::move(scan_options)) {}
+
+  Result<EnumeratedRecordBatchIterator> AddPositioningToInOrderScan(
+      TaggedRecordBatchIterator scan);
+
+  const std::shared_ptr<ScanOptions> scan_options_;
+};
+
+/// \brief ScannerBuilder is a factory class to construct a Scanner. It is used
+/// to pass information, notably a potential filter expression and a subset of
+/// columns to materialize.
+class ARROW_DS_EXPORT ScannerBuilder {
+ public:
+  explicit ScannerBuilder(std::shared_ptr<Dataset> dataset);
+
+  ScannerBuilder(std::shared_ptr<Dataset> dataset,
+                 std::shared_ptr<ScanOptions> scan_options);
+
+  ScannerBuilder(std::shared_ptr<Schema> schema, std::shared_ptr<Fragment> fragment,
+                 std::shared_ptr<ScanOptions> scan_options);
+
+  /// \brief Make a scanner from a record batch reader.
+  ///
+  /// The resulting scanner can be scanned only once. This is intended
+  /// to support writing data from streaming sources or other sources
+  /// that can be iterated only once.
+  static std::shared_ptr<ScannerBuilder> FromRecordBatchReader(
+      std::shared_ptr<RecordBatchReader> reader);
+
+  /// \brief Set the subset of columns to materialize.
+  ///
+  /// Columns which are not referenced may not be read from fragments.
+  ///
+  /// \param[in] columns list of columns to project. Order and duplicates will
+  ///            be preserved.
+  ///
+  /// \return Failure if any column name does not exists in the dataset's
+  ///         Schema.
+  Status Project(std::vector<std::string> columns);
+
+  /// \brief Set expressions which will be evaluated to produce the materialized
+  /// columns.
+  ///
+  /// Columns which are not referenced may not be read from fragments.
+  ///
+  /// \param[in] exprs expressions to evaluate to produce columns.
+  /// \param[in] names list of names for the resulting columns.
+  ///
+  /// \return Failure if any referenced column does not exists in the dataset's
+  ///         Schema.
+  Status Project(std::vector<compute::Expression> exprs, std::vector<std::string> names);
+
+  /// \brief Set the filter expression to return only rows matching the filter.
+  ///
+  /// The predicate will be passed down to Sources and corresponding
+  /// Fragments to exploit predicate pushdown if possible using
+  /// partition information or Fragment internal metadata, e.g. Parquet statistics.
+  /// Columns which are not referenced may not be read from fragments.
+  ///
+  /// \param[in] filter expression to filter rows with.
+  ///
+  /// \return Failure if any referenced columns does not exist in the dataset's
+  ///         Schema.
+  Status Filter(const compute::Expression& filter);
+
+  /// \brief Indicate if the Scanner should make use of the available
+  ///        ThreadPool found in ScanOptions;
+  Status UseThreads(bool use_threads = true);
+
+  /// \brief Limit how many fragments the scanner will read at once
+  Status FragmentReadahead(int fragment_readahead);
+
+  /// \brief Set the maximum number of rows per RecordBatch.
+  ///
+  /// \param[in] batch_size the maximum number of rows.
+  /// \returns An error if the number for batch is not greater than 0.
+  ///
+  /// This option provides a control limiting the memory owned by any RecordBatch.
+  Status BatchSize(int64_t batch_size);
+
+  /// \brief Set the pool from which materialized and scanned arrays will be allocated.
+  Status Pool(MemoryPool* pool);
+
+  /// \brief Set fragment-specific scan options.
+  Status FragmentScanOptions(std::shared_ptr<FragmentScanOptions> fragment_scan_options);
+
+  /// \brief Override default backpressure configuration
+  Status Backpressure(compute::BackpressureOptions backpressure);
+
+  /// \brief Return the constructed now-immutable Scanner object
+  Result<std::shared_ptr<Scanner>> Finish();
+
+  const std::shared_ptr<Schema>& schema() const;
+  const std::shared_ptr<Schema>& projected_schema() const;
+
+ private:
+  std::shared_ptr<Dataset> dataset_;
+  std::shared_ptr<ScanOptions> scan_options_ = std::make_shared<ScanOptions>();
+};
+
+/// \brief Construct a source ExecNode which yields batches from a dataset scan.
+///
+/// Does not construct associated filter or project nodes.
+/// Yielded batches will be augmented with fragment/batch indices to enable stable
+/// ordering for simple ExecPlans.
+class ARROW_DS_EXPORT ScanNodeOptions : public compute::ExecNodeOptions {
+ public:
+  explicit ScanNodeOptions(std::shared_ptr<Dataset> dataset,
+                           std::shared_ptr<ScanOptions> scan_options,
+                           bool require_sequenced_output = false)
+      : dataset(std::move(dataset)),
+        scan_options(std::move(scan_options)),
+        require_sequenced_output(require_sequenced_output) {}
+
+  std::shared_ptr<Dataset> dataset;
+  std::shared_ptr<ScanOptions> scan_options;
+  bool require_sequenced_output;
+};
+
+/// @}
+
+namespace internal {
+ARROW_DS_EXPORT void InitializeScanner(arrow::compute::ExecFactoryRegistry* registry);
+}  // namespace internal
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/test_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/test_util.h
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/type_fwd.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/type_fwd.h
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "arrow/compute/type_fwd.h"  // IWYU pragma: export
+#include "arrow/dataset/visibility.h"
+#include "arrow/filesystem/type_fwd.h"  // IWYU pragma: export
+#include "arrow/type_fwd.h"             // IWYU pragma: export
+
+namespace arrow {
+namespace dataset {
+
+class Dataset;
+class DatasetFactory;
+using DatasetVector = std::vector<std::shared_ptr<Dataset>>;
+
+class UnionDataset;
+class UnionDatasetFactory;
+
+class Fragment;
+using FragmentIterator = Iterator<std::shared_ptr<Fragment>>;
+using FragmentVector = std::vector<std::shared_ptr<Fragment>>;
+
+class FragmentScanOptions;
+
+class FileSource;
+class FileFormat;
+class FileFragment;
+class FileWriter;
+class FileWriteOptions;
+class FileSystemDataset;
+class FileSystemDatasetFactory;
+struct FileSystemDatasetWriteOptions;
+
+/// \brief Controls what happens if files exist in an output directory during a dataset
+/// write
+enum class ExistingDataBehavior : int8_t {
+  /// Deletes all files in a directory the first time that directory is encountered
+  kDeleteMatchingPartitions,
+  /// Ignores existing files, overwriting any that happen to have the same name as an
+  /// output file
+  kOverwriteOrIgnore,
+  /// Returns an error if there are any files or subdirectories in the output directory
+  kError,
+};
+
+class InMemoryDataset;
+
+class CsvFileFormat;
+class CsvFileWriter;
+class CsvFileWriteOptions;
+struct CsvFragmentScanOptions;
+
+class IpcFileFormat;
+class IpcFileWriter;
+class IpcFileWriteOptions;
+class IpcFragmentScanOptions;
+
+class ParquetFileFormat;
+class ParquetFileFragment;
+class ParquetFragmentScanOptions;
+class ParquetFileWriter;
+class ParquetFileWriteOptions;
+
+class Partitioning;
+class PartitioningFactory;
+class PartitioningOrFactory;
+struct KeyValuePartitioningOptions;
+class DirectoryPartitioning;
+class HivePartitioning;
+struct HivePartitioningOptions;
+class FilenamePartitioning;
+struct FilenamePartitioningOptions;
+
+struct ScanOptions;
+
+class Scanner;
+
+class ScannerBuilder;
+
+class ScanTask;
+using ScanTaskVector = std::vector<std::shared_ptr<ScanTask>>;
+using ScanTaskIterator = Iterator<std::shared_ptr<ScanTask>>;
+
+}  // namespace dataset
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/visibility.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/dataset/visibility.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_DS_STATIC
+#define ARROW_DS_EXPORT
+#elif defined(ARROW_DS_EXPORTING)
+#define ARROW_DS_EXPORT __declspec(dllexport)
+#else
+#define ARROW_DS_EXPORT __declspec(dllimport)
+#endif
+
+#define ARROW_DS_NO_EXPORT
+#else  // Not Windows
+#ifndef ARROW_DS_EXPORT
+#define ARROW_DS_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef ARROW_DS_NO_EXPORT
+#define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif  // Non-Windows
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/datum.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/datum.h
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/data.h"
+#include "arrow/scalar.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/variant.h"  // IWYU pragma: export
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class RecordBatch;
+class Table;
+
+/// \brief A descriptor type that gives the shape (array or scalar) and
+/// DataType of a Value, but without the data
+struct ARROW_EXPORT ValueDescr {
+  std::shared_ptr<DataType> type;
+  enum Shape {
+    /// \brief Either Array or Scalar
+    ANY,
+
+    /// \brief Array type
+    ARRAY,
+
+    /// \brief Only Scalar arguments supported
+    SCALAR
+  };
+
+  Shape shape;
+
+  ValueDescr() : shape(ANY) {}
+
+  ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
+      : type(std::move(type)), shape(shape) {}
+
+  ValueDescr(std::shared_ptr<DataType> type)  // NOLINT implicit conversion
+      : type(std::move(type)), shape(ValueDescr::ANY) {}
+
+  /// \brief Convenience constructor for ANY descr
+  static ValueDescr Any(std::shared_ptr<DataType> type) {
+    return ValueDescr(std::move(type), ANY);
+  }
+
+  /// \brief Convenience constructor for Value::ARRAY descr
+  static ValueDescr Array(std::shared_ptr<DataType> type) {
+    return ValueDescr(std::move(type), ARRAY);
+  }
+
+  /// \brief Convenience constructor for Value::SCALAR descr
+  static ValueDescr Scalar(std::shared_ptr<DataType> type) {
+    return ValueDescr(std::move(type), SCALAR);
+  }
+
+  bool operator==(const ValueDescr& other) const {
+    if (shape != other.shape) return false;
+    if (type == other.type) return true;
+    return type && type->Equals(other.type);
+  }
+
+  bool operator!=(const ValueDescr& other) const { return !(*this == other); }
+
+  std::string ToString() const;
+  static std::string ToString(const std::vector<ValueDescr>&);
+
+  ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
+};
+
+/// \brief For use with scalar functions, returns the broadcasted Value::Shape
+/// given a vector of value descriptors. Return SCALAR unless any value is
+/// ARRAY
+ARROW_EXPORT
+ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
+
+/// \class Datum
+/// \brief Variant type for various Arrow C++ data structures
+struct ARROW_EXPORT Datum {
+  enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE };
+
+  struct Empty {};
+
+  // Datums variants may have a length. This special value indicate that the
+  // current variant does not have a length.
+  static constexpr int64_t kUnknownLength = -1;
+
+  util::Variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
+                std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
+                std::shared_ptr<Table>>
+      value;
+
+  /// \brief Empty datum, to be populated elsewhere
+  Datum() = default;
+
+  Datum(const Datum& other) = default;
+  Datum& operator=(const Datum& other) = default;
+  Datum(Datum&& other) = default;
+  Datum& operator=(Datum&& other) = default;
+
+  Datum(std::shared_ptr<Scalar> value)  // NOLINT implicit conversion
+      : value(std::move(value)) {}
+
+  Datum(std::shared_ptr<ArrayData> value)  // NOLINT implicit conversion
+      : value(std::move(value)) {}
+
+  Datum(ArrayData arg)  // NOLINT implicit conversion
+      : value(std::make_shared<ArrayData>(std::move(arg))) {}
+
+  Datum(const Array& value);                   // NOLINT implicit conversion
+  Datum(const std::shared_ptr<Array>& value);  // NOLINT implicit conversion
+  Datum(std::shared_ptr<ChunkedArray> value);  // NOLINT implicit conversion
+  Datum(std::shared_ptr<RecordBatch> value);   // NOLINT implicit conversion
+  Datum(std::shared_ptr<Table> value);         // NOLINT implicit conversion
+
+  // Explicit constructors from const-refs. Can be expensive, prefer the
+  // shared_ptr constructors
+  explicit Datum(const ChunkedArray& value);
+  explicit Datum(const RecordBatch& value);
+  explicit Datum(const Table& value);
+
+  // Cast from subtypes of Array or Scalar to Datum
+  template <typename T, bool IsArray = std::is_base_of<Array, T>::value,
+            bool IsScalar = std::is_base_of<Scalar, T>::value,
+            typename = enable_if_t<IsArray || IsScalar>>
+  Datum(std::shared_ptr<T> value)  // NOLINT implicit conversion
+      : Datum(std::shared_ptr<typename std::conditional<IsArray, Array, Scalar>::type>(
+            std::move(value))) {}
+
+  // Cast from subtypes of Array or Scalar to Datum
+  template <typename T, typename TV = typename std::remove_reference<T>::type,
+            bool IsArray = std::is_base_of<Array, T>::value,
+            bool IsScalar = std::is_base_of<Scalar, T>::value,
+            typename = enable_if_t<IsArray || IsScalar>>
+  Datum(T&& value)  // NOLINT implicit conversion
+      : Datum(std::make_shared<TV>(std::forward<T>(value))) {}
+
+  // Convenience constructors
+  explicit Datum(bool value);
+  explicit Datum(int8_t value);
+  explicit Datum(uint8_t value);
+  explicit Datum(int16_t value);
+  explicit Datum(uint16_t value);
+  explicit Datum(int32_t value);
+  explicit Datum(uint32_t value);
+  explicit Datum(int64_t value);
+  explicit Datum(uint64_t value);
+  explicit Datum(float value);
+  explicit Datum(double value);
+  explicit Datum(std::string value);
+  explicit Datum(const char* value);
+
+  Datum::Kind kind() const {
+    switch (this->value.index()) {
+      case 0:
+        return Datum::NONE;
+      case 1:
+        return Datum::SCALAR;
+      case 2:
+        return Datum::ARRAY;
+      case 3:
+        return Datum::CHUNKED_ARRAY;
+      case 4:
+        return Datum::RECORD_BATCH;
+      case 5:
+        return Datum::TABLE;
+      default:
+        return Datum::NONE;
+    }
+  }
+
+  const std::shared_ptr<ArrayData>& array() const {
+    return util::get<std::shared_ptr<ArrayData>>(this->value);
+  }
+
+  /// \brief The sum of bytes in each buffer referenced by the datum
+  /// Note: Scalars report a size of 0
+  /// \see arrow::util::TotalBufferSize for caveats
+  int64_t TotalBufferSize() const;
+
+  ArrayData* mutable_array() const { return this->array().get(); }
+
+  std::shared_ptr<Array> make_array() const;
+
+  const std::shared_ptr<ChunkedArray>& chunked_array() const {
+    return util::get<std::shared_ptr<ChunkedArray>>(this->value);
+  }
+
+  const std::shared_ptr<RecordBatch>& record_batch() const {
+    return util::get<std::shared_ptr<RecordBatch>>(this->value);
+  }
+
+  const std::shared_ptr<Table>& table() const {
+    return util::get<std::shared_ptr<Table>>(this->value);
+  }
+
+  const std::shared_ptr<Scalar>& scalar() const {
+    return util::get<std::shared_ptr<Scalar>>(this->value);
+  }
+
+  template <typename ExactType>
+  std::shared_ptr<ExactType> array_as() const {
+    return internal::checked_pointer_cast<ExactType>(this->make_array());
+  }
+
+  template <typename ExactType>
+  const ExactType& scalar_as() const {
+    return internal::checked_cast<const ExactType&>(*this->scalar());
+  }
+
+  bool is_array() const { return this->kind() == Datum::ARRAY; }
+
+  bool is_arraylike() const {
+    return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
+  }
+
+  bool is_scalar() const { return this->kind() == Datum::SCALAR; }
+
+  /// \brief True if Datum contains a scalar or array-like data
+  bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
+
+  int64_t null_count() const;
+
+  /// \brief Return the shape (array or scalar) and type for supported kinds
+  /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
+  ValueDescr descr() const;
+
+  /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
+  /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
+  ValueDescr::Shape shape() const;
+
+  /// \brief The value type of the variant, if any
+  ///
+  /// \return nullptr if no type
+  const std::shared_ptr<DataType>& type() const;
+
+  /// \brief The schema of the variant, if any
+  ///
+  /// \return nullptr if no schema
+  const std::shared_ptr<Schema>& schema() const;
+
+  /// \brief The value length of the variant, if any
+  ///
+  /// \return kUnknownLength if no type
+  int64_t length() const;
+
+  /// \brief The array chunks of the variant, if any
+  ///
+  /// \return empty if not arraylike
+  ArrayVector chunks() const;
+
+  bool Equals(const Datum& other) const;
+
+  bool operator==(const Datum& other) const { return Equals(other); }
+  bool operator!=(const Datum& other) const { return !Equals(other); }
+
+  std::string ToString() const;
+
+  ARROW_EXPORT friend void PrintTo(const Datum&, std::ostream*);
+};
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/device.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/device.h
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/io/type_fwd.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryManager;
+
+/// \brief EXPERIMENTAL: Abstract interface for hardware devices
+///
+/// This object represents a device with access to some memory spaces.
+/// When handling a Buffer or raw memory address, it allows deciding in which
+/// context the raw memory address should be interpreted
+/// (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+class ARROW_EXPORT Device : public std::enable_shared_from_this<Device>,
+                            public util::EqualityComparable<Device> {
+ public:
+  virtual ~Device();
+
+  /// \brief A shorthand for this device's type.
+  ///
+  /// The returned value is different for each device class, but is the
+  /// same for all instances of a given class.  It can be used as a replacement
+  /// for RTTI.
+  virtual const char* type_name() const = 0;
+
+  /// \brief A human-readable description of the device.
+  ///
+  /// The returned value should be detailed enough to distinguish between
+  /// different instances, where necessary.
+  virtual std::string ToString() const = 0;
+
+  /// \brief Whether this instance points to the same device as another one.
+  virtual bool Equals(const Device&) const = 0;
+
+  /// \brief Whether this device is the main CPU device.
+  ///
+  /// This shorthand method is very useful when deciding whether a memory address
+  /// is CPU-accessible.
+  bool is_cpu() const { return is_cpu_; }
+
+  /// \brief Return a MemoryManager instance tied to this device
+  ///
+  /// The returned instance uses default parameters for this device type's
+  /// MemoryManager implementation.  Some devices also allow constructing
+  /// MemoryManager instances with non-default parameters.
+  virtual std::shared_ptr<MemoryManager> default_memory_manager() = 0;
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Device);
+  explicit Device(bool is_cpu = false) : is_cpu_(is_cpu) {}
+
+  bool is_cpu_;
+};
+
+/// \brief EXPERIMENTAL: An object that provides memory management primitives
+///
+/// A MemoryManager is always tied to a particular Device instance.
+/// It can also have additional parameters (such as a MemoryPool to
+/// allocate CPU memory).
+class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this<MemoryManager> {
+ public:
+  virtual ~MemoryManager();
+
+  /// \brief The device this MemoryManager is tied to
+  const std::shared_ptr<Device>& device() const { return device_; }
+
+  /// \brief Whether this MemoryManager is tied to the main CPU device.
+  ///
+  /// This shorthand method is very useful when deciding whether a memory address
+  /// is CPU-accessible.
+  bool is_cpu() const { return device_->is_cpu(); }
+
+  /// \brief Create a RandomAccessFile to read a particular buffer.
+  ///
+  /// The given buffer must be tied to this MemoryManager.
+  ///
+  /// See also the Buffer::GetReader shorthand.
+  virtual Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
+      std::shared_ptr<Buffer> buf) = 0;
+
+  /// \brief Create a OutputStream to write to a particular buffer.
+  ///
+  /// The given buffer must be mutable and tied to this MemoryManager.
+  /// The returned stream object writes into the buffer's underlying memory
+  /// (but it won't resize it).
+  ///
+  /// See also the Buffer::GetWriter shorthand.
+  virtual Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
+      std::shared_ptr<Buffer> buf) = 0;
+
+  /// \brief Allocate a (mutable) Buffer
+  ///
+  /// The buffer will be allocated in the device's memory.
+  virtual Result<std::unique_ptr<Buffer>> AllocateBuffer(int64_t size) = 0;
+
+  /// \brief Copy a Buffer to a destination MemoryManager
+  ///
+  /// See also the Buffer::Copy shorthand.
+  static Result<std::shared_ptr<Buffer>> CopyBuffer(
+      const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief Copy a non-owned Buffer to a destination MemoryManager
+  ///
+  /// This is useful for cases where the source memory area is externally managed
+  /// (its lifetime not tied to the source Buffer), otherwise please use CopyBuffer().
+  static Result<std::unique_ptr<Buffer>> CopyNonOwned(
+      const Buffer& source, const std::shared_ptr<MemoryManager>& to);
+
+  /// \brief Make a no-copy Buffer view in a destination MemoryManager
+  ///
+  /// See also the Buffer::View shorthand.
+  static Result<std::shared_ptr<Buffer>> ViewBuffer(
+      const std::shared_ptr<Buffer>& source, const std::shared_ptr<MemoryManager>& to);
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(MemoryManager);
+
+  explicit MemoryManager(const std::shared_ptr<Device>& device) : device_(device) {}
+
+  // Default implementations always return nullptr, should be overridden
+  // by subclasses that support data transfer.
+  // (returning nullptr means unsupported copy / view)
+  // In CopyBufferFrom and ViewBufferFrom, the `from` parameter is guaranteed to
+  // be equal to `buf->memory_manager()`.
+  virtual Result<std::shared_ptr<Buffer>> CopyBufferFrom(
+      const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
+  virtual Result<std::shared_ptr<Buffer>> CopyBufferTo(
+      const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
+  virtual Result<std::unique_ptr<Buffer>> CopyNonOwnedFrom(
+      const Buffer& buf, const std::shared_ptr<MemoryManager>& from);
+  virtual Result<std::unique_ptr<Buffer>> CopyNonOwnedTo(
+      const Buffer& buf, const std::shared_ptr<MemoryManager>& to);
+  virtual Result<std::shared_ptr<Buffer>> ViewBufferFrom(
+      const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& from);
+  virtual Result<std::shared_ptr<Buffer>> ViewBufferTo(
+      const std::shared_ptr<Buffer>& buf, const std::shared_ptr<MemoryManager>& to);
+
+  std::shared_ptr<Device> device_;
+};
+
+// ----------------------------------------------------------------------
+// CPU backend implementation
+
+class ARROW_EXPORT CPUDevice : public Device {
+ public:
+  const char* type_name() const override;
+  std::string ToString() const override;
+  bool Equals(const Device&) const override;
+
+  std::shared_ptr<MemoryManager> default_memory_manager() override;
+
+  /// \brief Return the global CPUDevice instance
+  static std::shared_ptr<Device> Instance();
+
+  /// \brief Create a MemoryManager
+  ///
+  /// The returned MemoryManager will use the given MemoryPool for allocations.
+  static std::shared_ptr<MemoryManager> memory_manager(MemoryPool* pool);
+
+ protected:
+  CPUDevice() : Device(true) {}
+};
+
+class ARROW_EXPORT CPUMemoryManager : public MemoryManager {
+ public:
+  Result<std::shared_ptr<io::RandomAccessFile>> GetBufferReader(
+      std::shared_ptr<Buffer> buf) override;
+  Result<std::shared_ptr<io::OutputStream>> GetBufferWriter(
+      std::shared_ptr<Buffer> buf) override;
+
+  Result<std::unique_ptr<Buffer>> AllocateBuffer(int64_t size) override;
+
+  /// \brief Return the MemoryPool associated with this MemoryManager.
+  MemoryPool* pool() const { return pool_; }
+
+ protected:
+  CPUMemoryManager(const std::shared_ptr<Device>& device, MemoryPool* pool)
+      : MemoryManager(device), pool_(pool) {}
+
+  static std::shared_ptr<MemoryManager> Make(const std::shared_ptr<Device>& device,
+                                             MemoryPool* pool = default_memory_pool());
+
+  Result<std::shared_ptr<Buffer>> CopyBufferFrom(
+      const std::shared_ptr<Buffer>& buf,
+      const std::shared_ptr<MemoryManager>& from) override;
+  Result<std::shared_ptr<Buffer>> CopyBufferTo(
+      const std::shared_ptr<Buffer>& buf,
+      const std::shared_ptr<MemoryManager>& to) override;
+  Result<std::unique_ptr<Buffer>> CopyNonOwnedFrom(
+      const Buffer& buf, const std::shared_ptr<MemoryManager>& from) override;
+  Result<std::unique_ptr<Buffer>> CopyNonOwnedTo(
+      const Buffer& buf, const std::shared_ptr<MemoryManager>& to) override;
+  Result<std::shared_ptr<Buffer>> ViewBufferFrom(
+      const std::shared_ptr<Buffer>& buf,
+      const std::shared_ptr<MemoryManager>& from) override;
+  Result<std::shared_ptr<Buffer>> ViewBufferTo(
+      const std::shared_ptr<Buffer>& buf,
+      const std::shared_ptr<MemoryManager>& to) override;
+
+  MemoryPool* pool_;
+
+  friend std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool);
+  friend ARROW_EXPORT std::shared_ptr<MemoryManager> default_cpu_memory_manager();
+};
+
+/// \brief Return the default CPU MemoryManager instance
+///
+/// The returned singleton instance uses the default MemoryPool.
+/// This function is a faster spelling of
+/// `CPUDevice::Instance()->default_memory_manager()`.
+ARROW_EXPORT
+std::shared_ptr<MemoryManager> default_cpu_memory_manager();
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/extension_type.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/extension_type.h
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// User-defined extension types.
+/// \since 0.13.0
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/data.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief The base class for custom / user-defined types.
+class ARROW_EXPORT ExtensionType : public DataType {
+ public:
+  static constexpr Type::type type_id = Type::EXTENSION;
+
+  static constexpr const char* type_name() { return "extension"; }
+
+  /// \brief The type of array used to represent this extension type's data
+  const std::shared_ptr<DataType>& storage_type() const { return storage_type_; }
+
+  /// \brief Return the type category of the storage type
+  Type::type storage_id() const override { return storage_type_->id(); }
+
+  DataTypeLayout layout() const override;
+
+  std::string ToString() const override;
+
+  std::string name() const override { return "extension"; }
+
+  /// \brief Unique name of extension type used to identify type for
+  /// serialization
+  /// \return the string name of the extension
+  virtual std::string extension_name() const = 0;
+
+  /// \brief Determine if two instances of the same extension types are
+  /// equal. Invoked from ExtensionType::Equals
+  /// \param[in] other the type to compare this type with
+  /// \return bool true if type instances are equal
+  virtual bool ExtensionEquals(const ExtensionType& other) const = 0;
+
+  /// \brief Wrap built-in Array type in a user-defined ExtensionArray instance
+  /// \param[in] data the physical storage for the extension type
+  virtual std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const = 0;
+
+  /// \brief Create an instance of the ExtensionType given the actual storage
+  /// type and the serialized representation
+  /// \param[in] storage_type the physical storage type of the extension
+  /// \param[in] serialized_data the serialized representation produced by
+  /// Serialize
+  virtual Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const = 0;
+
+  /// \brief Create a serialized representation of the extension type's
+  /// metadata. The storage type will be handled automatically in IPC code
+  /// paths
+  /// \return the serialized representation
+  virtual std::string Serialize() const = 0;
+
+  /// \brief Wrap the given storage array as an extension array
+  static std::shared_ptr<Array> WrapArray(const std::shared_ptr<DataType>& ext_type,
+                                          const std::shared_ptr<Array>& storage);
+
+  /// \brief Wrap the given chunked storage array as a chunked extension array
+  static std::shared_ptr<ChunkedArray> WrapArray(
+      const std::shared_ptr<DataType>& ext_type,
+      const std::shared_ptr<ChunkedArray>& storage);
+
+ protected:
+  explicit ExtensionType(std::shared_ptr<DataType> storage_type)
+      : DataType(Type::EXTENSION), storage_type_(storage_type) {}
+
+  std::shared_ptr<DataType> storage_type_;
+};
+
+/// \brief Base array class for user-defined extension types
+class ARROW_EXPORT ExtensionArray : public Array {
+ public:
+  /// \brief Construct an ExtensionArray from an ArrayData.
+  ///
+  /// The ArrayData must have the right ExtensionType.
+  explicit ExtensionArray(const std::shared_ptr<ArrayData>& data);
+
+  /// \brief Construct an ExtensionArray from a type and the underlying storage.
+  ExtensionArray(const std::shared_ptr<DataType>& type,
+                 const std::shared_ptr<Array>& storage);
+
+  const ExtensionType* extension_type() const {
+    return internal::checked_cast<const ExtensionType*>(data_->type.get());
+  }
+
+  /// \brief The physical storage for the extension array
+  const std::shared_ptr<Array>& storage() const { return storage_; }
+
+ protected:
+  void SetData(const std::shared_ptr<ArrayData>& data);
+  std::shared_ptr<Array> storage_;
+};
+
+class ARROW_EXPORT ExtensionTypeRegistry {
+ public:
+  /// \brief Provide access to the global registry to allow code to control for
+  /// race conditions in registry teardown when some types need to be
+  /// unregistered and destroyed first
+  static std::shared_ptr<ExtensionTypeRegistry> GetGlobalRegistry();
+
+  virtual ~ExtensionTypeRegistry() = default;
+
+  virtual Status RegisterType(std::shared_ptr<ExtensionType> type) = 0;
+  virtual Status UnregisterType(const std::string& type_name) = 0;
+  virtual std::shared_ptr<ExtensionType> GetType(const std::string& type_name) = 0;
+};
+
+/// \brief Register an extension type globally. The name returned by the type's
+/// extension_name() method should be unique. This method is thread-safe
+/// \param[in] type an instance of the extension type
+/// \return Status
+ARROW_EXPORT
+Status RegisterExtensionType(std::shared_ptr<ExtensionType> type);
+
+/// \brief Delete an extension type from the global registry. This method is
+/// thread-safe
+/// \param[in] type_name the unique name of a registered extension type
+/// \return Status error if the type name is unknown
+ARROW_EXPORT
+Status UnregisterExtensionType(const std::string& type_name);
+
+/// \brief Retrieve an extension type from the global registry. Returns nullptr
+/// if not found. This method is thread-safe
+/// \return the globally-registered extension type
+ARROW_EXPORT
+std::shared_ptr<ExtensionType> GetExtensionType(const std::string& type_name);
+
+ARROW_EXPORT extern const char kExtensionTypeKeyName[];
+ARROW_EXPORT extern const char kExtensionMetadataKeyName[];
+
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/api.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/api.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/config.h"  // IWYU pragma: export
+
+#include "arrow/filesystem/filesystem.h"  // IWYU pragma: export
+#include "arrow/filesystem/hdfs.h"        // IWYU pragma: export
+#include "arrow/filesystem/localfs.h"     // IWYU pragma: export
+#include "arrow/filesystem/mockfs.h"      // IWYU pragma: export
+#ifdef ARROW_S3
+#include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
+#endif
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/filesystem.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/filesystem.h
@@ -0,0 +1,541 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/filesystem/type_fwd.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+
+// A system clock time point expressed as a 64-bit (or more) number of
+// nanoseconds since the epoch.
+using TimePoint =
+    std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
+
+ARROW_EXPORT std::string ToString(FileType);
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);
+
+static const int64_t kNoSize = -1;
+static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));
+
+/// \brief FileSystem entry info
+struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
+  FileInfo() = default;
+  FileInfo(FileInfo&&) = default;
+  FileInfo& operator=(FileInfo&&) = default;
+  FileInfo(const FileInfo&) = default;
+  FileInfo& operator=(const FileInfo&) = default;
+
+  explicit FileInfo(std::string path, FileType type = FileType::Unknown)
+      : path_(std::move(path)), type_(type) {}
+
+  /// The file type
+  FileType type() const { return type_; }
+  void set_type(FileType type) { type_ = type; }
+
+  /// The full file path in the filesystem
+  const std::string& path() const { return path_; }
+  void set_path(std::string path) { path_ = std::move(path); }
+
+  /// The file base name (component after the last directory separator)
+  std::string base_name() const;
+
+  // The directory base name (component before the file base name).
+  std::string dir_name() const;
+
+  /// The size in bytes, if available
+  ///
+  /// Only regular files are guaranteed to have a size.
+  int64_t size() const { return size_; }
+  void set_size(int64_t size) { size_ = size; }
+
+  /// The file extension (excluding the dot)
+  std::string extension() const;
+
+  /// The time of last modification, if available
+  TimePoint mtime() const { return mtime_; }
+  void set_mtime(TimePoint mtime) { mtime_ = mtime; }
+
+  bool IsFile() const { return type_ == FileType::File; }
+  bool IsDirectory() const { return type_ == FileType::Directory; }
+
+  bool Equals(const FileInfo& other) const {
+    return type() == other.type() && path() == other.path() && size() == other.size() &&
+           mtime() == other.mtime();
+  }
+
+  std::string ToString() const;
+
+  /// Function object implementing less-than comparison and hashing by
+  /// path, to support sorting infos, using them as keys, and other
+  /// interactions with the STL.
+  struct ByPath {
+    bool operator()(const FileInfo& l, const FileInfo& r) const {
+      return l.path() < r.path();
+    }
+
+    size_t operator()(const FileInfo& i) const {
+      return std::hash<std::string>{}(i.path());
+    }
+  };
+
+ protected:
+  std::string path_;
+  FileType type_ = FileType::Unknown;
+  int64_t size_ = kNoSize;
+  TimePoint mtime_ = kNoTime;
+};
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);
+
+/// \brief File selector for filesystem APIs
+struct ARROW_EXPORT FileSelector {
+  /// The directory in which to select files.
+  /// If the path exists but doesn't point to a directory, this should be an error.
+  std::string base_dir;
+  /// The behavior if `base_dir` isn't found in the filesystem.  If false,
+  /// an error is returned.  If true, an empty selection is returned.
+  bool allow_not_found;
+  /// Whether to recurse into subdirectories.
+  bool recursive;
+  /// The maximum number of subdirectories to recurse into.
+  int32_t max_recursion;
+
+  FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
+};
+
+/// \brief FileSystem, path pair
+struct ARROW_EXPORT FileLocator {
+  std::shared_ptr<FileSystem> filesystem;
+  std::string path;
+};
+
+using FileInfoVector = std::vector<FileInfo>;
+using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
+
+}  // namespace fs
+
+template <>
+struct IterationTraits<fs::FileInfoVector> {
+  static fs::FileInfoVector End() { return {}; }
+  static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
+};
+
+namespace fs {
+
+/// \brief Abstract file system API
+class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
+ public:
+  virtual ~FileSystem();
+
+  virtual std::string type_name() const = 0;
+
+  /// EXPERIMENTAL: The IOContext associated with this filesystem.
+  const io::IOContext& io_context() const { return io_context_; }
+
+  /// Normalize path for the given filesystem
+  ///
+  /// The default implementation of this method is a no-op, but subclasses
+  /// may allow normalizing irregular path forms (such as Windows local paths).
+  virtual Result<std::string> NormalizePath(std::string path);
+
+  virtual bool Equals(const FileSystem& other) const = 0;
+
+  virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
+    return Equals(*other);
+  }
+
+  /// Get info for the given target.
+  ///
+  /// Any symlink is automatically dereferenced, recursively.
+  /// A nonexistent or unreachable file returns an Ok status and
+  /// has a FileType of value NotFound.  An error status indicates
+  /// a truly exceptional condition (low-level I/O error, etc.).
+  virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
+  /// Same, for many targets at once.
+  virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
+  /// Same, according to a selector.
+  ///
+  /// The selector's base directory will not be part of the results, even if
+  /// it exists.
+  /// If it doesn't exist, see `FileSelector::allow_not_found`.
+  virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
+
+  /// Async version of GetFileInfo
+  virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
+
+  /// Streaming async version of GetFileInfo
+  ///
+  /// The returned generator is not async-reentrant, i.e. you need to wait for
+  /// the returned future to complete before calling the generator again.
+  virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
+
+  /// Create a directory and subdirectories.
+  ///
+  /// This function succeeds if the directory already exists.
+  virtual Status CreateDir(const std::string& path, bool recursive = true) = 0;
+
+  /// Delete a directory and its contents, recursively.
+  virtual Status DeleteDir(const std::string& path) = 0;
+
+  /// Delete a directory's contents, recursively.
+  ///
+  /// Like DeleteDir, but doesn't delete the directory itself.
+  /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
+  virtual Status DeleteDirContents(const std::string& path,
+                                   bool missing_dir_ok = false) = 0;
+
+  /// Async version of DeleteDirContents.
+  virtual Future<> DeleteDirContentsAsync(const std::string& path,
+                                          bool missing_dir_ok = false);
+
+  /// EXPERIMENTAL: Delete the root directory's contents, recursively.
+  ///
+  /// Implementations may decide to raise an error if this operation is
+  /// too dangerous.
+  // NOTE: may decide to remove this if it's deemed not useful
+  virtual Status DeleteRootDirContents() = 0;
+
+  /// Delete a file.
+  virtual Status DeleteFile(const std::string& path) = 0;
+  /// Delete many files.
+  ///
+  /// The default implementation issues individual delete operations in sequence.
+  virtual Status DeleteFiles(const std::vector<std::string>& paths);
+
+  /// Move / rename a file or directory.
+  ///
+  /// If the destination exists:
+  /// - if it is a non-empty directory, an error is returned
+  /// - otherwise, if it has the same type as the source, it is replaced
+  /// - otherwise, behavior is unspecified (implementation-dependent).
+  virtual Status Move(const std::string& src, const std::string& dest) = 0;
+
+  /// Copy a file.
+  ///
+  /// If the destination exists and is a directory, an error is returned.
+  /// Otherwise, it is replaced.
+  virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;
+
+  /// Open an input stream for sequential reading.
+  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) = 0;
+  /// Open an input stream for sequential reading.
+  ///
+  /// This override assumes the given FileInfo validly represents the file's
+  /// characteristics, and may optimize access depending on them (for example
+  /// avoid querying the file size or its existence).
+  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);
+
+  /// Open an input file for random access reading.
+  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) = 0;
+  /// Open an input file for random access reading.
+  ///
+  /// This override assumes the given FileInfo validly represents the file's
+  /// characteristics, and may optimize access depending on them (for example
+  /// avoid querying the file size or its existence).
+  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info);
+
+  /// Async version of OpenInputStream
+  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const std::string& path);
+  /// Async version of OpenInputStream
+  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info);
+
+  /// Async version of OpenInputFile
+  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const std::string& path);
+  /// Async version of OpenInputFile
+  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info);
+
+  /// Open an output stream for sequential writing.
+  ///
+  /// If the target already exists, existing data is truncated.
+  virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
+
+  /// Open an output stream for appending.
+  ///
+  /// If the target doesn't exist, a new empty file is created.
+  ///
+  /// Note: some filesystem implementations do not support efficient appending
+  /// to an existing file, in which case this method will return NotImplemented.
+  /// Consider writing to multiple files (using e.g. the dataset layer) instead.
+  virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
+
+ protected:
+  explicit FileSystem(const io::IOContext& io_context = io::default_io_context())
+      : io_context_(io_context) {}
+
+  io::IOContext io_context_;
+  // Whether metadata operations (such as GetFileInfo or OpenInputStream)
+  // are cheap enough that the default async variants don't bother with
+  // a thread pool.
+  bool default_async_is_sync_ = true;
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation after prepending a fixed base path.
+///
+/// This is useful to expose a logical view of a subtree of a filesystem,
+/// for example a directory in a LocalFileSystem.
+/// This works on abstract paths, i.e. paths using forward slashes and
+/// and a single root "/".  Windows paths are not guaranteed to work.
+/// This makes no security guarantee.  For example, symlinks may allow to
+/// "escape" the subtree and access other parts of the underlying filesystem.
+class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
+ public:
+  // This constructor may abort if base_path is invalid.
+  explicit SubTreeFileSystem(const std::string& base_path,
+                             std::shared_ptr<FileSystem> base_fs);
+  ~SubTreeFileSystem() override;
+
+  std::string type_name() const override { return "subtree"; }
+  std::string base_path() const { return base_path_; }
+  std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }
+
+  Result<std::string> NormalizePath(std::string path) override;
+
+  bool Equals(const FileSystem& other) const override;
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info) override;
+
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+      const FileInfo& info) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const std::string& path) override;
+  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+      const FileInfo& info) override;
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  SubTreeFileSystem() {}
+
+  const std::string base_path_;
+  std::shared_ptr<FileSystem> base_fs_;
+
+  Result<std::string> PrependBase(const std::string& s) const;
+  Result<std::string> PrependBaseNonEmpty(const std::string& s) const;
+  Result<std::string> StripBase(const std::string& s) const;
+  Status FixInfo(FileInfo* info) const;
+
+  static Result<std::string> NormalizeBasePath(
+      std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation but inserts latencies at various points.
+class ARROW_EXPORT SlowFileSystem : public FileSystem {
+ public:
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+                 std::shared_ptr<io::LatencyGenerator> latencies);
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
+  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
+                 int32_t seed);
+
+  std::string type_name() const override { return "slow"; }
+  bool Equals(const FileSystem& other) const override;
+
+  using FileSystem::GetFileInfo;
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  std::shared_ptr<FileSystem> base_fs_;
+  std::shared_ptr<io::LatencyGenerator> latencies_;
+};
+
+/// \defgroup filesystem-factories Functions for creating FileSystem instances
+///
+/// @{
+
+/// \brief Create a new FileSystem by URI
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+                                                      std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[in] io_context an IOContext which will be associated with the filesystem
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+                                                      const io::IOContext& io_context,
+                                                      std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths.  Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+    const std::string& uri, std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths.  Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+    const std::string& uri, const io::IOContext& io_context,
+    std::string* out_path = NULLPTR);
+
+/// @}
+
+/// \brief Copy files, including from one FileSystem to another
+///
+/// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
+/// will be used, otherwise the file will be opened as a stream in both FileSystems and
+/// chunks copied from the source to the destination. No directories will be created.
+ARROW_EXPORT
+Status CopyFiles(const std::vector<FileLocator>& sources,
+                 const std::vector<FileLocator>& destinations,
+                 const io::IOContext& io_context = io::default_io_context(),
+                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+/// \brief Copy selected files, including from one FileSystem to another
+///
+/// Directories will be created under the destination base directory as needed.
+ARROW_EXPORT
+Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
+                 const FileSelector& source_sel,
+                 const std::shared_ptr<FileSystem>& destination_fs,
+                 const std::string& destination_base_dir,
+                 const io::IOContext& io_context = io::default_io_context(),
+                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+struct FileSystemGlobalOptions {
+  /// Path to a single PEM file holding all TLS CA certificates
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_file_path;
+
+  /// Path to a directory holding TLS CA certificates in individual PEM files
+  /// named along the OpenSSL "hashed" format.
+  ///
+  /// If empty, the underlying TLS library's defaults will be used.
+  std::string tls_ca_dir_path;
+};
+
+/// EXPERIMENTAL: optional global initialization routine
+///
+/// This is for environments (such as manylinux) where the path
+/// to TLS CA certificates needs to be configured at runtime.
+ARROW_EXPORT
+Status Initialize(const FileSystemGlobalOptions& options);
+
+}  // namespace fs
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/gcsfs.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/gcsfs.h
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+namespace fs {
+
+struct GcsCredentials;
+
+/// Options for the GcsFileSystem implementation.
+struct ARROW_EXPORT GcsOptions {
+  std::shared_ptr<GcsCredentials> credentials;
+
+  std::string endpoint_override;
+  std::string scheme;
+  /// \brief Location to use for creating buckets.
+  std::string default_bucket_location;
+
+  /// \brief Default metadata for OpenOutputStream.
+  ///
+  /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
+  std::shared_ptr<const KeyValueMetadata> default_metadata;
+
+  bool Equals(const GcsOptions& other) const;
+
+  /// \brief Initialize with Google Default Credentials
+  ///
+  /// Create options configured to use [Application Default Credentials][aip/4110]. The
+  /// details of this mechanism are too involved to describe here, but suffice is to say
+  /// that applications can override any defaults using an environment variable
+  /// (`GOOGLE_APPLICATION_CREDENTIALS`), and that the defaults work with most Google
+  /// Cloud Platform deployment environments (GCE, GKE, Cloud Run, etc.), and that have
+  /// the same behavior as the `gcloud` CLI tool on your workstation.
+  ///
+  /// \see https://cloud.google.com/docs/authentication
+  ///
+  /// [aip/4110]: https://google.aip.dev/auth/4110
+  static GcsOptions Defaults();
+
+  /// \brief Initialize with anonymous credentials
+  static GcsOptions Anonymous();
+
+  /// \brief Initialize with access token
+  ///
+  /// These credentials are useful when using an out-of-band mechanism to fetch access
+  /// tokens. Note that access tokens are time limited, you will need to manually refresh
+  /// the tokens created by the out-of-band mechanism.
+  static GcsOptions FromAccessToken(const std::string& access_token,
+                                    std::chrono::system_clock::time_point expiration);
+
+  /// \brief Initialize with service account impersonation
+  ///
+  /// Service account impersonation allows one principal (a user or service account) to
+  /// impersonate a service account. It requires that the calling principal has the
+  /// necessary permissions *on* the service account.
+  static GcsOptions FromImpersonatedServiceAccount(
+      const GcsCredentials& base_credentials, const std::string& target_service_account);
+
+  /// Creates service account credentials from a JSON object in string form.
+  ///
+  /// The @p json_object  is expected to be in the format described by [aip/4112]. Such an
+  /// object contains the identity of a service account, as well as a private key that can
+  /// be used to sign tokens, showing the caller was holding the private key.
+  ///
+  /// In GCP one can create several "keys" for each service account, and these keys are
+  /// downloaded as a JSON "key file". The contents of such a file are in the format
+  /// required by this function. Remember that key files and their contents should be
+  /// treated as any other secret with security implications, think of them as passwords
+  /// (because they are!), don't store them or output them where unauthorized persons may
+  /// read them.
+  ///
+  /// Most applications should probably use default credentials, maybe pointing them to a
+  /// file with these contents. Using this function may be useful when the json object is
+  /// obtained from a Cloud Secret Manager or a similar service.
+  ///
+  /// [aip/4112]: https://google.aip.dev/auth/4112
+  static GcsOptions FromServiceAccountCredentials(const std::string& json_object);
+
+  /// Initialize from URIs such as "gs://bucket/object".
+  static Result<GcsOptions> FromUri(const arrow::internal::Uri& uri,
+                                    std::string* out_path);
+  static Result<GcsOptions> FromUri(const std::string& uri, std::string* out_path);
+};
+
+/// \brief GCS-backed FileSystem implementation.
+///
+/// GCS (Google Cloud Storage - https://cloud.google.com/storage) is a scalable object
+/// storage system for any amount of data. The main abstractions in GCS are buckets and
+/// objects. A bucket is a namespace for objects, buckets can store any number of objects,
+/// tens of millions and even billions is not uncommon.  Each object contains a single
+/// blob of data, up to 5TiB in size.  Buckets are typically configured to keep a single
+/// version of each object, but versioning can be enabled. Versioning is important because
+/// objects are immutable, once created one cannot append data to the object or modify the
+/// object data in any way.
+///
+/// GCS buckets are in a global namespace, if a Google Cloud customer creates a bucket
+/// named `foo` no other customer can create a bucket with the same name. Note that a
+/// principal (a user or service account) may only list the buckets they are entitled to,
+/// and then only within a project. It is not possible to list "all" the buckets.
+///
+/// Within each bucket objects are in flat namespace. GCS does not have folders or
+/// directories. However, following some conventions it is possible to emulate
+/// directories. To this end, this class:
+///
+/// - All buckets are treated as directories at the "root"
+/// - Creating a root directory results in a new bucket being created, this may be slower
+///   than most GCS operations.
+/// - The class creates marker objects for a directory, using a metadata attribute to
+///   annotate the file.
+/// - GCS can list all the objects with a given prefix, this is used to emulate listing
+///   of directories.
+/// - In object lists GCS can summarize all the objects with a common prefix as a single
+///   entry, this is used to emulate non-recursive lists. Note that GCS list time is
+///   proportional to the number of objects in the prefix. Listing recursively takes
+///   almost the same time as non-recursive lists.
+///
+class ARROW_EXPORT GcsFileSystem : public FileSystem {
+ public:
+  ~GcsFileSystem() override = default;
+
+  std::string type_name() const override;
+
+  bool Equals(const FileSystem& other) const override;
+
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive) override;
+
+  Status DeleteDir(const std::string& path) override;
+
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+
+  /// This is not implemented in GcsFileSystem, as it would be too dangerous.
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const FileInfo& info) override;
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
+
+  ARROW_DEPRECATED(
+      "Deprecated. "
+      "OpenAppendStream is unsupported on the GCS FileSystem.")
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
+
+  /// Create a GcsFileSystem instance from the given options.
+  static std::shared_ptr<GcsFileSystem> Make(
+      const GcsOptions& options, const io::IOContext& = io::default_io_context());
+
+ private:
+  explicit GcsFileSystem(const GcsOptions& options, const io::IOContext& io_context);
+
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+
+}  // namespace fs
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/hdfs.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/hdfs.h
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/io/hdfs.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+namespace fs {
+
+/// Options for the HDFS implementation.
+struct ARROW_EXPORT HdfsOptions {
+  HdfsOptions() = default;
+  ~HdfsOptions() = default;
+
+  /// Hdfs configuration options, contains host, port, driver
+  io::HdfsConnectionConfig connection_config;
+
+  /// Used by Hdfs OpenWritable Interface.
+  int32_t buffer_size = 0;
+  int16_t replication = 3;
+  int64_t default_block_size = 0;
+
+  void ConfigureEndPoint(std::string host, int port);
+  void ConfigureReplication(int16_t replication);
+  void ConfigureUser(std::string user_name);
+  void ConfigureBufferSize(int32_t buffer_size);
+  void ConfigureBlockSize(int64_t default_block_size);
+  void ConfigureKerberosTicketCachePath(std::string path);
+  void ConfigureExtraConf(std::string key, std::string val);
+
+  bool Equals(const HdfsOptions& other) const;
+
+  static Result<HdfsOptions> FromUri(const ::arrow::internal::Uri& uri);
+  static Result<HdfsOptions> FromUri(const std::string& uri);
+};
+
+/// HDFS-backed FileSystem implementation.
+///
+/// implementation notes:
+/// - This is a wrapper of arrow/io/hdfs, so we can use FileSystem API to handle hdfs.
+class ARROW_EXPORT HadoopFileSystem : public FileSystem {
+ public:
+  ~HadoopFileSystem() override;
+
+  std::string type_name() const override { return "hdfs"; }
+  HdfsOptions options() const;
+  bool Equals(const FileSystem& other) const override;
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+  /// Create a HdfsFileSystem instance from the given options.
+  static Result<std::shared_ptr<HadoopFileSystem>> Make(
+      const HdfsOptions& options, const io::IOContext& = io::default_io_context());
+
+ protected:
+  HadoopFileSystem(const HdfsOptions& options, const io::IOContext&);
+
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace fs
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/localfs.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/localfs.h
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+
+namespace arrow {
+namespace internal {
+
+class Uri;
+
+}
+
+namespace fs {
+
+/// Options for the LocalFileSystem implementation.
+struct ARROW_EXPORT LocalFileSystemOptions {
+  /// Whether OpenInputStream and OpenInputFile return a mmap'ed file,
+  /// or a regular one.
+  bool use_mmap = false;
+
+  /// \brief Initialize with defaults
+  static LocalFileSystemOptions Defaults();
+
+  bool Equals(const LocalFileSystemOptions& other) const;
+
+  static Result<LocalFileSystemOptions> FromUri(const ::arrow::internal::Uri& uri,
+                                                std::string* out_path);
+};
+
+/// \brief A FileSystem implementation accessing files on the local machine.
+///
+/// This class handles only `/`-separated paths.  If desired, conversion
+/// from Windows backslash-separated paths should be done by the caller.
+/// Details such as symlinks are abstracted away (symlinks are always
+/// followed, except when deleting an entry).
+class ARROW_EXPORT LocalFileSystem : public FileSystem {
+ public:
+  explicit LocalFileSystem(const io::IOContext& = io::default_io_context());
+  explicit LocalFileSystem(const LocalFileSystemOptions&,
+                           const io::IOContext& = io::default_io_context());
+  ~LocalFileSystem() override;
+
+  std::string type_name() const override { return "local"; }
+
+  Result<std::string> NormalizePath(std::string path) override;
+
+  bool Equals(const FileSystem& other) const override;
+
+  LocalFileSystemOptions options() const { return options_; }
+
+  /// \cond FALSE
+  using FileSystem::GetFileInfo;
+  /// \endcond
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+  LocalFileSystemOptions options_;
+};
+
+namespace internal {
+
+// Return whether the string is detected as a local absolute path.
+ARROW_EXPORT
+bool DetectAbsolutePath(const std::string& s);
+
+}  // namespace internal
+
+}  // namespace fs
+}  // namespace arrow
--- a/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/mockfs.h
+++ b/.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/mockfs.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+struct MockDirInfo {
+  std::string full_path;
+  TimePoint mtime;
+
+  bool operator==(const MockDirInfo& other) const {
+    return mtime == other.mtime && full_path == other.full_path;
+  }
+
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockDirInfo&);
+};
+
+struct MockFileInfo {
+  std::string full_path;
+  TimePoint mtime;
+  util::string_view data;
+
+  bool operator==(const MockFileInfo& other) const {
+    return mtime == other.mtime && full_path == other.full_path && data == other.data;
+  }
+
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockFileInfo&);
+};
+
+/// A mock FileSystem implementation that holds its contents in memory.
+///
+/// Useful for validating the FileSystem API, writing conformance suite,
+/// and bootstrapping FileSystem-based APIs.
+class ARROW_EXPORT MockFileSystem : public FileSystem {
+ public:
+  explicit MockFileSystem(TimePoint current_time,
+                          const io::IOContext& = io::default_io_context());
+  ~MockFileSystem() override;
+
+  std::string type_name() const override { return "mock"; }
+
+  bool Equals(const FileSystem& other) const override;
+
+  // XXX It's not very practical to have to explicitly declare inheritance
+  // of default overrides.
+  using FileSystem::GetFileInfo;
+  Result<FileInfo> GetFileInfo(const std::string& path) override;
+  Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+  Status CreateDir(const std::string& path, bool recursive = true) override;
+
+  Status DeleteDir(const std::string& path) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteRootDirContents() override;
+
+  Status DeleteFile(const std::string& path) override;
+
+  Status Move(const std::string& src, const std::string& dest) override;
+
+  Status CopyFile(const std::string& src, const std::string& dest) override;
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+  // Contents-dumping helpers to ease testing.
+  // Output is lexicographically-ordered by full path.
+  std::vector<MockDirInfo> AllDirs();
+  std::vector<MockFileInfo> AllFiles();
+
+  // Create a File with a content from a string.
+  Status CreateFile(const std::string& path, util::string_view content,
+                    bool recursive = true);
+
+  // Create a MockFileSystem out of (empty) FileInfo. The content of every
+  // file is empty and of size 0. All directories will be created recursively.
+  static Result<std::shared_ptr<FileSystem>> Make(TimePoint current_time,
+                                                  const std::vector<FileInfo>& infos);
+
+  class Impl;
+
+ protected:
+  std::unique_ptr<Impl> impl_;
+};
+
+class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
+ public:
+  explicit MockAsyncFileSystem(TimePoint current_time,
+                               const io::IOContext& io_context = io::default_io_context())
+      : MockFileSystem(current_time, io_context) {
+    default_async_is_sync_ = false;
+  }
+
+  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+};
+
+}  // namespace internal
+}  // namespace fs
+}  // namespace arrow
--- a/Show More
+++ b/Show More