first commit

2025-10-25 07:50:56 +00:00 · 2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/api/io.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/api/io.h
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "parquet/exception.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/api/reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/api/reader.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+// Column reader API
+#include "parquet/column_reader.h"
+#include "parquet/column_scanner.h"
+#include "parquet/exception.h"
+#include "parquet/file_reader.h"
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/printer.h"
+#include "parquet/properties.h"
+#include "parquet/statistics.h"
+
+// Schemas
+#include "parquet/api/schema.h"
+
+// IO
+#include "parquet/api/io.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/api/schema.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/api/schema.h
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+// Schemas
+#include "parquet/schema.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/api/writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/api/writer.h
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "parquet/api/io.h"
+#include "parquet/api/schema.h"
+#include "parquet/column_writer.h"
+#include "parquet/exception.h"
+#include "parquet/file_writer.h"
+#include "parquet/statistics.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/reader.h
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+// N.B. we don't include async_generator.h as it's relatively heavy
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "parquet/file_reader.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace arrow {
+
+class ChunkedArray;
+class KeyValueMetadata;
+class RecordBatchReader;
+struct Scalar;
+class Schema;
+class Table;
+class RecordBatch;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class FileMetaData;
+class SchemaDescriptor;
+
+namespace arrow {
+
+class ColumnChunkReader;
+class ColumnReader;
+struct SchemaManifest;
+class RowGroupReader;
+
+/// \brief Arrow read adapter class for deserializing Parquet files as Arrow row batches.
+///
+/// This interfaces caters for different use cases and thus provides different
+/// interfaces. In its most simplistic form, we cater for a user that wants to
+/// read the whole Parquet at once with the `FileReader::ReadTable` method.
+///
+/// More advanced users that also want to implement parallelism on top of each
+/// single Parquet files should do this on the RowGroup level. For this, they can
+/// call `FileReader::RowGroup(i)->ReadTable` to receive only the specified
+/// RowGroup as a table.
+///
+/// In the most advanced situation, where a consumer wants to independently read
+/// RowGroups in parallel and consume each column individually, they can call
+/// `FileReader::RowGroup(i)->Column(j)->Read` and receive an `arrow::Column`
+/// instance.
+///
+/// The parquet format supports an optional integer field_id which can be assigned
+/// to a field.  Arrow will convert these field IDs to a metadata key named
+/// PARQUET:field_id on the appropriate field.
+// TODO(wesm): nested data does not always make sense with this user
+// interface unless you are only reading a single leaf node from a branch of
+// a table. For example:
+//
+// repeated group data {
+//   optional group record {
+//     optional int32 val1;
+//     optional byte_array val2;
+//     optional bool val3;
+//   }
+//   optional int32 val4;
+// }
+//
+// In the Parquet file, there are 3 leaf nodes:
+//
+// * data.record.val1
+// * data.record.val2
+// * data.record.val3
+// * data.val4
+//
+// When materializing this data in an Arrow array, we would have:
+//
+// data: list<struct<
+//   record: struct<
+//    val1: int32,
+//    val2: string (= list<uint8>),
+//    val3: bool,
+//   >,
+//   val4: int32
+// >>
+//
+// However, in the Parquet format, each leaf node has its own repetition and
+// definition levels describing the structure of the intermediate nodes in
+// this array structure. Thus, we will need to scan the leaf data for a group
+// of leaf nodes part of the same type tree to create a single result Arrow
+// nested array structure.
+//
+// This is additionally complicated "chunky" repeated fields or very large byte
+// arrays
+class PARQUET_EXPORT FileReader {
+ public:
+  /// Factory function to create a FileReader from a ParquetFileReader and properties
+  static ::arrow::Status Make(::arrow::MemoryPool* pool,
+                              std::unique_ptr<ParquetFileReader> reader,
+                              const ArrowReaderProperties& properties,
+                              std::unique_ptr<FileReader>* out);
+
+  /// Factory function to create a FileReader from a ParquetFileReader
+  static ::arrow::Status Make(::arrow::MemoryPool* pool,
+                              std::unique_ptr<ParquetFileReader> reader,
+                              std::unique_ptr<FileReader>* out);
+
+  // Since the distribution of columns amongst a Parquet file's row groups may
+  // be uneven (the number of values in each column chunk can be different), we
+  // provide a column-oriented read interface. The ColumnReader hides the
+  // details of paging through the file's row groups and yielding
+  // fully-materialized arrow::Array instances
+  //
+  // Returns error status if the column of interest is not flat.
+  virtual ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) = 0;
+
+  /// \brief Return arrow schema for all the columns.
+  virtual ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out) = 0;
+
+  /// \brief Read column as a whole into a chunked array.
+  ///
+  /// The indicated column index is relative to the schema
+  virtual ::arrow::Status ReadColumn(int i,
+                                     std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+
+  // NOTE: Experimental API
+  // Reads a specific top level schema field into an Array
+  // The index i refers the index of the top level schema field, which may
+  // be nested or flat - e.g.
+  //
+  // 0 foo.bar
+  //   foo.bar.baz
+  //   foo.qux
+  // 1 foo2
+  // 2 foo3
+  //
+  // i=0 will read the entire foo struct, i=1 the foo2 primitive column etc
+  virtual ::arrow::Status ReadSchemaField(
+      int i, std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+
+  /// \brief Return a RecordBatchReader of row groups selected from row_group_indices.
+  ///
+  /// Note that the ordering in row_group_indices matters. FileReaders must outlive
+  /// their RecordBatchReaders.
+  ///
+  /// \returns error Status if row_group_indices contains an invalid index
+  virtual ::arrow::Status GetRecordBatchReader(
+      const std::vector<int>& row_group_indices,
+      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
+
+  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                       std::shared_ptr<::arrow::RecordBatchReader>* out);
+
+  /// \brief Return a RecordBatchReader of row groups selected from
+  /// row_group_indices, whose columns are selected by column_indices.
+  ///
+  /// Note that the ordering in row_group_indices and column_indices
+  /// matter. FileReaders must outlive their RecordBatchReaders.
+  ///
+  /// \returns error Status if either row_group_indices or column_indices
+  ///     contains an invalid index
+  virtual ::arrow::Status GetRecordBatchReader(
+      const std::vector<int>& row_group_indices, const std::vector<int>& column_indices,
+      std::unique_ptr<::arrow::RecordBatchReader>* out) = 0;
+
+  /// \brief Return a generator of record batches.
+  ///
+  /// The FileReader must outlive the generator, so this requires that you pass in a
+  /// shared_ptr.
+  ///
+  /// \returns error Result if either row_group_indices or column_indices contains an
+  ///     invalid index
+  virtual ::arrow::Result<
+      std::function<::arrow::Future<std::shared_ptr<::arrow::RecordBatch>>()>>
+  GetRecordBatchGenerator(std::shared_ptr<FileReader> reader,
+                          const std::vector<int> row_group_indices,
+                          const std::vector<int> column_indices,
+                          ::arrow::internal::Executor* cpu_executor = NULLPTR,
+                          int64_t rows_to_readahead = 0) = 0;
+
+  ::arrow::Status GetRecordBatchReader(const std::vector<int>& row_group_indices,
+                                       const std::vector<int>& column_indices,
+                                       std::shared_ptr<::arrow::RecordBatchReader>* out);
+
+  /// Read all columns into a Table
+  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0;
+
+  /// \brief Read the given columns into a Table
+  ///
+  /// The indicated column indices are relative to the schema
+  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+                                    std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroup(int i, const std::vector<int>& column_indices,
+                                       std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups,
+                                        const std::vector<int>& column_indices,
+                                        std::shared_ptr<::arrow::Table>* out) = 0;
+
+  virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups,
+                                        std::shared_ptr<::arrow::Table>* out) = 0;
+
+  /// \brief Scan file contents with one thread, return number of rows
+  virtual ::arrow::Status ScanContents(std::vector<int> columns,
+                                       const int32_t column_batch_size,
+                                       int64_t* num_rows) = 0;
+
+  /// \brief Return a reader for the RowGroup, this object must not outlive the
+  ///   FileReader.
+  virtual std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) = 0;
+
+  /// \brief The number of row groups in the file
+  virtual int num_row_groups() const = 0;
+
+  virtual ParquetFileReader* parquet_reader() const = 0;
+
+  /// Set whether to use multiple threads during reads of multiple columns.
+  /// By default only one thread is used.
+  virtual void set_use_threads(bool use_threads) = 0;
+
+  /// Set number of records to read per batch for the RecordBatchReader.
+  virtual void set_batch_size(int64_t batch_size) = 0;
+
+  virtual const ArrowReaderProperties& properties() const = 0;
+
+  virtual const SchemaManifest& manifest() const = 0;
+
+  virtual ~FileReader() = default;
+};
+
+class RowGroupReader {
+ public:
+  virtual ~RowGroupReader() = default;
+  virtual std::shared_ptr<ColumnChunkReader> Column(int column_index) = 0;
+  virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+                                    std::shared_ptr<::arrow::Table>* out) = 0;
+  virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0;
+
+ private:
+  struct Iterator;
+};
+
+class ColumnChunkReader {
+ public:
+  virtual ~ColumnChunkReader() = default;
+  virtual ::arrow::Status Read(std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+};
+
+// At this point, the column reader is a stream iterator. It only knows how to
+// read the next batch of values for a particular column from the file until it
+// runs out.
+//
+// We also do not expose any internal Parquet details, such as row groups. This
+// might change in the future.
+class PARQUET_EXPORT ColumnReader {
+ public:
+  virtual ~ColumnReader() = default;
+
+  // Scan the next array of the indicated size. The actual size of the
+  // returned array may be less than the passed size depending how much data is
+  // available in the file.
+  //
+  // When all the data in the file has been exhausted, the result is set to
+  // nullptr.
+  //
+  // Returns Status::OK on a successful read, including if you have exhausted
+  // the data available in the file.
+  virtual ::arrow::Status NextBatch(int64_t batch_size,
+                                    std::shared_ptr<::arrow::ChunkedArray>* out) = 0;
+};
+
+/// \brief Experimental helper class for bindings (like Python) that struggle
+/// either with std::move or C++ exceptions
+class PARQUET_EXPORT FileReaderBuilder {
+ public:
+  FileReaderBuilder();
+
+  /// Create FileReaderBuilder from Arrow file and optional properties / metadata
+  ::arrow::Status Open(std::shared_ptr<::arrow::io::RandomAccessFile> file,
+                       const ReaderProperties& properties = default_reader_properties(),
+                       std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  ParquetFileReader* raw_reader() { return raw_reader_.get(); }
+
+  /// Set Arrow MemoryPool for memory allocation
+  FileReaderBuilder* memory_pool(::arrow::MemoryPool* pool);
+  /// Set Arrow reader properties
+  FileReaderBuilder* properties(const ArrowReaderProperties& arg_properties);
+  /// Build FileReader instance
+  ::arrow::Status Build(std::unique_ptr<FileReader>* out);
+
+ private:
+  ::arrow::MemoryPool* pool_;
+  ArrowReaderProperties properties_;
+  std::unique_ptr<ParquetFileReader> raw_reader_;
+};
+
+/// \defgroup parquet-arrow-reader-factories Factory functions for Parquet Arrow readers
+///
+/// @{
+
+/// \brief Build FileReader from Arrow file and MemoryPool
+///
+/// Advanced settings are supported through the FileReaderBuilder class.
+PARQUET_EXPORT
+::arrow::Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>,
+                         ::arrow::MemoryPool* allocator,
+                         std::unique_ptr<FileReader>* reader);
+
+/// @}
+
+PARQUET_EXPORT
+::arrow::Status StatisticsAsScalars(const Statistics& Statistics,
+                                    std::shared_ptr<::arrow::Scalar>* min,
+                                    std::shared_ptr<::arrow::Scalar>* max);
+
+namespace internal {
+
+PARQUET_EXPORT
+::arrow::Status FuzzReader(const uint8_t* data, int64_t size);
+
+}  // namespace internal
+}  // namespace arrow
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/schema.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/schema.h
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+
+#include "parquet/level_conversion.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+
+class ArrowReaderProperties;
+class ArrowWriterProperties;
+class WriterProperties;
+
+namespace arrow {
+
+/// \defgroup arrow-to-parquet-schema-conversion Functions to convert an Arrow
+/// schema into a Parquet schema.
+///
+/// @{
+
+PARQUET_EXPORT
+::arrow::Status FieldToNode(const std::shared_ptr<::arrow::Field>& field,
+                            const WriterProperties& properties,
+                            const ArrowWriterProperties& arrow_properties,
+                            schema::NodePtr* out);
+
+PARQUET_EXPORT
+::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                                const WriterProperties& properties,
+                                const ArrowWriterProperties& arrow_properties,
+                                std::shared_ptr<SchemaDescriptor>* out);
+
+PARQUET_EXPORT
+::arrow::Status ToParquetSchema(const ::arrow::Schema* arrow_schema,
+                                const WriterProperties& properties,
+                                std::shared_ptr<SchemaDescriptor>* out);
+
+/// @}
+
+/// \defgroup parquet-to-arrow-schema-conversion Functions to convert a Parquet
+/// schema into an Arrow schema.
+///
+/// @{
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(
+    const SchemaDescriptor* parquet_schema, const ArrowReaderProperties& properties,
+    const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata,
+    std::shared_ptr<::arrow::Schema>* out);
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                                  const ArrowReaderProperties& properties,
+                                  std::shared_ptr<::arrow::Schema>* out);
+
+PARQUET_EXPORT
+::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema,
+                                  std::shared_ptr<::arrow::Schema>* out);
+
+/// @}
+
+/// \brief Bridge between an arrow::Field and parquet column indices.
+struct PARQUET_EXPORT SchemaField {
+  std::shared_ptr<::arrow::Field> field;
+  std::vector<SchemaField> children;
+
+  // Only set for leaf nodes
+  int column_index = -1;
+
+  parquet::internal::LevelInfo level_info;
+
+  bool is_leaf() const { return column_index != -1; }
+};
+
+/// \brief Bridge between a parquet Schema and an arrow Schema.
+///
+/// Expose parquet columns as a tree structure. Useful traverse and link
+/// between arrow's Schema and parquet's Schema.
+struct PARQUET_EXPORT SchemaManifest {
+  static ::arrow::Status Make(
+      const SchemaDescriptor* schema,
+      const std::shared_ptr<const ::arrow::KeyValueMetadata>& metadata,
+      const ArrowReaderProperties& properties, SchemaManifest* manifest);
+
+  const SchemaDescriptor* descr;
+  std::shared_ptr<::arrow::Schema> origin_schema;
+  std::shared_ptr<const ::arrow::KeyValueMetadata> schema_metadata;
+  std::vector<SchemaField> schema_fields;
+
+  std::unordered_map<int, const SchemaField*> column_index_to_field;
+  std::unordered_map<const SchemaField*, const SchemaField*> child_to_parent;
+
+  ::arrow::Status GetColumnField(int column_index, const SchemaField** out) const {
+    auto it = column_index_to_field.find(column_index);
+    if (it == column_index_to_field.end()) {
+      return ::arrow::Status::KeyError("Column index ", column_index,
+                                       " not found in schema manifest, may be malformed");
+    }
+    *out = it->second;
+    return ::arrow::Status::OK();
+  }
+
+  const SchemaField* GetParent(const SchemaField* field) const {
+    // Returns nullptr also if not found
+    auto it = child_to_parent.find(field);
+    if (it == child_to_parent.end()) {
+      return NULLPTR;
+    }
+    return it->second;
+  }
+
+  /// Coalesce a list of field indices (relative to the equivalent arrow::Schema) which
+  /// correspond to the column root (first node below the parquet schema's root group) of
+  /// each leaf referenced in column_indices.
+  ///
+  /// For example, for leaves `a.b.c`, `a.b.d.e`, and `i.j.k` (column_indices=[0,1,3])
+  /// the roots are `a` and `i` (return=[0,2]).
+  ///
+  /// root
+  /// -- a  <------
+  /// -- -- b  |  |
+  /// -- -- -- c  |
+  /// -- -- -- d  |
+  /// -- -- -- -- e
+  /// -- f
+  /// -- -- g
+  /// -- -- -- h
+  /// -- i  <---
+  /// -- -- j  |
+  /// -- -- -- k
+  ::arrow::Result<std::vector<int>> GetFieldIndices(
+      const std::vector<int>& column_indices) const {
+    const schema::GroupNode* group = descr->group_node();
+    std::unordered_set<int> already_added;
+
+    std::vector<int> out;
+    for (int column_idx : column_indices) {
+      if (column_idx < 0 || column_idx >= descr->num_columns()) {
+        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid");
+      }
+
+      auto field_node = descr->GetColumnRoot(column_idx);
+      auto field_idx = group->FieldIndex(*field_node);
+      if (field_idx == -1) {
+        return ::arrow::Status::IndexError("Column index ", column_idx, " is not valid");
+      }
+
+      if (already_added.insert(field_idx).second) {
+        out.push_back(field_idx);
+      }
+    }
+    return out;
+  }
+};
+
+}  // namespace arrow
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/test_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/test_util.h
@@ -0,0 +1,507 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+#include "parquet/column_reader.h"
+
+namespace parquet {
+
+using internal::RecordReader;
+
+namespace arrow {
+
+using ::arrow::Array;
+using ::arrow::ChunkedArray;
+using ::arrow::Status;
+
+template <int32_t PRECISION>
+struct DecimalWithPrecisionAndScale {
+  static_assert(PRECISION >= 1 && PRECISION <= 38, "Invalid precision value");
+
+  using type = ::arrow::Decimal128Type;
+  static constexpr ::arrow::Type::type type_id = ::arrow::Decimal128Type::type_id;
+  static constexpr int32_t precision = PRECISION;
+  static constexpr int32_t scale = PRECISION - 1;
+};
+
+template <int32_t PRECISION>
+struct Decimal256WithPrecisionAndScale {
+  static_assert(PRECISION >= 1 && PRECISION <= 76, "Invalid precision value");
+
+  using type = ::arrow::Decimal256Type;
+  static constexpr ::arrow::Type::type type_id = ::arrow::Decimal256Type::type_id;
+  static constexpr int32_t precision = PRECISION;
+  static constexpr int32_t scale = PRECISION - 1;
+};
+
+template <class ArrowType>
+::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
+  using c_type = typename ArrowType::c_type;
+  std::vector<c_type> values;
+  ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1), &values);
+  ::arrow::NumericBuilder<ArrowType> builder;
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
+  return builder.Finish(out);
+}
+
+template <class ArrowType>
+::arrow::enable_if_integer<ArrowType, Status> NonNullArray(size_t size,
+                                                           std::shared_ptr<Array>* out) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::randint(size, 0, 64, &values);
+
+  // Passing data type so this will work with TimestampType too
+  ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
+                                             ::arrow::default_memory_pool());
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
+  return builder.Finish(out);
+}
+
+template <class ArrowType>
+::arrow::enable_if_date<ArrowType, Status> NonNullArray(size_t size,
+                                                        std::shared_ptr<Array>* out) {
+  std::vector<typename ArrowType::c_type> values;
+  ::arrow::randint(size, 0, 24, &values);
+  for (size_t i = 0; i < size; i++) {
+    values[i] *= 86400000;
+  }
+
+  // Passing data type so this will work with TimestampType too
+  ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
+                                             ::arrow::default_memory_pool());
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
+  return builder.Finish(out);
+}
+
+template <class ArrowType>
+::arrow::enable_if_base_binary<ArrowType, Status> NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
+  using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  BuilderType builder;
+  for (size_t i = 0; i < size; i++) {
+    RETURN_NOT_OK(builder.Append("test-string"));
+  }
+  return builder.Finish(out);
+}
+
+template <typename ArrowType>
+::arrow::enable_if_fixed_size_binary<ArrowType, Status> NonNullArray(
+    size_t size, std::shared_ptr<Array>* out) {
+  using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  // set byte_width to the length of "fixed": 5
+  // todo: find a way to generate test data with more diversity.
+  BuilderType builder(::arrow::fixed_size_binary(5));
+  for (size_t i = 0; i < size; i++) {
+    RETURN_NOT_OK(builder.Append("fixed"));
+  }
+  return builder.Finish(out);
+}
+
+static void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
+  auto gen = ::arrow::random::RandomArrayGenerator(seed);
+  std::shared_ptr<Array> decimals;
+  int32_t byte_width = 0;
+  if (precision <= ::arrow::Decimal128Type::kMaxPrecision) {
+    decimals = gen.Decimal128(::arrow::decimal128(precision, 0), n);
+    byte_width = ::arrow::Decimal128Type::kByteWidth;
+  } else {
+    decimals = gen.Decimal256(::arrow::decimal256(precision, 0), n);
+    byte_width = ::arrow::Decimal256Type::kByteWidth;
+  }
+  std::memcpy(out, decimals->data()->GetValues<uint8_t>(1, 0), byte_width * n);
+}
+
+template <typename ArrowType, int32_t precision = ArrowType::precision>
+::arrow::enable_if_t<
+    std::is_same<ArrowType, DecimalWithPrecisionAndScale<precision>>::value, Status>
+NonNullArray(size_t size, std::shared_ptr<Array>* out) {
+  constexpr int32_t kDecimalPrecision = precision;
+  constexpr int32_t kDecimalScale = DecimalWithPrecisionAndScale<precision>::scale;
+
+  const auto type = ::arrow::decimal(kDecimalPrecision, kDecimalScale);
+  ::arrow::Decimal128Builder builder(type);
+  const int32_t byte_width =
+      static_cast<const ::arrow::Decimal128Type&>(*type).byte_width();
+
+  constexpr int32_t seed = 0;
+
+  ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
+  random_decimals(size, seed, kDecimalPrecision, out_buf->mutable_data());
+
+  RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size));
+  return builder.Finish(out);
+}
+
+template <typename ArrowType, int32_t precision = ArrowType::precision>
+::arrow::enable_if_t<
+    std::is_same<ArrowType, Decimal256WithPrecisionAndScale<precision>>::value, Status>
+NonNullArray(size_t size, std::shared_ptr<Array>* out) {
+  constexpr int32_t kDecimalPrecision = precision;
+  constexpr int32_t kDecimalScale = Decimal256WithPrecisionAndScale<precision>::scale;
+
+  const auto type = ::arrow::decimal256(kDecimalPrecision, kDecimalScale);
+  ::arrow::Decimal256Builder builder(type);
+  const int32_t byte_width =
+      static_cast<const ::arrow::Decimal256Type&>(*type).byte_width();
+
+  constexpr int32_t seed = 0;
+
+  ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
+  random_decimals(size, seed, kDecimalPrecision, out_buf->mutable_data());
+
+  RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size));
+  return builder.Finish(out);
+}
+
+template <class ArrowType>
+::arrow::enable_if_boolean<ArrowType, Status> NonNullArray(size_t size,
+                                                           std::shared_ptr<Array>* out) {
+  std::vector<uint8_t> values;
+  ::arrow::randint(size, 0, 1, &values);
+  ::arrow::BooleanBuilder builder;
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
+  return builder.Finish(out);
+}
+
+// This helper function only supports (size/2) nulls.
+template <typename ArrowType>
+::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
+    size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Array>* out) {
+  using c_type = typename ArrowType::c_type;
+  std::vector<c_type> values;
+  ::arrow::random_real(size, seed, static_cast<c_type>(-1e10), static_cast<c_type>(1e10),
+                       &values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::NumericBuilder<ArrowType> builder;
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+// This helper function only supports (size/2) nulls.
+template <typename ArrowType>
+::arrow::enable_if_integer<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
+                                                            uint32_t seed,
+                                                            std::shared_ptr<Array>* out) {
+  std::vector<typename ArrowType::c_type> values;
+
+  // Seed is random in Arrow right now
+  (void)seed;
+  ::arrow::randint(size, 0, 64, &values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  // Passing data type so this will work with TimestampType too
+  ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
+                                             ::arrow::default_memory_pool());
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+template <typename ArrowType>
+::arrow::enable_if_date<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
+                                                         uint32_t seed,
+                                                         std::shared_ptr<Array>* out) {
+  std::vector<typename ArrowType::c_type> values;
+
+  // Seed is random in Arrow right now
+  (void)seed;
+  ::arrow::randint(size, 0, 24, &values);
+  for (size_t i = 0; i < size; i++) {
+    values[i] *= 86400000;
+  }
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  // Passing data type so this will work with TimestampType too
+  ::arrow::NumericBuilder<ArrowType> builder(std::make_shared<ArrowType>(),
+                                             ::arrow::default_memory_pool());
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+// This helper function only supports (size/2) nulls yet.
+template <typename ArrowType>
+::arrow::enable_if_base_binary<ArrowType, Status> NullableArray(
+    size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  BuilderType builder;
+
+  const int kBufferSize = 10;
+  uint8_t buffer[kBufferSize];
+  for (size_t i = 0; i < size; i++) {
+    if (!valid_bytes[i]) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      ::arrow::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
+      if (ArrowType::is_utf8) {
+        // Trivially force data to be valid UTF8 by making it all ASCII
+        for (auto& byte : buffer) {
+          byte &= 0x7f;
+        }
+      }
+      RETURN_NOT_OK(builder.Append(buffer, kBufferSize));
+    }
+  }
+  return builder.Finish(out);
+}
+
+// This helper function only supports (size/2) nulls yet,
+// same as NullableArray<String|Binary>(..)
+template <typename ArrowType>
+::arrow::enable_if_fixed_size_binary<ArrowType, Status> NullableArray(
+    size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<::arrow::Array>* out) {
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  const int byte_width = 10;
+  BuilderType builder(::arrow::fixed_size_binary(byte_width));
+
+  const int kBufferSize = byte_width;
+  uint8_t buffer[kBufferSize];
+  for (size_t i = 0; i < size; i++) {
+    if (!valid_bytes[i]) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      ::arrow::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
+      RETURN_NOT_OK(builder.Append(buffer));
+    }
+  }
+  return builder.Finish(out);
+}
+
+template <typename ArrowType, int32_t precision = ArrowType::precision>
+::arrow::enable_if_t<
+    std::is_same<ArrowType, DecimalWithPrecisionAndScale<precision>>::value, Status>
+NullableArray(size_t size, size_t num_nulls, uint32_t seed,
+              std::shared_ptr<::arrow::Array>* out) {
+  std::vector<uint8_t> valid_bytes(size, '\1');
+
+  for (size_t i = 0; i < num_nulls; ++i) {
+    valid_bytes[i * 2] = '\0';
+  }
+
+  constexpr int32_t kDecimalPrecision = precision;
+  constexpr int32_t kDecimalScale = DecimalWithPrecisionAndScale<precision>::scale;
+  const auto type = ::arrow::decimal(kDecimalPrecision, kDecimalScale);
+  const int32_t byte_width =
+      static_cast<const ::arrow::Decimal128Type&>(*type).byte_width();
+
+  ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
+
+  random_decimals(size, seed, precision, out_buf->mutable_data());
+
+  ::arrow::Decimal128Builder builder(type);
+  RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size, valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+template <typename ArrowType, int32_t precision = ArrowType::precision>
+::arrow::enable_if_t<
+    std::is_same<ArrowType, Decimal256WithPrecisionAndScale<precision>>::value, Status>
+NullableArray(size_t size, size_t num_nulls, uint32_t seed,
+              std::shared_ptr<::arrow::Array>* out) {
+  std::vector<uint8_t> valid_bytes(size, '\1');
+
+  for (size_t i = 0; i < num_nulls; ++i) {
+    valid_bytes[i * 2] = '\0';
+  }
+
+  constexpr int32_t kDecimalPrecision = precision;
+  constexpr int32_t kDecimalScale = Decimal256WithPrecisionAndScale<precision>::scale;
+  const auto type = ::arrow::decimal256(kDecimalPrecision, kDecimalScale);
+  const int32_t byte_width =
+      static_cast<const ::arrow::Decimal256Type&>(*type).byte_width();
+
+  ARROW_ASSIGN_OR_RAISE(auto out_buf, ::arrow::AllocateBuffer(size * byte_width));
+
+  random_decimals(size, seed, precision, out_buf->mutable_data());
+
+  ::arrow::Decimal256Builder builder(type);
+  RETURN_NOT_OK(builder.AppendValues(out_buf->data(), size, valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+// This helper function only supports (size/2) nulls yet.
+template <class ArrowType>
+::arrow::enable_if_boolean<ArrowType, Status> NullableArray(size_t size, size_t num_nulls,
+                                                            uint32_t seed,
+                                                            std::shared_ptr<Array>* out) {
+  std::vector<uint8_t> values;
+
+  // Seed is random in Arrow right now
+  (void)seed;
+
+  ::arrow::randint(size, 0, 1, &values);
+  std::vector<uint8_t> valid_bytes(size, 1);
+
+  for (size_t i = 0; i < num_nulls; i++) {
+    valid_bytes[i * 2] = 0;
+  }
+
+  ::arrow::BooleanBuilder builder;
+  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  return builder.Finish(out);
+}
+
+/// Wrap an Array into a ListArray by splitting it up into size lists.
+///
+/// This helper function only supports (size/2) nulls.
+Status MakeListArray(const std::shared_ptr<Array>& values, int64_t size,
+                     int64_t null_count, const std::string& item_name,
+                     bool nullable_values, std::shared_ptr<::arrow::ListArray>* out) {
+  // We always include an empty list
+  int64_t non_null_entries = size - null_count - 1;
+  int64_t length_per_entry = values->length() / non_null_entries;
+
+  auto offsets = AllocateBuffer();
+  RETURN_NOT_OK(offsets->Resize((size + 1) * sizeof(int32_t)));
+  int32_t* offsets_ptr = reinterpret_cast<int32_t*>(offsets->mutable_data());
+
+  auto null_bitmap = AllocateBuffer();
+  int64_t bitmap_size = ::arrow::bit_util::BytesForBits(size);
+  RETURN_NOT_OK(null_bitmap->Resize(bitmap_size));
+  uint8_t* null_bitmap_ptr = null_bitmap->mutable_data();
+  memset(null_bitmap_ptr, 0, bitmap_size);
+
+  int32_t current_offset = 0;
+  for (int64_t i = 0; i < size; i++) {
+    offsets_ptr[i] = current_offset;
+    if (!(((i % 2) == 0) && ((i / 2) < null_count))) {
+      // Non-null list (list with index 1 is always empty).
+      ::arrow::bit_util::SetBit(null_bitmap_ptr, i);
+      if (i != 1) {
+        current_offset += static_cast<int32_t>(length_per_entry);
+      }
+    }
+  }
+  offsets_ptr[size] = static_cast<int32_t>(values->length());
+
+  auto value_field = ::arrow::field(item_name, values->type(), nullable_values);
+  *out = std::make_shared<::arrow::ListArray>(::arrow::list(value_field), size, offsets,
+                                              values, null_bitmap, null_count);
+
+  return Status::OK();
+}
+
+// Make an array containing only empty lists, with a null values array
+Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
+  // Allocate an offsets buffer containing only zeroes
+  const int64_t offsets_nbytes = (size + 1) * sizeof(int32_t);
+  ARROW_ASSIGN_OR_RAISE(auto offsets_buffer, ::arrow::AllocateBuffer(offsets_nbytes));
+  memset(offsets_buffer->mutable_data(), 0, offsets_nbytes);
+
+  auto value_field =
+      ::arrow::field("item", ::arrow::float64(), false /* nullable_values */);
+  auto list_type = ::arrow::list(value_field);
+
+  std::vector<std::shared_ptr<Buffer>> child_buffers = {nullptr /* null bitmap */,
+                                                        nullptr /* values */};
+  auto child_data =
+      ::arrow::ArrayData::Make(value_field->type(), 0, std::move(child_buffers));
+
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr /* bitmap */,
+                                                  std::move(offsets_buffer)};
+  auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers));
+  array_data->child_data.push_back(child_data);
+
+  *out_array = ::arrow::MakeArray(array_data);
+  return Status::OK();
+}
+
+std::shared_ptr<::arrow::Table> MakeSimpleTable(
+    const std::shared_ptr<ChunkedArray>& values, bool nullable) {
+  auto schema = ::arrow::schema({::arrow::field("col", values->type(), nullable)});
+  return ::arrow::Table::Make(schema, {values});
+}
+
+std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& values,
+                                                bool nullable) {
+  auto carr = std::make_shared<::arrow::ChunkedArray>(values);
+  return MakeSimpleTable(carr, nullable);
+}
+
+template <typename T>
+void ExpectArray(T* expected, Array* result) {
+  auto p_array = static_cast<::arrow::PrimitiveArray*>(result);
+  for (int i = 0; i < result->length(); i++) {
+    EXPECT_EQ(expected[i], reinterpret_cast<const T*>(p_array->values()->data())[i]);
+  }
+}
+
+template <typename ArrowType>
+void ExpectArrayT(void* expected, Array* result) {
+  ::arrow::PrimitiveArray* p_array = static_cast<::arrow::PrimitiveArray*>(result);
+  for (int64_t i = 0; i < result->length(); i++) {
+    EXPECT_EQ(reinterpret_cast<typename ArrowType::c_type*>(expected)[i],
+              reinterpret_cast<const typename ArrowType::c_type*>(
+                  p_array->values()->data())[i]);
+  }
+}
+
+template <>
+void ExpectArrayT<::arrow::BooleanType>(void* expected, Array* result) {
+  ::arrow::BooleanBuilder builder;
+  ARROW_EXPECT_OK(
+      builder.AppendValues(reinterpret_cast<uint8_t*>(expected), result->length()));
+
+  std::shared_ptr<Array> expected_array;
+  ARROW_EXPECT_OK(builder.Finish(&expected_array));
+  EXPECT_TRUE(result->Equals(*expected_array));
+}
+
+}  // namespace arrow
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/arrow/writer.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class Schema;
+class Table;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class FileMetaData;
+class ParquetFileWriter;
+
+namespace arrow {
+
+/// \brief Iterative FileWriter class
+///
+/// Start a new RowGroup or Chunk with NewRowGroup.
+/// Write column-by-column the whole column chunk.
+///
+/// If PARQUET:field_id is present as a metadata key on a field, and the corresponding
+/// value is a nonnegative integer, then it will be used as the field_id in the parquet
+/// file.
+class PARQUET_EXPORT FileWriter {
+ public:
+  static ::arrow::Status Make(MemoryPool* pool, std::unique_ptr<ParquetFileWriter> writer,
+                              std::shared_ptr<::arrow::Schema> schema,
+                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                              std::unique_ptr<FileWriter>* out);
+
+  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
+                              std::shared_ptr<::arrow::io::OutputStream> sink,
+                              std::shared_ptr<WriterProperties> properties,
+                              std::unique_ptr<FileWriter>* writer);
+
+  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
+                              std::shared_ptr<::arrow::io::OutputStream> sink,
+                              std::shared_ptr<WriterProperties> properties,
+                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
+                              std::unique_ptr<FileWriter>* writer);
+
+  virtual std::shared_ptr<::arrow::Schema> schema() const = 0;
+
+  /// \brief Write a Table to Parquet.
+  virtual ::arrow::Status WriteTable(const ::arrow::Table& table, int64_t chunk_size) = 0;
+
+  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
+  virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
+
+  /// \brief Write ColumnChunk in row group using slice of a ChunkedArray
+  virtual ::arrow::Status WriteColumnChunk(
+      const std::shared_ptr<::arrow::ChunkedArray>& data, int64_t offset,
+      int64_t size) = 0;
+
+  virtual ::arrow::Status WriteColumnChunk(
+      const std::shared_ptr<::arrow::ChunkedArray>& data) = 0;
+  virtual ::arrow::Status Close() = 0;
+  virtual ~FileWriter();
+
+  virtual MemoryPool* memory_pool() const = 0;
+  virtual const std::shared_ptr<FileMetaData> metadata() const = 0;
+};
+
+/// \brief Write Parquet file metadata only to indicated Arrow OutputStream
+PARQUET_EXPORT
+::arrow::Status WriteFileMetaData(const FileMetaData& file_metadata,
+                                  ::arrow::io::OutputStream* sink);
+
+/// \brief Write metadata-only Parquet file to indicated Arrow OutputStream
+PARQUET_EXPORT
+::arrow::Status WriteMetaDataFile(const FileMetaData& file_metadata,
+                                  ::arrow::io::OutputStream* sink);
+
+/// \brief Write a Table to Parquet.
+::arrow::Status PARQUET_EXPORT
+WriteTable(const ::arrow::Table& table, MemoryPool* pool,
+           std::shared_ptr<::arrow::io::OutputStream> sink, int64_t chunk_size,
+           std::shared_ptr<WriterProperties> properties = default_writer_properties(),
+           std::shared_ptr<ArrowWriterProperties> arrow_properties =
+               default_arrow_writer_properties());
+
+}  // namespace arrow
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/bloom_filter.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/bloom_filter.h
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+#include <cstdint>
+#include <memory>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "parquet/hasher.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+// A Bloom filter is a compact structure to indicate whether an item is not in a set or
+// probably in a set. The Bloom filter usually consists of a bit set that represents a
+// set of elements, a hash strategy and a Bloom filter algorithm.
+class PARQUET_EXPORT BloomFilter {
+ public:
+  // Maximum Bloom filter size, it sets to HDFS default block size 128MB
+  // This value will be reconsidered when implementing Bloom filter producer.
+  static constexpr uint32_t kMaximumBloomFilterBytes = 128 * 1024 * 1024;
+
+  /// Determine whether an element exist in set or not.
+  ///
+  /// @param hash the element to contain.
+  /// @return false if value is definitely not in set, and true means PROBABLY
+  /// in set.
+  virtual bool FindHash(uint64_t hash) const = 0;
+
+  /// Insert element to set represented by Bloom filter bitset.
+  /// @param hash the hash of value to insert into Bloom filter.
+  virtual void InsertHash(uint64_t hash) = 0;
+
+  /// Write this Bloom filter to an output stream. A Bloom filter structure should
+  /// include bitset length, hash strategy, algorithm, and bitset.
+  ///
+  /// @param sink the output stream to write
+  virtual void WriteTo(ArrowOutputStream* sink) const = 0;
+
+  /// Get the number of bytes of bitset
+  virtual uint32_t GetBitsetSize() const = 0;
+
+  /// Compute hash for 32 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int32_t value) const = 0;
+
+  /// Compute hash for 64 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int64_t value) const = 0;
+
+  /// Compute hash for float value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(float value) const = 0;
+
+  /// Compute hash for double value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(double value) const = 0;
+
+  /// Compute hash for Int96 value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const Int96* value) const = 0;
+
+  /// Compute hash for ByteArray value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const ByteArray* value) const = 0;
+
+  /// Compute hash for fixed byte array value by using its plain encoding result.
+  ///
+  /// @param value the value address.
+  /// @param len the value length.
+  /// @return hash result.
+  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
+
+  virtual ~BloomFilter() {}
+
+ protected:
+  // Hash strategy available for Bloom filter.
+  enum class HashStrategy : uint32_t { MURMUR3_X64_128 = 0 };
+
+  // Bloom filter algorithm.
+  enum class Algorithm : uint32_t { BLOCK = 0 };
+};
+
+// The BlockSplitBloomFilter is implemented using block-based Bloom filters from
+// Putze et al.'s "Cache-,Hash- and Space-Efficient Bloom filters". The basic idea is to
+// hash the item to a tiny Bloom filter which size fit a single cache line or smaller.
+//
+// This implementation sets 8 bits in each tiny Bloom filter. Each tiny Bloom
+// filter is 32 bytes to take advantage of 32-byte SIMD instructions.
+class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
+ public:
+  /// The constructor of BlockSplitBloomFilter. It uses murmur3_x64_128 as hash function.
+  BlockSplitBloomFilter();
+
+  /// Initialize the BlockSplitBloomFilter. The range of num_bytes should be within
+  /// [kMinimumBloomFilterBytes, kMaximumBloomFilterBytes], it will be
+  /// rounded up/down to lower/upper bound if num_bytes is out of range and also
+  /// will be rounded up to a power of 2.
+  ///
+  /// @param num_bytes The number of bytes to store Bloom filter bitset.
+  void Init(uint32_t num_bytes);
+
+  /// Initialize the BlockSplitBloomFilter. It copies the bitset as underlying
+  /// bitset because the given bitset may not satisfy the 32-byte alignment requirement
+  /// which may lead to segfault when performing SIMD instructions. It is the caller's
+  /// responsibility to free the bitset passed in. This is used when reconstructing
+  /// a Bloom filter from a parquet file.
+  ///
+  /// @param bitset The given bitset to initialize the Bloom filter.
+  /// @param num_bytes  The number of bytes of given bitset.
+  void Init(const uint8_t* bitset, uint32_t num_bytes);
+
+  // Minimum Bloom filter size, it sets to 32 bytes to fit a tiny Bloom filter.
+  static constexpr uint32_t kMinimumBloomFilterBytes = 32;
+
+  /// Calculate optimal size according to the number of distinct values and false
+  /// positive probability.
+  ///
+  /// @param ndv The number of distinct values.
+  /// @param fpp The false positive probability.
+  /// @return it always return a value between kMinimumBloomFilterBytes and
+  /// kMaximumBloomFilterBytes, and the return value is always a power of 2
+  static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
+    DCHECK(fpp > 0.0 && fpp < 1.0);
+    const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
+    uint32_t num_bits;
+
+    // Handle overflow.
+    if (m < 0 || m > kMaximumBloomFilterBytes << 3) {
+      num_bits = static_cast<uint32_t>(kMaximumBloomFilterBytes << 3);
+    } else {
+      num_bits = static_cast<uint32_t>(m);
+    }
+
+    // Round up to lower bound
+    if (num_bits < kMinimumBloomFilterBytes << 3) {
+      num_bits = kMinimumBloomFilterBytes << 3;
+    }
+
+    // Get next power of 2 if bits is not power of 2.
+    if ((num_bits & (num_bits - 1)) != 0) {
+      num_bits = static_cast<uint32_t>(::arrow::bit_util::NextPower2(num_bits));
+    }
+
+    // Round down to upper bound
+    if (num_bits > kMaximumBloomFilterBytes << 3) {
+      num_bits = kMaximumBloomFilterBytes << 3;
+    }
+
+    return num_bits;
+  }
+
+  bool FindHash(uint64_t hash) const override;
+  void InsertHash(uint64_t hash) override;
+  void WriteTo(ArrowOutputStream* sink) const override;
+  uint32_t GetBitsetSize() const override { return num_bytes_; }
+
+  uint64_t Hash(int64_t value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(float value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(double value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const Int96* value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const ByteArray* value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(int32_t value) const override { return hasher_->Hash(value); }
+  uint64_t Hash(const FLBA* value, uint32_t len) const override {
+    return hasher_->Hash(value, len);
+  }
+
+  /// Deserialize the Bloom filter from an input stream. It is used when reconstructing
+  /// a Bloom filter from a parquet filter.
+  ///
+  /// @param input_stream The input stream from which to construct the Bloom filter
+  /// @return The BlockSplitBloomFilter.
+  static BlockSplitBloomFilter Deserialize(ArrowInputStream* input_stream);
+
+ private:
+  // Bytes in a tiny Bloom filter block.
+  static constexpr int kBytesPerFilterBlock = 32;
+
+  // The number of bits to be set in each tiny Bloom filter
+  static constexpr int kBitsSetPerBlock = 8;
+
+  // A mask structure used to set bits in each tiny Bloom filter.
+  struct BlockMask {
+    uint32_t item[kBitsSetPerBlock];
+  };
+
+  // The block-based algorithm needs eight odd SALT values to calculate eight indexes
+  // of bit to set, one bit in each 32-bit word.
+  static constexpr uint32_t SALT[kBitsSetPerBlock] = {
+      0x47b6137bU, 0x44974d91U, 0x8824ad5bU, 0xa2b7289dU,
+      0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
+
+  /// Set bits in mask array according to input key.
+  /// @param key the value to calculate mask values.
+  /// @param mask the mask array is used to set inside a block
+  void SetMask(uint32_t key, BlockMask& mask) const;
+
+  // Memory pool to allocate aligned buffer for bitset
+  ::arrow::MemoryPool* pool_;
+
+  // The underlying buffer of bitset.
+  std::shared_ptr<Buffer> data_;
+
+  // The number of bytes of Bloom filter bitset.
+  uint32_t num_bytes_;
+
+  // Hash strategy used in this Bloom filter.
+  HashStrategy hash_strategy_;
+
+  // Algorithm used in this Bloom filter.
+  Algorithm algorithm_;
+
+  // The hash pointer points to actual hash class used.
+  std::unique_ptr<Hasher> hasher_;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/column_page.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/column_page.h
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module defines an abstract interface for iterating through pages in a
+// Parquet column chunk within a row group. It could be extended in the future
+// to iterate through all data pages in all chunks in a file.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "parquet/statistics.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+// TODO: Parallel processing is not yet safe because of memory-ownership
+// semantics (the PageReader may or may not own the memory referenced by a
+// page)
+//
+// TODO(wesm): In the future Parquet implementations may store the crc code
+// in format::PageHeader. parquet-mr currently does not, so we also skip it
+// here, both on the read and write path
+class Page {
+ public:
+  Page(const std::shared_ptr<Buffer>& buffer, PageType::type type)
+      : buffer_(buffer), type_(type) {}
+
+  PageType::type type() const { return type_; }
+
+  std::shared_ptr<Buffer> buffer() const { return buffer_; }
+
+  // @returns: a pointer to the page's data
+  const uint8_t* data() const { return buffer_->data(); }
+
+  // @returns: the total size in bytes of the page's data buffer
+  int32_t size() const { return static_cast<int32_t>(buffer_->size()); }
+
+ private:
+  std::shared_ptr<Buffer> buffer_;
+  PageType::type type_;
+};
+
+/// \brief Base type for DataPageV1 and DataPageV2 including common attributes
+class DataPage : public Page {
+ public:
+  int32_t num_values() const { return num_values_; }
+  Encoding::type encoding() const { return encoding_; }
+  int64_t uncompressed_size() const { return uncompressed_size_; }
+  const EncodedStatistics& statistics() const { return statistics_; }
+
+  virtual ~DataPage() = default;
+
+ protected:
+  DataPage(PageType::type type, const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+           Encoding::type encoding, int64_t uncompressed_size,
+           const EncodedStatistics& statistics = EncodedStatistics())
+      : Page(buffer, type),
+        num_values_(num_values),
+        encoding_(encoding),
+        uncompressed_size_(uncompressed_size),
+        statistics_(statistics) {}
+
+  int32_t num_values_;
+  Encoding::type encoding_;
+  int64_t uncompressed_size_;
+  EncodedStatistics statistics_;
+};
+
+class DataPageV1 : public DataPage {
+ public:
+  DataPageV1(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+             Encoding::type encoding, Encoding::type definition_level_encoding,
+             Encoding::type repetition_level_encoding, int64_t uncompressed_size,
+             const EncodedStatistics& statistics = EncodedStatistics())
+      : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size,
+                 statistics),
+        definition_level_encoding_(definition_level_encoding),
+        repetition_level_encoding_(repetition_level_encoding) {}
+
+  Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; }
+
+  Encoding::type definition_level_encoding() const { return definition_level_encoding_; }
+
+ private:
+  Encoding::type definition_level_encoding_;
+  Encoding::type repetition_level_encoding_;
+};
+
+class DataPageV2 : public DataPage {
+ public:
+  DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls,
+             int32_t num_rows, Encoding::type encoding,
+             int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length,
+             int64_t uncompressed_size, bool is_compressed = false,
+             const EncodedStatistics& statistics = EncodedStatistics())
+      : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size,
+                 statistics),
+        num_nulls_(num_nulls),
+        num_rows_(num_rows),
+        definition_levels_byte_length_(definition_levels_byte_length),
+        repetition_levels_byte_length_(repetition_levels_byte_length),
+        is_compressed_(is_compressed) {}
+
+  int32_t num_nulls() const { return num_nulls_; }
+
+  int32_t num_rows() const { return num_rows_; }
+
+  int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; }
+
+  int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; }
+
+  bool is_compressed() const { return is_compressed_; }
+
+ private:
+  int32_t num_nulls_;
+  int32_t num_rows_;
+  int32_t definition_levels_byte_length_;
+  int32_t repetition_levels_byte_length_;
+  bool is_compressed_;
+};
+
+class DictionaryPage : public Page {
+ public:
+  DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values,
+                 Encoding::type encoding, bool is_sorted = false)
+      : Page(buffer, PageType::DICTIONARY_PAGE),
+        num_values_(num_values),
+        encoding_(encoding),
+        is_sorted_(is_sorted) {}
+
+  int32_t num_values() const { return num_values_; }
+
+  Encoding::type encoding() const { return encoding_; }
+
+  bool is_sorted() const { return is_sorted_; }
+
+ private:
+  int32_t num_values_;
+  Encoding::type encoding_;
+  bool is_sorted_;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/column_reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/column_reader.h
@@ -0,0 +1,376 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "parquet/exception.h"
+#include "parquet/level_conversion.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+
+namespace bit_util {
+class BitReader;
+}  // namespace bit_util
+
+namespace util {
+class RleDecoder;
+}  // namespace util
+
+}  // namespace arrow
+
+namespace parquet {
+
+class Decryptor;
+class Page;
+
+// 16 MB is the default maximum page header size
+static constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024;
+
+// 16 KB is the default expected page header size
+static constexpr uint32_t kDefaultPageHeaderSize = 16 * 1024;
+
+class PARQUET_EXPORT LevelDecoder {
+ public:
+  LevelDecoder();
+  ~LevelDecoder();
+
+  // Initialize the LevelDecoder state with new data
+  // and return the number of bytes consumed
+  int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+              const uint8_t* data, int32_t data_size);
+
+  void SetDataV2(int32_t num_bytes, int16_t max_level, int num_buffered_values,
+                 const uint8_t* data);
+
+  // Decodes a batch of levels into an array and returns the number of levels decoded
+  int Decode(int batch_size, int16_t* levels);
+
+ private:
+  int bit_width_;
+  int num_values_remaining_;
+  Encoding::type encoding_;
+  std::unique_ptr<::arrow::util::RleDecoder> rle_decoder_;
+  std::unique_ptr<::arrow::bit_util::BitReader> bit_packed_decoder_;
+  int16_t max_level_;
+};
+
+struct CryptoContext {
+  CryptoContext(bool start_with_dictionary_page, int16_t rg_ordinal, int16_t col_ordinal,
+                std::shared_ptr<Decryptor> meta, std::shared_ptr<Decryptor> data)
+      : start_decrypt_with_dictionary_page(start_with_dictionary_page),
+        row_group_ordinal(rg_ordinal),
+        column_ordinal(col_ordinal),
+        meta_decryptor(std::move(meta)),
+        data_decryptor(std::move(data)) {}
+  CryptoContext() {}
+
+  bool start_decrypt_with_dictionary_page = false;
+  int16_t row_group_ordinal = -1;
+  int16_t column_ordinal = -1;
+  std::shared_ptr<Decryptor> meta_decryptor;
+  std::shared_ptr<Decryptor> data_decryptor;
+};
+
+// Abstract page iterator interface. This way, we can feed column pages to the
+// ColumnReader through whatever mechanism we choose
+class PARQUET_EXPORT PageReader {
+ public:
+  virtual ~PageReader() = default;
+
+  static std::unique_ptr<PageReader> Open(
+      std::shared_ptr<ArrowInputStream> stream, int64_t total_num_rows,
+      Compression::type codec, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      const CryptoContext* ctx = NULLPTR);
+
+  // @returns: shared_ptr<Page>(nullptr) on EOS, std::shared_ptr<Page>
+  // containing new Page otherwise
+  virtual std::shared_ptr<Page> NextPage() = 0;
+
+  virtual void set_max_page_header_size(uint32_t size) = 0;
+};
+
+class PARQUET_EXPORT ColumnReader {
+ public:
+  virtual ~ColumnReader() = default;
+
+  static std::shared_ptr<ColumnReader> Make(
+      const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  // Returns true if there are still values in this column.
+  virtual bool HasNext() = 0;
+
+  virtual Type::type type() const = 0;
+
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  // Get the encoding that can be exposed by this reader. If it returns
+  // dictionary encoding, then ReadBatchWithDictionary can be used to read data.
+  //
+  // \note API EXPERIMENTAL
+  virtual ExposedEncoding GetExposedEncoding() = 0;
+
+ protected:
+  friend class RowGroupReader;
+  // Set the encoding that can be exposed by this reader.
+  //
+  // \note API EXPERIMENTAL
+  virtual void SetExposedEncoding(ExposedEncoding encoding) = 0;
+};
+
+// API to read values from a single column. This is a main client facing API.
+template <typename DType>
+class TypedColumnReader : public ColumnReader {
+ public:
+  typedef typename DType::c_type T;
+
+  // Read a batch of repetition levels, definition levels, and values from the
+  // column.
+  //
+  // Since null values are not stored in the values, the number of values read
+  // may be less than the number of repetition and definition levels. With
+  // nested data this is almost certainly true.
+  //
+  // Set def_levels or rep_levels to nullptr if you want to skip reading them.
+  // This is only safe if you know through some other source that there are no
+  // undefined values.
+  //
+  // To fully exhaust a row group, you must read batches until the number of
+  // values read reaches the number of stored values according to the metadata.
+  //
+  // This API is the same for both V1 and V2 of the DataPage
+  //
+  // @returns: actual number of levels read (see values_read for number of values read)
+  virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                            T* values, int64_t* values_read) = 0;
+
+  /// Read a batch of repetition levels, definition levels, and values from the
+  /// column and leave spaces for null entries on the lowest level in the values
+  /// buffer.
+  ///
+  /// In comparison to ReadBatch the length of repetition and definition levels
+  /// is the same as of the number of values read for max_definition_level == 1.
+  /// In the case of max_definition_level > 1, the repetition and definition
+  /// levels are larger than the values but the values include the null entries
+  /// with definition_level == (max_definition_level - 1).
+  ///
+  /// To fully exhaust a row group, you must read batches until the number of
+  /// values read reaches the number of stored values according to the metadata.
+  ///
+  /// @param batch_size the number of levels to read
+  /// @param[out] def_levels The Parquet definition levels, output has
+  ///   the length levels_read.
+  /// @param[out] rep_levels The Parquet repetition levels, output has
+  ///   the length levels_read.
+  /// @param[out] values The values in the lowest nested level including
+  ///   spacing for nulls on the lowest levels; output has the length
+  ///   values_read.
+  /// @param[out] valid_bits Memory allocated for a bitmap that indicates if
+  ///   the row is null or on the maximum definition level. For performance
+  ///   reasons the underlying buffer should be able to store 1 bit more than
+  ///   required. If this requires an additional byte, this byte is only read
+  ///   but never written to.
+  /// @param valid_bits_offset The offset in bits of the valid_bits where the
+  ///   first relevant bit resides.
+  /// @param[out] levels_read The number of repetition/definition levels that were read.
+  /// @param[out] values_read The number of values read, this includes all
+  ///   non-null entries as well as all null-entries on the lowest level
+  ///   (i.e. definition_level == max_definition_level - 1)
+  /// @param[out] null_count The number of nulls on the lowest levels.
+  ///   (i.e. (values_read - null_count) is total number of non-null entries)
+  ///
+  /// \deprecated Since 4.0.0
+  ARROW_DEPRECATED("Doesn't handle nesting correctly and unused outside of unit tests.")
+  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels,
+                                  int16_t* rep_levels, T* values, uint8_t* valid_bits,
+                                  int64_t valid_bits_offset, int64_t* levels_read,
+                                  int64_t* values_read, int64_t* null_count) = 0;
+
+  // Skip reading levels
+  // Returns the number of levels skipped
+  virtual int64_t Skip(int64_t num_rows_to_skip) = 0;
+
+  // Read a batch of repetition levels, definition levels, and indices from the
+  // column. And read the dictionary if a dictionary page is encountered during
+  // reading pages. This API is similar to ReadBatch(), with ability to read
+  // dictionary and indices. It is only valid to call this method  when the reader can
+  // expose dictionary encoding. (i.e., the reader's GetExposedEncoding() returns
+  // DICTIONARY).
+  //
+  // The dictionary is read along with the data page. When there's no data page,
+  // the dictionary won't be returned.
+  //
+  // @param batch_size The batch size to read
+  // @param[out] def_levels The Parquet definition levels.
+  // @param[out] rep_levels The Parquet repetition levels.
+  // @param[out] indices The dictionary indices.
+  // @param[out] indices_read The number of indices read.
+  // @param[out] dict The pointer to dictionary values. It will return nullptr if
+  // there's no data page. Each column chunk only has one dictionary page. The dictionary
+  // is owned by the reader, so the caller is responsible for copying the dictionary
+  // values before the reader gets destroyed.
+  // @param[out] dict_len The dictionary length. It will return 0 if there's no data
+  // page.
+  // @returns: actual number of levels read (see indices_read for number of
+  // indices read
+  //
+  // \note API EXPERIMENTAL
+  virtual int64_t ReadBatchWithDictionary(int64_t batch_size, int16_t* def_levels,
+                                          int16_t* rep_levels, int32_t* indices,
+                                          int64_t* indices_read, const T** dict,
+                                          int32_t* dict_len) = 0;
+};
+
+namespace internal {
+
+/// \brief Stateful column reader that delimits semantic records for both flat
+/// and nested columns
+///
+/// \note API EXPERIMENTAL
+/// \since 1.3.0
+class RecordReader {
+ public:
+  static std::shared_ptr<RecordReader> Make(
+      const ColumnDescriptor* descr, LevelInfo leaf_info,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      const bool read_dictionary = false);
+
+  virtual ~RecordReader() = default;
+
+  /// \brief Attempt to read indicated number of records from column chunk
+  /// \return number of records read
+  virtual int64_t ReadRecords(int64_t num_records) = 0;
+
+  /// \brief Pre-allocate space for data. Results in better flat read performance
+  virtual void Reserve(int64_t num_values) = 0;
+
+  /// \brief Clear consumed values and repetition/definition levels as the
+  /// result of calling ReadRecords
+  virtual void Reset() = 0;
+
+  /// \brief Transfer filled values buffer to caller. A new one will be
+  /// allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseValues() = 0;
+
+  /// \brief Transfer filled validity bitmap buffer to caller. A new one will
+  /// be allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseIsValid() = 0;
+
+  /// \brief Return true if the record reader has more internal data yet to
+  /// process
+  virtual bool HasMoreData() const = 0;
+
+  /// \brief Advance record reader to the next row group
+  /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
+  virtual void SetPageReader(std::unique_ptr<PageReader> reader) = 0;
+
+  virtual void DebugPrintState() = 0;
+
+  /// \brief Decoded definition levels
+  int16_t* def_levels() const {
+    return reinterpret_cast<int16_t*>(def_levels_->mutable_data());
+  }
+
+  /// \brief Decoded repetition levels
+  int16_t* rep_levels() const {
+    return reinterpret_cast<int16_t*>(rep_levels_->mutable_data());
+  }
+
+  /// \brief Decoded values, including nulls, if any
+  uint8_t* values() const { return values_->mutable_data(); }
+
+  /// \brief Number of values written including nulls (if any)
+  int64_t values_written() const { return values_written_; }
+
+  /// \brief Number of definition / repetition levels (from those that have
+  /// been decoded) that have been consumed inside the reader.
+  int64_t levels_position() const { return levels_position_; }
+
+  /// \brief Number of definition / repetition levels that have been written
+  /// internally in the reader
+  int64_t levels_written() const { return levels_written_; }
+
+  /// \brief Number of nulls in the leaf
+  int64_t null_count() const { return null_count_; }
+
+  /// \brief True if the leaf values are nullable
+  bool nullable_values() const { return nullable_values_; }
+
+  /// \brief True if reading directly as Arrow dictionary-encoded
+  bool read_dictionary() const { return read_dictionary_; }
+
+ protected:
+  bool nullable_values_;
+
+  bool at_record_start_;
+  int64_t records_read_;
+
+  int64_t values_written_;
+  int64_t values_capacity_;
+  int64_t null_count_;
+
+  int64_t levels_written_;
+  int64_t levels_position_;
+  int64_t levels_capacity_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> values_;
+  // In the case of false, don't allocate the values buffer (when we directly read into
+  // builder classes).
+  bool uses_values_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
+  std::shared_ptr<::arrow::ResizableBuffer> def_levels_;
+  std::shared_ptr<::arrow::ResizableBuffer> rep_levels_;
+
+  bool read_dictionary_ = false;
+};
+
+class BinaryRecordReader : virtual public RecordReader {
+ public:
+  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0;
+};
+
+/// \brief Read records directly to dictionary-encoded Arrow form (int32
+/// indices). Only valid for BYTE_ARRAY columns
+class DictionaryRecordReader : virtual public RecordReader {
+ public:
+  virtual std::shared_ptr<::arrow::ChunkedArray> GetResult() = 0;
+};
+
+}  // namespace internal
+
+using BoolReader = TypedColumnReader<BooleanType>;
+using Int32Reader = TypedColumnReader<Int32Type>;
+using Int64Reader = TypedColumnReader<Int64Type>;
+using Int96Reader = TypedColumnReader<Int96Type>;
+using FloatReader = TypedColumnReader<FloatType>;
+using DoubleReader = TypedColumnReader<DoubleType>;
+using ByteArrayReader = TypedColumnReader<ByteArrayType>;
+using FixedLenByteArrayReader = TypedColumnReader<FLBAType>;
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/column_scanner.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/column_scanner.h
@@ -0,0 +1,262 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdio.h>
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "parquet/column_reader.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+static constexpr int64_t DEFAULT_SCANNER_BATCH_SIZE = 128;
+
+class PARQUET_EXPORT Scanner {
+ public:
+  explicit Scanner(std::shared_ptr<ColumnReader> reader,
+                   int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+                   ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : batch_size_(batch_size),
+        level_offset_(0),
+        levels_buffered_(0),
+        value_buffer_(AllocateBuffer(pool)),
+        value_offset_(0),
+        values_buffered_(0),
+        reader_(std::move(reader)) {
+    def_levels_.resize(descr()->max_definition_level() > 0 ? batch_size_ : 0);
+    rep_levels_.resize(descr()->max_repetition_level() > 0 ? batch_size_ : 0);
+  }
+
+  virtual ~Scanner() {}
+
+  static std::shared_ptr<Scanner> Make(
+      std::shared_ptr<ColumnReader> col_reader,
+      int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) = 0;
+
+  bool HasNext() { return level_offset_ < levels_buffered_ || reader_->HasNext(); }
+
+  const ColumnDescriptor* descr() const { return reader_->descr(); }
+
+  int64_t batch_size() const { return batch_size_; }
+
+  void SetBatchSize(int64_t batch_size) { batch_size_ = batch_size; }
+
+ protected:
+  int64_t batch_size_;
+
+  std::vector<int16_t> def_levels_;
+  std::vector<int16_t> rep_levels_;
+  int level_offset_;
+  int levels_buffered_;
+
+  std::shared_ptr<ResizableBuffer> value_buffer_;
+  int value_offset_;
+  int64_t values_buffered_;
+  std::shared_ptr<ColumnReader> reader_;
+};
+
+template <typename DType>
+class PARQUET_TEMPLATE_CLASS_EXPORT TypedScanner : public Scanner {
+ public:
+  typedef typename DType::c_type T;
+
+  explicit TypedScanner(std::shared_ptr<ColumnReader> reader,
+                        int64_t batch_size = DEFAULT_SCANNER_BATCH_SIZE,
+                        ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : Scanner(std::move(reader), batch_size, pool) {
+    typed_reader_ = static_cast<TypedColumnReader<DType>*>(reader_.get());
+    int value_byte_size = type_traits<DType::type_num>::value_byte_size;
+    PARQUET_THROW_NOT_OK(value_buffer_->Resize(batch_size_ * value_byte_size));
+    values_ = reinterpret_cast<T*>(value_buffer_->mutable_data());
+  }
+
+  virtual ~TypedScanner() {}
+
+  bool NextLevels(int16_t* def_level, int16_t* rep_level) {
+    if (level_offset_ == levels_buffered_) {
+      levels_buffered_ = static_cast<int>(
+          typed_reader_->ReadBatch(static_cast<int>(batch_size_), def_levels_.data(),
+                                   rep_levels_.data(), values_, &values_buffered_));
+
+      value_offset_ = 0;
+      level_offset_ = 0;
+      if (!levels_buffered_) {
+        return false;
+      }
+    }
+    *def_level = descr()->max_definition_level() > 0 ? def_levels_[level_offset_] : 0;
+    *rep_level = descr()->max_repetition_level() > 0 ? rep_levels_[level_offset_] : 0;
+    level_offset_++;
+    return true;
+  }
+
+  bool Next(T* val, int16_t* def_level, int16_t* rep_level, bool* is_null) {
+    if (level_offset_ == levels_buffered_) {
+      if (!HasNext()) {
+        // Out of data pages
+        return false;
+      }
+    }
+
+    NextLevels(def_level, rep_level);
+    *is_null = *def_level < descr()->max_definition_level();
+
+    if (*is_null) {
+      return true;
+    }
+
+    if (value_offset_ == values_buffered_) {
+      throw ParquetException("Value was non-null, but has not been buffered");
+    }
+    *val = values_[value_offset_++];
+    return true;
+  }
+
+  // Returns true if there is a next value
+  bool NextValue(T* val, bool* is_null) {
+    if (level_offset_ == levels_buffered_) {
+      if (!HasNext()) {
+        // Out of data pages
+        return false;
+      }
+    }
+
+    // Out of values
+    int16_t def_level = -1;
+    int16_t rep_level = -1;
+    NextLevels(&def_level, &rep_level);
+    *is_null = def_level < descr()->max_definition_level();
+
+    if (*is_null) {
+      return true;
+    }
+
+    if (value_offset_ == values_buffered_) {
+      throw ParquetException("Value was non-null, but has not been buffered");
+    }
+    *val = values_[value_offset_++];
+    return true;
+  }
+
+  virtual void PrintNext(std::ostream& out, int width, bool with_levels = false) {
+    T val{};
+    int16_t def_level = -1;
+    int16_t rep_level = -1;
+    bool is_null = false;
+    char buffer[80];
+
+    if (!Next(&val, &def_level, &rep_level, &is_null)) {
+      throw ParquetException("No more values buffered");
+    }
+
+    if (with_levels) {
+      out << "  D:" << def_level << " R:" << rep_level << " ";
+      if (!is_null) {
+        out << "V:";
+      }
+    }
+
+    if (is_null) {
+      std::string null_fmt = format_fwf<ByteArrayType>(width);
+      snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL");
+    } else {
+      FormatValue(&val, buffer, sizeof(buffer), width);
+    }
+    out << buffer;
+  }
+
+ private:
+  // The ownership of this object is expressed through the reader_ variable in the base
+  TypedColumnReader<DType>* typed_reader_;
+
+  inline void FormatValue(void* val, char* buffer, int bufsize, int width);
+
+  T* values_;
+};
+
+template <typename DType>
+inline void TypedScanner<DType>::FormatValue(void* val, char* buffer, int bufsize,
+                                             int width) {
+  std::string fmt = format_fwf<DType>(width);
+  snprintf(buffer, bufsize, fmt.c_str(), *reinterpret_cast<T*>(val));
+}
+
+template <>
+inline void TypedScanner<Int96Type>::FormatValue(void* val, char* buffer, int bufsize,
+                                                 int width) {
+  std::string fmt = format_fwf<Int96Type>(width);
+  std::string result = Int96ToString(*reinterpret_cast<Int96*>(val));
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+template <>
+inline void TypedScanner<ByteArrayType>::FormatValue(void* val, char* buffer, int bufsize,
+                                                     int width) {
+  std::string fmt = format_fwf<ByteArrayType>(width);
+  std::string result = ByteArrayToString(*reinterpret_cast<ByteArray*>(val));
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+template <>
+inline void TypedScanner<FLBAType>::FormatValue(void* val, char* buffer, int bufsize,
+                                                int width) {
+  std::string fmt = format_fwf<FLBAType>(width);
+  std::string result = FixedLenByteArrayToString(
+      *reinterpret_cast<FixedLenByteArray*>(val), descr()->type_length());
+  snprintf(buffer, bufsize, fmt.c_str(), result.c_str());
+}
+
+typedef TypedScanner<BooleanType> BoolScanner;
+typedef TypedScanner<Int32Type> Int32Scanner;
+typedef TypedScanner<Int64Type> Int64Scanner;
+typedef TypedScanner<Int96Type> Int96Scanner;
+typedef TypedScanner<FloatType> FloatScanner;
+typedef TypedScanner<DoubleType> DoubleScanner;
+typedef TypedScanner<ByteArrayType> ByteArrayScanner;
+typedef TypedScanner<FLBAType> FixedLenByteArrayScanner;
+
+template <typename RType>
+int64_t ScanAll(int32_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                uint8_t* values, int64_t* values_buffered,
+                parquet::ColumnReader* reader) {
+  typedef typename RType::T Type;
+  auto typed_reader = static_cast<RType*>(reader);
+  auto vals = reinterpret_cast<Type*>(&values[0]);
+  return typed_reader->ReadBatch(batch_size, def_levels, rep_levels, vals,
+                                 values_buffered);
+}
+
+int64_t PARQUET_EXPORT ScanAllValues(int32_t batch_size, int16_t* def_levels,
+                                     int16_t* rep_levels, uint8_t* values,
+                                     int64_t* values_buffered,
+                                     parquet::ColumnReader* reader);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/column_writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/column_writer.h
@@ -0,0 +1,270 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+
+namespace bit_util {
+class BitWriter;
+}  // namespace bit_util
+
+namespace util {
+class RleEncoder;
+}  // namespace util
+
+}  // namespace arrow
+
+namespace parquet {
+
+struct ArrowWriteContext;
+class ColumnDescriptor;
+class DataPage;
+class DictionaryPage;
+class ColumnChunkMetaDataBuilder;
+class Encryptor;
+class WriterProperties;
+
+class PARQUET_EXPORT LevelEncoder {
+ public:
+  LevelEncoder();
+  ~LevelEncoder();
+
+  static int MaxBufferSize(Encoding::type encoding, int16_t max_level,
+                           int num_buffered_values);
+
+  // Initialize the LevelEncoder.
+  void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
+            uint8_t* data, int data_size);
+
+  // Encodes a batch of levels from an array and returns the number of levels encoded
+  int Encode(int batch_size, const int16_t* levels);
+
+  int32_t len() {
+    if (encoding_ != Encoding::RLE) {
+      throw ParquetException("Only implemented for RLE encoding");
+    }
+    return rle_length_;
+  }
+
+ private:
+  int bit_width_;
+  int rle_length_;
+  Encoding::type encoding_;
+  std::unique_ptr<::arrow::util::RleEncoder> rle_encoder_;
+  std::unique_ptr<::arrow::bit_util::BitWriter> bit_packed_encoder_;
+};
+
+class PARQUET_EXPORT PageWriter {
+ public:
+  virtual ~PageWriter() {}
+
+  static std::unique_ptr<PageWriter> Open(
+      std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
+      int compression_level, ColumnChunkMetaDataBuilder* metadata,
+      int16_t row_group_ordinal = -1, int16_t column_chunk_ordinal = -1,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      bool buffered_row_group = false,
+      std::shared_ptr<Encryptor> header_encryptor = NULLPTR,
+      std::shared_ptr<Encryptor> data_encryptor = NULLPTR);
+
+  // The Column Writer decides if dictionary encoding is used if set and
+  // if the dictionary encoding has fallen back to default encoding on reaching dictionary
+  // page limit
+  virtual void Close(bool has_dictionary, bool fallback) = 0;
+
+  // Return the number of uncompressed bytes written (including header size)
+  virtual int64_t WriteDataPage(const DataPage& page) = 0;
+
+  // Return the number of uncompressed bytes written (including header size)
+  virtual int64_t WriteDictionaryPage(const DictionaryPage& page) = 0;
+
+  virtual bool has_compressor() = 0;
+
+  virtual void Compress(const Buffer& src_buffer, ResizableBuffer* dest_buffer) = 0;
+};
+
+static constexpr int WRITE_BATCH_SIZE = 1000;
+class PARQUET_EXPORT ColumnWriter {
+ public:
+  virtual ~ColumnWriter() = default;
+
+  static std::shared_ptr<ColumnWriter> Make(ColumnChunkMetaDataBuilder*,
+                                            std::unique_ptr<PageWriter>,
+                                            const WriterProperties* properties);
+
+  /// \brief Closes the ColumnWriter, commits any buffered values to pages.
+  /// \return Total size of the column in bytes
+  virtual int64_t Close() = 0;
+
+  /// \brief The physical Parquet type of the column
+  virtual Type::type type() const = 0;
+
+  /// \brief The schema for the column
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  /// \brief The number of rows written so far
+  virtual int64_t rows_written() const = 0;
+
+  /// \brief The total size of the compressed pages + page headers. Some values
+  /// might be still buffered and not written to a page yet
+  virtual int64_t total_compressed_bytes() const = 0;
+
+  /// \brief The total number of bytes written as serialized data and
+  /// dictionary pages to the ColumnChunk so far
+  virtual int64_t total_bytes_written() const = 0;
+
+  /// \brief The file-level writer properties
+  virtual const WriterProperties* properties() = 0;
+
+  /// \brief Write Apache Arrow columnar data directly to ColumnWriter. Returns
+  /// error status if the array data type is not compatible with the concrete
+  /// writer type.
+  ///
+  /// leaf_array is always a primitive (possibly dictionary encoded type).
+  /// Leaf_field_nullable indicates whether the leaf array is considered nullable
+  /// according to its schema in a Table or its parent array.
+  virtual ::arrow::Status WriteArrow(const int16_t* def_levels, const int16_t* rep_levels,
+                                     int64_t num_levels, const ::arrow::Array& leaf_array,
+                                     ArrowWriteContext* ctx,
+                                     bool leaf_field_nullable) = 0;
+};
+
+// API to write values to a single column. This is the main client facing API.
+template <typename DType>
+class TypedColumnWriter : public ColumnWriter {
+ public:
+  using T = typename DType::c_type;
+
+  // Write a batch of repetition levels, definition levels, and values to the
+  // column.
+  // `num_values` is the number of logical leaf values.
+  // `def_levels` (resp. `rep_levels`) can be null if the column's max definition level
+  // (resp. max repetition level) is 0.
+  // If not null, each of `def_levels` and `rep_levels` must have at least
+  // `num_values`.
+  //
+  // The number of physical values written (taken from `values`) is returned.
+  // It can be smaller than `num_values` is there are some undefined values.
+  virtual int64_t WriteBatch(int64_t num_values, const int16_t* def_levels,
+                             const int16_t* rep_levels, const T* values) = 0;
+
+  /// Write a batch of repetition levels, definition levels, and values to the
+  /// column.
+  ///
+  /// In comparison to WriteBatch the length of repetition and definition levels
+  /// is the same as of the number of values read for max_definition_level == 1.
+  /// In the case of max_definition_level > 1, the repetition and definition
+  /// levels are larger than the values but the values include the null entries
+  /// with definition_level == (max_definition_level - 1). Thus we have to differentiate
+  /// in the parameters of this function if the input has the length of num_values or the
+  /// _number of rows in the lowest nesting level_.
+  ///
+  /// In the case that the most inner node in the Parquet is required, the _number of rows
+  /// in the lowest nesting level_ is equal to the number of non-null values. If the
+  /// inner-most schema node is optional, the _number of rows in the lowest nesting level_
+  /// also includes all values with definition_level == (max_definition_level - 1).
+  ///
+  /// @param num_values number of levels to write.
+  /// @param def_levels The Parquet definition levels, length is num_values
+  /// @param rep_levels The Parquet repetition levels, length is num_values
+  /// @param valid_bits Bitmap that indicates if the row is null on the lowest nesting
+  ///   level. The length is number of rows in the lowest nesting level.
+  /// @param valid_bits_offset The offset in bits of the valid_bits where the
+  ///   first relevant bit resides.
+  /// @param values The values in the lowest nested level including
+  ///   spacing for nulls on the lowest levels; input has the length
+  ///   of the number of rows on the lowest nesting level.
+  virtual void WriteBatchSpaced(int64_t num_values, const int16_t* def_levels,
+                                const int16_t* rep_levels, const uint8_t* valid_bits,
+                                int64_t valid_bits_offset, const T* values) = 0;
+
+  // Estimated size of the values that are not written to a page yet
+  virtual int64_t EstimatedBufferedValueBytes() const = 0;
+};
+
+using BoolWriter = TypedColumnWriter<BooleanType>;
+using Int32Writer = TypedColumnWriter<Int32Type>;
+using Int64Writer = TypedColumnWriter<Int64Type>;
+using Int96Writer = TypedColumnWriter<Int96Type>;
+using FloatWriter = TypedColumnWriter<FloatType>;
+using DoubleWriter = TypedColumnWriter<DoubleType>;
+using ByteArrayWriter = TypedColumnWriter<ByteArrayType>;
+using FixedLenByteArrayWriter = TypedColumnWriter<FLBAType>;
+
+namespace internal {
+
+/**
+ * Timestamp conversion constants
+ */
+constexpr int64_t kJulianEpochOffsetDays = INT64_C(2440588);
+
+template <int64_t UnitPerDay, int64_t NanosecondsPerUnit>
+inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_timestamp) {
+  int64_t julian_days = (time / UnitPerDay) + kJulianEpochOffsetDays;
+  (*impala_timestamp).value[2] = (uint32_t)julian_days;
+
+  int64_t last_day_units = time % UnitPerDay;
+  auto last_day_nanos = last_day_units * NanosecondsPerUnit;
+  // impala_timestamp will be unaligned every other entry so do memcpy instead
+  // of assign and reinterpret cast to avoid undefined behavior.
+  std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
+}
+
+constexpr int64_t kSecondsInNanos = INT64_C(1000000000);
+
+inline void SecondsToImpalaTimestamp(const int64_t seconds, Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kSecondsPerDay, kSecondsInNanos>(seconds,
+                                                                   impala_timestamp);
+}
+
+constexpr int64_t kMillisecondsInNanos = kSecondsInNanos / INT64_C(1000);
+
+inline void MillisecondsToImpalaTimestamp(const int64_t milliseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMillisecondsPerDay, kMillisecondsInNanos>(
+      milliseconds, impala_timestamp);
+}
+
+constexpr int64_t kMicrosecondsInNanos = kMillisecondsInNanos / INT64_C(1000);
+
+inline void MicrosecondsToImpalaTimestamp(const int64_t microseconds,
+                                          Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kMicrosecondsPerDay, kMicrosecondsInNanos>(
+      microseconds, impala_timestamp);
+}
+
+constexpr int64_t kNanosecondsInNanos = INT64_C(1);
+
+inline void NanosecondsToImpalaTimestamp(const int64_t nanoseconds,
+                                         Int96* impala_timestamp) {
+  ArrowTimestampToImpalaTimestamp<kNanosecondsPerDay, kNanosecondsInNanos>(
+      nanoseconds, impala_timestamp);
+}
+
+}  // namespace internal
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encoding.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encoding.h
@@ -0,0 +1,460 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "arrow/util/spaced.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class ArrayBuilder;
+class BinaryArray;
+class BinaryBuilder;
+class BooleanBuilder;
+class Int32Type;
+class Int64Type;
+class FloatType;
+class DoubleType;
+class FixedSizeBinaryType;
+template <typename T>
+class NumericBuilder;
+class FixedSizeBinaryBuilder;
+template <typename T>
+class Dictionary32Builder;
+
+}  // namespace arrow
+
+namespace parquet {
+
+template <typename DType>
+class TypedEncoder;
+
+using BooleanEncoder = TypedEncoder<BooleanType>;
+using Int32Encoder = TypedEncoder<Int32Type>;
+using Int64Encoder = TypedEncoder<Int64Type>;
+using Int96Encoder = TypedEncoder<Int96Type>;
+using FloatEncoder = TypedEncoder<FloatType>;
+using DoubleEncoder = TypedEncoder<DoubleType>;
+using ByteArrayEncoder = TypedEncoder<ByteArrayType>;
+using FLBAEncoder = TypedEncoder<FLBAType>;
+
+template <typename DType>
+class TypedDecoder;
+
+class BooleanDecoder;
+using Int32Decoder = TypedDecoder<Int32Type>;
+using Int64Decoder = TypedDecoder<Int64Type>;
+using Int96Decoder = TypedDecoder<Int96Type>;
+using FloatDecoder = TypedDecoder<FloatType>;
+using DoubleDecoder = TypedDecoder<DoubleType>;
+using ByteArrayDecoder = TypedDecoder<ByteArrayType>;
+class FLBADecoder;
+
+template <typename T>
+struct EncodingTraits;
+
+template <>
+struct EncodingTraits<BooleanType> {
+  using Encoder = BooleanEncoder;
+  using Decoder = BooleanDecoder;
+
+  using ArrowType = ::arrow::BooleanType;
+  using Accumulator = ::arrow::BooleanBuilder;
+  struct DictAccumulator {};
+};
+
+template <>
+struct EncodingTraits<Int32Type> {
+  using Encoder = Int32Encoder;
+  using Decoder = Int32Decoder;
+
+  using ArrowType = ::arrow::Int32Type;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::Int32Type>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int32Type>;
+};
+
+template <>
+struct EncodingTraits<Int64Type> {
+  using Encoder = Int64Encoder;
+  using Decoder = Int64Decoder;
+
+  using ArrowType = ::arrow::Int64Type;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::Int64Type>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::Int64Type>;
+};
+
+template <>
+struct EncodingTraits<Int96Type> {
+  using Encoder = Int96Encoder;
+  using Decoder = Int96Decoder;
+
+  struct Accumulator {};
+  struct DictAccumulator {};
+};
+
+template <>
+struct EncodingTraits<FloatType> {
+  using Encoder = FloatEncoder;
+  using Decoder = FloatDecoder;
+
+  using ArrowType = ::arrow::FloatType;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::FloatType>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FloatType>;
+};
+
+template <>
+struct EncodingTraits<DoubleType> {
+  using Encoder = DoubleEncoder;
+  using Decoder = DoubleDecoder;
+
+  using ArrowType = ::arrow::DoubleType;
+  using Accumulator = ::arrow::NumericBuilder<::arrow::DoubleType>;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::DoubleType>;
+};
+
+template <>
+struct EncodingTraits<ByteArrayType> {
+  using Encoder = ByteArrayEncoder;
+  using Decoder = ByteArrayDecoder;
+
+  /// \brief Internal helper class for decoding BYTE_ARRAY data where we can
+  /// overflow the capacity of a single arrow::BinaryArray
+  struct Accumulator {
+    std::unique_ptr<::arrow::BinaryBuilder> builder;
+    std::vector<std::shared_ptr<::arrow::Array>> chunks;
+  };
+  using ArrowType = ::arrow::BinaryType;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::BinaryType>;
+};
+
+template <>
+struct EncodingTraits<FLBAType> {
+  using Encoder = FLBAEncoder;
+  using Decoder = FLBADecoder;
+
+  using ArrowType = ::arrow::FixedSizeBinaryType;
+  using Accumulator = ::arrow::FixedSizeBinaryBuilder;
+  using DictAccumulator = ::arrow::Dictionary32Builder<::arrow::FixedSizeBinaryType>;
+};
+
+class ColumnDescriptor;
+
+// Untyped base for all encoders
+class Encoder {
+ public:
+  virtual ~Encoder() = default;
+
+  virtual int64_t EstimatedDataEncodedSize() = 0;
+  virtual std::shared_ptr<Buffer> FlushValues() = 0;
+  virtual Encoding::type encoding() const = 0;
+
+  virtual void Put(const ::arrow::Array& values) = 0;
+
+  virtual MemoryPool* memory_pool() const = 0;
+};
+
+// Base class for value encoders. Since encoders may or not have state (e.g.,
+// dictionary encoding) we use a class instance to maintain any state.
+//
+// Encode interfaces are internal, subject to change without deprecation.
+template <typename DType>
+class TypedEncoder : virtual public Encoder {
+ public:
+  typedef typename DType::c_type T;
+
+  using Encoder::Put;
+
+  virtual void Put(const T* src, int num_values) = 0;
+
+  virtual void Put(const std::vector<T>& src, int num_values = -1);
+
+  virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                         int64_t valid_bits_offset) = 0;
+};
+
+template <typename DType>
+void TypedEncoder<DType>::Put(const std::vector<T>& src, int num_values) {
+  if (num_values == -1) {
+    num_values = static_cast<int>(src.size());
+  }
+  Put(src.data(), num_values);
+}
+
+template <>
+inline void TypedEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  // NOTE(wesm): This stub is here only to satisfy the compiler; it is
+  // overridden later with the actual implementation
+}
+
+// Base class for dictionary encoders
+template <typename DType>
+class DictEncoder : virtual public TypedEncoder<DType> {
+ public:
+  /// Writes out any buffered indices to buffer preceded by the bit width of this data.
+  /// Returns the number of bytes written.
+  /// If the supplied buffer is not big enough, returns -1.
+  /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize()
+  /// to size buffer.
+  virtual int WriteIndices(uint8_t* buffer, int buffer_len) = 0;
+
+  virtual int dict_encoded_size() = 0;
+  // virtual int dict_encoded_size() { return dict_encoded_size_; }
+
+  virtual int bit_width() const = 0;
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  virtual void WriteDict(uint8_t* buffer) = 0;
+
+  virtual int num_entries() const = 0;
+
+  /// \brief EXPERIMENTAL: Append dictionary indices into the encoder. It is
+  /// assumed (without any boundschecking) that the indices reference
+  /// pre-existing dictionary values
+  /// \param[in] indices the dictionary index values. Only Int32Array currently
+  /// supported
+  virtual void PutIndices(const ::arrow::Array& indices) = 0;
+
+  /// \brief EXPERIMENTAL: Append dictionary into encoder, inserting indices
+  /// separately. Currently throws exception if the current dictionary memo is
+  /// non-empty
+  /// \param[in] values the dictionary values. Only valid for certain
+  /// Parquet/Arrow type combinations, like BYTE_ARRAY/BinaryArray
+  virtual void PutDictionary(const ::arrow::Array& values) = 0;
+};
+
+// ----------------------------------------------------------------------
+// Value decoding
+
+class Decoder {
+ public:
+  virtual ~Decoder() = default;
+
+  // Sets the data for a new page. This will be called multiple times on the same
+  // decoder and should reset all internal state.
+  virtual void SetData(int num_values, const uint8_t* data, int len) = 0;
+
+  // Returns the number of values left (for the last call to SetData()). This is
+  // the number of values left in this page.
+  virtual int values_left() const = 0;
+  virtual Encoding::type encoding() const = 0;
+};
+
+template <typename DType>
+class TypedDecoder : virtual public Decoder {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief Decode values into a buffer
+  ///
+  /// Subclasses may override the more specialized Decode methods below.
+  ///
+  /// \param[in] buffer destination for decoded values
+  /// \param[in] max_values maximum number of values to decode
+  /// \return The number of values decoded. Should be identical to max_values except
+  /// at the end of the current data page.
+  virtual int Decode(T* buffer, int max_values) = 0;
+
+  /// \brief Decode the values in this data page but leave spaces for null entries.
+  ///
+  /// \param[in] buffer destination for decoded values
+  /// \param[in] num_values size of the def_levels and buffer arrays including the number
+  /// of null slots
+  /// \param[in] null_count number of null slots
+  /// \param[in] valid_bits bitmap data indicating position of valid slots
+  /// \param[in] valid_bits_offset offset into valid_bits
+  /// \return The number of values decoded, including nulls.
+  virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
+                           const uint8_t* valid_bits, int64_t valid_bits_offset) {
+    if (null_count > 0) {
+      int values_to_read = num_values - null_count;
+      int values_read = Decode(buffer, values_to_read);
+      if (values_read != values_to_read) {
+        throw ParquetException("Number of values / definition_levels read did not match");
+      }
+
+      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
+                                                      valid_bits, valid_bits_offset);
+    } else {
+      return Decode(buffer, num_values);
+    }
+  }
+
+  /// \brief Decode into an ArrayBuilder or other accumulator
+  ///
+  /// This function assumes the definition levels were already decoded
+  /// as a validity bitmap in the given `valid_bits`.  `null_count`
+  /// is the number of 0s in `valid_bits`.
+  /// As a space optimization, it is allowed for `valid_bits` to be null
+  /// if `null_count` is zero.
+  ///
+  /// \return number of values decoded
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<DType>::Accumulator* out) = 0;
+
+  /// \brief Decode into an ArrayBuilder or other accumulator ignoring nulls
+  ///
+  /// \return number of values decoded
+  int DecodeArrowNonNull(int num_values,
+                         typename EncodingTraits<DType>::Accumulator* out) {
+    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, out);
+  }
+
+  /// \brief Decode into a DictionaryBuilder
+  ///
+  /// This function assumes the definition levels were already decoded
+  /// as a validity bitmap in the given `valid_bits`.  `null_count`
+  /// is the number of 0s in `valid_bits`.
+  /// As a space optimization, it is allowed for `valid_bits` to be null
+  /// if `null_count` is zero.
+  ///
+  /// \return number of values decoded
+  virtual int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset,
+                          typename EncodingTraits<DType>::DictAccumulator* builder) = 0;
+
+  /// \brief Decode into a DictionaryBuilder ignoring nulls
+  ///
+  /// \return number of values decoded
+  int DecodeArrowNonNull(int num_values,
+                         typename EncodingTraits<DType>::DictAccumulator* builder) {
+    return DecodeArrow(num_values, 0, /*valid_bits=*/NULLPTR, 0, builder);
+  }
+};
+
+template <typename DType>
+class DictDecoder : virtual public TypedDecoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  virtual void SetDict(TypedDecoder<DType>* dictionary) = 0;
+
+  /// \brief Insert dictionary values into the Arrow dictionary builder's memo,
+  /// but do not append any indices
+  virtual void InsertDictionary(::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices and append to dictionary
+  /// builder. The builder must have had the dictionary from this decoder
+  /// inserted already.
+  ///
+  /// \warning Remember to reset the builder each time the dict decoder is initialized
+  /// with a new dictionary page
+  virtual int DecodeIndicesSpaced(int num_values, int null_count,
+                                  const uint8_t* valid_bits, int64_t valid_bits_offset,
+                                  ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls)
+  ///
+  /// \warning Remember to reset the builder each time the dict decoder is initialized
+  /// with a new dictionary page
+  virtual int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) = 0;
+
+  /// \brief Decode only dictionary indices (no nulls). Same as above
+  /// DecodeIndices but target is an array instead of a builder.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual int DecodeIndices(int num_values, int32_t* indices) = 0;
+
+  /// \brief Get dictionary. The reader will call this API when it encounters a
+  /// new dictionary.
+  ///
+  /// @param[out] dictionary The pointer to dictionary values. Dictionary is owned by
+  /// the decoder and is destroyed when the decoder is destroyed.
+  /// @param[out] dictionary_length The dictionary length.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual void GetDictionary(const T** dictionary, int32_t* dictionary_length) = 0;
+};
+
+// ----------------------------------------------------------------------
+// TypedEncoder specializations, traits, and factory functions
+
+class BooleanDecoder : virtual public TypedDecoder<BooleanType> {
+ public:
+  using TypedDecoder<BooleanType>::Decode;
+  virtual int Decode(uint8_t* buffer, int max_values) = 0;
+};
+
+class FLBADecoder : virtual public TypedDecoder<FLBAType> {
+ public:
+  using TypedDecoder<FLBAType>::DecodeSpaced;
+
+  // TODO(wesm): As possible follow-up to PARQUET-1508, we should examine if
+  // there is value in adding specialized read methods for
+  // FIXED_LEN_BYTE_ARRAY. If only Decimal data can occur with this data type
+  // then perhaps not
+};
+
+PARQUET_EXPORT
+std::unique_ptr<Encoder> MakeEncoder(
+    Type::type type_num, Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Encoder> MakeTypedEncoder(
+    Encoding::type encoding, bool use_dictionary = false,
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = typename EncodingTraits<DType>::Encoder;
+  std::unique_ptr<Encoder> base =
+      MakeEncoder(DType::type_num, encoding, use_dictionary, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
+                                     const ColumnDescriptor* descr = NULLPTR);
+
+namespace detail {
+
+PARQUET_EXPORT
+std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
+                                         const ColumnDescriptor* descr,
+                                         ::arrow::MemoryPool* pool);
+
+}  // namespace detail
+
+template <typename DType>
+std::unique_ptr<DictDecoder<DType>> MakeDictDecoder(
+    const ColumnDescriptor* descr = NULLPTR,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  using OutType = DictDecoder<DType>;
+  auto decoder = detail::MakeDictDecoder(DType::type_num, descr, pool);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(decoder.release()));
+}
+
+template <typename DType>
+std::unique_ptr<typename EncodingTraits<DType>::Decoder> MakeTypedDecoder(
+    Encoding::type encoding, const ColumnDescriptor* descr = NULLPTR) {
+  using OutType = typename EncodingTraits<DType>::Decoder;
+  std::unique_ptr<Decoder> base = MakeDecoder(DType::type_num, encoding, descr);
+  return std::unique_ptr<OutType>(dynamic_cast<OutType*>(base.release()));
+}
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/crypto_factory.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/crypto_factory.h
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "parquet/encryption/encryption.h"
+#include "parquet/encryption/file_key_wrapper.h"
+#include "parquet/encryption/key_toolkit.h"
+#include "parquet/encryption/kms_client_factory.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
+    ParquetCipher::AES_GCM_V1;
+static constexpr bool kDefaultPlaintextFooter = false;
+static constexpr bool kDefaultDoubleWrapping = true;
+static constexpr double kDefaultCacheLifetimeSeconds = 600;  // 10 minutes
+static constexpr bool kDefaultInternalKeyMaterial = true;
+static constexpr bool kDefaultUniformEncryption = false;
+static constexpr int32_t kDefaultDataKeyLengthBits = 128;
+
+struct PARQUET_EXPORT EncryptionConfiguration {
+  explicit EncryptionConfiguration(const std::string& footer_key)
+      : footer_key(footer_key) {}
+
+  /// ID of the master key for footer encryption/signing
+  std::string footer_key;
+
+  /// List of columns to encrypt, with master key IDs (see HIVE-21848).
+  /// Format: "masterKeyID:colName,colName;masterKeyID:colName..."
+  /// Either
+  /// (1) column_keys must be set
+  /// or
+  /// (2) uniform_encryption must be set to true
+  /// If none of (1) and (2) are true, or if both are true, an exception will be
+  /// thrown.
+  std::string column_keys;
+
+  /// Encrypt footer and all columns with the same encryption key.
+  bool uniform_encryption = kDefaultUniformEncryption;
+
+  /// Parquet encryption algorithm. Can be "AES_GCM_V1" (default), or "AES_GCM_CTR_V1".
+  ParquetCipher::type encryption_algorithm = kDefaultEncryptionAlgorithm;
+
+  /// Write files with plaintext footer.
+  /// The default is false - files are written with encrypted footer.
+  bool plaintext_footer = kDefaultPlaintextFooter;
+
+  /// Use double wrapping - where data encryption keys (DEKs) are encrypted with key
+  /// encryption keys (KEKs), which in turn are encrypted with master keys.
+  /// The default is true. If set to false, use single wrapping - where DEKs are
+  /// encrypted directly with master keys.
+  bool double_wrapping = kDefaultDoubleWrapping;
+
+  /// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client
+  /// objects).
+  /// The default is 600 (10 minutes).
+  double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds;
+
+  /// Store key material inside Parquet file footers; this mode doesn’t produce
+  /// additional files. By default, true. If set to false, key material is stored in
+  /// separate files in the same folder, which enables key rotation for immutable
+  /// Parquet files.
+  bool internal_key_material = kDefaultInternalKeyMaterial;
+
+  /// Length of data encryption keys (DEKs), randomly generated by parquet key
+  /// management tools. Can be 128, 192 or 256 bits.
+  /// The default is 128 bits.
+  int32_t data_key_length_bits = kDefaultDataKeyLengthBits;
+};
+
+struct PARQUET_EXPORT DecryptionConfiguration {
+  /// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client
+  /// objects).
+  /// The default is 600 (10 minutes).
+  double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds;
+};
+
+/// This is a core class, that translates the parameters of high level encryption (like
+/// the names of encrypted columns, names of master keys, etc), into parameters of low
+/// level encryption (like the key metadata, DEK, etc). A factory that produces the low
+/// level FileEncryptionProperties and FileDecryptionProperties objects, from the high
+/// level parameters.
+class PARQUET_EXPORT CryptoFactory {
+ public:
+  /// a KmsClientFactory object must be registered via this method before calling any of
+  /// GetFileEncryptionProperties()/GetFileDecryptionProperties() methods.
+  void RegisterKmsClientFactory(std::shared_ptr<KmsClientFactory> kms_client_factory);
+
+  std::shared_ptr<FileEncryptionProperties> GetFileEncryptionProperties(
+      const KmsConnectionConfig& kms_connection_config,
+      const EncryptionConfiguration& encryption_config);
+
+  /// The returned FileDecryptionProperties object will use the cache inside this
+  /// CryptoFactory object, so please keep this
+  /// CryptoFactory object alive along with the returned
+  /// FileDecryptionProperties object.
+  std::shared_ptr<FileDecryptionProperties> GetFileDecryptionProperties(
+      const KmsConnectionConfig& kms_connection_config,
+      const DecryptionConfiguration& decryption_config);
+
+  void RemoveCacheEntriesForToken(const std::string& access_token) {
+    key_toolkit_.RemoveCacheEntriesForToken(access_token);
+  }
+
+  void RemoveCacheEntriesForAllTokens() { key_toolkit_.RemoveCacheEntriesForAllTokens(); }
+
+ private:
+  ColumnPathToEncryptionPropertiesMap GetColumnEncryptionProperties(
+      int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper);
+
+  /// Key utilities object for kms client initialization and cache control
+  KeyToolkit key_toolkit_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/encryption.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/encryption.h
@@ -0,0 +1,510 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "parquet/exception.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
+    ParquetCipher::AES_GCM_V1;
+static constexpr int32_t kMaximalAadMetadataLength = 256;
+static constexpr bool kDefaultEncryptedFooter = true;
+static constexpr bool kDefaultCheckSignature = true;
+static constexpr bool kDefaultAllowPlaintextFiles = false;
+static constexpr int32_t kAadFileUniqueLength = 8;
+
+class ColumnDecryptionProperties;
+using ColumnPathToDecryptionPropertiesMap =
+    std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
+
+class ColumnEncryptionProperties;
+using ColumnPathToEncryptionPropertiesMap =
+    std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
+
+class PARQUET_EXPORT DecryptionKeyRetriever {
+ public:
+  virtual std::string GetKey(const std::string& key_metadata) = 0;
+  virtual ~DecryptionKeyRetriever() {}
+};
+
+/// Simple integer key retriever
+class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
+ public:
+  void PutKey(uint32_t key_id, const std::string& key);
+  std::string GetKey(const std::string& key_metadata) override;
+
+ private:
+  std::map<uint32_t, std::string> key_map_;
+};
+
+// Simple string key retriever
+class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
+ public:
+  void PutKey(const std::string& key_id, const std::string& key);
+  std::string GetKey(const std::string& key_metadata) override;
+
+ private:
+  std::map<std::string, std::string> key_map_;
+};
+
+class PARQUET_EXPORT HiddenColumnException : public ParquetException {
+ public:
+  explicit HiddenColumnException(const std::string& columnPath)
+      : ParquetException(columnPath.c_str()) {}
+};
+
+class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
+ public:
+  explicit KeyAccessDeniedException(const std::string& columnPath)
+      : ParquetException(columnPath.c_str()) {}
+};
+
+inline const uint8_t* str2bytes(const std::string& str) {
+  if (str.empty()) return NULLPTR;
+
+  char* cbytes = const_cast<char*>(str.c_str());
+  return reinterpret_cast<const uint8_t*>(cbytes);
+}
+
+class PARQUET_EXPORT ColumnEncryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    /// Convenience builder for encrypted columns.
+    explicit Builder(const std::string& name) : Builder(name, true) {}
+
+    /// Convenience builder for encrypted columns.
+    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
+        : Builder(path->ToDotString(), true) {}
+
+    /// Set a column-specific key.
+    /// If key is not set on an encrypted column, the column will
+    /// be encrypted with the footer key.
+    /// keyBytes Key length must be either 16, 24 or 32 bytes.
+    /// The key is cloned, and will be wiped out (array values set to 0) upon completion
+    /// of file writing.
+    /// Caller is responsible for wiping out the input key array.
+    Builder* key(std::string column_key);
+
+    /// Set a key retrieval metadata.
+    /// use either key_metadata() or key_id(), not both
+    Builder* key_metadata(const std::string& key_metadata);
+
+    /// A convenience function to set key metadata using a string id.
+    /// Set a key retrieval metadata (converted from String).
+    /// use either key_metadata() or key_id(), not both
+    /// key_id will be converted to metadata (UTF-8 array).
+    Builder* key_id(const std::string& key_id);
+
+    std::shared_ptr<ColumnEncryptionProperties> build() {
+      return std::shared_ptr<ColumnEncryptionProperties>(
+          new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
+    }
+
+   private:
+    const std::string column_path_;
+    bool encrypted_;
+    std::string key_;
+    std::string key_metadata_;
+
+    Builder(const std::string path, bool encrypted)
+        : column_path_(path), encrypted_(encrypted) {}
+  };
+
+  std::string column_path() const { return column_path_; }
+  bool is_encrypted() const { return encrypted_; }
+  bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
+  std::string key() const { return key_; }
+  std::string key_metadata() const { return key_metadata_; }
+
+  /// Upon completion of file writing, the encryption key
+  /// will be wiped out.
+  void WipeOutEncryptionKey() { key_.clear(); }
+
+  bool is_utilized() {
+    if (key_.empty())
+      return false;  // can re-use column properties without encryption keys
+    return utilized_;
+  }
+
+  /// ColumnEncryptionProperties object can be used for writing one file only.
+  /// Mark ColumnEncryptionProperties as utilized once it is used in
+  /// FileEncryptionProperties as the encryption key will be wiped out upon
+  /// completion of file writing.
+  void set_utilized() { utilized_ = true; }
+
+  std::shared_ptr<ColumnEncryptionProperties> DeepClone() {
+    std::string key_copy = key_;
+    return std::shared_ptr<ColumnEncryptionProperties>(new ColumnEncryptionProperties(
+        encrypted_, column_path_, key_copy, key_metadata_));
+  }
+
+  ColumnEncryptionProperties() = default;
+  ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default;
+  ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default;
+
+ private:
+  const std::string column_path_;
+  bool encrypted_;
+  bool encrypted_with_footer_key_;
+  std::string key_;
+  std::string key_metadata_;
+  bool utilized_;
+  explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path,
+                                      const std::string& key,
+                                      const std::string& key_metadata);
+};
+
+class PARQUET_EXPORT ColumnDecryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    explicit Builder(const std::string& name) : column_path_(name) {}
+
+    explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
+        : Builder(path->ToDotString()) {}
+
+    /// Set an explicit column key. If applied on a file that contains
+    /// key metadata for this column the metadata will be ignored,
+    /// the column will be decrypted with this key.
+    /// key length must be either 16, 24 or 32 bytes.
+    Builder* key(const std::string& key);
+
+    std::shared_ptr<ColumnDecryptionProperties> build();
+
+   private:
+    const std::string column_path_;
+    std::string key_;
+  };
+
+  ColumnDecryptionProperties() = default;
+  ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default;
+  ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default;
+
+  std::string column_path() const { return column_path_; }
+  std::string key() const { return key_; }
+  bool is_utilized() { return utilized_; }
+
+  /// ColumnDecryptionProperties object can be used for reading one file only.
+  /// Mark ColumnDecryptionProperties as utilized once it is used in
+  /// FileDecryptionProperties as the encryption key will be wiped out upon
+  /// completion of file reading.
+  void set_utilized() { utilized_ = true; }
+
+  /// Upon completion of file reading, the encryption key
+  /// will be wiped out.
+  void WipeOutDecryptionKey();
+
+  std::shared_ptr<ColumnDecryptionProperties> DeepClone();
+
+ private:
+  const std::string column_path_;
+  std::string key_;
+  bool utilized_;
+
+  /// This class is only required for setting explicit column decryption keys -
+  /// to override key retriever (or to provide keys when key metadata and/or
+  /// key retriever are not available)
+  explicit ColumnDecryptionProperties(const std::string& column_path,
+                                      const std::string& key);
+};
+
+class PARQUET_EXPORT AADPrefixVerifier {
+ public:
+  /// Verifies identity (AAD Prefix) of individual file,
+  /// or of file collection in a data set.
+  /// Throws exception if an AAD prefix is wrong.
+  /// In a data set, AAD Prefixes should be collected,
+  /// and then checked for missing files.
+  virtual void Verify(const std::string& aad_prefix) = 0;
+  virtual ~AADPrefixVerifier() {}
+};
+
+class PARQUET_EXPORT FileDecryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    Builder() {
+      check_plaintext_footer_integrity_ = kDefaultCheckSignature;
+      plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
+    }
+
+    /// Set an explicit footer key. If applied on a file that contains
+    /// footer key metadata the metadata will be ignored, the footer
+    /// will be decrypted/verified with this key.
+    /// If explicit key is not set, footer key will be fetched from
+    /// key retriever.
+    /// With explicit keys or AAD prefix, new encryption properties object must be
+    /// created for each encrypted file.
+    /// Explicit encryption keys (footer and column) are cloned.
+    /// Upon completion of file reading, the cloned encryption keys in the properties
+    /// will be wiped out (array values set to 0).
+    /// Caller is responsible for wiping out the input key array.
+    /// param footerKey Key length must be either 16, 24 or 32 bytes.
+    Builder* footer_key(const std::string footer_key);
+
+    /// Set explicit column keys (decryption properties).
+    /// Its also possible to set a key retriever on this property object.
+    /// Upon file decryption, availability of explicit keys is checked before
+    /// invocation of the retriever callback.
+    /// If an explicit key is available for a footer or a column,
+    /// its key metadata will be ignored.
+    Builder* column_keys(
+        const ColumnPathToDecryptionPropertiesMap& column_decryption_properties);
+
+    /// Set a key retriever callback. Its also possible to
+    /// set explicit footer or column keys on this file property object.
+    /// Upon file decryption, availability of explicit keys is checked before
+    /// invocation of the retriever callback.
+    /// If an explicit key is available for a footer or a column,
+    /// its key metadata will be ignored.
+    Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever);
+
+    /// Skip integrity verification of plaintext footers.
+    /// If not called, integrity of plaintext footers will be checked in runtime,
+    /// and an exception will be thrown in the following situations:
+    /// - footer signing key is not available
+    /// (not passed, or not found by key retriever)
+    /// - footer content and signature don't match
+    Builder* disable_footer_signature_verification() {
+      check_plaintext_footer_integrity_ = false;
+      return this;
+    }
+
+    /// Explicitly supply the file AAD prefix.
+    /// A must when a prefix is used for file encryption, but not stored in file.
+    /// If AAD prefix is stored in file, it will be compared to the explicitly
+    /// supplied value and an exception will be thrown if they differ.
+    Builder* aad_prefix(const std::string& aad_prefix);
+
+    /// Set callback for verification of AAD Prefixes stored in file.
+    Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
+
+    /// By default, reading plaintext (unencrypted) files is not
+    /// allowed when using a decryptor
+    /// - in order to detect files that were not encrypted by mistake.
+    /// However, the default behavior can be overridden by calling this method.
+    /// The caller should use then a different method to ensure encryption
+    /// of files with sensitive data.
+    Builder* plaintext_files_allowed() {
+      plaintext_files_allowed_ = true;
+      return this;
+    }
+
+    std::shared_ptr<FileDecryptionProperties> build() {
+      return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
+          footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
+          aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
+    }
+
+   private:
+    std::string footer_key_;
+    std::string aad_prefix_;
+    std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
+    ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
+
+    std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
+    bool check_plaintext_footer_integrity_;
+    bool plaintext_files_allowed_;
+  };
+
+  std::string column_key(const std::string& column_path) const;
+
+  std::string footer_key() const { return footer_key_; }
+
+  std::string aad_prefix() const { return aad_prefix_; }
+
+  const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
+    return key_retriever_;
+  }
+
+  bool check_plaintext_footer_integrity() const {
+    return check_plaintext_footer_integrity_;
+  }
+
+  bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
+
+  const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
+    return aad_prefix_verifier_;
+  }
+
+  /// Upon completion of file reading, the encryption keys in the properties
+  /// will be wiped out (array values set to 0).
+  void WipeOutDecryptionKeys();
+
+  bool is_utilized();
+
+  /// FileDecryptionProperties object can be used for reading one file only.
+  /// Mark FileDecryptionProperties as utilized once it is used to read a file as the
+  /// encryption keys will be wiped out upon completion of file reading.
+  void set_utilized() { utilized_ = true; }
+
+  /// FileDecryptionProperties object can be used for reading one file only.
+  /// (unless this object keeps the keyRetrieval callback only, and no explicit
+  /// keys or aadPrefix).
+  /// At the end, keys are wiped out in the memory.
+  /// This method allows to clone identical properties for another file,
+  /// with an option to update the aadPrefix (if newAadPrefix is null,
+  /// aadPrefix will be cloned too)
+  std::shared_ptr<FileDecryptionProperties> DeepClone(std::string new_aad_prefix = "");
+
+ private:
+  std::string footer_key_;
+  std::string aad_prefix_;
+  std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
+
+  const std::string empty_string_ = "";
+  ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
+
+  std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
+  bool check_plaintext_footer_integrity_;
+  bool plaintext_files_allowed_;
+  bool utilized_;
+
+  FileDecryptionProperties(
+      const std::string& footer_key,
+      std::shared_ptr<DecryptionKeyRetriever> key_retriever,
+      bool check_plaintext_footer_integrity, const std::string& aad_prefix,
+      std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
+      const ColumnPathToDecryptionPropertiesMap& column_decryption_properties,
+      bool plaintext_files_allowed);
+};
+
+class PARQUET_EXPORT FileEncryptionProperties {
+ public:
+  class PARQUET_EXPORT Builder {
+   public:
+    explicit Builder(const std::string& footer_key)
+        : parquet_cipher_(kDefaultEncryptionAlgorithm),
+          encrypted_footer_(kDefaultEncryptedFooter) {
+      footer_key_ = footer_key;
+      store_aad_prefix_in_file_ = false;
+    }
+
+    /// Create files with plaintext footer.
+    /// If not called, the files will be created with encrypted footer (default).
+    Builder* set_plaintext_footer() {
+      encrypted_footer_ = false;
+      return this;
+    }
+
+    /// Set encryption algorithm.
+    /// If not called, files will be encrypted with AES_GCM_V1 (default).
+    Builder* algorithm(ParquetCipher::type parquet_cipher) {
+      parquet_cipher_ = parquet_cipher;
+      return this;
+    }
+
+    /// Set a key retrieval metadata (converted from String).
+    /// use either footer_key_metadata or footer_key_id, not both.
+    Builder* footer_key_id(const std::string& key_id);
+
+    /// Set a key retrieval metadata.
+    /// use either footer_key_metadata or footer_key_id, not both.
+    Builder* footer_key_metadata(const std::string& footer_key_metadata);
+
+    /// Set the file AAD Prefix.
+    Builder* aad_prefix(const std::string& aad_prefix);
+
+    /// Skip storing AAD Prefix in file.
+    /// If not called, and if AAD Prefix is set, it will be stored.
+    Builder* disable_aad_prefix_storage();
+
+    /// Set the list of encrypted columns and their properties (keys etc).
+    /// If not called, all columns will be encrypted with the footer key.
+    /// If called, the file columns not in the list will be left unencrypted.
+    Builder* encrypted_columns(
+        const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
+
+    std::shared_ptr<FileEncryptionProperties> build() {
+      return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
+          parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
+          aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
+    }
+
+   private:
+    ParquetCipher::type parquet_cipher_;
+    bool encrypted_footer_;
+    std::string footer_key_;
+    std::string footer_key_metadata_;
+
+    std::string aad_prefix_;
+    bool store_aad_prefix_in_file_;
+    ColumnPathToEncryptionPropertiesMap encrypted_columns_;
+  };
+  bool encrypted_footer() const { return encrypted_footer_; }
+
+  EncryptionAlgorithm algorithm() const { return algorithm_; }
+
+  std::string footer_key() const { return footer_key_; }
+
+  std::string footer_key_metadata() const { return footer_key_metadata_; }
+
+  std::string file_aad() const { return file_aad_; }
+
+  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
+      const std::string& column_path);
+
+  bool is_utilized() const { return utilized_; }
+
+  /// FileEncryptionProperties object can be used for writing one file only.
+  /// Mark FileEncryptionProperties as utilized once it is used to write a file as the
+  /// encryption keys will be wiped out upon completion of file writing.
+  void set_utilized() { utilized_ = true; }
+
+  /// Upon completion of file writing, the encryption keys
+  /// will be wiped out (array values set to 0).
+  void WipeOutEncryptionKeys();
+
+  /// FileEncryptionProperties object can be used for writing one file only.
+  /// (at the end, keys are wiped out in the memory).
+  /// This method allows to clone identical properties for another file,
+  /// with an option to update the aadPrefix (if newAadPrefix is null,
+  /// aadPrefix will be cloned too)
+  std::shared_ptr<FileEncryptionProperties> DeepClone(std::string new_aad_prefix = "");
+
+  ColumnPathToEncryptionPropertiesMap encrypted_columns() const {
+    return encrypted_columns_;
+  }
+
+ private:
+  EncryptionAlgorithm algorithm_;
+  std::string footer_key_;
+  std::string footer_key_metadata_;
+  bool encrypted_footer_;
+  std::string file_aad_;
+  std::string aad_prefix_;
+  bool utilized_;
+  bool store_aad_prefix_in_file_;
+  ColumnPathToEncryptionPropertiesMap encrypted_columns_;
+
+  FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key,
+                           const std::string& footer_key_metadata, bool encrypted_footer,
+                           const std::string& aad_prefix, bool store_aad_prefix_in_file,
+                           const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_material_store.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_material_store.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License") = 0; you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace parquet {
+namespace encryption {
+
+// Key material can be stored outside the Parquet file, for example in a separate small
+// file in the same folder. This is important for “key rotation”, when MEKs have to be
+// changed (if compromised; or periodically, just in case) - without modifying the Parquet
+// files (often  immutable).
+// TODO: details will be implemented later
+class FileKeyMaterialStore {};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_unwrapper.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_unwrapper.h
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/concurrent_map.h"
+
+#include "parquet/encryption/encryption.h"
+#include "parquet/encryption/key_material.h"
+#include "parquet/encryption/key_toolkit.h"
+#include "parquet/encryption/key_toolkit_internal.h"
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// This class will retrieve the key from "key metadata", following these steps:
+// 1. Parse "key metadata" (see structure in KeyMetadata class).
+// 2. Retrieve "key material" which can be stored inside or outside "key metadata"
+//    Currently we don't support the case "key material" stores outside "key metadata"
+//    yet.
+// 3. Unwrap the "data encryption key" from "key material". There are 2 modes:
+// 3.1. single wrapping: decrypt the wrapped "data encryption key" directly with "master
+// encryption key" 3.2. double wrapping: 2 steps: 3.2.1. "key encryption key" is decrypted
+// with "master encryption key" 3.2.2. "data encryption key" is decrypted with the above
+// "key encryption key"
+class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever {
+ public:
+  /// key_toolkit and kms_connection_config is to get KmsClient from cache or create
+  /// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of
+  /// KmsClient in the cache.
+  FileKeyUnwrapper(KeyToolkit* key_toolkit,
+                   const KmsConnectionConfig& kms_connection_config,
+                   double cache_lifetime_seconds);
+
+  std::string GetKey(const std::string& key_metadata) override;
+
+ private:
+  internal::KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material);
+  std::shared_ptr<KmsClient> GetKmsClientFromConfigOrKeyMaterial(
+      const KeyMaterial& key_material);
+
+  /// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token
+  std::shared_ptr<::arrow::util::ConcurrentMap<std::string, std::string>> kek_per_kek_id_;
+  KeyToolkit* key_toolkit_;
+  KmsConnectionConfig kms_connection_config_;
+  const double cache_entry_lifetime_seconds_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_wrapper.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/file_key_wrapper.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/util/concurrent_map.h"
+
+#include "parquet/encryption/file_key_material_store.h"
+#include "parquet/encryption/key_encryption_key.h"
+#include "parquet/encryption/key_toolkit.h"
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// This class will generate "key metadata" from "data encryption key" and "master key",
+// following these steps:
+// 1. Wrap "data encryption key". There are 2 modes:
+//   1.1. single wrapping: encrypt "data encryption key" directly with "master encryption
+//        key"
+//   1.2. double wrapping: 2 steps:
+//     1.2.1. "key encryption key" is randomized (see KeyEncryptionKey class)
+//     1.2.2. "data encryption key" is encrypted with the above "key encryption key"
+// 2. Create "key material" (see structure in KeyMaterial class)
+// 3. Create "key metadata" with "key material" inside or a reference to outside "key
+//    material" (see structure in KeyMetadata class).
+//    We don't support the case "key material" stores outside "key metadata" yet.
+class PARQUET_EXPORT FileKeyWrapper {
+ public:
+  static constexpr int kKeyEncryptionKeyLength = 16;
+  static constexpr int kKeyEncryptionKeyIdLength = 16;
+
+  /// key_toolkit and kms_connection_config is to get KmsClient from the cache or create
+  /// KmsClient if it's not in the cache yet. cache_entry_lifetime_seconds is life time of
+  /// KmsClient in the cache. key_material_store is to store "key material" outside
+  /// parquet file, NULL if "key material" is stored inside parquet file.
+  FileKeyWrapper(KeyToolkit* key_toolkit,
+                 const KmsConnectionConfig& kms_connection_config,
+                 std::shared_ptr<FileKeyMaterialStore> key_material_store,
+                 double cache_entry_lifetime_seconds, bool double_wrapping);
+
+  /// Creates key_metadata field for a given data key, via wrapping the key with the
+  /// master key
+  std::string GetEncryptionKeyMetadata(const std::string& data_key,
+                                       const std::string& master_key_id,
+                                       bool is_footer_key);
+
+ private:
+  KeyEncryptionKey CreateKeyEncryptionKey(const std::string& master_key_id);
+
+  /// A map of Master Encryption Key ID -> KeyEncryptionKey, for the current token
+  std::shared_ptr<::arrow::util::ConcurrentMap<std::string, KeyEncryptionKey>>
+      kek_per_master_key_id_;
+
+  std::shared_ptr<KmsClient> kms_client_;
+  KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<FileKeyMaterialStore> key_material_store_;
+  const double cache_entry_lifetime_seconds_;
+  const bool double_wrapping_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_encryption_key.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_encryption_key.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/util/base64.h"
+
+namespace parquet {
+namespace encryption {
+
+// In the double wrapping mode, each "data encryption key" (DEK) is encrypted with a “key
+// encryption key” (KEK), that in turn is encrypted with a "master encryption key" (MEK).
+// In a writer process, a random KEK is generated for each MEK ID, and cached in a <MEK-ID
+// : KEK> map. This allows to perform an interaction with a KMS server only once for each
+// MEK, in order to wrap its KEK. "Data encryption key" (DEK) wrapping is performed
+// locally, and does not involve an interaction with a KMS server.
+class KeyEncryptionKey {
+ public:
+  KeyEncryptionKey(std::string kek_bytes, std::string kek_id,
+                   std::string encoded_wrapped_kek)
+      : kek_bytes_(std::move(kek_bytes)),
+        kek_id_(std::move(kek_id)),
+        encoded_kek_id_(::arrow::util::base64_encode(kek_id_)),
+        encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {}
+
+  const std::string& kek_bytes() const { return kek_bytes_; }
+
+  const std::string& kek_id() const { return kek_id_; }
+
+  const std::string& encoded_kek_id() const { return encoded_kek_id_; }
+
+  const std::string& encoded_wrapped_kek() const { return encoded_wrapped_kek_; }
+
+ private:
+  std::string kek_bytes_;
+  std::string kek_id_;
+  std::string encoded_kek_id_;
+  std::string encoded_wrapped_kek_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_material.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_material.h
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "parquet/platform.h"
+
+namespace arrow {
+namespace json {
+namespace internal {
+class ObjectParser;
+}  // namespace internal
+}  // namespace json
+}  // namespace arrow
+
+namespace parquet {
+namespace encryption {
+
+// KeyMaterial class represents the "key material", keeping the information that allows
+// readers to recover an encryption key (see description of the KeyMetadata class). The
+// keytools package (PARQUET-1373) implements the "envelope encryption" pattern, in a
+// "single wrapping" or "double wrapping" mode. In the single wrapping mode, the key
+// material is generated by encrypting the "data encryption key" (DEK) by a "master key".
+// In the double wrapping mode, the key material is generated by encrypting the DEK by a
+// "key encryption key" (KEK), that in turn is encrypted by a "master key".
+//
+// Key material is kept in a flat json object, with the following fields:
+// 1. "keyMaterialType" - a String, with the type of  key material. In the current
+// version, only one value is allowed - "PKMT1" (stands
+//     for "parquet key management tools, version 1"). For external key material storage,
+//     this field is written in both "key metadata" and "key material" jsons. For internal
+//     key material storage, this field is written only once in the common json.
+// 2. "isFooterKey" - a boolean. If true, means that the material belongs to a file footer
+// key, and keeps additional information (such as
+//     KMS instance ID and URL). If false, means that the material belongs to a column
+//     key.
+// 3. "kmsInstanceID" - a String, with the KMS Instance ID. Written only in footer key
+// material.
+// 4. "kmsInstanceURL" - a String, with the KMS Instance URL. Written only in footer key
+// material.
+// 5. "masterKeyID" - a String, with the ID of the master key used to generate the
+// material.
+// 6. "wrappedDEK" - a String, with the wrapped DEK (base64 encoding).
+// 7. "doubleWrapping" - a boolean. If true, means that the material was generated in
+// double wrapping mode.
+//     If false - in single wrapping mode.
+// 8. "keyEncryptionKeyID" - a String, with the ID of the KEK used to generate the
+// material. Written only in double wrapping mode.
+// 9. "wrappedKEK" - a String, with the wrapped KEK (base64 encoding). Written only in
+// double wrapping mode.
+class PARQUET_EXPORT KeyMaterial {
+ public:
+  // these fields are defined in a specification and should never be changed
+  static constexpr const char kKeyMaterialTypeField[] = "keyMaterialType";
+  static constexpr const char kKeyMaterialType1[] = "PKMT1";
+
+  static constexpr const char kFooterKeyIdInFile[] = "footerKey";
+  static constexpr const char kColumnKeyIdInFilePrefix[] = "columnKey";
+
+  static constexpr const char kIsFooterKeyField[] = "isFooterKey";
+  static constexpr const char kDoubleWrappingField[] = "doubleWrapping";
+  static constexpr const char kKmsInstanceIdField[] = "kmsInstanceID";
+  static constexpr const char kKmsInstanceUrlField[] = "kmsInstanceURL";
+  static constexpr const char kMasterKeyIdField[] = "masterKeyID";
+  static constexpr const char kWrappedDataEncryptionKeyField[] = "wrappedDEK";
+  static constexpr const char kKeyEncryptionKeyIdField[] = "keyEncryptionKeyID";
+  static constexpr const char kWrappedKeyEncryptionKeyField[] = "wrappedKEK";
+
+ public:
+  KeyMaterial() = default;
+
+  static KeyMaterial Parse(const std::string& key_material_string);
+
+  static KeyMaterial Parse(
+      const ::arrow::json::internal::ObjectParser* key_material_json);
+
+  /// This method returns a json string that will be stored either inside a parquet file
+  /// or in a key material store outside the parquet file.
+  static std::string SerializeToJson(bool is_footer_key,
+                                     const std::string& kms_instance_id,
+                                     const std::string& kms_instance_url,
+                                     const std::string& master_key_id,
+                                     bool is_double_wrapped, const std::string& kek_id,
+                                     const std::string& encoded_wrapped_kek,
+                                     const std::string& encoded_wrapped_dek,
+                                     bool is_internal_storage);
+
+  bool is_footer_key() const { return is_footer_key_; }
+  bool is_double_wrapped() const { return is_double_wrapped_; }
+  const std::string& master_key_id() const { return master_key_id_; }
+  const std::string& wrapped_dek() const { return encoded_wrapped_dek_; }
+  const std::string& kek_id() const { return kek_id_; }
+  const std::string& wrapped_kek() const { return encoded_wrapped_kek_; }
+  const std::string& kms_instance_id() const { return kms_instance_id_; }
+  const std::string& kms_instance_url() const { return kms_instance_url_; }
+
+ private:
+  KeyMaterial(bool is_footer_key, const std::string& kms_instance_id,
+              const std::string& kms_instance_url, const std::string& master_key_id,
+              bool is_double_wrapped, const std::string& kek_id,
+              const std::string& encoded_wrapped_kek,
+              const std::string& encoded_wrapped_dek);
+
+  bool is_footer_key_;
+  std::string kms_instance_id_;
+  std::string kms_instance_url_;
+  std::string master_key_id_;
+  bool is_double_wrapped_;
+  std::string kek_id_;
+  std::string encoded_wrapped_kek_;
+  std::string encoded_wrapped_dek_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_metadata.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_metadata.h
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/util/variant.h"
+
+#include "parquet/encryption/key_material.h"
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// Parquet encryption specification defines "key metadata" as an arbitrary byte array,
+// generated by file writers for each encryption key, and passed to the low level API for
+// storage in the file footer. The "key metadata" field is made available to file readers
+// to enable recovery of the key. This interface can be utilized for implementation
+// of any key management scheme.
+//
+// The keytools package (PARQUET-1373) implements one approach, of many possible, to key
+// management and to generation of the "key metadata" fields. This approach, based on the
+// "envelope encryption" pattern, allows integration with KMS servers. It keeps the actual
+// material, required to recover a key, in a "key material" object (see the KeyMaterial
+// class for details). This class is implemented to support version 1 of the parquet key
+// management tools specification.
+//
+// KeyMetadata writes (and reads) the "key metadata" field as a flat json object,
+// with the following fields:
+// 1. "keyMaterialType" - a String, with the type of  key material.
+// 2. "internalStorage" - a boolean. If true, means that "key material" is kept inside the
+// "key metadata" field. If false, "key material" is kept externally (outside Parquet
+// files) - in this case, "key metadata" keeps a reference to the external "key material".
+// 3. "keyReference" - a String, with the reference to the external "key material".
+// Written only if internalStorage is false.
+//
+// If internalStorage is true, "key material" is a part of "key metadata", and the json
+// keeps additional fields, described in the KeyMaterial class.
+class PARQUET_EXPORT KeyMetadata {
+ public:
+  static constexpr const char kKeyMaterialInternalStorageField[] = "internalStorage";
+  static constexpr const char kKeyReferenceField[] = "keyReference";
+
+  /// key_metadata_bytes is the key metadata field stored in the parquet file,
+  /// in the serialized json object format.
+  static KeyMetadata Parse(const std::string& key_metadata_bytes);
+
+  static std::string CreateSerializedForExternalMaterial(
+      const std::string& key_reference);
+
+  bool key_material_stored_internally() const { return is_internal_storage_; }
+
+  const KeyMaterial& key_material() const {
+    if (!is_internal_storage_) {
+      throw ParquetException("key material is stored externally.");
+    }
+    return ::arrow::util::get<KeyMaterial>(key_material_or_reference_);
+  }
+
+  const std::string& key_reference() const {
+    if (is_internal_storage_) {
+      throw ParquetException("key material is stored internally.");
+    }
+    return ::arrow::util::get<std::string>(key_material_or_reference_);
+  }
+
+ private:
+  explicit KeyMetadata(const KeyMaterial& key_material);
+  explicit KeyMetadata(const std::string& key_reference);
+
+  bool is_internal_storage_;
+  /// If is_internal_storage_ is true, KeyMaterial is set,
+  /// else a string referencing to an outside "key material" is set.
+  ::arrow::util::Variant<KeyMaterial, std::string> key_material_or_reference_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_toolkit.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/key_toolkit.h
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "parquet/encryption/key_encryption_key.h"
+#include "parquet/encryption/kms_client.h"
+#include "parquet/encryption/kms_client_factory.h"
+#include "parquet/encryption/two_level_cache_with_expiration.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// KeyToolkit is a utility that keeps various tools for key management (such as key
+// rotation, kms client instantiation, cache control, etc), plus a number of auxiliary
+// classes for internal use.
+class PARQUET_EXPORT KeyToolkit {
+ public:
+  /// KMS client two level cache: token -> KMSInstanceId -> KmsClient
+  TwoLevelCacheWithExpiration<std::shared_ptr<KmsClient>>& kms_client_cache_per_token() {
+    return kms_client_cache_;
+  }
+  /// Key encryption key two level cache for wrapping: token -> MasterEncryptionKeyId ->
+  /// KeyEncryptionKey
+  TwoLevelCacheWithExpiration<KeyEncryptionKey>& kek_write_cache_per_token() {
+    return key_encryption_key_write_cache_;
+  }
+
+  /// Key encryption key two level cache for unwrapping: token -> KeyEncryptionKeyId ->
+  /// KeyEncryptionKeyBytes
+  TwoLevelCacheWithExpiration<std::string>& kek_read_cache_per_token() {
+    return key_encryption_key_read_cache_;
+  }
+
+  std::shared_ptr<KmsClient> GetKmsClient(
+      const KmsConnectionConfig& kms_connection_config, double cache_entry_lifetime_ms);
+
+  /// Flush any caches that are tied to the (compromised) access_token
+  void RemoveCacheEntriesForToken(const std::string& access_token);
+
+  void RemoveCacheEntriesForAllTokens();
+
+  void RegisterKmsClientFactory(std::shared_ptr<KmsClientFactory> kms_client_factory) {
+    if (kms_client_factory_ != NULL) {
+      throw ParquetException("KMS client factory has already been registered.");
+    }
+    kms_client_factory_ = kms_client_factory;
+  }
+
+ private:
+  TwoLevelCacheWithExpiration<std::shared_ptr<KmsClient>> kms_client_cache_;
+  TwoLevelCacheWithExpiration<KeyEncryptionKey> key_encryption_key_write_cache_;
+  TwoLevelCacheWithExpiration<std::string> key_encryption_key_read_cache_;
+  std::shared_ptr<KmsClientFactory> kms_client_factory_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/kms_client.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/kms_client.h
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/util/mutex.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+/// This class wraps the key access token of a KMS server. If your token changes over
+/// time, you should keep the reference to the KeyAccessToken object and call Refresh()
+/// method every time you have a new token.
+class PARQUET_EXPORT KeyAccessToken {
+ public:
+  KeyAccessToken() = default;
+
+  explicit KeyAccessToken(const std::string value) : value_(value) {}
+
+  void Refresh(const std::string& new_value) {
+    auto lock = mutex_.Lock();
+    value_ = new_value;
+  }
+
+  const std::string& value() const {
+    auto lock = mutex_.Lock();
+    return value_;
+  }
+
+ private:
+  std::string value_;
+  mutable ::arrow::util::Mutex mutex_;
+};
+
+struct PARQUET_EXPORT KmsConnectionConfig {
+  std::string kms_instance_id;
+  std::string kms_instance_url;
+  /// If the access token is changed in the future, you should keep a reference to
+  /// this object and call Refresh() on it whenever there is a new access token.
+  std::shared_ptr<KeyAccessToken> refreshable_key_access_token;
+  std::unordered_map<std::string, std::string> custom_kms_conf;
+
+  KmsConnectionConfig();
+
+  const std::string& key_access_token() const {
+    if (refreshable_key_access_token == NULL ||
+        refreshable_key_access_token->value().empty()) {
+      throw ParquetException("key access token is not set!");
+    }
+    return refreshable_key_access_token->value();
+  }
+
+  void SetDefaultIfEmpty();
+};
+
+class PARQUET_EXPORT KmsClient {
+ public:
+  static constexpr const char kKmsInstanceIdDefault[] = "DEFAULT";
+  static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT";
+  static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT";
+
+  /// Wraps a key - encrypts it with the master key, encodes the result
+  /// and potentially adds a KMS-specific metadata.
+  virtual std::string WrapKey(const std::string& key_bytes,
+                              const std::string& master_key_identifier) = 0;
+
+  /// Decrypts (unwraps) a key with the master key.
+  virtual std::string UnwrapKey(const std::string& wrapped_key,
+                                const std::string& master_key_identifier) = 0;
+  virtual ~KmsClient() {}
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/kms_client_factory.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/kms_client_factory.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+class PARQUET_EXPORT KmsClientFactory {
+ public:
+  explicit KmsClientFactory(bool wrap_locally = false) : wrap_locally_(wrap_locally) {}
+
+  virtual ~KmsClientFactory() = default;
+
+  virtual std::shared_ptr<KmsClient> CreateKmsClient(
+      const KmsConnectionConfig& kms_connection_config) = 0;
+
+ protected:
+  bool wrap_locally_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/local_wrap_kms_client.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/local_wrap_kms_client.h
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "arrow/util/concurrent_map.h"
+
+#include "parquet/encryption/kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+/// This class supports local wrapping mode, master keys will be fetched from the KMS
+/// server and used to encrypt other keys (data encryption keys or key encryption keys).
+class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient {
+ public:
+  static constexpr const char kLocalWrapNoKeyVersion[] = "NO_VERSION";
+
+  explicit LocalWrapKmsClient(const KmsConnectionConfig& kms_connection_config);
+
+  std::string WrapKey(const std::string& key_bytes,
+                      const std::string& master_key_identifier) override;
+
+  std::string UnwrapKey(const std::string& wrapped_key,
+                        const std::string& master_key_identifier) override;
+
+ protected:
+  /// Get master key from the remote KMS server.
+  /// Note: this function might be called by multiple threads
+  virtual std::string GetMasterKeyFromServer(
+      const std::string& master_key_identifier) = 0;
+
+ private:
+  /// KMS systems wrap keys by encrypting them by master keys, and attaching additional
+  /// information (such as the version number of the masker key) to the result of
+  /// encryption. The master key version is required in  key rotation. Currently, the
+  /// local wrapping mode does not support key rotation (because not all KMS systems allow
+  /// to fetch a master key by its ID and version number). Still, the local wrapping mode
+  /// adds a placeholder for the master key version, that will enable support for key
+  /// rotation in this mode in the future, with appropriate KMS systems. This will also
+  /// enable backward compatibility, where future readers will be able to extract master
+  /// key version in the files written by the current code.
+  ///
+  /// LocalKeyWrap class writes (and reads) the "key wrap" as a flat json with the
+  /// following fields:
+  /// 1. "masterKeyVersion" - a String, with the master key version. In the current
+  /// version, only one value is allowed - "NO_VERSION".
+  /// 2. "encryptedKey" - a String, with the key encrypted by the master key
+  /// (base64-encoded).
+  class LocalKeyWrap {
+   public:
+    static constexpr const char kLocalWrapKeyVersionField[] = "masterKeyVersion";
+    static constexpr const char kLocalWrapEncryptedKeyField[] = "encryptedKey";
+
+    LocalKeyWrap(std::string master_key_version, std::string encrypted_encoded_key);
+
+    static std::string CreateSerialized(const std::string& encrypted_encoded_key);
+
+    static LocalKeyWrap Parse(const std::string& wrapped_key);
+
+    const std::string& master_key_version() const { return master_key_version_; }
+
+    const std::string& encrypted_encoded_key() const { return encrypted_encoded_key_; }
+
+   private:
+    std::string encrypted_encoded_key_;
+    std::string master_key_version_;
+  };
+
+  std::string GetKeyFromServer(const std::string& key_identifier);
+
+ protected:
+  KmsConnectionConfig kms_connection_config_;
+  ::arrow::util::ConcurrentMap<std::string, std::string> master_key_cache_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/test_encryption_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/test_encryption_util.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module defines an abstract interface for iterating through pages in a
+// Parquet column chunk within a row group. It could be extended in the future
+// to iterate through all data pages in all chunks in a file.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include <gtest/gtest.h>
+
+#include "arrow/util/io_util.h"
+
+#include "parquet/encryption/encryption.h"
+#include "parquet/test_util.h"
+
+namespace parquet {
+class ParquetFileReader;
+namespace encryption {
+namespace test {
+
+using ::arrow::internal::TemporaryDir;
+
+constexpr int kFixedLength = 10;
+
+const char kFooterEncryptionKey[] = "0123456789012345";  // 128bit/16
+const char kColumnEncryptionKey1[] = "1234567890123450";
+const char kColumnEncryptionKey2[] = "1234567890123451";
+const char kFileName[] = "tester";
+
+// Get the path of file inside parquet test data directory
+std::string data_file(const char* file);
+
+// A temporary directory that contains the encrypted files generated in the tests.
+extern std::unique_ptr<TemporaryDir> temp_dir;
+
+inline ::arrow::Result<std::unique_ptr<TemporaryDir>> temp_data_dir() {
+  return TemporaryDir::Make("parquet-encryption-test-");
+}
+
+const char kDoubleFieldName[] = "double_field";
+const char kFloatFieldName[] = "float_field";
+const char kBooleanFieldName[] = "boolean_field";
+const char kInt32FieldName[] = "int32_field";
+const char kInt64FieldName[] = "int64_field";
+const char kInt96FieldName[] = "int96_field";
+const char kByteArrayFieldName[] = "ba_field";
+const char kFixedLenByteArrayFieldName[] = "flba_field";
+
+const char kFooterMasterKey[] = "0123456789112345";
+const char kFooterMasterKeyId[] = "kf";
+const char* const kColumnMasterKeys[] = {"1234567890123450", "1234567890123451",
+                                         "1234567890123452", "1234567890123453",
+                                         "1234567890123454", "1234567890123455"};
+const char* const kColumnMasterKeyIds[] = {"kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
+
+// The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory
+// as the key mapping to look at.
+std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
+                                                         const char* const* column_keys,
+                                                         const char* footer_id,
+                                                         const char* footer_key);
+
+// The result of this function will be used to set into EncryptionConfiguration
+// as colum keys.
+std::string BuildColumnKeyMapping();
+
+// FileEncryptor and FileDecryptor are helper classes to write/read an encrypted parquet
+// file corresponding to each pair of FileEncryptionProperties/FileDecryptionProperties.
+// FileEncryptor writes the file with fixed data values and FileDecryptor reads the file
+// and verify the correctness of data values.
+class FileEncryptor {
+ public:
+  FileEncryptor();
+
+  void EncryptFile(
+      std::string file,
+      std::shared_ptr<parquet::FileEncryptionProperties> encryption_configurations);
+
+ private:
+  std::shared_ptr<schema::GroupNode> SetupEncryptionSchema();
+
+  int num_rowgroups_ = 5;
+  int rows_per_rowgroup_ = 50;
+  std::shared_ptr<schema::GroupNode> schema_;
+};
+
+class FileDecryptor {
+ public:
+  void DecryptFile(std::string file_name,
+                   std::shared_ptr<FileDecryptionProperties> file_decryption_properties);
+
+ private:
+  void CheckFile(parquet::ParquetFileReader* file_reader,
+                 FileDecryptionProperties* file_decryption_properties);
+};
+
+}  // namespace test
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/test_in_memory_kms.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/test_in_memory_kms.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "arrow/util/base64.h"
+
+#include "parquet/encryption/kms_client_factory.h"
+#include "parquet/encryption/local_wrap_kms_client.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace encryption {
+
+// This is a mock class, built for testing only. Don't use it as an example of
+// LocalWrapKmsClient implementation.
+class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient {
+ public:
+  explicit TestOnlyLocalWrapInMemoryKms(const KmsConnectionConfig& kms_connection_config);
+
+  static void InitializeMasterKeys(
+      const std::unordered_map<std::string, std::string>& master_keys_map);
+
+ protected:
+  std::string GetMasterKeyFromServer(const std::string& master_key_identifier) override;
+
+ private:
+  static std::unordered_map<std::string, std::string> master_key_map_;
+};
+
+// This is a mock class, built for testing only. Don't use it as an example of KmsClient
+// implementation.
+class TestOnlyInServerWrapKms : public KmsClient {
+ public:
+  static void InitializeMasterKeys(
+      const std::unordered_map<std::string, std::string>& master_keys_map);
+
+  std::string WrapKey(const std::string& key_bytes,
+                      const std::string& master_key_identifier) override;
+
+  std::string UnwrapKey(const std::string& wrapped_key,
+                        const std::string& master_key_identifier) override;
+
+ private:
+  std::string GetMasterKeyFromServer(const std::string& master_key_identifier);
+
+  static std::unordered_map<std::string, std::string> master_key_map_;
+};
+
+// This is a mock class, built for testing only. Don't use it as an example of
+// KmsClientFactory implementation.
+class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory {
+ public:
+  TestOnlyInMemoryKmsClientFactory(
+      bool wrap_locally,
+      const std::unordered_map<std::string, std::string>& master_keys_map)
+      : KmsClientFactory(wrap_locally) {
+    TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys(master_keys_map);
+    TestOnlyInServerWrapKms::InitializeMasterKeys(master_keys_map);
+  }
+
+  std::shared_ptr<KmsClient> CreateKmsClient(
+      const KmsConnectionConfig& kms_connection_config) {
+    if (wrap_locally_) {
+      return std::make_shared<TestOnlyLocalWrapInMemoryKms>(kms_connection_config);
+    } else {
+      return std::make_shared<TestOnlyInServerWrapKms>();
+    }
+  }
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/two_level_cache_with_expiration.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/encryption/two_level_cache_with_expiration.h
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <unordered_map>
+
+#include "arrow/util/concurrent_map.h"
+#include "arrow/util/mutex.h"
+
+namespace parquet {
+namespace encryption {
+
+using ::arrow::util::ConcurrentMap;
+
+namespace internal {
+
+using TimePoint =
+    std::chrono::time_point<std::chrono::system_clock, std::chrono::duration<double>>;
+
+inline TimePoint CurrentTimePoint() { return std::chrono::system_clock::now(); }
+
+template <typename E>
+class ExpiringCacheEntry {
+ public:
+  ExpiringCacheEntry() = default;
+
+  ExpiringCacheEntry(E cached_item, double expiration_interval_seconds)
+      : expiration_timestamp_(CurrentTimePoint() +
+                              std::chrono::duration<double>(expiration_interval_seconds)),
+        cached_item_(std::move(cached_item)) {}
+
+  bool IsExpired() const {
+    const auto now = CurrentTimePoint();
+    return (now > expiration_timestamp_);
+  }
+
+  E cached_item() { return cached_item_; }
+
+ private:
+  const TimePoint expiration_timestamp_;
+  E cached_item_;
+};
+
+// This class is to avoid the below warning when compiling KeyToolkit class with VS2015
+// warning C4503: decorated name length exceeded, name was truncated
+template <typename V>
+class ExpiringCacheMapEntry {
+ public:
+  ExpiringCacheMapEntry() = default;
+
+  explicit ExpiringCacheMapEntry(
+      std::shared_ptr<ConcurrentMap<std::string, V>> cached_item,
+      double expiration_interval_seconds)
+      : map_cache_(cached_item, expiration_interval_seconds) {}
+
+  bool IsExpired() { return map_cache_.IsExpired(); }
+
+  std::shared_ptr<ConcurrentMap<std::string, V>> cached_item() {
+    return map_cache_.cached_item();
+  }
+
+ private:
+  // ConcurrentMap object may be accessed and modified at many places at the same time,
+  // from multiple threads, or even removed from cache.
+  ExpiringCacheEntry<std::shared_ptr<ConcurrentMap<std::string, V>>> map_cache_;
+};
+
+}  // namespace internal
+
+// Two-level cache with expiration of internal caches according to token lifetime.
+// External cache is per token, internal is per string key.
+// Wrapper class around:
+//    std::unordered_map<std::string,
+//    internal::ExpiringCacheEntry<std::unordered_map<std::string, V>>>
+// This cache is safe to be shared between threads.
+template <typename V>
+class TwoLevelCacheWithExpiration {
+ public:
+  TwoLevelCacheWithExpiration() {
+    last_cache_cleanup_timestamp_ = internal::CurrentTimePoint();
+  }
+
+  std::shared_ptr<ConcurrentMap<std::string, V>> GetOrCreateInternalCache(
+      const std::string& access_token, double cache_entry_lifetime_seconds) {
+    auto lock = mutex_.Lock();
+
+    auto external_cache_entry = cache_.find(access_token);
+    if (external_cache_entry == cache_.end() ||
+        external_cache_entry->second.IsExpired()) {
+      cache_.insert({access_token, internal::ExpiringCacheMapEntry<V>(
+                                       std::shared_ptr<ConcurrentMap<std::string, V>>(
+                                           new ConcurrentMap<std::string, V>()),
+                                       cache_entry_lifetime_seconds)});
+    }
+
+    return cache_[access_token].cached_item();
+  }
+
+  void CheckCacheForExpiredTokens(double cache_cleanup_period_seconds) {
+    auto lock = mutex_.Lock();
+
+    const auto now = internal::CurrentTimePoint();
+    if (now > (last_cache_cleanup_timestamp_ +
+               std::chrono::duration<double>(cache_cleanup_period_seconds))) {
+      RemoveExpiredEntriesNoMutex();
+      last_cache_cleanup_timestamp_ =
+          now + std::chrono::duration<double>(cache_cleanup_period_seconds);
+    }
+  }
+
+  void RemoveExpiredEntriesFromCache() {
+    auto lock = mutex_.Lock();
+
+    RemoveExpiredEntriesNoMutex();
+  }
+
+  void Remove(const std::string& access_token) {
+    auto lock = mutex_.Lock();
+    cache_.erase(access_token);
+  }
+
+  void Clear() {
+    auto lock = mutex_.Lock();
+    cache_.clear();
+  }
+
+ private:
+  void RemoveExpiredEntriesNoMutex() {
+    for (auto it = cache_.begin(); it != cache_.end();) {
+      if (it->second.IsExpired()) {
+        it = cache_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  std::unordered_map<std::string, internal::ExpiringCacheMapEntry<V>> cache_;
+  internal::TimePoint last_cache_cleanup_timestamp_;
+  ::arrow::util::Mutex mutex_;
+};
+
+}  // namespace encryption
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/exception.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/exception.h
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <exception>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/string_builder.h"
+#include "parquet/platform.h"
+
+// PARQUET-1085
+#if !defined(ARROW_UNUSED)
+#define ARROW_UNUSED(x) UNUSED(x)
+#endif
+
+// Parquet exception to Arrow Status
+
+#define BEGIN_PARQUET_CATCH_EXCEPTIONS try {
+#define END_PARQUET_CATCH_EXCEPTIONS                   \
+  }                                                    \
+  catch (const ::parquet::ParquetStatusException& e) { \
+    return e.status();                                 \
+  }                                                    \
+  catch (const ::parquet::ParquetException& e) {       \
+    return ::arrow::Status::IOError(e.what());         \
+  }
+
+// clang-format off
+
+#define PARQUET_CATCH_NOT_OK(s)    \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS   \
+  (s);                             \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// clang-format on
+
+#define PARQUET_CATCH_AND_RETURN(s) \
+  BEGIN_PARQUET_CATCH_EXCEPTIONS    \
+  return (s);                       \
+  END_PARQUET_CATCH_EXCEPTIONS
+
+// Arrow Status to Parquet exception
+
+#define PARQUET_IGNORE_NOT_OK(s)                                \
+  do {                                                          \
+    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
+    ARROW_UNUSED(_s);                                           \
+  } while (0)
+
+#define PARQUET_THROW_NOT_OK(s)                                 \
+  do {                                                          \
+    ::arrow::Status _s = ::arrow::internal::GenericToStatus(s); \
+    if (!_s.ok()) {                                             \
+      throw ::parquet::ParquetStatusException(std::move(_s));   \
+    }                                                           \
+  } while (0)
+
+#define PARQUET_ASSIGN_OR_THROW_IMPL(status_name, lhs, rexpr) \
+  auto status_name = (rexpr);                                 \
+  PARQUET_THROW_NOT_OK(status_name.status());                 \
+  lhs = std::move(status_name).ValueOrDie();
+
+#define PARQUET_ASSIGN_OR_THROW(lhs, rexpr)                                              \
+  PARQUET_ASSIGN_OR_THROW_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
+                               lhs, rexpr);
+
+namespace parquet {
+
+class ParquetException : public std::exception {
+ public:
+  PARQUET_NORETURN static void EofException(const std::string& msg = "") {
+    static std::string prefix = "Unexpected end of stream";
+    if (msg.empty()) {
+      throw ParquetException(prefix);
+    }
+    throw ParquetException(prefix, ": ", msg);
+  }
+
+  PARQUET_NORETURN static void NYI(const std::string& msg = "") {
+    throw ParquetException("Not yet implemented: ", msg, ".");
+  }
+
+  template <typename... Args>
+  explicit ParquetException(Args&&... args)
+      : msg_(::arrow::util::StringBuilder(std::forward<Args>(args)...)) {}
+
+  explicit ParquetException(std::string msg) : msg_(std::move(msg)) {}
+
+  explicit ParquetException(const char* msg, const std::exception&) : msg_(msg) {}
+
+  ParquetException(const ParquetException&) = default;
+  ParquetException& operator=(const ParquetException&) = default;
+  ParquetException(ParquetException&&) = default;
+  ParquetException& operator=(ParquetException&&) = default;
+
+  const char* what() const noexcept override { return msg_.c_str(); }
+
+ private:
+  std::string msg_;
+};
+
+// Support printing a ParquetException.
+// This is needed for clang-on-MSVC as there operator<< is not defined for
+// std::exception.
+PARQUET_EXPORT
+std::ostream& operator<<(std::ostream& os, const ParquetException& exception);
+
+class ParquetStatusException : public ParquetException {
+ public:
+  explicit ParquetStatusException(::arrow::Status status)
+      : ParquetException(status.ToString()), status_(std::move(status)) {}
+
+  const ::arrow::Status& status() const { return status_; }
+
+ private:
+  ::arrow::Status status_;
+};
+
+// This class exists for the purpose of detecting an invalid or corrupted file.
+class ParquetInvalidOrCorruptedFileException : public ParquetStatusException {
+ public:
+  ParquetInvalidOrCorruptedFileException(const ParquetInvalidOrCorruptedFileException&) =
+      default;
+
+  template <typename Arg,
+            typename std::enable_if<
+                !std::is_base_of<ParquetInvalidOrCorruptedFileException, Arg>::value,
+                int>::type = 0,
+            typename... Args>
+  explicit ParquetInvalidOrCorruptedFileException(Arg arg, Args&&... args)
+      : ParquetStatusException(::arrow::Status::Invalid(std::forward<Arg>(arg),
+                                                        std::forward<Args>(args)...)) {}
+};
+
+template <typename StatusReturnBlock>
+void ThrowNotOk(StatusReturnBlock&& b) {
+  PARQUET_THROW_NOT_OK(b());
+}
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/file_reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/file_reader.h
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/io/caching.h"
+#include "arrow/util/type_fwd.h"
+#include "parquet/metadata.h"  // IWYU pragma: keep
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+
+namespace parquet {
+
+class ColumnReader;
+class FileMetaData;
+class PageReader;
+class RowGroupMetaData;
+
+class PARQUET_EXPORT RowGroupReader {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    virtual ~Contents() {}
+    virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0;
+    virtual const RowGroupMetaData* metadata() const = 0;
+    virtual const ReaderProperties* properties() const = 0;
+  };
+
+  explicit RowGroupReader(std::unique_ptr<Contents> contents);
+
+  // Returns the rowgroup metadata
+  const RowGroupMetaData* metadata() const;
+
+  // Construct a ColumnReader for the indicated row group-relative
+  // column. Ownership is shared with the RowGroupReader.
+  std::shared_ptr<ColumnReader> Column(int i);
+
+  // Construct a ColumnReader, trying to enable exposed encoding.
+  //
+  // For dictionary encoding, currently we only support column chunks that are fully
+  // dictionary encoded, i.e., all data pages in the column chunk are dictionary encoded.
+  // If a column chunk uses dictionary encoding but then falls back to plain encoding, the
+  // encoding will not be exposed.
+  //
+  // The returned column reader provides an API GetExposedEncoding() for the
+  // users to check the exposed encoding and determine how to read the batches.
+  //
+  // \note API EXPERIMENTAL
+  std::shared_ptr<ColumnReader> ColumnWithExposeEncoding(
+      int i, ExposedEncoding encoding_to_expose);
+
+  std::unique_ptr<PageReader> GetColumnPageReader(int i);
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+class PARQUET_EXPORT ParquetFileReader {
+ public:
+  // Declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct PARQUET_EXPORT Contents {
+    static std::unique_ptr<Contents> Open(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+    static ::arrow::Future<std::unique_ptr<Contents>> OpenAsync(
+        std::shared_ptr<::arrow::io::RandomAccessFile> source,
+        const ReaderProperties& props = default_reader_properties(),
+        std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+    virtual ~Contents() = default;
+    // Perform any cleanup associated with the file contents
+    virtual void Close() = 0;
+    virtual std::shared_ptr<RowGroupReader> GetRowGroup(int i) = 0;
+    virtual std::shared_ptr<FileMetaData> metadata() const = 0;
+  };
+
+  ParquetFileReader();
+  ~ParquetFileReader();
+
+  // Create a file reader instance from an Arrow file object. Thread-safety is
+  // the responsibility of the file implementation
+  static std::unique_ptr<ParquetFileReader> Open(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  // API Convenience to open a serialized Parquet file on disk, using Arrow IO
+  // interfaces.
+  static std::unique_ptr<ParquetFileReader> OpenFile(
+      const std::string& path, bool memory_map = true,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  // Asynchronously open a file reader from an Arrow file object.
+  // Does not throw - all errors are reported through the Future.
+  static ::arrow::Future<std::unique_ptr<ParquetFileReader>> OpenAsync(
+      std::shared_ptr<::arrow::io::RandomAccessFile> source,
+      const ReaderProperties& props = default_reader_properties(),
+      std::shared_ptr<FileMetaData> metadata = NULLPTR);
+
+  void Open(std::unique_ptr<Contents> contents);
+  void Close();
+
+  // The RowGroupReader is owned by the FileReader
+  std::shared_ptr<RowGroupReader> RowGroup(int i);
+
+  // Returns the file metadata. Only one instance is ever created
+  std::shared_ptr<FileMetaData> metadata() const;
+
+  /// Pre-buffer the specified column indices in all row groups.
+  ///
+  /// Readers can optionally call this to cache the necessary slices
+  /// of the file in-memory before deserialization. Arrow readers can
+  /// automatically do this via an option. This is intended to
+  /// increase performance when reading from high-latency filesystems
+  /// (e.g. Amazon S3).
+  ///
+  /// After calling this, creating readers for row groups/column
+  /// indices that were not buffered may fail. Creating multiple
+  /// readers for the a subset of the buffered regions is
+  /// acceptable. This may be called again to buffer a different set
+  /// of row groups/columns.
+  ///
+  /// If memory usage is a concern, note that data will remain
+  /// buffered in memory until either \a PreBuffer() is called again,
+  /// or the reader itself is destructed. Reading - and buffering -
+  /// only one row group at a time may be useful.
+  ///
+  /// This method may throw.
+  void PreBuffer(const std::vector<int>& row_groups,
+                 const std::vector<int>& column_indices,
+                 const ::arrow::io::IOContext& ctx,
+                 const ::arrow::io::CacheOptions& options);
+
+  /// Wait for the specified row groups and column indices to be pre-buffered.
+  ///
+  /// After the returned Future completes, reading the specified row
+  /// groups/columns will not block.
+  ///
+  /// PreBuffer must be called first. This method does not throw.
+  ::arrow::Future<> WhenBuffered(const std::vector<int>& row_groups,
+                                 const std::vector<int>& column_indices) const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+// Read only Parquet file metadata
+std::shared_ptr<FileMetaData> PARQUET_EXPORT
+ReadMetaData(const std::shared_ptr<::arrow::io::RandomAccessFile>& source);
+
+/// \brief Scan all values in file. Useful for performance testing
+/// \param[in] columns the column numbers to scan. If empty scans all
+/// \param[in] column_batch_size number of values to read at a time when scanning column
+/// \param[in] reader a ParquetFileReader instance
+/// \return number of semantic rows in file
+PARQUET_EXPORT
+int64_t ScanFileContents(std::vector<int> columns, const int32_t column_batch_size,
+                         ParquetFileReader* reader);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/file_writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/file_writer.h
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "parquet/metadata.h"
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+
+class ColumnWriter;
+
+// FIXME: copied from reader-internal.cc
+static constexpr uint8_t kParquetMagic[4] = {'P', 'A', 'R', '1'};
+static constexpr uint8_t kParquetEMagic[4] = {'P', 'A', 'R', 'E'};
+
+class PARQUET_EXPORT RowGroupWriter {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    virtual ~Contents() = default;
+    virtual int num_columns() const = 0;
+    virtual int64_t num_rows() const = 0;
+
+    // to be used only with ParquetFileWriter::AppendRowGroup
+    virtual ColumnWriter* NextColumn() = 0;
+    // to be used only with ParquetFileWriter::AppendBufferedRowGroup
+    virtual ColumnWriter* column(int i) = 0;
+
+    virtual int current_column() const = 0;
+    virtual void Close() = 0;
+
+    // total bytes written by the page writer
+    virtual int64_t total_bytes_written() const = 0;
+    // total bytes still compressed but not written
+    virtual int64_t total_compressed_bytes() const = 0;
+  };
+
+  explicit RowGroupWriter(std::unique_ptr<Contents> contents);
+
+  /// Construct a ColumnWriter for the indicated row group-relative column.
+  ///
+  /// To be used only with ParquetFileWriter::AppendRowGroup
+  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is only
+  /// valid until the next call to NextColumn or Close. As the contents are
+  /// directly written to the sink, once a new column is started, the contents
+  /// of the previous one cannot be modified anymore.
+  ColumnWriter* NextColumn();
+  /// Index of currently written column. Equal to -1 if NextColumn()
+  /// has not been called yet.
+  int current_column();
+  void Close();
+
+  int num_columns() const;
+
+  /// Construct a ColumnWriter for the indicated row group column.
+  ///
+  /// To be used only with ParquetFileWriter::AppendBufferedRowGroup
+  /// Ownership is solely within the RowGroupWriter. The ColumnWriter is
+  /// valid until Close. The contents are buffered in memory and written to sink
+  /// on Close
+  ColumnWriter* column(int i);
+
+  /**
+   * Number of rows that shall be written as part of this RowGroup.
+   */
+  int64_t num_rows() const;
+
+  int64_t total_bytes_written() const;
+  int64_t total_compressed_bytes() const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+};
+
+PARQUET_EXPORT
+void WriteFileMetaData(const FileMetaData& file_metadata,
+                       ::arrow::io::OutputStream* sink);
+
+PARQUET_EXPORT
+void WriteMetaDataFile(const FileMetaData& file_metadata,
+                       ::arrow::io::OutputStream* sink);
+
+PARQUET_EXPORT
+void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
+                                ArrowOutputStream* sink,
+                                const std::shared_ptr<Encryptor>& encryptor,
+                                bool encrypt_footer);
+
+PARQUET_EXPORT
+void WriteEncryptedFileMetadata(const FileMetaData& file_metadata,
+                                ::arrow::io::OutputStream* sink,
+                                const std::shared_ptr<Encryptor>& encryptor = NULLPTR,
+                                bool encrypt_footer = false);
+PARQUET_EXPORT
+void WriteFileCryptoMetaData(const FileCryptoMetaData& crypto_metadata,
+                             ::arrow::io::OutputStream* sink);
+
+class PARQUET_EXPORT ParquetFileWriter {
+ public:
+  // Forward declare a virtual class 'Contents' to aid dependency injection and more
+  // easily create test fixtures
+  // An implementation of the Contents class is defined in the .cc file
+  struct Contents {
+    Contents(std::shared_ptr<::parquet::schema::GroupNode> schema,
+             std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+        : schema_(), key_value_metadata_(std::move(key_value_metadata)) {
+      schema_.Init(std::move(schema));
+    }
+    virtual ~Contents() {}
+    // Perform any cleanup associated with the file contents
+    virtual void Close() = 0;
+
+    /// \note Deprecated since 1.3.0
+    RowGroupWriter* AppendRowGroup(int64_t num_rows);
+
+    virtual RowGroupWriter* AppendRowGroup() = 0;
+    virtual RowGroupWriter* AppendBufferedRowGroup() = 0;
+
+    virtual int64_t num_rows() const = 0;
+    virtual int num_columns() const = 0;
+    virtual int num_row_groups() const = 0;
+
+    virtual const std::shared_ptr<WriterProperties>& properties() const = 0;
+
+    const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const {
+      return key_value_metadata_;
+    }
+
+    // Return const-pointer to make it clear that this object is not to be copied
+    const SchemaDescriptor* schema() const { return &schema_; }
+
+    SchemaDescriptor schema_;
+
+    /// This should be the only place this is stored. Everything else is a const reference
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
+
+    const std::shared_ptr<FileMetaData>& metadata() const { return file_metadata_; }
+    std::shared_ptr<FileMetaData> file_metadata_;
+  };
+
+  ParquetFileWriter();
+  ~ParquetFileWriter();
+
+  static std::unique_ptr<ParquetFileWriter> Open(
+      std::shared_ptr<::arrow::io::OutputStream> sink,
+      std::shared_ptr<schema::GroupNode> schema,
+      std::shared_ptr<WriterProperties> properties = default_writer_properties(),
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+
+  void Open(std::unique_ptr<Contents> contents);
+  void Close();
+
+  // Construct a RowGroupWriter for the indicated number of rows.
+  //
+  // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  // until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  // @param num_rows The number of rows that are stored in the new RowGroup
+  //
+  // \deprecated Since 1.3.0
+  RowGroupWriter* AppendRowGroup(int64_t num_rows);
+
+  /// Construct a RowGroupWriter with an arbitrary number of rows.
+  ///
+  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  RowGroupWriter* AppendRowGroup();
+
+  /// Construct a RowGroupWriter that buffers all the values until the RowGroup is ready.
+  /// Use this if you want to write a RowGroup based on a certain size
+  ///
+  /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
+  /// until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
+  RowGroupWriter* AppendBufferedRowGroup();
+
+  /// Number of columns.
+  ///
+  /// This number is fixed during the lifetime of the writer as it is determined via
+  /// the schema.
+  int num_columns() const;
+
+  /// Number of rows in the yet started RowGroups.
+  ///
+  /// Changes on the addition of a new RowGroup.
+  int64_t num_rows() const;
+
+  /// Number of started RowGroups.
+  int num_row_groups() const;
+
+  /// Configuration passed to the writer, e.g. the used Parquet format version.
+  const std::shared_ptr<WriterProperties>& properties() const;
+
+  /// Returns the file schema descriptor
+  const SchemaDescriptor* schema() const;
+
+  /// Returns a column descriptor in schema
+  const ColumnDescriptor* descr(int i) const;
+
+  /// Returns the file custom metadata
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
+
+  /// Returns the file metadata, only available after calling Close().
+  const std::shared_ptr<FileMetaData> metadata() const;
+
+ private:
+  // Holds a pointer to an instance of Contents implementation
+  std::unique_ptr<Contents> contents_;
+  std::shared_ptr<FileMetaData> file_metadata_;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/hasher.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/hasher.h
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include "parquet/types.h"
+
+namespace parquet {
+// Abstract class for hash
+class Hasher {
+ public:
+  /// Compute hash for 32 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int32_t value) const = 0;
+
+  /// Compute hash for 64 bits value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(int64_t value) const = 0;
+
+  /// Compute hash for float value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(float value) const = 0;
+
+  /// Compute hash for double value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(double value) const = 0;
+
+  /// Compute hash for Int96 value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const Int96* value) const = 0;
+
+  /// Compute hash for ByteArray value by using its plain encoding result.
+  ///
+  /// @param value the value to hash.
+  /// @return hash result.
+  virtual uint64_t Hash(const ByteArray* value) const = 0;
+
+  /// Compute hash for fixed byte array value by using its plain encoding result.
+  ///
+  /// @param value the value address.
+  /// @param len the value length.
+  virtual uint64_t Hash(const FLBA* value, uint32_t len) const = 0;
+
+  virtual ~Hasher() = default;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/level_comparison.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/level_comparison.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+
+#include "parquet/platform.h"
+
+namespace parquet {
+namespace internal {
+
+/// Builds a  bitmap where each set bit indicates the corresponding level is greater
+/// than rhs.
+uint64_t PARQUET_EXPORT GreaterThanBitmap(const int16_t* levels, int64_t num_levels,
+                                          int16_t rhs);
+
+struct MinMax {
+  int16_t min;
+  int16_t max;
+};
+
+MinMax FindMinMax(const int16_t* levels, int64_t num_levels);
+
+}  // namespace internal
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/level_comparison_inc.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/level_comparison_inc.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "parquet/level_comparison.h"
+
+// Used to make sure ODR rule isn't violated.
+#ifndef PARQUET_IMPL_NAMESPACE
+#error "PARQUET_IMPL_NAMESPACE must be defined"
+#endif
+namespace parquet {
+namespace internal {
+namespace PARQUET_IMPL_NAMESPACE {
+/// Builds a bitmap by applying predicate to the level vector provided.
+///
+/// \param[in] levels Rep or def level array.
+/// \param[in] num_levels The number of levels to process (must be [0, 64])
+/// \param[in] predicate The predicate to apply (must have the signature `bool
+/// predicate(int16_t)`.
+/// \returns The bitmap using least significant "bit" ordering.
+///
+template <typename Predicate>
+inline uint64_t LevelsToBitmap(const int16_t* levels, int64_t num_levels,
+                               Predicate predicate) {
+  // Both clang and GCC can vectorize this automatically with SSE4/AVX2.
+  uint64_t mask = 0;
+  for (int x = 0; x < num_levels; x++) {
+    mask |= static_cast<uint64_t>(predicate(levels[x]) ? 1 : 0) << x;
+  }
+  return ::arrow::bit_util::ToLittleEndian(mask);
+}
+
+inline MinMax FindMinMaxImpl(const int16_t* levels, int64_t num_levels) {
+  MinMax out{std::numeric_limits<int16_t>::max(), std::numeric_limits<int16_t>::min()};
+  for (int x = 0; x < num_levels; x++) {
+    out.min = std::min(levels[x], out.min);
+    out.max = std::max(levels[x], out.max);
+  }
+  return out;
+}
+
+inline uint64_t GreaterThanBitmapImpl(const int16_t* levels, int64_t num_levels,
+                                      int16_t rhs) {
+  return LevelsToBitmap(levels, num_levels, [rhs](int16_t value) { return value > rhs; });
+}
+
+}  // namespace PARQUET_IMPL_NAMESPACE
+}  // namespace internal
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/level_conversion.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/level_conversion.h
@@ -0,0 +1,199 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/util/endian.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+
+namespace parquet {
+namespace internal {
+
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repetition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repetition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  bool HasNullableValues() const { return repeated_ancestor_def_level < def_level; }
+
+  // How many slots an undefined but present (i.e. null) element in
+  // parquet consumes when decoding to Arrow.
+  // "Slot" is used in the same context as the Arrow specification
+  // (i.e. a value holder).
+  // This is only ever >1 for descendents of FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater than or equal to this field indicate a present,
+  // possibly null, child value.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // with definition levels).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor is not empty.  This is used to discriminate
+  // between a value less than |def_level| being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  //   0 = null list
+  //   1 = present but empty list.
+  //   2 = a null value in the list
+  //   3 = a non null struct but null integer.
+  //   4 = a present integer.
+  // When reconstructing, the struct and integer arrays'
+  // repeated_ancestor_def_level would be 2.  Any
+  // def_level < 2 indicates that there isn't a corresponding
+  // child value in the list.
+  // i.e. [null, [], [null], [{f0: null}], [{f0: 1}]]
+  // has the def levels [0, 1, 2, 3, 4].  The actual
+  // struct array is only of length 3: [not-set, set, set] and
+  // the int array is also of length 3: [N/A, null, 1].
+  //
+  int16_t repeated_ancestor_def_level = 0;
+
+  /// Increments levels according to the cardinality of node.
+  void Increment(const schema::Node& node) {
+    if (node.is_repeated()) {
+      IncrementRepeated();
+      return;
+    }
+    if (node.is_optional()) {
+      IncrementOptional();
+      return;
+    }
+  }
+
+  /// Incremetns level for a optional node.
+  void IncrementOptional() { def_level++; }
+
+  /// Increments levels for the repeated node.  Returns
+  /// the previous ancestor_list_def_level.
+  int16_t IncrementRepeated() {
+    int16_t last_repeated_ancestor = repeated_ancestor_def_level;
+
+    // Repeated fields add both a repetition and definition level. This is used
+    // to distinguish between an empty list and a list with an item in it.
+    ++rep_level;
+    ++def_level;
+    // For levels >= repeated_ancenstor_def_level it indicates the list was
+    // non-null and had at least one element.  This is important
+    // for later decoding because we need to add a slot for these
+    // values.  for levels < current_def_level no slots are added
+    // to arrays.
+    repeated_ancestor_def_level = def_level;
+    return last_repeated_ancestor;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const LevelInfo& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def_level;
+    if (levels.null_slot_usage > 1) {
+      os << ", null_slot_usage=" << levels.null_slot_usage;
+    }
+    os << "}";
+    return os;
+  }
+};
+
+// Input/Output structure for reconstructed validity bitmaps.
+struct PARQUET_EXPORT ValidityBitmapInputOutput {
+  // Input only.
+  // The maximum number of values_read expected (actual
+  // values read must be less than or equal to this value).
+  // If this number is exceeded methods will throw a
+  // ParquetException. Exceeding this limit indicates
+  // either a corrupt or incorrectly written file.
+  int64_t values_read_upper_bound = 0;
+  // Output only. The number of values added to the encountered
+  // (this is logically the count of the number of elements
+  // for an Arrow array).
+  int64_t values_read = 0;
+  // Input/Output. The number of nulls encountered.
+  int64_t null_count = 0;
+  // Output only. The validity bitmap to populate. May be be null only
+  // for DefRepLevelsToListInfo (if all that is needed is list offsets).
+  uint8_t* valid_bits = NULLPTR;
+  // Input only, offset into valid_bits to start at.
+  int64_t valid_bits_offset = 0;
+};
+
+//  Converts def_levels to validity bitmaps for non-list arrays and structs that have
+//  at least one member that is not a list and has no list descendents.
+//  For lists use DefRepLevelsToList and structs where all descendants contain
+//  a list use DefRepLevelsToBitmap.
+void PARQUET_EXPORT DefLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
+                                      LevelInfo level_info,
+                                      ValidityBitmapInputOutput* output);
+
+// Reconstructs a validity bitmap and list offsets for a list arrays based on
+// def/rep levels. The first element of offsets will not be modified if rep_levels
+// starts with a new list.  The first element of offsets will be used when calculating
+// the next offset.  See documentation onf DefLevelsToBitmap for when to use this
+// method vs the other ones in this file for reconstruction.
+//
+// Offsets must be sized to 1 + values_read_upper_bound.
+void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels,
+                                       const int16_t* rep_levels, int64_t num_def_levels,
+                                       LevelInfo level_info,
+                                       ValidityBitmapInputOutput* output,
+                                       int32_t* offsets);
+void PARQUET_EXPORT DefRepLevelsToList(const int16_t* def_levels,
+                                       const int16_t* rep_levels, int64_t num_def_levels,
+                                       LevelInfo level_info,
+                                       ValidityBitmapInputOutput* output,
+                                       int64_t* offsets);
+
+// Reconstructs a validity bitmap for a struct every member is a list or has
+// a list descendant.  See documentation on DefLevelsToBitmap for when more
+// details on this method compared to the other ones defined above.
+void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t* def_levels,
+                                         const int16_t* rep_levels,
+                                         int64_t num_def_levels, LevelInfo level_info,
+                                         ValidityBitmapInputOutput* output);
+
+// This is exposed to ensure we can properly test a software simulated pext function
+// (i.e. it isn't hidden by runtime dispatch).
+uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection);
+
+}  // namespace internal
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/level_conversion_inc.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/level_conversion_inc.h
@@ -0,0 +1,357 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "parquet/level_conversion.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/simd.h"
+#include "parquet/exception.h"
+#include "parquet/level_comparison.h"
+
+namespace parquet {
+namespace internal {
+#ifndef PARQUET_IMPL_NAMESPACE
+#error "PARQUET_IMPL_NAMESPACE must be defined"
+#endif
+namespace PARQUET_IMPL_NAMESPACE {
+
+// clang-format off
+/* Python code to generate lookup table:
+
+kLookupBits = 5
+count = 0
+print('constexpr int kLookupBits = {};'.format(kLookupBits))
+print('constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {')
+print(' ', end = '')
+for mask in range(1 << kLookupBits):
+    for data in range(1 << kLookupBits):
+        bit_value = 0
+        bit_len = 0
+        for i in range(kLookupBits):
+            if mask & (1 << i):
+                bit_value |= (((data >> i) & 1) << bit_len)
+                bit_len += 1
+        out = '0x{:02X},'.format(bit_value)
+        count += 1
+        if count % (1 << kLookupBits) == 1:
+            print(' {')
+        if count % 8 == 1:
+            print('    ', end = '')
+        if count % 8 == 0:
+            print(out, end = '\n')
+        else:
+            print(out, end = ' ')
+        if count % (1 << kLookupBits) == 0:
+            print('  },', end = '')
+print('\n};')
+
+*/
+// clang-format on
+
+constexpr int kLookupBits = 5;
+constexpr uint8_t kPextTable[1 << kLookupBits][1 << kLookupBits] = {
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+        0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+        0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+        0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+        0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+        0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+        0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+        0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+        0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+        0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+        0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+        0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+        0x03, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+        0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+        0x03, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00,
+        0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+        0x07, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01,
+        0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+        0x02, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
+        0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+        0x05, 0x06, 0x07, 0x06, 0x07, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03,
+        0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+        0x05, 0x06, 0x06, 0x07, 0x07, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+        0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+        0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
+        0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+        0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03, 0x02, 0x03,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01,
+        0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02,
+        0x03, 0x03, 0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02,
+        0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05,
+        0x06, 0x07, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03,
+        0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x00, 0x01, 0x00,
+        0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07,
+        0x06, 0x07, 0x04, 0x05, 0x04, 0x05, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x00, 0x00, 0x01,
+        0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
+        0x07, 0x07, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02,
+        0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+        0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,
+        0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+        0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02,
+        0x03, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04, 0x05, 0x04, 0x05,
+        0x04, 0x05, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+        0x03, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x04, 0x04,
+        0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+        0x07, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09,
+        0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
+        0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05,
+        0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07,
+    },
+    {
+        0x00, 0x01, 0x00, 0x01, 0x02, 0x03, 0x02, 0x03, 0x04, 0x05, 0x04,
+        0x05, 0x06, 0x07, 0x06, 0x07, 0x08, 0x09, 0x08, 0x09, 0x0A, 0x0B,
+        0x0A, 0x0B, 0x0C, 0x0D, 0x0C, 0x0D, 0x0E, 0x0F, 0x0E, 0x0F,
+    },
+    {
+        0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05,
+        0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0A, 0x0A,
+        0x0B, 0x0B, 0x0C, 0x0C, 0x0D, 0x0D, 0x0E, 0x0E, 0x0F, 0x0F,
+    },
+    {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+        0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
+        0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+    },
+};
+
+inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
+  // A software emulation of _pext_u64
+
+  // These checks should be inline and are likely to be common cases.
+  if (select_bitmap == ~uint64_t{0}) {
+    return bitmap;
+  } else if (select_bitmap == 0) {
+    return 0;
+  }
+
+  // Fallback to lookup table method
+  uint64_t bit_value = 0;
+  int bit_len = 0;
+  constexpr uint8_t kLookupMask = (1U << kLookupBits) - 1;
+  while (select_bitmap != 0) {
+    const auto mask_len = ARROW_POPCOUNT32(select_bitmap & kLookupMask);
+    const uint64_t value = kPextTable[select_bitmap & kLookupMask][bitmap & kLookupMask];
+    bit_value |= (value << bit_len);
+    bit_len += mask_len;
+    bitmap >>= kLookupBits;
+    select_bitmap >>= kLookupBits;
+  }
+  return bit_value;
+}
+
+#ifdef ARROW_HAVE_BMI2
+
+// Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds,
+#if UINTPTR_MAX == 0xFFFFFFFF
+
+using extract_bitmap_t = uint32_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return _pext_u32(bitmap, select_bitmap);
+}
+
+#else
+
+using extract_bitmap_t = uint64_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return _pext_u64(bitmap, select_bitmap);
+}
+
+#endif
+
+#else  // !defined(ARROW_HAVE_BMI2)
+
+// Use 64-bit pext emulation when BMI2 isn't available.
+using extract_bitmap_t = uint64_t;
+inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
+                                    extract_bitmap_t select_bitmap) {
+  return ExtractBitsSoftware(bitmap, select_bitmap);
+}
+
+#endif
+
+static constexpr int64_t kExtractBitsSize = 8 * sizeof(extract_bitmap_t);
+
+template <bool has_repeated_parent>
+int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size,
+                               int64_t upper_bound_remaining, LevelInfo level_info,
+                               ::arrow::internal::FirstTimeBitmapWriter* writer) {
+  DCHECK_LE(batch_size, kExtractBitsSize);
+
+  // Greater than level_info.def_level - 1 implies >= the def_level
+  auto defined_bitmap = static_cast<extract_bitmap_t>(
+      internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level - 1));
+
+  if (has_repeated_parent) {
+    // Greater than level_info.repeated_ancestor_def_level - 1 implies >= the
+    // repeated_ancestor_def_level
+    auto present_bitmap = static_cast<extract_bitmap_t>(internal::GreaterThanBitmap(
+        def_levels, batch_size, level_info.repeated_ancestor_def_level - 1));
+    auto selected_bits = ExtractBits(defined_bitmap, present_bitmap);
+    int64_t selected_count = ::arrow::bit_util::PopCount(present_bitmap);
+    if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) {
+      throw ParquetException("Values read exceeded upper bound");
+    }
+    writer->AppendWord(selected_bits, selected_count);
+    return ::arrow::bit_util::PopCount(selected_bits);
+  } else {
+    if (ARROW_PREDICT_FALSE(batch_size > upper_bound_remaining)) {
+      std::stringstream ss;
+      ss << "Values read exceeded upper bound";
+      throw ParquetException(ss.str());
+    }
+
+    writer->AppendWord(defined_bitmap, batch_size);
+    return ::arrow::bit_util::PopCount(defined_bitmap);
+  }
+}
+
+template <bool has_repeated_parent>
+void DefLevelsToBitmapSimd(const int16_t* def_levels, int64_t num_def_levels,
+                           LevelInfo level_info, ValidityBitmapInputOutput* output) {
+  ::arrow::internal::FirstTimeBitmapWriter writer(
+      output->valid_bits,
+      /*start_offset=*/output->valid_bits_offset,
+      /*length=*/output->values_read_upper_bound);
+  int64_t set_count = 0;
+  output->values_read = 0;
+  int64_t values_read_remaining = output->values_read_upper_bound;
+  while (num_def_levels > kExtractBitsSize) {
+    set_count += DefLevelsBatchToBitmap<has_repeated_parent>(
+        def_levels, kExtractBitsSize, values_read_remaining, level_info, &writer);
+    def_levels += kExtractBitsSize;
+    num_def_levels -= kExtractBitsSize;
+    values_read_remaining = output->values_read_upper_bound - writer.position();
+  }
+  set_count += DefLevelsBatchToBitmap<has_repeated_parent>(
+      def_levels, num_def_levels, values_read_remaining, level_info, &writer);
+
+  output->values_read = writer.position();
+  output->null_count += output->values_read - set_count;
+  writer.Finish();
+}
+
+}  // namespace PARQUET_IMPL_NAMESPACE
+}  // namespace internal
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/metadata.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/metadata.h
@@ -0,0 +1,489 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "parquet/platform.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+class ColumnDescriptor;
+class EncodedStatistics;
+class Statistics;
+class SchemaDescriptor;
+
+class FileCryptoMetaData;
+class InternalFileDecryptor;
+class Decryptor;
+class Encryptor;
+class FooterSigningEncryptor;
+
+namespace schema {
+
+class ColumnPath;
+
+}  // namespace schema
+
+using KeyValueMetadata = ::arrow::KeyValueMetadata;
+
+class PARQUET_EXPORT ApplicationVersion {
+ public:
+  // Known Versions with Issues
+  static const ApplicationVersion& PARQUET_251_FIXED_VERSION();
+  static const ApplicationVersion& PARQUET_816_FIXED_VERSION();
+  static const ApplicationVersion& PARQUET_CPP_FIXED_STATS_VERSION();
+  static const ApplicationVersion& PARQUET_MR_FIXED_STATS_VERSION();
+
+  // Application that wrote the file. e.g. "IMPALA"
+  std::string application_;
+  // Build name
+  std::string build_;
+
+  // Version of the application that wrote the file, expressed as
+  // (<major>.<minor>.<patch>). Unmatched parts default to 0.
+  // "1.2.3"    => {1, 2, 3}
+  // "1.2"      => {1, 2, 0}
+  // "1.2-cdh5" => {1, 2, 0}
+  struct {
+    int major;
+    int minor;
+    int patch;
+    std::string unknown;
+    std::string pre_release;
+    std::string build_info;
+  } version;
+
+  ApplicationVersion() = default;
+  explicit ApplicationVersion(const std::string& created_by);
+  ApplicationVersion(std::string application, int major, int minor, int patch);
+
+  // Returns true if version is strictly less than other_version
+  bool VersionLt(const ApplicationVersion& other_version) const;
+
+  // Returns true if version is strictly equal with other_version
+  bool VersionEq(const ApplicationVersion& other_version) const;
+
+  // Checks if the Version has the correct statistics for a given column
+  bool HasCorrectStatistics(Type::type primitive, EncodedStatistics& statistics,
+                            SortOrder::type sort_order = SortOrder::SIGNED) const;
+};
+
+class PARQUET_EXPORT ColumnCryptoMetaData {
+ public:
+  static std::unique_ptr<ColumnCryptoMetaData> Make(const uint8_t* metadata);
+  ~ColumnCryptoMetaData();
+
+  bool Equals(const ColumnCryptoMetaData& other) const;
+
+  std::shared_ptr<schema::ColumnPath> path_in_schema() const;
+  bool encrypted_with_footer_key() const;
+  const std::string& key_metadata() const;
+
+ private:
+  explicit ColumnCryptoMetaData(const uint8_t* metadata);
+
+  class ColumnCryptoMetaDataImpl;
+  std::unique_ptr<ColumnCryptoMetaDataImpl> impl_;
+};
+
+/// \brief Public struct for Thrift PageEncodingStats in ColumnChunkMetaData
+struct PageEncodingStats {
+  PageType::type page_type;
+  Encoding::type encoding;
+  int32_t count;
+};
+
+/// \brief ColumnChunkMetaData is a proxy around format::ColumnChunkMetaData.
+class PARQUET_EXPORT ColumnChunkMetaData {
+ public:
+  // API convenience to get a MetaData accessor
+  static std::unique_ptr<ColumnChunkMetaData> Make(
+      const void* metadata, const ColumnDescriptor* descr,
+      const ApplicationVersion* writer_version = NULLPTR, int16_t row_group_ordinal = -1,
+      int16_t column_ordinal = -1,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~ColumnChunkMetaData();
+
+  bool Equals(const ColumnChunkMetaData& other) const;
+
+  // column chunk
+  int64_t file_offset() const;
+
+  // parameter is only used when a dataset is spread across multiple files
+  const std::string& file_path() const;
+
+  // column metadata
+  bool is_metadata_set() const;
+  Type::type type() const;
+  int64_t num_values() const;
+  std::shared_ptr<schema::ColumnPath> path_in_schema() const;
+  bool is_stats_set() const;
+  std::shared_ptr<Statistics> statistics() const;
+
+  Compression::type compression() const;
+  // Indicate if the ColumnChunk compression is supported by the current
+  // compiled parquet library.
+  bool can_decompress() const;
+
+  const std::vector<Encoding::type>& encodings() const;
+  const std::vector<PageEncodingStats>& encoding_stats() const;
+  bool has_dictionary_page() const;
+  int64_t dictionary_page_offset() const;
+  int64_t data_page_offset() const;
+  bool has_index_page() const;
+  int64_t index_page_offset() const;
+  int64_t total_compressed_size() const;
+  int64_t total_uncompressed_size() const;
+  std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const;
+
+ private:
+  explicit ColumnChunkMetaData(
+      const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal,
+      int16_t column_ordinal, const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+  // PIMPL Idiom
+  class ColumnChunkMetaDataImpl;
+  std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
+};
+
+/// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
+class PARQUET_EXPORT RowGroupMetaData {
+ public:
+  /// \brief Create a RowGroupMetaData from a serialized thrift message.
+  static std::unique_ptr<RowGroupMetaData> Make(
+      const void* metadata, const SchemaDescriptor* schema,
+      const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~RowGroupMetaData();
+
+  bool Equals(const RowGroupMetaData& other) const;
+
+  /// \brief The number of columns in this row group. The order must match the
+  /// parent's column ordering.
+  int num_columns() const;
+
+  /// \brief Return the ColumnChunkMetaData of the corresponding column ordinal.
+  ///
+  /// WARNING, the returned object references memory location in it's parent
+  /// (RowGroupMetaData) object. Hence, the parent must outlive the returned
+  /// object.
+  ///
+  /// \param[in] index of the ColumnChunkMetaData to retrieve.
+  ///
+  /// \throws ParquetException if the index is out of bound.
+  std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int index) const;
+
+  /// \brief Number of rows in this row group.
+  int64_t num_rows() const;
+
+  /// \brief Total byte size of all the uncompressed column data in this row group.
+  int64_t total_byte_size() const;
+
+  /// \brief Total byte size of all the compressed (and potentially encrypted)
+  /// column data in this row group.
+  ///
+  /// This information is optional and may be 0 if omitted.
+  int64_t total_compressed_size() const;
+
+  /// \brief Byte offset from beginning of file to first page (data or
+  /// dictionary) in this row group
+  ///
+  /// The file_offset field that this method exposes is optional. This method
+  /// will return 0 if that field is not set to a meaningful value.
+  int64_t file_offset() const;
+  // Return const-pointer to make it clear that this object is not to be copied
+  const SchemaDescriptor* schema() const;
+  // Indicate if all of the RowGroup's ColumnChunks can be decompressed.
+  bool can_decompress() const;
+
+ private:
+  explicit RowGroupMetaData(
+      const void* metadata, const SchemaDescriptor* schema,
+      const ApplicationVersion* writer_version = NULLPTR,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+  // PIMPL Idiom
+  class RowGroupMetaDataImpl;
+  std::unique_ptr<RowGroupMetaDataImpl> impl_;
+};
+
+class FileMetaDataBuilder;
+
+/// \brief FileMetaData is a proxy around format::FileMetaData.
+class PARQUET_EXPORT FileMetaData {
+ public:
+  /// \brief Create a FileMetaData from a serialized thrift message.
+  static std::shared_ptr<FileMetaData> Make(
+      const void* serialized_metadata, uint32_t* inout_metadata_len,
+      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  ~FileMetaData();
+
+  bool Equals(const FileMetaData& other) const;
+
+  /// \brief The number of top-level columns in the schema.
+  ///
+  /// Parquet thrift definition requires that nested schema elements are
+  /// flattened. This method returns the number of columns in the un-flattened
+  /// version.
+  int num_columns() const;
+
+  /// \brief The number of flattened schema elements.
+  ///
+  /// Parquet thrift definition requires that nested schema elements are
+  /// flattened. This method returns the total number of elements in the
+  /// flattened list.
+  int num_schema_elements() const;
+
+  /// \brief The total number of rows.
+  int64_t num_rows() const;
+
+  /// \brief The number of row groups in the file.
+  int num_row_groups() const;
+
+  /// \brief Return the RowGroupMetaData of the corresponding row group ordinal.
+  ///
+  /// WARNING, the returned object references memory location in it's parent
+  /// (FileMetaData) object. Hence, the parent must outlive the returned object.
+  ///
+  /// \param[in] index of the RowGroup to retrieve.
+  ///
+  /// \throws ParquetException if the index is out of bound.
+  std::unique_ptr<RowGroupMetaData> RowGroup(int index) const;
+
+  /// \brief Return the "version" of the file
+  ///
+  /// WARNING: The value returned by this method is unreliable as 1) the Parquet
+  /// file metadata stores the version as a single integer and 2) some producers
+  /// are known to always write a hardcoded value.  Therefore, you cannot use
+  /// this value to know which features are used in the file.
+  ParquetVersion::type version() const;
+
+  /// \brief Return the application's user-agent string of the writer.
+  const std::string& created_by() const;
+
+  /// \brief Return the application's version of the writer.
+  const ApplicationVersion& writer_version() const;
+
+  /// \brief Size of the original thrift encoded metadata footer.
+  uint32_t size() const;
+
+  /// \brief Indicate if all of the FileMetadata's RowGroups can be decompressed.
+  ///
+  /// This will return false if any of the RowGroup's page is compressed with a
+  /// compression format which is not compiled in the current parquet library.
+  bool can_decompress() const;
+
+  bool is_encryption_algorithm_set() const;
+  EncryptionAlgorithm encryption_algorithm() const;
+  const std::string& footer_signing_key_metadata() const;
+
+  /// \brief Verify signature of FileMetaData when file is encrypted but footer
+  /// is not encrypted (plaintext footer).
+  bool VerifySignature(const void* signature);
+
+  void WriteTo(::arrow::io::OutputStream* dst,
+               const std::shared_ptr<Encryptor>& encryptor = NULLPTR) const;
+
+  /// \brief Return Thrift-serialized representation of the metadata as a
+  /// string
+  std::string SerializeToString() const;
+
+  // Return const-pointer to make it clear that this object is not to be copied
+  const SchemaDescriptor* schema() const;
+
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
+
+  /// \brief Set a path to all ColumnChunk for all RowGroups.
+  ///
+  /// Commonly used by systems (Dask, Spark) who generates an metadata-only
+  /// parquet file. The path is usually relative to said index file.
+  ///
+  /// \param[in] path to set.
+  void set_file_path(const std::string& path);
+
+  /// \brief Merge row groups from another metadata file into this one.
+  ///
+  /// The schema of the input FileMetaData must be equal to the
+  /// schema of this object.
+  ///
+  /// This is used by systems who creates an aggregate metadata-only file by
+  /// concatenating the row groups of multiple files. This newly created
+  /// metadata file acts as an index of all available row groups.
+  ///
+  /// \param[in] other FileMetaData to merge the row groups from.
+  ///
+  /// \throws ParquetException if schemas are not equal.
+  void AppendRowGroups(const FileMetaData& other);
+
+  /// \brief Return a FileMetaData containing a subset of the row groups in this
+  /// FileMetaData.
+  std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) const;
+
+ private:
+  friend FileMetaDataBuilder;
+  friend class SerializedFile;
+
+  explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len,
+                        std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
+
+  void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor);
+
+  // PIMPL Idiom
+  FileMetaData();
+  class FileMetaDataImpl;
+  std::unique_ptr<FileMetaDataImpl> impl_;
+};
+
+class PARQUET_EXPORT FileCryptoMetaData {
+ public:
+  // API convenience to get a MetaData accessor
+  static std::shared_ptr<FileCryptoMetaData> Make(const uint8_t* serialized_metadata,
+                                                  uint32_t* metadata_len);
+  ~FileCryptoMetaData();
+
+  EncryptionAlgorithm encryption_algorithm() const;
+  const std::string& key_metadata() const;
+
+  void WriteTo(::arrow::io::OutputStream* dst) const;
+
+ private:
+  friend FileMetaDataBuilder;
+  FileCryptoMetaData(const uint8_t* serialized_metadata, uint32_t* metadata_len);
+
+  // PIMPL Idiom
+  FileCryptoMetaData();
+  class FileCryptoMetaDataImpl;
+  std::unique_ptr<FileCryptoMetaDataImpl> impl_;
+};
+
+// Builder API
+class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column);
+
+  static std::unique_ptr<ColumnChunkMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const ColumnDescriptor* column,
+      void* contents);
+
+  ~ColumnChunkMetaDataBuilder();
+
+  // column chunk
+  // Used when a dataset is spread across multiple files
+  void set_file_path(const std::string& path);
+  // column metadata
+  void SetStatistics(const EncodedStatistics& stats);
+  // get the column descriptor
+  const ColumnDescriptor* descr() const;
+
+  int64_t total_compressed_size() const;
+  // commit the metadata
+
+  void Finish(int64_t num_values, int64_t dictionary_page_offset,
+              int64_t index_page_offset, int64_t data_page_offset,
+              int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary,
+              bool dictionary_fallback,
+              const std::map<Encoding::type, int32_t>& dict_encoding_stats_,
+              const std::map<Encoding::type, int32_t>& data_encoding_stats_,
+              const std::shared_ptr<Encryptor>& encryptor = NULLPTR);
+
+  // The metadata contents, suitable for passing to ColumnChunkMetaData::Make
+  const void* contents() const;
+
+  // For writing metadata at end of column chunk
+  void WriteTo(::arrow::io::OutputStream* sink);
+
+ private:
+  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                      const ColumnDescriptor* column);
+  explicit ColumnChunkMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                      const ColumnDescriptor* column, void* contents);
+  // PIMPL Idiom
+  class ColumnChunkMetaDataBuilderImpl;
+  std::unique_ptr<ColumnChunkMetaDataBuilderImpl> impl_;
+};
+
+class PARQUET_EXPORT RowGroupMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<RowGroupMetaDataBuilder> Make(
+      std::shared_ptr<WriterProperties> props, const SchemaDescriptor* schema_,
+      void* contents);
+
+  ~RowGroupMetaDataBuilder();
+
+  ColumnChunkMetaDataBuilder* NextColumnChunk();
+  int num_columns();
+  int64_t num_rows();
+  int current_column() const;
+
+  void set_num_rows(int64_t num_rows);
+
+  // commit the metadata
+  void Finish(int64_t total_bytes_written, int16_t row_group_ordinal = -1);
+
+ private:
+  explicit RowGroupMetaDataBuilder(std::shared_ptr<WriterProperties> props,
+                                   const SchemaDescriptor* schema_, void* contents);
+  // PIMPL Idiom
+  class RowGroupMetaDataBuilderImpl;
+  std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_;
+};
+
+class PARQUET_EXPORT FileMetaDataBuilder {
+ public:
+  // API convenience to get a MetaData reader
+  static std::unique_ptr<FileMetaDataBuilder> Make(
+      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+
+  ~FileMetaDataBuilder();
+
+  // The prior RowGroupMetaDataBuilder (if any) is destroyed
+  RowGroupMetaDataBuilder* AppendRowGroup();
+
+  // Complete the Thrift structure
+  std::unique_ptr<FileMetaData> Finish();
+
+  // crypto metadata
+  std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData();
+
+ private:
+  explicit FileMetaDataBuilder(
+      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
+      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+  // PIMPL Idiom
+  class FileMetaDataBuilderImpl;
+  std::unique_ptr<FileMetaDataBuilderImpl> impl_;
+};
+
+PARQUET_EXPORT std::string ParquetVersionToString(ParquetVersion::type ver);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/murmur3.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/murmur3.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#pragma once
+
+#include <cstdint>
+
+#include "parquet/hasher.h"
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+/// Source:
+/// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
+/// (Modified to adapt to coding conventions and to inherit the Hasher abstract class)
+class PARQUET_EXPORT MurmurHash3 : public Hasher {
+ public:
+  MurmurHash3() : seed_(DEFAULT_SEED) {}
+  uint64_t Hash(int32_t value) const override;
+  uint64_t Hash(int64_t value) const override;
+  uint64_t Hash(float value) const override;
+  uint64_t Hash(double value) const override;
+  uint64_t Hash(const Int96* value) const override;
+  uint64_t Hash(const ByteArray* value) const override;
+  uint64_t Hash(const FLBA* val, uint32_t len) const override;
+
+ private:
+  // Default seed for hash which comes from Bloom filter in parquet-mr, it is generated
+  // by System.nanoTime() of java.
+  static constexpr int DEFAULT_SEED = 1361930890;
+
+  uint32_t seed_;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/parquet_version.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/parquet_version.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_VERSION_H
+#define PARQUET_VERSION_H
+
+#define PARQUET_VERSION_MAJOR 8
+#define PARQUET_VERSION_MINOR 0
+#define PARQUET_VERSION_PATCH 0
+
+#define PARQUET_SO_VERSION "800"
+#define PARQUET_FULL_SO_VERSION "800.0.0"
+
+// define the parquet created by version
+#define CREATED_BY_VERSION "parquet-cpp-arrow version 8.0.0"
+
+#endif  // PARQUET_VERSION_H
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/pch.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/pch.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Often-used headers, for precompiling.
+// If updating this header, please make sure you check compilation speed
+// before checking in.  Adding headers which are not used extremely often
+// may incur a slowdown, since it makes the precompiled header heavier to load.
+
+#include "parquet/encoding.h"
+#include "parquet/exception.h"
+#include "parquet/metadata.h"
+#include "parquet/properties.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/platform.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/platform.h
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/buffer.h"         // IWYU pragma: export
+#include "arrow/io/interfaces.h"  // IWYU pragma: export
+#include "arrow/status.h"         // IWYU pragma: export
+#include "arrow/type_fwd.h"       // IWYU pragma: export
+#include "arrow/util/macros.h"    // IWYU pragma: export
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// Disable warning for STL types usage in DLL interface
+// https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
+#pragma warning(disable : 4275 4251)
+// Disable diamond inheritance warnings
+#pragma warning(disable : 4250)
+// Disable macro redefinition warnings
+#pragma warning(disable : 4005)
+// Disable extern before exported template warnings
+#pragma warning(disable : 4910)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef PARQUET_STATIC
+#define PARQUET_EXPORT
+#elif defined(PARQUET_EXPORTING)
+#define PARQUET_EXPORT __declspec(dllexport)
+#else
+#define PARQUET_EXPORT __declspec(dllimport)
+#endif
+
+#define PARQUET_NO_EXPORT
+
+#else  // Not Windows
+#ifndef PARQUET_EXPORT
+#define PARQUET_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef PARQUET_NO_EXPORT
+#define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif  // Non-Windows
+
+// This is a complicated topic, some reading on it:
+// http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
+#if defined(_MSC_VER) || defined(__clang__)
+#define PARQUET_TEMPLATE_CLASS_EXPORT
+#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
+#else
+#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
+#define PARQUET_TEMPLATE_EXPORT
+#endif
+
+#define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN
+
+#define PARQUET_NORETURN ARROW_NORETURN
+#define PARQUET_DEPRECATED ARROW_DEPRECATED
+
+// If ARROW_VALGRIND set when compiling unit tests, also define
+// PARQUET_VALGRIND
+#ifdef ARROW_VALGRIND
+#define PARQUET_VALGRIND
+#endif
+
+namespace parquet {
+
+using Buffer = ::arrow::Buffer;
+using Codec = ::arrow::util::Codec;
+using Compression = ::arrow::Compression;
+using MemoryPool = ::arrow::MemoryPool;
+using MutableBuffer = ::arrow::MutableBuffer;
+using ResizableBuffer = ::arrow::ResizableBuffer;
+using ResizableBuffer = ::arrow::ResizableBuffer;
+using ArrowInputFile = ::arrow::io::RandomAccessFile;
+using ArrowInputStream = ::arrow::io::InputStream;
+using ArrowOutputStream = ::arrow::io::OutputStream;
+
+constexpr int64_t kDefaultOutputStreamSize = 1024;
+
+constexpr int16_t kNonPageOrdinal = static_cast<int16_t>(-1);
+
+PARQUET_EXPORT
+std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+PARQUET_EXPORT
+std::shared_ptr<ResizableBuffer> AllocateBuffer(
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), int64_t size = 0);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/printer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/printer.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <list>
+
+#include "parquet/platform.h"
+
+namespace parquet {
+
+class ParquetFileReader;
+
+class PARQUET_EXPORT ParquetFilePrinter {
+ private:
+  ParquetFileReader* fileReader;
+
+ public:
+  explicit ParquetFilePrinter(ParquetFileReader* reader) : fileReader(reader) {}
+  ~ParquetFilePrinter() {}
+
+  void DebugPrint(std::ostream& stream, std::list<int> selected_columns,
+                  bool print_values = false, bool format_dump = false,
+                  bool print_key_value_metadata = false,
+                  const char* filename = "No Name");
+
+  void JSONPrint(std::ostream& stream, std::list<int> selected_columns,
+                 const char* filename = "No Name");
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/properties.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/properties.h
@@ -0,0 +1,837 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+#include "arrow/io/caching.h"
+#include "arrow/type.h"
+#include "arrow/util/compression.h"
+#include "parquet/encryption/encryption.h"
+#include "parquet/exception.h"
+#include "parquet/parquet_version.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/type_fwd.h"
+#include "parquet/types.h"
+
+namespace parquet {
+
+/// Controls serialization format of data pages.  parquet-format v2.0.0
+/// introduced a new data page metadata type DataPageV2 and serialized page
+/// structure (for example, encoded levels are no longer compressed). Prior to
+/// the completion of PARQUET-457 in 2020, this library did not implement
+/// DataPageV2 correctly, so if you use the V2 data page format, you may have
+/// forward compatibility issues (older versions of the library will be unable
+/// to read the files). Note that some Parquet implementations do not implement
+/// DataPageV2 at all.
+enum class ParquetDataPageVersion { V1, V2 };
+
+/// Align the default buffer size to a small multiple of a page size.
+constexpr int64_t kDefaultBufferSize = 4096 * 4;
+
+class PARQUET_EXPORT ReaderProperties {
+ public:
+  explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool())
+      : pool_(pool) {}
+
+  MemoryPool* memory_pool() const { return pool_; }
+
+  std::shared_ptr<ArrowInputStream> GetStream(std::shared_ptr<ArrowInputFile> source,
+                                              int64_t start, int64_t num_bytes);
+
+  /// Buffered stream reading allows the user to control the memory usage of
+  /// parquet readers. This ensure that all `RandomAccessFile::ReadAt` calls are
+  /// wrapped in a buffered reader that uses a fix sized buffer (of size
+  /// `buffer_size()`) instead of the full size of the ReadAt.
+  ///
+  /// The primary reason for this control knobs is for resource control and not
+  /// performance.
+  bool is_buffered_stream_enabled() const { return buffered_stream_enabled_; }
+  void enable_buffered_stream() { buffered_stream_enabled_ = true; }
+  void disable_buffered_stream() { buffered_stream_enabled_ = false; }
+
+  int64_t buffer_size() const { return buffer_size_; }
+  void set_buffer_size(int64_t size) { buffer_size_ = size; }
+
+  void file_decryption_properties(std::shared_ptr<FileDecryptionProperties> decryption) {
+    file_decryption_properties_ = std::move(decryption);
+  }
+
+  const std::shared_ptr<FileDecryptionProperties>& file_decryption_properties() const {
+    return file_decryption_properties_;
+  }
+
+ private:
+  MemoryPool* pool_;
+  int64_t buffer_size_ = kDefaultBufferSize;
+  bool buffered_stream_enabled_ = false;
+  std::shared_ptr<FileDecryptionProperties> file_decryption_properties_;
+};
+
+ReaderProperties PARQUET_EXPORT default_reader_properties();
+
+static constexpr int64_t kDefaultDataPageSize = 1024 * 1024;
+static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true;
+static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = kDefaultDataPageSize;
+static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024;
+static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 64 * 1024 * 1024;
+static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true;
+static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096;
+static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN;
+static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION;
+static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED;
+
+class PARQUET_EXPORT ColumnProperties {
+ public:
+  ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING,
+                   Compression::type codec = DEFAULT_COMPRESSION_TYPE,
+                   bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED,
+                   bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED,
+                   size_t max_stats_size = DEFAULT_MAX_STATISTICS_SIZE)
+      : encoding_(encoding),
+        codec_(codec),
+        dictionary_enabled_(dictionary_enabled),
+        statistics_enabled_(statistics_enabled),
+        max_stats_size_(max_stats_size),
+        compression_level_(Codec::UseDefaultCompressionLevel()) {}
+
+  void set_encoding(Encoding::type encoding) { encoding_ = encoding; }
+
+  void set_compression(Compression::type codec) { codec_ = codec; }
+
+  void set_dictionary_enabled(bool dictionary_enabled) {
+    dictionary_enabled_ = dictionary_enabled;
+  }
+
+  void set_statistics_enabled(bool statistics_enabled) {
+    statistics_enabled_ = statistics_enabled;
+  }
+
+  void set_max_statistics_size(size_t max_stats_size) {
+    max_stats_size_ = max_stats_size;
+  }
+
+  void set_compression_level(int compression_level) {
+    compression_level_ = compression_level;
+  }
+
+  Encoding::type encoding() const { return encoding_; }
+
+  Compression::type compression() const { return codec_; }
+
+  bool dictionary_enabled() const { return dictionary_enabled_; }
+
+  bool statistics_enabled() const { return statistics_enabled_; }
+
+  size_t max_statistics_size() const { return max_stats_size_; }
+
+  int compression_level() const { return compression_level_; }
+
+ private:
+  Encoding::type encoding_;
+  Compression::type codec_;
+  bool dictionary_enabled_;
+  bool statistics_enabled_;
+  size_t max_stats_size_;
+  int compression_level_;
+};
+
+class PARQUET_EXPORT WriterProperties {
+ public:
+  class Builder {
+   public:
+    Builder()
+        : pool_(::arrow::default_memory_pool()),
+          dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT),
+          write_batch_size_(DEFAULT_WRITE_BATCH_SIZE),
+          max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH),
+          pagesize_(kDefaultDataPageSize),
+          version_(ParquetVersion::PARQUET_1_0),
+          data_page_version_(ParquetDataPageVersion::V1),
+          created_by_(DEFAULT_CREATED_BY) {}
+    virtual ~Builder() {}
+
+    /// Specify the memory pool for the writer. Default default_memory_pool.
+    Builder* memory_pool(MemoryPool* pool) {
+      pool_ = pool;
+      return this;
+    }
+
+    /// Enable dictionary encoding in general for all columns. Default enabled.
+    Builder* enable_dictionary() {
+      default_column_properties_.set_dictionary_enabled(true);
+      return this;
+    }
+
+    /// Disable dictionary encoding in general for all columns. Default enabled.
+    Builder* disable_dictionary() {
+      default_column_properties_.set_dictionary_enabled(false);
+      return this;
+    }
+
+    /// Enable dictionary encoding for column specified by `path`. Default enabled.
+    Builder* enable_dictionary(const std::string& path) {
+      dictionary_enabled_[path] = true;
+      return this;
+    }
+
+    /// Enable dictionary encoding for column specified by `path`. Default enabled.
+    Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->enable_dictionary(path->ToDotString());
+    }
+
+    /// Disable dictionary encoding for column specified by `path`. Default enabled.
+    Builder* disable_dictionary(const std::string& path) {
+      dictionary_enabled_[path] = false;
+      return this;
+    }
+
+    /// Disable dictionary encoding for column specified by `path`. Default enabled.
+    Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->disable_dictionary(path->ToDotString());
+    }
+
+    /// Specify the dictionary page size limit per row group. Default 1MB.
+    Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) {
+      dictionary_pagesize_limit_ = dictionary_psize_limit;
+      return this;
+    }
+
+    /// Specify the write batch size while writing batches of Arrow values into Parquet.
+    /// Default 1024.
+    Builder* write_batch_size(int64_t write_batch_size) {
+      write_batch_size_ = write_batch_size;
+      return this;
+    }
+
+    /// Specify the max row group length.
+    /// Default 64M.
+    Builder* max_row_group_length(int64_t max_row_group_length) {
+      max_row_group_length_ = max_row_group_length;
+      return this;
+    }
+
+    /// Specify the data page size.
+    /// Default 1MB.
+    Builder* data_pagesize(int64_t pg_size) {
+      pagesize_ = pg_size;
+      return this;
+    }
+
+    /// Specify the data page version.
+    /// Default V1.
+    Builder* data_page_version(ParquetDataPageVersion data_page_version) {
+      data_page_version_ = data_page_version;
+      return this;
+    }
+
+    /// Specify the Parquet file version.
+    /// Default PARQUET_1_0.
+    Builder* version(ParquetVersion::type version) {
+      version_ = version;
+      return this;
+    }
+
+    Builder* created_by(const std::string& created_by) {
+      created_by_ = created_by;
+      return this;
+    }
+
+    /// \brief Define the encoding that is used when we don't utilise dictionary encoding.
+    //
+    /// This either apply if dictionary encoding is disabled or if we fallback
+    /// as the dictionary grew too large.
+    Builder* encoding(Encoding::type encoding_type) {
+      if (encoding_type == Encoding::PLAIN_DICTIONARY ||
+          encoding_type == Encoding::RLE_DICTIONARY) {
+        throw ParquetException("Can't use dictionary encoding as fallback encoding");
+      }
+
+      default_column_properties_.set_encoding(encoding_type);
+      return this;
+    }
+
+    /// \brief Define the encoding that is used when we don't utilise dictionary encoding.
+    //
+    /// This either apply if dictionary encoding is disabled or if we fallback
+    /// as the dictionary grew too large.
+    Builder* encoding(const std::string& path, Encoding::type encoding_type) {
+      if (encoding_type == Encoding::PLAIN_DICTIONARY ||
+          encoding_type == Encoding::RLE_DICTIONARY) {
+        throw ParquetException("Can't use dictionary encoding as fallback encoding");
+      }
+
+      encodings_[path] = encoding_type;
+      return this;
+    }
+
+    /// \brief Define the encoding that is used when we don't utilise dictionary encoding.
+    //
+    /// This either apply if dictionary encoding is disabled or if we fallback
+    /// as the dictionary grew too large.
+    Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path,
+                      Encoding::type encoding_type) {
+      return this->encoding(path->ToDotString(), encoding_type);
+    }
+
+    /// Specify compression codec in general for all columns.
+    /// Default UNCOMPRESSED.
+    Builder* compression(Compression::type codec) {
+      default_column_properties_.set_compression(codec);
+      return this;
+    }
+
+    /// Specify max statistics size to store min max value.
+    /// Default 4KB.
+    Builder* max_statistics_size(size_t max_stats_sz) {
+      default_column_properties_.set_max_statistics_size(max_stats_sz);
+      return this;
+    }
+
+    /// Specify compression codec for the column specified by `path`.
+    /// Default UNCOMPRESSED.
+    Builder* compression(const std::string& path, Compression::type codec) {
+      codecs_[path] = codec;
+      return this;
+    }
+
+    /// Specify compression codec for the column specified by `path`.
+    /// Default UNCOMPRESSED.
+    Builder* compression(const std::shared_ptr<schema::ColumnPath>& path,
+                         Compression::type codec) {
+      return this->compression(path->ToDotString(), codec);
+    }
+
+    /// \brief Specify the default compression level for the compressor in
+    /// every column.  In case a column does not have an explicitly specified
+    /// compression level, the default one would be used.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(int compression_level) {
+      default_column_properties_.set_compression_level(compression_level);
+      return this;
+    }
+
+    /// \brief Specify a compression level for the compressor for the column
+    /// described by path.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(const std::string& path, int compression_level) {
+      codecs_compression_level_[path] = compression_level;
+      return this;
+    }
+
+    /// \brief Specify a compression level for the compressor for the column
+    /// described by path.
+    ///
+    /// The provided compression level is compressor specific. The user would
+    /// have to familiarize oneself with the available levels for the selected
+    /// compressor.  If the compressor does not allow for selecting different
+    /// compression levels, calling this function would not have any effect.
+    /// Parquet and Arrow do not validate the passed compression level.  If no
+    /// level is selected by the user or if the special
+    /// std::numeric_limits<int>::min() value is passed, then Arrow selects the
+    /// compression level.
+    Builder* compression_level(const std::shared_ptr<schema::ColumnPath>& path,
+                               int compression_level) {
+      return this->compression_level(path->ToDotString(), compression_level);
+    }
+
+    /// Define the file encryption properties.
+    /// Default NULL.
+    Builder* encryption(
+        std::shared_ptr<FileEncryptionProperties> file_encryption_properties) {
+      file_encryption_properties_ = std::move(file_encryption_properties);
+      return this;
+    }
+
+    /// Enable statistics in general.
+    /// Default enabled.
+    Builder* enable_statistics() {
+      default_column_properties_.set_statistics_enabled(true);
+      return this;
+    }
+
+    /// Disable statistics in general.
+    /// Default enabled.
+    Builder* disable_statistics() {
+      default_column_properties_.set_statistics_enabled(false);
+      return this;
+    }
+
+    /// Enable statistics for the column specified by `path`.
+    /// Default enabled.
+    Builder* enable_statistics(const std::string& path) {
+      statistics_enabled_[path] = true;
+      return this;
+    }
+
+    /// Enable statistics for the column specified by `path`.
+    /// Default enabled.
+    Builder* enable_statistics(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->enable_statistics(path->ToDotString());
+    }
+
+    /// Disable statistics for the column specified by `path`.
+    /// Default enabled.
+    Builder* disable_statistics(const std::string& path) {
+      statistics_enabled_[path] = false;
+      return this;
+    }
+
+    /// Disable statistics for the column specified by `path`.
+    /// Default enabled.
+    Builder* disable_statistics(const std::shared_ptr<schema::ColumnPath>& path) {
+      return this->disable_statistics(path->ToDotString());
+    }
+
+    /// \brief Build the WriterProperties with the builder parameters.
+    /// \return The WriterProperties defined by the builder.
+    std::shared_ptr<WriterProperties> build() {
+      std::unordered_map<std::string, ColumnProperties> column_properties;
+      auto get = [&](const std::string& key) -> ColumnProperties& {
+        auto it = column_properties.find(key);
+        if (it == column_properties.end())
+          return column_properties[key] = default_column_properties_;
+        else
+          return it->second;
+      };
+
+      for (const auto& item : encodings_) get(item.first).set_encoding(item.second);
+      for (const auto& item : codecs_) get(item.first).set_compression(item.second);
+      for (const auto& item : codecs_compression_level_)
+        get(item.first).set_compression_level(item.second);
+      for (const auto& item : dictionary_enabled_)
+        get(item.first).set_dictionary_enabled(item.second);
+      for (const auto& item : statistics_enabled_)
+        get(item.first).set_statistics_enabled(item.second);
+
+      return std::shared_ptr<WriterProperties>(new WriterProperties(
+          pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_,
+          pagesize_, version_, created_by_, std::move(file_encryption_properties_),
+          default_column_properties_, column_properties, data_page_version_));
+    }
+
+   private:
+    MemoryPool* pool_;
+    int64_t dictionary_pagesize_limit_;
+    int64_t write_batch_size_;
+    int64_t max_row_group_length_;
+    int64_t pagesize_;
+    ParquetVersion::type version_;
+    ParquetDataPageVersion data_page_version_;
+    std::string created_by_;
+
+    std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;
+
+    // Settings used for each column unless overridden in any of the maps below
+    ColumnProperties default_column_properties_;
+    std::unordered_map<std::string, Encoding::type> encodings_;
+    std::unordered_map<std::string, Compression::type> codecs_;
+    std::unordered_map<std::string, int32_t> codecs_compression_level_;
+    std::unordered_map<std::string, bool> dictionary_enabled_;
+    std::unordered_map<std::string, bool> statistics_enabled_;
+  };
+
+  inline MemoryPool* memory_pool() const { return pool_; }
+
+  inline int64_t dictionary_pagesize_limit() const { return dictionary_pagesize_limit_; }
+
+  inline int64_t write_batch_size() const { return write_batch_size_; }
+
+  inline int64_t max_row_group_length() const { return max_row_group_length_; }
+
+  inline int64_t data_pagesize() const { return pagesize_; }
+
+  inline ParquetDataPageVersion data_page_version() const {
+    return parquet_data_page_version_;
+  }
+
+  inline ParquetVersion::type version() const { return parquet_version_; }
+
+  inline std::string created_by() const { return parquet_created_by_; }
+
+  inline Encoding::type dictionary_index_encoding() const {
+    if (parquet_version_ == ParquetVersion::PARQUET_1_0) {
+      return Encoding::PLAIN_DICTIONARY;
+    } else {
+      return Encoding::RLE_DICTIONARY;
+    }
+  }
+
+  inline Encoding::type dictionary_page_encoding() const {
+    if (parquet_version_ == ParquetVersion::PARQUET_1_0) {
+      return Encoding::PLAIN_DICTIONARY;
+    } else {
+      return Encoding::PLAIN;
+    }
+  }
+
+  const ColumnProperties& column_properties(
+      const std::shared_ptr<schema::ColumnPath>& path) const {
+    auto it = column_properties_.find(path->ToDotString());
+    if (it != column_properties_.end()) return it->second;
+    return default_column_properties_;
+  }
+
+  Encoding::type encoding(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).encoding();
+  }
+
+  Compression::type compression(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).compression();
+  }
+
+  int compression_level(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).compression_level();
+  }
+
+  bool dictionary_enabled(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).dictionary_enabled();
+  }
+
+  bool statistics_enabled(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).statistics_enabled();
+  }
+
+  size_t max_statistics_size(const std::shared_ptr<schema::ColumnPath>& path) const {
+    return column_properties(path).max_statistics_size();
+  }
+
+  inline FileEncryptionProperties* file_encryption_properties() const {
+    return file_encryption_properties_.get();
+  }
+
+  std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
+      const std::string& path) const {
+    if (file_encryption_properties_) {
+      return file_encryption_properties_->column_encryption_properties(path);
+    } else {
+      return NULLPTR;
+    }
+  }
+
+ private:
+  explicit WriterProperties(
+      MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size,
+      int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version,
+      const std::string& created_by,
+      std::shared_ptr<FileEncryptionProperties> file_encryption_properties,
+      const ColumnProperties& default_column_properties,
+      const std::unordered_map<std::string, ColumnProperties>& column_properties,
+      ParquetDataPageVersion data_page_version)
+      : pool_(pool),
+        dictionary_pagesize_limit_(dictionary_pagesize_limit),
+        write_batch_size_(write_batch_size),
+        max_row_group_length_(max_row_group_length),
+        pagesize_(pagesize),
+        parquet_data_page_version_(data_page_version),
+        parquet_version_(version),
+        parquet_created_by_(created_by),
+        file_encryption_properties_(file_encryption_properties),
+        default_column_properties_(default_column_properties),
+        column_properties_(column_properties) {}
+
+  MemoryPool* pool_;
+  int64_t dictionary_pagesize_limit_;
+  int64_t write_batch_size_;
+  int64_t max_row_group_length_;
+  int64_t pagesize_;
+  ParquetDataPageVersion parquet_data_page_version_;
+  ParquetVersion::type parquet_version_;
+  std::string parquet_created_by_;
+
+  std::shared_ptr<FileEncryptionProperties> file_encryption_properties_;
+
+  ColumnProperties default_column_properties_;
+  std::unordered_map<std::string, ColumnProperties> column_properties_;
+};
+
+PARQUET_EXPORT const std::shared_ptr<WriterProperties>& default_writer_properties();
+
+// ----------------------------------------------------------------------
+// Properties specific to Apache Arrow columnar read and write
+
+static constexpr bool kArrowDefaultUseThreads = false;
+
+// Default number of rows to read when using ::arrow::RecordBatchReader
+static constexpr int64_t kArrowDefaultBatchSize = 64 * 1024;
+
+/// EXPERIMENTAL: Properties for configuring FileReader behavior.
+class PARQUET_EXPORT ArrowReaderProperties {
+ public:
+  explicit ArrowReaderProperties(bool use_threads = kArrowDefaultUseThreads)
+      : use_threads_(use_threads),
+        read_dict_indices_(),
+        batch_size_(kArrowDefaultBatchSize),
+        pre_buffer_(false),
+        cache_options_(::arrow::io::CacheOptions::Defaults()),
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
+
+  void set_use_threads(bool use_threads) { use_threads_ = use_threads; }
+
+  bool use_threads() const { return use_threads_; }
+
+  void set_read_dictionary(int column_index, bool read_dict) {
+    if (read_dict) {
+      read_dict_indices_.insert(column_index);
+    } else {
+      read_dict_indices_.erase(column_index);
+    }
+  }
+  bool read_dictionary(int column_index) const {
+    if (read_dict_indices_.find(column_index) != read_dict_indices_.end()) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  void set_batch_size(int64_t batch_size) { batch_size_ = batch_size; }
+
+  int64_t batch_size() const { return batch_size_; }
+
+  /// Enable read coalescing.
+  ///
+  /// When enabled, the Arrow reader will pre-buffer necessary regions
+  /// of the file in-memory. This is intended to improve performance on
+  /// high-latency filesystems (e.g. Amazon S3).
+  void set_pre_buffer(bool pre_buffer) { pre_buffer_ = pre_buffer; }
+
+  bool pre_buffer() const { return pre_buffer_; }
+
+  /// Set options for read coalescing. This can be used to tune the
+  /// implementation for characteristics of different filesystems.
+  void set_cache_options(::arrow::io::CacheOptions options) { cache_options_ = options; }
+
+  const ::arrow::io::CacheOptions& cache_options() const { return cache_options_; }
+
+  /// Set execution context for read coalescing.
+  void set_io_context(const ::arrow::io::IOContext& ctx) { io_context_ = ctx; }
+
+  const ::arrow::io::IOContext& io_context() const { return io_context_; }
+
+  /// Set timestamp unit to use for deprecated INT96-encoded timestamps
+  /// (default is NANO).
+  void set_coerce_int96_timestamp_unit(::arrow::TimeUnit::type unit) {
+    coerce_int96_timestamp_unit_ = unit;
+  }
+
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit() const {
+    return coerce_int96_timestamp_unit_;
+  }
+
+ private:
+  bool use_threads_;
+  std::unordered_set<int> read_dict_indices_;
+  int64_t batch_size_;
+  bool pre_buffer_;
+  ::arrow::io::IOContext io_context_;
+  ::arrow::io::CacheOptions cache_options_;
+  ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
+};
+
+/// EXPERIMENTAL: Constructs the default ArrowReaderProperties
+PARQUET_EXPORT
+ArrowReaderProperties default_arrow_reader_properties();
+
+class PARQUET_EXPORT ArrowWriterProperties {
+ public:
+  enum EngineVersion {
+    V1,  // Supports only nested lists.
+    V2   // Full support for all nesting combinations
+  };
+  class Builder {
+   public:
+    Builder()
+        : write_timestamps_as_int96_(false),
+          coerce_timestamps_enabled_(false),
+          coerce_timestamps_unit_(::arrow::TimeUnit::SECOND),
+          truncated_timestamps_allowed_(false),
+          store_schema_(false),
+          // TODO: At some point we should flip this.
+          compliant_nested_types_(false),
+          engine_version_(V2) {}
+    virtual ~Builder() = default;
+
+    Builder* disable_deprecated_int96_timestamps() {
+      write_timestamps_as_int96_ = false;
+      return this;
+    }
+
+    Builder* enable_deprecated_int96_timestamps() {
+      write_timestamps_as_int96_ = true;
+      return this;
+    }
+
+    Builder* coerce_timestamps(::arrow::TimeUnit::type unit) {
+      coerce_timestamps_enabled_ = true;
+      coerce_timestamps_unit_ = unit;
+      return this;
+    }
+
+    Builder* allow_truncated_timestamps() {
+      truncated_timestamps_allowed_ = true;
+      return this;
+    }
+
+    Builder* disallow_truncated_timestamps() {
+      truncated_timestamps_allowed_ = false;
+      return this;
+    }
+
+    /// \brief EXPERIMENTAL: Write binary serialized Arrow schema to the file,
+    /// to enable certain read options (like "read_dictionary") to be set
+    /// automatically
+    Builder* store_schema() {
+      store_schema_ = true;
+      return this;
+    }
+
+    Builder* enable_compliant_nested_types() {
+      compliant_nested_types_ = true;
+      return this;
+    }
+
+    Builder* disable_compliant_nested_types() {
+      compliant_nested_types_ = false;
+      return this;
+    }
+
+    Builder* set_engine_version(EngineVersion version) {
+      engine_version_ = version;
+      return this;
+    }
+
+    std::shared_ptr<ArrowWriterProperties> build() {
+      return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties(
+          write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_,
+          truncated_timestamps_allowed_, store_schema_, compliant_nested_types_,
+          engine_version_));
+    }
+
+   private:
+    bool write_timestamps_as_int96_;
+
+    bool coerce_timestamps_enabled_;
+    ::arrow::TimeUnit::type coerce_timestamps_unit_;
+    bool truncated_timestamps_allowed_;
+
+    bool store_schema_;
+    bool compliant_nested_types_;
+    EngineVersion engine_version_;
+  };
+
+  bool support_deprecated_int96_timestamps() const { return write_timestamps_as_int96_; }
+
+  bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_; }
+  ::arrow::TimeUnit::type coerce_timestamps_unit() const {
+    return coerce_timestamps_unit_;
+  }
+
+  bool truncated_timestamps_allowed() const { return truncated_timestamps_allowed_; }
+
+  bool store_schema() const { return store_schema_; }
+
+  /// \brief Enable nested type naming according to the parquet specification.
+  ///
+  /// Older versions of arrow wrote out field names for nested lists based on the name
+  /// of the field.  According to the parquet specification they should always be
+  /// "element".
+  bool compliant_nested_types() const { return compliant_nested_types_; }
+
+  /// \brief The underlying engine version to use when writing Arrow data.
+  ///
+  /// V2 is currently the latest V1 is considered deprecated but left in
+  /// place in case there are bugs detected in V2.
+  EngineVersion engine_version() const { return engine_version_; }
+
+ private:
+  explicit ArrowWriterProperties(bool write_nanos_as_int96,
+                                 bool coerce_timestamps_enabled,
+                                 ::arrow::TimeUnit::type coerce_timestamps_unit,
+                                 bool truncated_timestamps_allowed, bool store_schema,
+                                 bool compliant_nested_types,
+                                 EngineVersion engine_version)
+      : write_timestamps_as_int96_(write_nanos_as_int96),
+        coerce_timestamps_enabled_(coerce_timestamps_enabled),
+        coerce_timestamps_unit_(coerce_timestamps_unit),
+        truncated_timestamps_allowed_(truncated_timestamps_allowed),
+        store_schema_(store_schema),
+        compliant_nested_types_(compliant_nested_types),
+        engine_version_(engine_version) {}
+
+  const bool write_timestamps_as_int96_;
+  const bool coerce_timestamps_enabled_;
+  const ::arrow::TimeUnit::type coerce_timestamps_unit_;
+  const bool truncated_timestamps_allowed_;
+  const bool store_schema_;
+  const bool compliant_nested_types_;
+  const EngineVersion engine_version_;
+};
+
+/// \brief State object used for writing Arrow data directly to a Parquet
+/// column chunk. API possibly not stable
+struct ArrowWriteContext {
+  ArrowWriteContext(MemoryPool* memory_pool, ArrowWriterProperties* properties)
+      : memory_pool(memory_pool),
+        properties(properties),
+        data_buffer(AllocateBuffer(memory_pool)),
+        def_levels_buffer(AllocateBuffer(memory_pool)) {}
+
+  template <typename T>
+  ::arrow::Status GetScratchData(const int64_t num_values, T** out) {
+    ARROW_RETURN_NOT_OK(this->data_buffer->Resize(num_values * sizeof(T), false));
+    *out = reinterpret_cast<T*>(this->data_buffer->mutable_data());
+    return ::arrow::Status::OK();
+  }
+
+  MemoryPool* memory_pool;
+  const ArrowWriterProperties* properties;
+
+  // Buffer used for storing the data of an array converted to the physical type
+  // as expected by parquet-cpp.
+  std::shared_ptr<ResizableBuffer> data_buffer;
+
+  // We use the shared ownership of this buffer
+  std::shared_ptr<ResizableBuffer> def_levels_buffer;
+};
+
+PARQUET_EXPORT
+std::shared_ptr<ArrowWriterProperties> default_arrow_writer_properties();
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/schema.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/schema.h
@@ -0,0 +1,492 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module contains the logical parquet-cpp types (independent of Thrift
+// structures), schema nodes, and related type tools
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "parquet/platform.h"
+#include "parquet/types.h"
+#include "parquet/windows_fixup.h"  // for OPTIONAL
+
+namespace parquet {
+
+class SchemaDescriptor;
+
+namespace schema {
+
+class Node;
+
+// List encodings: using the terminology from Impala to define different styles
+// of representing logical lists (a.k.a. ARRAY types) in Parquet schemas. Since
+// the converted type named in the Parquet metadata is ConvertedType::LIST we
+// use that terminology here. It also helps distinguish from the *_ARRAY
+// primitive types.
+//
+// One-level encoding: Only allows required lists with required cells
+//   repeated value_type name
+//
+// Two-level encoding: Enables optional lists with only required cells
+//   <required/optional> group list
+//     repeated value_type item
+//
+// Three-level encoding: Enables optional lists with optional cells
+//   <required/optional> group bag
+//     repeated group list
+//       <required/optional> value_type item
+//
+// 2- and 1-level encoding are respectively equivalent to 3-level encoding with
+// the non-repeated nodes set to required.
+//
+// The "official" encoding recommended in the Parquet spec is the 3-level, and
+// we use that as the default when creating list types. For semantic completeness
+// we allow the other two. Since all types of encodings will occur "in the
+// wild" we need to be able to interpret the associated definition levels in
+// the context of the actual encoding used in the file.
+//
+// NB: Some Parquet writers may not set ConvertedType::LIST on the repeated
+// SchemaElement, which could make things challenging if we are trying to infer
+// that a sequence of nodes semantically represents an array according to one
+// of these encodings (versus a struct containing an array). We should refuse
+// the temptation to guess, as they say.
+struct ListEncoding {
+  enum type { ONE_LEVEL, TWO_LEVEL, THREE_LEVEL };
+};
+
+class PARQUET_EXPORT ColumnPath {
+ public:
+  ColumnPath() : path_() {}
+  explicit ColumnPath(const std::vector<std::string>& path) : path_(path) {}
+  explicit ColumnPath(std::vector<std::string>&& path) : path_(std::move(path)) {}
+
+  static std::shared_ptr<ColumnPath> FromDotString(const std::string& dotstring);
+  static std::shared_ptr<ColumnPath> FromNode(const Node& node);
+
+  std::shared_ptr<ColumnPath> extend(const std::string& node_name) const;
+  std::string ToDotString() const;
+  const std::vector<std::string>& ToDotVector() const;
+
+ protected:
+  std::vector<std::string> path_;
+};
+
+// Base class for logical schema types. A type has a name, repetition level,
+// and optionally a logical type (ConvertedType in Parquet metadata parlance)
+class PARQUET_EXPORT Node {
+ public:
+  enum type { PRIMITIVE, GROUP };
+
+  virtual ~Node() {}
+
+  bool is_primitive() const { return type_ == Node::PRIMITIVE; }
+
+  bool is_group() const { return type_ == Node::GROUP; }
+
+  bool is_optional() const { return repetition_ == Repetition::OPTIONAL; }
+
+  bool is_repeated() const { return repetition_ == Repetition::REPEATED; }
+
+  bool is_required() const { return repetition_ == Repetition::REQUIRED; }
+
+  virtual bool Equals(const Node* other) const = 0;
+
+  const std::string& name() const { return name_; }
+
+  Node::type node_type() const { return type_; }
+
+  Repetition::type repetition() const { return repetition_; }
+
+  ConvertedType::type converted_type() const { return converted_type_; }
+
+  const std::shared_ptr<const LogicalType>& logical_type() const { return logical_type_; }
+
+  /// \brief The field_id value for the serialized SchemaElement. If the
+  /// field_id is less than 0 (e.g. -1), it will not be set when serialized to
+  /// Thrift.
+  int field_id() const { return field_id_; }
+
+  const Node* parent() const { return parent_; }
+
+  const std::shared_ptr<ColumnPath> path() const;
+
+  virtual void ToParquet(void* element) const = 0;
+
+  // Node::Visitor abstract class for walking schemas with the visitor pattern
+  class Visitor {
+   public:
+    virtual ~Visitor() {}
+
+    virtual void Visit(Node* node) = 0;
+  };
+  class ConstVisitor {
+   public:
+    virtual ~ConstVisitor() {}
+
+    virtual void Visit(const Node* node) = 0;
+  };
+
+  virtual void Visit(Visitor* visitor) = 0;
+  virtual void VisitConst(ConstVisitor* visitor) const = 0;
+
+ protected:
+  friend class GroupNode;
+
+  Node(Node::type type, const std::string& name, Repetition::type repetition,
+       ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1)
+      : type_(type),
+        name_(name),
+        repetition_(repetition),
+        converted_type_(converted_type),
+        field_id_(field_id),
+        parent_(NULLPTR) {}
+
+  Node(Node::type type, const std::string& name, Repetition::type repetition,
+       std::shared_ptr<const LogicalType> logical_type, int field_id = -1)
+      : type_(type),
+        name_(name),
+        repetition_(repetition),
+        logical_type_(std::move(logical_type)),
+        field_id_(field_id),
+        parent_(NULLPTR) {}
+
+  Node::type type_;
+  std::string name_;
+  Repetition::type repetition_;
+  ConvertedType::type converted_type_;
+  std::shared_ptr<const LogicalType> logical_type_;
+  int field_id_;
+  // Nodes should not be shared, they have a single parent.
+  const Node* parent_;
+
+  bool EqualsInternal(const Node* other) const;
+  void SetParent(const Node* p_parent);
+
+ private:
+  PARQUET_DISALLOW_COPY_AND_ASSIGN(Node);
+};
+
+// Save our breath all over the place with these typedefs
+typedef std::shared_ptr<Node> NodePtr;
+typedef std::vector<NodePtr> NodeVector;
+
+// A type that is one of the primitive Parquet storage types. In addition to
+// the other type metadata (name, repetition level, logical type), also has the
+// physical storage type and their type-specific metadata (byte width, decimal
+// parameters)
+class PARQUET_EXPORT PrimitiveNode : public Node {
+ public:
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element);
+
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             Type::type type,
+                             ConvertedType::type converted_type = ConvertedType::NONE,
+                             int length = -1, int precision = -1, int scale = -1,
+                             int field_id = -1) {
+    return NodePtr(new PrimitiveNode(name, repetition, type, converted_type, length,
+                                     precision, scale, field_id));
+  }
+
+  // If no logical type, pass LogicalType::None() or nullptr
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             std::shared_ptr<const LogicalType> logical_type,
+                             Type::type primitive_type, int primitive_length = -1,
+                             int field_id = -1) {
+    return NodePtr(new PrimitiveNode(name, repetition, logical_type, primitive_type,
+                                     primitive_length, field_id));
+  }
+
+  bool Equals(const Node* other) const override;
+
+  Type::type physical_type() const { return physical_type_; }
+
+  ColumnOrder column_order() const { return column_order_; }
+
+  void SetColumnOrder(ColumnOrder column_order) { column_order_ = column_order; }
+
+  int32_t type_length() const { return type_length_; }
+
+  const DecimalMetadata& decimal_metadata() const { return decimal_metadata_; }
+
+  void ToParquet(void* element) const override;
+  void Visit(Visitor* visitor) override;
+  void VisitConst(ConstVisitor* visitor) const override;
+
+ private:
+  PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type,
+                ConvertedType::type converted_type = ConvertedType::NONE, int length = -1,
+                int precision = -1, int scale = -1, int field_id = -1);
+
+  PrimitiveNode(const std::string& name, Repetition::type repetition,
+                std::shared_ptr<const LogicalType> logical_type,
+                Type::type primitive_type, int primitive_length = -1, int field_id = -1);
+
+  Type::type physical_type_;
+  int32_t type_length_;
+  DecimalMetadata decimal_metadata_;
+  ColumnOrder column_order_;
+
+  // For FIXED_LEN_BYTE_ARRAY
+  void SetTypeLength(int32_t length) { type_length_ = length; }
+
+  bool EqualsInternal(const PrimitiveNode* other) const;
+
+  FRIEND_TEST(TestPrimitiveNode, Attrs);
+  FRIEND_TEST(TestPrimitiveNode, Equals);
+  FRIEND_TEST(TestPrimitiveNode, PhysicalLogicalMapping);
+  FRIEND_TEST(TestPrimitiveNode, FromParquet);
+};
+
+class PARQUET_EXPORT GroupNode : public Node {
+ public:
+  static std::unique_ptr<Node> FromParquet(const void* opaque_element,
+                                           NodeVector fields = {});
+
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             const NodeVector& fields,
+                             ConvertedType::type converted_type = ConvertedType::NONE,
+                             int field_id = -1) {
+    return NodePtr(new GroupNode(name, repetition, fields, converted_type, field_id));
+  }
+
+  // If no logical type, pass nullptr
+  // A field_id -1 (or any negative value) will be serialized as null in Thrift
+  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
+                             const NodeVector& fields,
+                             std::shared_ptr<const LogicalType> logical_type,
+                             int field_id = -1) {
+    return NodePtr(new GroupNode(name, repetition, fields, logical_type, field_id));
+  }
+
+  bool Equals(const Node* other) const override;
+
+  NodePtr field(int i) const { return fields_[i]; }
+  // Get the index of a field by its name, or negative value if not found.
+  // If several fields share the same name, it is unspecified which one
+  // is returned.
+  int FieldIndex(const std::string& name) const;
+  // Get the index of a field by its node, or negative value if not found.
+  int FieldIndex(const Node& node) const;
+
+  int field_count() const { return static_cast<int>(fields_.size()); }
+
+  void ToParquet(void* element) const override;
+  void Visit(Visitor* visitor) override;
+  void VisitConst(ConstVisitor* visitor) const override;
+
+  /// \brief Return true if this node or any child node has REPEATED repetition
+  /// type
+  bool HasRepeatedFields() const;
+
+ private:
+  GroupNode(const std::string& name, Repetition::type repetition,
+            const NodeVector& fields,
+            ConvertedType::type converted_type = ConvertedType::NONE, int field_id = -1);
+
+  GroupNode(const std::string& name, Repetition::type repetition,
+            const NodeVector& fields, std::shared_ptr<const LogicalType> logical_type,
+            int field_id = -1);
+
+  NodeVector fields_;
+  bool EqualsInternal(const GroupNode* other) const;
+
+  // Mapping between field name to the field index
+  std::unordered_multimap<std::string, int> field_name_to_idx_;
+
+  FRIEND_TEST(TestGroupNode, Attrs);
+  FRIEND_TEST(TestGroupNode, Equals);
+  FRIEND_TEST(TestGroupNode, FieldIndex);
+  FRIEND_TEST(TestGroupNode, FieldIndexDuplicateName);
+};
+
+// ----------------------------------------------------------------------
+// Convenience primitive type factory functions
+
+#define PRIMITIVE_FACTORY(FuncName, TYPE)                                                \
+  static inline NodePtr FuncName(const std::string& name,                                \
+                                 Repetition::type repetition = Repetition::OPTIONAL,     \
+                                 int field_id = -1) {                                    \
+    return PrimitiveNode::Make(name, repetition, Type::TYPE, ConvertedType::NONE,        \
+                               /*length=*/-1, /*precision=*/-1, /*scale=*/-1, field_id); \
+  }
+
+PRIMITIVE_FACTORY(Boolean, BOOLEAN)
+PRIMITIVE_FACTORY(Int32, INT32)
+PRIMITIVE_FACTORY(Int64, INT64)
+PRIMITIVE_FACTORY(Int96, INT96)
+PRIMITIVE_FACTORY(Float, FLOAT)
+PRIMITIVE_FACTORY(Double, DOUBLE)
+PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY)
+
+void PARQUET_EXPORT PrintSchema(const schema::Node* schema, std::ostream& stream,
+                                int indent_width = 2);
+
+}  // namespace schema
+
+// The ColumnDescriptor encapsulates information necessary to interpret
+// primitive column data in the context of a particular schema. We have to
+// examine the node structure of a column's path to the root in the schema tree
+// to be able to reassemble the nested structure from the repetition and
+// definition levels.
+class PARQUET_EXPORT ColumnDescriptor {
+ public:
+  ColumnDescriptor(schema::NodePtr node, int16_t max_definition_level,
+                   int16_t max_repetition_level,
+                   const SchemaDescriptor* schema_descr = NULLPTR);
+
+  bool Equals(const ColumnDescriptor& other) const;
+
+  int16_t max_definition_level() const { return max_definition_level_; }
+
+  int16_t max_repetition_level() const { return max_repetition_level_; }
+
+  Type::type physical_type() const { return primitive_node_->physical_type(); }
+
+  ConvertedType::type converted_type() const { return primitive_node_->converted_type(); }
+
+  const std::shared_ptr<const LogicalType>& logical_type() const {
+    return primitive_node_->logical_type();
+  }
+
+  ColumnOrder column_order() const { return primitive_node_->column_order(); }
+
+  SortOrder::type sort_order() const {
+    auto la = logical_type();
+    auto pt = physical_type();
+    return la ? GetSortOrder(la, pt) : GetSortOrder(converted_type(), pt);
+  }
+
+  const std::string& name() const { return primitive_node_->name(); }
+
+  const std::shared_ptr<schema::ColumnPath> path() const;
+
+  const schema::NodePtr& schema_node() const { return node_; }
+
+  std::string ToString() const;
+
+  int type_length() const;
+
+  int type_precision() const;
+
+  int type_scale() const;
+
+ private:
+  schema::NodePtr node_;
+  const schema::PrimitiveNode* primitive_node_;
+
+  int16_t max_definition_level_;
+  int16_t max_repetition_level_;
+};
+
+// Container for the converted Parquet schema with a computed information from
+// the schema analysis needed for file reading
+//
+// * Column index to Node
+// * Max repetition / definition levels for each primitive node
+//
+// The ColumnDescriptor objects produced by this class can be used to assist in
+// the reconstruction of fully materialized data structures from the
+// repetition-definition level encoding of nested data
+//
+// TODO(wesm): this object can be recomputed from a Schema
+class PARQUET_EXPORT SchemaDescriptor {
+ public:
+  SchemaDescriptor() {}
+  ~SchemaDescriptor() {}
+
+  // Analyze the schema
+  void Init(std::unique_ptr<schema::Node> schema);
+  void Init(schema::NodePtr schema);
+
+  const ColumnDescriptor* Column(int i) const;
+
+  // Get the index of a column by its dotstring path, or negative value if not found.
+  // If several columns share the same dotstring path, it is unspecified which one
+  // is returned.
+  int ColumnIndex(const std::string& node_path) const;
+  // Get the index of a column by its node, or negative value if not found.
+  int ColumnIndex(const schema::Node& node) const;
+
+  bool Equals(const SchemaDescriptor& other) const;
+
+  // The number of physical columns appearing in the file
+  int num_columns() const { return static_cast<int>(leaves_.size()); }
+
+  const schema::NodePtr& schema_root() const { return schema_; }
+
+  const schema::GroupNode* group_node() const { return group_node_; }
+
+  // Returns the root (child of the schema root) node of the leaf(column) node
+  const schema::Node* GetColumnRoot(int i) const;
+
+  const std::string& name() const { return group_node_->name(); }
+
+  std::string ToString() const;
+
+  void updateColumnOrders(const std::vector<ColumnOrder>& column_orders);
+
+  /// \brief Return column index corresponding to a particular
+  /// PrimitiveNode. Returns -1 if not found
+  int GetColumnIndex(const schema::PrimitiveNode& node) const;
+
+  /// \brief Return true if any field or their children have REPEATED repetition
+  /// type
+  bool HasRepeatedFields() const;
+
+ private:
+  friend class ColumnDescriptor;
+
+  // Root Node
+  schema::NodePtr schema_;
+  // Root Node
+  const schema::GroupNode* group_node_;
+
+  void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
+                 int16_t max_rep_level, const schema::NodePtr& base);
+
+  // Result of leaf node / tree analysis
+  std::vector<ColumnDescriptor> leaves_;
+
+  std::unordered_map<const schema::PrimitiveNode*, int> node_to_leaf_index_;
+
+  // Mapping between leaf nodes and root group of leaf (first node
+  // below the schema's root group)
+  //
+  // For example, the leaf `a.b.c.d` would have a link back to `a`
+  //
+  // -- a  <------
+  // -- -- b     |
+  // -- -- -- c  |
+  // -- -- -- -- d
+  std::unordered_map<int, schema::NodePtr> leaf_to_base_;
+
+  // Mapping between ColumnPath DotString to the leaf index
+  std::unordered_multimap<std::string, int> leaf_to_idx_;
+};
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/statistics.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/statistics.h
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "parquet/platform.h"
+#include "parquet/types.h"
+
+namespace arrow {
+
+class Array;
+class BinaryArray;
+
+}  // namespace arrow
+
+namespace parquet {
+
+class ColumnDescriptor;
+
+// ----------------------------------------------------------------------
+// Value comparator interfaces
+
+/// \brief Base class for value comparators. Generally used with
+/// TypedComparator<T>
+class PARQUET_EXPORT Comparator {
+ public:
+  virtual ~Comparator() {}
+
+  /// \brief Create a comparator explicitly from physical type and
+  /// sort order
+  /// \param[in] physical_type the physical type for the typed
+  /// comparator
+  /// \param[in] sort_order either SortOrder::SIGNED or
+  /// SortOrder::UNSIGNED
+  /// \param[in] type_length for FIXED_LEN_BYTE_ARRAY only
+  static std::shared_ptr<Comparator> Make(Type::type physical_type,
+                                          SortOrder::type sort_order,
+                                          int type_length = -1);
+
+  /// \brief Create typed comparator inferring default sort order from
+  /// ColumnDescriptor
+  /// \param[in] descr the Parquet column schema
+  static std::shared_ptr<Comparator> Make(const ColumnDescriptor* descr);
+};
+
+/// \brief Interface for comparison of physical types according to the
+/// semantics of a particular logical type.
+template <typename DType>
+class TypedComparator : public Comparator {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief Scalar comparison of two elements, return true if first
+  /// is strictly less than the second
+  virtual bool Compare(const T& a, const T& b) = 0;
+
+  /// \brief Compute maximum and minimum elements in a batch of
+  /// elements without any nulls
+  virtual std::pair<T, T> GetMinMax(const T* values, int64_t length) = 0;
+
+  /// \brief Compute minimum and maximum elements from an Arrow array. Only
+  /// valid for certain Parquet Type / Arrow Type combinations, like BYTE_ARRAY
+  /// / arrow::BinaryArray
+  virtual std::pair<T, T> GetMinMax(const ::arrow::Array& values) = 0;
+
+  /// \brief Compute maximum and minimum elements in a batch of
+  /// elements with accompanying bitmap indicating which elements are
+  /// included (bit set) and excluded (bit not set)
+  ///
+  /// \param[in] values the sequence of values
+  /// \param[in] length the length of the sequence
+  /// \param[in] valid_bits a bitmap indicating which elements are
+  /// included (1) or excluded (0)
+  /// \param[in] valid_bits_offset the bit offset into the bitmap of
+  /// the first element in the sequence
+  virtual std::pair<T, T> GetMinMaxSpaced(const T* values, int64_t length,
+                                          const uint8_t* valid_bits,
+                                          int64_t valid_bits_offset) = 0;
+};
+
+/// \brief Typed version of Comparator::Make
+template <typename DType>
+std::shared_ptr<TypedComparator<DType>> MakeComparator(Type::type physical_type,
+                                                       SortOrder::type sort_order,
+                                                       int type_length = -1) {
+  return std::static_pointer_cast<TypedComparator<DType>>(
+      Comparator::Make(physical_type, sort_order, type_length));
+}
+
+/// \brief Typed version of Comparator::Make
+template <typename DType>
+std::shared_ptr<TypedComparator<DType>> MakeComparator(const ColumnDescriptor* descr) {
+  return std::static_pointer_cast<TypedComparator<DType>>(Comparator::Make(descr));
+}
+
+// ----------------------------------------------------------------------
+
+/// \brief Structure represented encoded statistics to be written to
+/// and from Parquet serialized metadata
+class PARQUET_EXPORT EncodedStatistics {
+  std::shared_ptr<std::string> max_, min_;
+  bool is_signed_ = false;
+
+ public:
+  EncodedStatistics()
+      : max_(std::make_shared<std::string>()), min_(std::make_shared<std::string>()) {}
+
+  const std::string& max() const { return *max_; }
+  const std::string& min() const { return *min_; }
+
+  int64_t null_count = 0;
+  int64_t distinct_count = 0;
+
+  bool has_min = false;
+  bool has_max = false;
+  bool has_null_count = false;
+  bool has_distinct_count = false;
+
+  // From parquet-mr
+  // Don't write stats larger than the max size rather than truncating. The
+  // rationale is that some engines may use the minimum value in the page as
+  // the true minimum for aggregations and there is no way to mark that a
+  // value has been truncated and is a lower bound and not in the page.
+  void ApplyStatSizeLimits(size_t length) {
+    if (max_->length() > length) {
+      has_max = false;
+    }
+    if (min_->length() > length) {
+      has_min = false;
+    }
+  }
+
+  bool is_set() const {
+    return has_min || has_max || has_null_count || has_distinct_count;
+  }
+
+  bool is_signed() const { return is_signed_; }
+
+  void set_is_signed(bool is_signed) { is_signed_ = is_signed; }
+
+  EncodedStatistics& set_max(const std::string& value) {
+    *max_ = value;
+    has_max = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_min(const std::string& value) {
+    *min_ = value;
+    has_min = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_null_count(int64_t value) {
+    null_count = value;
+    has_null_count = true;
+    return *this;
+  }
+
+  EncodedStatistics& set_distinct_count(int64_t value) {
+    distinct_count = value;
+    has_distinct_count = true;
+    return *this;
+  }
+};
+
+/// \brief Base type for computing column statistics while writing a file
+class PARQUET_EXPORT Statistics {
+ public:
+  virtual ~Statistics() {}
+
+  /// \brief Create a new statistics instance given a column schema
+  /// definition
+  /// \param[in] descr the column schema
+  /// \param[in] pool a memory pool to use for any memory allocations, optional
+  static std::shared_ptr<Statistics> Make(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  /// \brief Create a new statistics instance given a column schema
+  /// definition and pre-existing state
+  /// \param[in] descr the column schema
+  /// \param[in] encoded_min the encoded minimum value
+  /// \param[in] encoded_max the encoded maximum value
+  /// \param[in] num_values total number of values
+  /// \param[in] null_count number of null values
+  /// \param[in] distinct_count number of distinct values
+  /// \param[in] has_min_max whether the min/max statistics are set
+  /// \param[in] has_null_count whether the null_count statistics are set
+  /// \param[in] has_distinct_count whether the distinct_count statistics are set
+  /// \param[in] pool a memory pool to use for any memory allocations, optional
+  static std::shared_ptr<Statistics> Make(
+      const ColumnDescriptor* descr, const std::string& encoded_min,
+      const std::string& encoded_max, int64_t num_values, int64_t null_count,
+      int64_t distinct_count, bool has_min_max, bool has_null_count,
+      bool has_distinct_count,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool());
+
+  /// \brief Return true if the count of null values is set
+  virtual bool HasNullCount() const = 0;
+
+  /// \brief The number of null values, may not be set
+  virtual int64_t null_count() const = 0;
+
+  /// \brief Return true if the count of distinct values is set
+  virtual bool HasDistinctCount() const = 0;
+
+  /// \brief The number of distinct values, may not be set
+  virtual int64_t distinct_count() const = 0;
+
+  /// \brief The total number of values in the column
+  virtual int64_t num_values() const = 0;
+
+  /// \brief Return true if the min and max statistics are set. Obtain
+  /// with TypedStatistics<T>::min and max
+  virtual bool HasMinMax() const = 0;
+
+  /// \brief Reset state of object to initial (no data observed) state
+  virtual void Reset() = 0;
+
+  /// \brief Plain-encoded minimum value
+  virtual std::string EncodeMin() const = 0;
+
+  /// \brief Plain-encoded maximum value
+  virtual std::string EncodeMax() const = 0;
+
+  /// \brief The finalized encoded form of the statistics for transport
+  virtual EncodedStatistics Encode() = 0;
+
+  /// \brief The physical type of the column schema
+  virtual Type::type physical_type() const = 0;
+
+  /// \brief The full type descriptor from the column schema
+  virtual const ColumnDescriptor* descr() const = 0;
+
+  /// \brief Check two Statistics for equality
+  virtual bool Equals(const Statistics& other) const = 0;
+
+ protected:
+  static std::shared_ptr<Statistics> Make(Type::type physical_type, const void* min,
+                                          const void* max, int64_t num_values,
+                                          int64_t null_count, int64_t distinct_count);
+};
+
+/// \brief A typed implementation of Statistics
+template <typename DType>
+class TypedStatistics : public Statistics {
+ public:
+  using T = typename DType::c_type;
+
+  /// \brief The current minimum value
+  virtual const T& min() const = 0;
+
+  /// \brief The current maximum value
+  virtual const T& max() const = 0;
+
+  /// \brief Update state with state of another Statistics object
+  virtual void Merge(const TypedStatistics<DType>& other) = 0;
+
+  /// \brief Batch statistics update
+  virtual void Update(const T* values, int64_t num_not_null, int64_t num_null) = 0;
+
+  /// \brief Batch statistics update with supplied validity bitmap
+  /// \param[in] values pointer to column values
+  /// \param[in] valid_bits Pointer to bitmap representing if values are non-null.
+  /// \param[in] valid_bits_offset Offset offset into valid_bits where the slice of
+  ///                              data begins.
+  /// \param[in] num_spaced_values The length of values in values/valid_bits to inspect
+  ///                              when calculating statistics. This can be smaller than
+  ///                              num_not_null+num_null as num_null can include nulls
+  ///                              from parents while num_spaced_values does not.
+  /// \param[in] num_not_null Number of values that are not null.
+  /// \param[in] num_null Number of values that are null.
+  virtual void UpdateSpaced(const T* values, const uint8_t* valid_bits,
+                            int64_t valid_bits_offset, int64_t num_spaced_values,
+                            int64_t num_not_null, int64_t num_null) = 0;
+
+  /// \brief EXPERIMENTAL: Update statistics with an Arrow array without
+  /// conversion to a primitive Parquet C type. Only implemented for certain
+  /// Parquet type / Arrow type combinations like BYTE_ARRAY /
+  /// arrow::BinaryArray
+  ///
+  /// If update_counts is true then the null_count and num_values will be updated
+  /// based on the null_count of values.  Set to false if these are updated
+  /// elsewhere (e.g. when updating a dictionary where the counts are taken from
+  /// the indices and not the values)
+  virtual void Update(const ::arrow::Array& values, bool update_counts = true) = 0;
+
+  /// \brief Set min and max values to particular values
+  virtual void SetMinMax(const T& min, const T& max) = 0;
+
+  /// \brief Increments the null count directly
+  /// Use Update to extract the null count from data.  Use this if you determine
+  /// the null count through some other means (e.g. dictionary arrays where the
+  /// null count is determined from the indices)
+  virtual void IncrementNullCount(int64_t n) = 0;
+
+  /// \brief Increments the number ov values directly
+  /// The same note on IncrementNullCount applies here
+  virtual void IncrementNumValues(int64_t n) = 0;
+};
+
+using BoolStatistics = TypedStatistics<BooleanType>;
+using Int32Statistics = TypedStatistics<Int32Type>;
+using Int64Statistics = TypedStatistics<Int64Type>;
+using FloatStatistics = TypedStatistics<FloatType>;
+using DoubleStatistics = TypedStatistics<DoubleType>;
+using ByteArrayStatistics = TypedStatistics<ByteArrayType>;
+using FLBAStatistics = TypedStatistics<FLBAType>;
+
+/// \brief Typed version of Statistics::Make
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(
+    const ColumnDescriptor* descr,
+    ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(descr, pool));
+}
+
+/// \brief Create Statistics initialized to a particular state
+/// \param[in] min the minimum value
+/// \param[in] max the minimum value
+/// \param[in] num_values number of values
+/// \param[in] null_count number of null values
+/// \param[in] distinct_count number of distinct values
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(const typename DType::c_type& min,
+                                                       const typename DType::c_type& max,
+                                                       int64_t num_values,
+                                                       int64_t null_count,
+                                                       int64_t distinct_count) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(
+      DType::type_num, &min, &max, num_values, null_count, distinct_count));
+}
+
+/// \brief Typed version of Statistics::Make
+template <typename DType>
+std::shared_ptr<TypedStatistics<DType>> MakeStatistics(
+    const ColumnDescriptor* descr, const std::string& encoded_min,
+    const std::string& encoded_max, int64_t num_values, int64_t null_count,
+    int64_t distinct_count, bool has_min_max, bool has_null_count,
+    bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) {
+  return std::static_pointer_cast<TypedStatistics<DType>>(Statistics::Make(
+      descr, encoded_min, encoded_max, num_values, null_count, distinct_count,
+      has_min_max, has_null_count, has_distinct_count, pool));
+}
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/stream_reader.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/stream_reader.h
@@ -0,0 +1,299 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <chrono>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/util/optional.h"
+#include "parquet/column_reader.h"
+#include "parquet/file_reader.h"
+#include "parquet/stream_writer.h"
+
+namespace parquet {
+
+/// \brief A class for reading Parquet files using an output stream type API.
+///
+/// The values given must be of the correct type i.e. the type must
+/// match the file schema exactly otherwise a ParquetException will be
+/// thrown.
+///
+/// The user must explicitly advance to the next row using the
+/// EndRow() function or EndRow input manipulator.
+///
+/// Required and optional fields are supported:
+/// - Required fields are read using operator>>(T)
+/// - Optional fields are read with
+///   operator>>(arrow::util::optional<T>)
+///
+/// Note that operator>>(arrow::util::optional<T>) can be used to read
+/// required fields.
+///
+/// Similarly operator>>(T) can be used to read optional fields.
+/// However, if the value is not present then a ParquetException will
+/// be raised.
+///
+/// Currently there is no support for repeated fields.
+///
+class PARQUET_EXPORT StreamReader {
+ public:
+  template <typename T>
+  using optional = ::arrow::util::optional<T>;
+
+  // N.B. Default constructed objects are not usable.  This
+  //      constructor is provided so that the object may be move
+  //      assigned afterwards.
+  StreamReader() = default;
+
+  explicit StreamReader(std::unique_ptr<ParquetFileReader> reader);
+
+  ~StreamReader() = default;
+
+  bool eof() const { return eof_; }
+
+  int current_column() const { return column_index_; }
+
+  int64_t current_row() const { return current_row_; }
+
+  int num_columns() const;
+
+  int64_t num_rows() const;
+
+  // Moving is possible.
+  StreamReader(StreamReader&&) = default;
+  StreamReader& operator=(StreamReader&&) = default;
+
+  // Copying is not allowed.
+  StreamReader(const StreamReader&) = delete;
+  StreamReader& operator=(const StreamReader&) = delete;
+
+  StreamReader& operator>>(bool& v);
+
+  StreamReader& operator>>(int8_t& v);
+
+  StreamReader& operator>>(uint8_t& v);
+
+  StreamReader& operator>>(int16_t& v);
+
+  StreamReader& operator>>(uint16_t& v);
+
+  StreamReader& operator>>(int32_t& v);
+
+  StreamReader& operator>>(uint32_t& v);
+
+  StreamReader& operator>>(int64_t& v);
+
+  StreamReader& operator>>(uint64_t& v);
+
+  StreamReader& operator>>(std::chrono::milliseconds& v);
+
+  StreamReader& operator>>(std::chrono::microseconds& v);
+
+  StreamReader& operator>>(float& v);
+
+  StreamReader& operator>>(double& v);
+
+  StreamReader& operator>>(char& v);
+
+  template <int N>
+  StreamReader& operator>>(char (&v)[N]) {
+    ReadFixedLength(v, N);
+    return *this;
+  }
+
+  template <std::size_t N>
+  StreamReader& operator>>(std::array<char, N>& v) {
+    ReadFixedLength(v.data(), static_cast<int>(N));
+    return *this;
+  }
+
+  // N.B. Cannot allow for reading to a arbitrary char pointer as the
+  //      length cannot be verified.  Also it would overshadow the
+  //      char[N] input operator.
+  // StreamReader& operator>>(char * v);
+
+  StreamReader& operator>>(std::string& v);
+
+  // Input operators for optional fields.
+
+  StreamReader& operator>>(optional<bool>& v);
+
+  StreamReader& operator>>(optional<int8_t>& v);
+
+  StreamReader& operator>>(optional<uint8_t>& v);
+
+  StreamReader& operator>>(optional<int16_t>& v);
+
+  StreamReader& operator>>(optional<uint16_t>& v);
+
+  StreamReader& operator>>(optional<int32_t>& v);
+
+  StreamReader& operator>>(optional<uint32_t>& v);
+
+  StreamReader& operator>>(optional<int64_t>& v);
+
+  StreamReader& operator>>(optional<uint64_t>& v);
+
+  StreamReader& operator>>(optional<float>& v);
+
+  StreamReader& operator>>(optional<double>& v);
+
+  StreamReader& operator>>(optional<std::chrono::milliseconds>& v);
+
+  StreamReader& operator>>(optional<std::chrono::microseconds>& v);
+
+  StreamReader& operator>>(optional<char>& v);
+
+  StreamReader& operator>>(optional<std::string>& v);
+
+  template <std::size_t N>
+  StreamReader& operator>>(optional<std::array<char, N>>& v) {
+    CheckColumn(Type::FIXED_LEN_BYTE_ARRAY, ConvertedType::NONE, N);
+    FixedLenByteArray flba;
+    if (ReadOptional(&flba)) {
+      v = std::array<char, N>{};
+      std::memcpy(v->data(), flba.ptr, N);
+    } else {
+      v.reset();
+    }
+    return *this;
+  }
+
+  /// \brief Terminate current row and advance to next one.
+  /// \throws ParquetException if all columns in the row were not
+  /// read or skipped.
+  void EndRow();
+
+  /// \brief Skip the data in the next columns.
+  /// If the number of columns exceeds the columns remaining on the
+  /// current row then skipping is terminated - it does _not_ continue
+  /// skipping columns on the next row.
+  /// Skipping of columns still requires the use 'EndRow' even if all
+  /// remaining columns were skipped.
+  /// \return Number of columns actually skipped.
+  int64_t SkipColumns(int64_t num_columns_to_skip);
+
+  /// \brief Skip the data in the next rows.
+  /// Skipping of rows is not allowed if reading of data for the
+  /// current row is not finished.
+  /// Skipping of rows will be terminated if the end of file is
+  /// reached.
+  /// \return Number of rows actually skipped.
+  int64_t SkipRows(int64_t num_rows_to_skip);
+
+ protected:
+  [[noreturn]] void ThrowReadFailedException(
+      const std::shared_ptr<schema::PrimitiveNode>& node);
+
+  template <typename ReaderType, typename T>
+  void Read(T* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, v, &values_read);
+
+    if (values_read != 1) {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  template <typename ReaderType, typename ReadType, typename T>
+  void Read(T* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    ReadType tmp;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read);
+
+    if (values_read == 1) {
+      *v = tmp;
+    } else {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  template <typename ReaderType, typename ReadType = typename ReaderType::T, typename T>
+  void ReadOptional(optional<T>* v) {
+    const auto& node = nodes_[column_index_];
+    auto reader = static_cast<ReaderType*>(column_readers_[column_index_++].get());
+    int16_t def_level;
+    int16_t rep_level;
+    ReadType tmp;
+    int64_t values_read;
+
+    reader->ReadBatch(kBatchSizeOne, &def_level, &rep_level, &tmp, &values_read);
+
+    if (values_read == 1) {
+      *v = T(tmp);
+    } else if ((values_read == 0) && (def_level == 0)) {
+      v->reset();
+    } else {
+      ThrowReadFailedException(node);
+    }
+  }
+
+  void ReadFixedLength(char* ptr, int len);
+
+  void Read(ByteArray* v);
+
+  void Read(FixedLenByteArray* v);
+
+  bool ReadOptional(ByteArray* v);
+
+  bool ReadOptional(FixedLenByteArray* v);
+
+  void NextRowGroup();
+
+  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type,
+                   int length = 0);
+
+  void SkipRowsInColumn(ColumnReader* reader, int64_t num_rows_to_skip);
+
+  void SetEof();
+
+ private:
+  std::unique_ptr<ParquetFileReader> file_reader_;
+  std::shared_ptr<FileMetaData> file_metadata_;
+  std::shared_ptr<RowGroupReader> row_group_reader_;
+  std::vector<std::shared_ptr<ColumnReader>> column_readers_;
+  std::vector<std::shared_ptr<schema::PrimitiveNode>> nodes_;
+
+  bool eof_{true};
+  int row_group_index_{0};
+  int column_index_{0};
+  int64_t current_row_{0};
+  int64_t row_group_row_offset_{0};
+
+  static constexpr int64_t kBatchSizeOne = 1;
+};  // namespace parquet
+
+PARQUET_EXPORT
+StreamReader& operator>>(StreamReader&, EndRowType);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/stream_writer.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/stream_writer.h
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <chrono>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "parquet/column_writer.h"
+#include "parquet/file_writer.h"
+
+namespace parquet {
+
+/// \brief A class for writing Parquet files using an output stream type API.
+///
+/// The values given must be of the correct type i.e. the type must
+/// match the file schema exactly otherwise a ParquetException will be
+/// thrown.
+///
+/// The user must explicitly indicate the end of the row using the
+/// EndRow() function or EndRow output manipulator.
+///
+/// A maximum row group size can be configured, the default size is
+/// 512MB.  Alternatively the row group size can be set to zero and the
+/// user can create new row groups by calling the EndRowGroup()
+/// function or using the EndRowGroup output manipulator.
+///
+/// Required and optional fields are supported:
+/// - Required fields are written using operator<<(T)
+/// - Optional fields are written using
+///   operator<<(arrow::util::optional<T>).
+///
+/// Note that operator<<(T) can be used to write optional fields.
+///
+/// Similarly, operator<<(arrow::util::optional<T>) can be used to
+/// write required fields.  However if the optional parameter does not
+/// have a value (i.e. it is nullopt) then a ParquetException will be
+/// raised.
+///
+/// Currently there is no support for repeated fields.
+///
+class PARQUET_EXPORT StreamWriter {
+ public:
+  template <typename T>
+  using optional = ::arrow::util::optional<T>;
+
+  // N.B. Default constructed objects are not usable.  This
+  //      constructor is provided so that the object may be move
+  //      assigned afterwards.
+  StreamWriter() = default;
+
+  explicit StreamWriter(std::unique_ptr<ParquetFileWriter> writer);
+
+  ~StreamWriter() = default;
+
+  static void SetDefaultMaxRowGroupSize(int64_t max_size);
+
+  void SetMaxRowGroupSize(int64_t max_size);
+
+  int current_column() const { return column_index_; }
+
+  int64_t current_row() const { return current_row_; }
+
+  int num_columns() const;
+
+  // Moving is possible.
+  StreamWriter(StreamWriter&&) = default;
+  StreamWriter& operator=(StreamWriter&&) = default;
+
+  // Copying is not allowed.
+  StreamWriter(const StreamWriter&) = delete;
+  StreamWriter& operator=(const StreamWriter&) = delete;
+
+  /// \brief Output operators for required fields.
+  /// These can also be used for optional fields when a value must be set.
+  StreamWriter& operator<<(bool v);
+
+  StreamWriter& operator<<(int8_t v);
+
+  StreamWriter& operator<<(uint8_t v);
+
+  StreamWriter& operator<<(int16_t v);
+
+  StreamWriter& operator<<(uint16_t v);
+
+  StreamWriter& operator<<(int32_t v);
+
+  StreamWriter& operator<<(uint32_t v);
+
+  StreamWriter& operator<<(int64_t v);
+
+  StreamWriter& operator<<(uint64_t v);
+
+  StreamWriter& operator<<(const std::chrono::milliseconds& v);
+
+  StreamWriter& operator<<(const std::chrono::microseconds& v);
+
+  StreamWriter& operator<<(float v);
+
+  StreamWriter& operator<<(double v);
+
+  StreamWriter& operator<<(char v);
+
+  /// \brief Helper class to write fixed length strings.
+  /// This is useful as the standard string view (such as
+  /// arrow::util::string_view) is for variable length data.
+  struct PARQUET_EXPORT FixedStringView {
+    FixedStringView() = default;
+
+    explicit FixedStringView(const char* data_ptr);
+
+    FixedStringView(const char* data_ptr, std::size_t data_len);
+
+    const char* data{NULLPTR};
+    std::size_t size{0};
+  };
+
+  /// \brief Output operators for fixed length strings.
+  template <int N>
+  StreamWriter& operator<<(const char (&v)[N]) {
+    return WriteFixedLength(v, N);
+  }
+  template <std::size_t N>
+  StreamWriter& operator<<(const std::array<char, N>& v) {
+    return WriteFixedLength(v.data(), N);
+  }
+  StreamWriter& operator<<(FixedStringView v);
+
+  /// \brief Output operators for variable length strings.
+  StreamWriter& operator<<(const char* v);
+  StreamWriter& operator<<(const std::string& v);
+  StreamWriter& operator<<(::arrow::util::string_view v);
+
+  /// \brief Output operator for optional fields.
+  template <typename T>
+  StreamWriter& operator<<(const optional<T>& v) {
+    if (v) {
+      return operator<<(*v);
+    }
+    SkipOptionalColumn();
+    return *this;
+  }
+
+  /// \brief Skip the next N columns of optional data.  If there are
+  /// less than N columns remaining then the excess columns are
+  /// ignored.
+  /// \throws ParquetException if there is an attempt to skip any
+  /// required column.
+  /// \return Number of columns actually skipped.
+  int64_t SkipColumns(int num_columns_to_skip);
+
+  /// \brief Terminate the current row and advance to next one.
+  /// \throws ParquetException if all columns in the row were not
+  /// written or skipped.
+  void EndRow();
+
+  /// \brief Terminate the current row group and create new one.
+  void EndRowGroup();
+
+ protected:
+  template <typename WriterType, typename T>
+  StreamWriter& Write(const T v) {
+    auto writer = static_cast<WriterType*>(row_group_writer_->column(column_index_++));
+
+    writer->WriteBatch(kBatchSizeOne, &kDefLevelOne, &kRepLevelZero, &v);
+
+    if (max_row_group_size_ > 0) {
+      row_group_size_ += writer->EstimatedBufferedValueBytes();
+    }
+    return *this;
+  }
+
+  StreamWriter& WriteVariableLength(const char* data_ptr, std::size_t data_len);
+
+  StreamWriter& WriteFixedLength(const char* data_ptr, std::size_t data_len);
+
+  void CheckColumn(Type::type physical_type, ConvertedType::type converted_type,
+                   int length = -1);
+
+  /// \brief Skip the next column which must be optional.
+  /// \throws ParquetException if the next column does not exist or is
+  /// not optional.
+  void SkipOptionalColumn();
+
+  void WriteNullValue(ColumnWriter* writer);
+
+ private:
+  using node_ptr_type = std::shared_ptr<schema::PrimitiveNode>;
+
+  struct null_deleter {
+    void operator()(void*) {}
+  };
+
+  int32_t column_index_{0};
+  int64_t current_row_{0};
+  int64_t row_group_size_{0};
+  int64_t max_row_group_size_{default_row_group_size_};
+
+  std::unique_ptr<ParquetFileWriter> file_writer_;
+  std::unique_ptr<RowGroupWriter, null_deleter> row_group_writer_;
+  std::vector<node_ptr_type> nodes_;
+
+  static constexpr int16_t kDefLevelZero = 0;
+  static constexpr int16_t kDefLevelOne = 1;
+  static constexpr int16_t kRepLevelZero = 0;
+  static constexpr int64_t kBatchSizeOne = 1;
+
+  static int64_t default_row_group_size_;
+};
+
+struct PARQUET_EXPORT EndRowType {};
+constexpr EndRowType EndRow = {};
+
+struct PARQUET_EXPORT EndRowGroupType {};
+constexpr EndRowGroupType EndRowGroup = {};
+
+PARQUET_EXPORT
+StreamWriter& operator<<(StreamWriter&, EndRowType);
+
+PARQUET_EXPORT
+StreamWriter& operator<<(StreamWriter&, EndRowGroupType);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/test_util.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/test_util.h
@@ -0,0 +1,715 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This module defines an abstract interface for iterating through pages in a
+// Parquet column chunk within a row group. It could be extended in the future
+// to iterate through all data pages in all chunks in a file.
+
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/io/memory.h"
+#include "arrow/testing/util.h"
+
+#include "parquet/column_page.h"
+#include "parquet/column_reader.h"
+#include "parquet/column_writer.h"
+#include "parquet/encoding.h"
+#include "parquet/platform.h"
+
+namespace parquet {
+
+static constexpr int FLBA_LENGTH = 12;
+
+inline bool operator==(const FixedLenByteArray& a, const FixedLenByteArray& b) {
+  return 0 == memcmp(a.ptr, b.ptr, FLBA_LENGTH);
+}
+
+namespace test {
+
+typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType,
+                         DoubleType, ByteArrayType, FLBAType>
+    ParquetTypes;
+
+class ParquetTestException : public parquet::ParquetException {
+  using ParquetException::ParquetException;
+};
+
+const char* get_data_dir();
+std::string get_bad_data_dir();
+
+std::string get_data_file(const std::string& filename, bool is_good = true);
+
+template <typename T>
+static inline void assert_vector_equal(const std::vector<T>& left,
+                                       const std::vector<T>& right) {
+  ASSERT_EQ(left.size(), right.size());
+
+  for (size_t i = 0; i < left.size(); ++i) {
+    ASSERT_EQ(left[i], right[i]) << i;
+  }
+}
+
+template <typename T>
+static inline bool vector_equal(const std::vector<T>& left, const std::vector<T>& right) {
+  if (left.size() != right.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < left.size(); ++i) {
+    if (left[i] != right[i]) {
+      std::cerr << "index " << i << " left was " << left[i] << " right was " << right[i]
+                << std::endl;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+template <typename T>
+static std::vector<T> slice(const std::vector<T>& values, int start, int end) {
+  if (end < start) {
+    return std::vector<T>(0);
+  }
+
+  std::vector<T> out(end - start);
+  for (int i = start; i < end; ++i) {
+    out[i - start] = values[i];
+  }
+  return out;
+}
+
+void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out);
+void random_bools(int n, double p, uint32_t seed, bool* out);
+
+template <typename T>
+inline void random_numbers(int n, uint32_t seed, T min_value, T max_value, T* out) {
+  std::default_random_engine gen(seed);
+  std::uniform_int_distribution<T> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen);
+  }
+}
+
+template <>
+inline void random_numbers(int n, uint32_t seed, float min_value, float max_value,
+                           float* out) {
+  std::default_random_engine gen(seed);
+  std::uniform_real_distribution<float> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen);
+  }
+}
+
+template <>
+inline void random_numbers(int n, uint32_t seed, double min_value, double max_value,
+                           double* out) {
+  std::default_random_engine gen(seed);
+  std::uniform_real_distribution<double> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen);
+  }
+}
+
+void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
+                          Int96* out);
+
+void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out);
+
+void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
+                       int max_size);
+
+void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int max_size);
+
+template <typename Type, typename Sequence>
+std::shared_ptr<Buffer> EncodeValues(Encoding::type encoding, bool use_dictionary,
+                                     const Sequence& values, int length,
+                                     const ColumnDescriptor* descr) {
+  auto encoder = MakeTypedEncoder<Type>(encoding, use_dictionary, descr);
+  encoder->Put(values, length);
+  return encoder->FlushValues();
+}
+
+template <typename T>
+static void InitValues(int num_values, std::vector<T>& values,
+                       std::vector<uint8_t>& buffer) {
+  random_numbers(num_values, 0, std::numeric_limits<T>::min(),
+                 std::numeric_limits<T>::max(), values.data());
+}
+
+template <typename T>
+static void InitDictValues(int num_values, int num_dicts, std::vector<T>& values,
+                           std::vector<uint8_t>& buffer) {
+  int repeat_factor = num_values / num_dicts;
+  InitValues<T>(num_dicts, values, buffer);
+  // add some repeated values
+  for (int j = 1; j < repeat_factor; ++j) {
+    for (int i = 0; i < num_dicts; ++i) {
+      std::memcpy(&values[num_dicts * j + i], &values[i], sizeof(T));
+    }
+  }
+  // computed only dict_per_page * repeat_factor - 1 values < num_values
+  // compute remaining
+  for (int i = num_dicts * repeat_factor; i < num_values; ++i) {
+    std::memcpy(&values[i], &values[i - num_dicts * repeat_factor], sizeof(T));
+  }
+}
+
+template <>
+inline void InitDictValues<bool>(int num_values, int num_dicts, std::vector<bool>& values,
+                                 std::vector<uint8_t>& buffer) {
+  // No op for bool
+}
+
+class MockPageReader : public PageReader {
+ public:
+  explicit MockPageReader(const std::vector<std::shared_ptr<Page>>& pages)
+      : pages_(pages), page_index_(0) {}
+
+  std::shared_ptr<Page> NextPage() override {
+    if (page_index_ == static_cast<int>(pages_.size())) {
+      // EOS to consumer
+      return std::shared_ptr<Page>(nullptr);
+    }
+    return pages_[page_index_++];
+  }
+
+  // No-op
+  void set_max_page_header_size(uint32_t size) override {}
+
+ private:
+  std::vector<std::shared_ptr<Page>> pages_;
+  int page_index_;
+};
+
+// TODO(wesm): this is only used for testing for now. Refactor to form part of
+// primary file write path
+template <typename Type>
+class DataPageBuilder {
+ public:
+  using c_type = typename Type::c_type;
+
+  // This class writes data and metadata to the passed inputs
+  explicit DataPageBuilder(ArrowOutputStream* sink)
+      : sink_(sink),
+        num_values_(0),
+        encoding_(Encoding::PLAIN),
+        definition_level_encoding_(Encoding::RLE),
+        repetition_level_encoding_(Encoding::RLE),
+        have_def_levels_(false),
+        have_rep_levels_(false),
+        have_values_(false) {}
+
+  void AppendDefLevels(const std::vector<int16_t>& levels, int16_t max_level,
+                       Encoding::type encoding = Encoding::RLE) {
+    AppendLevels(levels, max_level, encoding);
+
+    num_values_ = std::max(static_cast<int32_t>(levels.size()), num_values_);
+    definition_level_encoding_ = encoding;
+    have_def_levels_ = true;
+  }
+
+  void AppendRepLevels(const std::vector<int16_t>& levels, int16_t max_level,
+                       Encoding::type encoding = Encoding::RLE) {
+    AppendLevels(levels, max_level, encoding);
+
+    num_values_ = std::max(static_cast<int32_t>(levels.size()), num_values_);
+    repetition_level_encoding_ = encoding;
+    have_rep_levels_ = true;
+  }
+
+  void AppendValues(const ColumnDescriptor* d, const std::vector<c_type>& values,
+                    Encoding::type encoding = Encoding::PLAIN) {
+    std::shared_ptr<Buffer> values_sink = EncodeValues<Type>(
+        encoding, false, values.data(), static_cast<int>(values.size()), d);
+    PARQUET_THROW_NOT_OK(sink_->Write(values_sink->data(), values_sink->size()));
+
+    num_values_ = std::max(static_cast<int32_t>(values.size()), num_values_);
+    encoding_ = encoding;
+    have_values_ = true;
+  }
+
+  int32_t num_values() const { return num_values_; }
+
+  Encoding::type encoding() const { return encoding_; }
+
+  Encoding::type rep_level_encoding() const { return repetition_level_encoding_; }
+
+  Encoding::type def_level_encoding() const { return definition_level_encoding_; }
+
+ private:
+  ArrowOutputStream* sink_;
+
+  int32_t num_values_;
+  Encoding::type encoding_;
+  Encoding::type definition_level_encoding_;
+  Encoding::type repetition_level_encoding_;
+
+  bool have_def_levels_;
+  bool have_rep_levels_;
+  bool have_values_;
+
+  // Used internally for both repetition and definition levels
+  void AppendLevels(const std::vector<int16_t>& levels, int16_t max_level,
+                    Encoding::type encoding) {
+    if (encoding != Encoding::RLE) {
+      ParquetException::NYI("only rle encoding currently implemented");
+    }
+
+    std::vector<uint8_t> encode_buffer(LevelEncoder::MaxBufferSize(
+        Encoding::RLE, max_level, static_cast<int>(levels.size())));
+
+    // We encode into separate memory from the output stream because the
+    // RLE-encoded bytes have to be preceded in the stream by their absolute
+    // size.
+    LevelEncoder encoder;
+    encoder.Init(encoding, max_level, static_cast<int>(levels.size()),
+                 encode_buffer.data(), static_cast<int>(encode_buffer.size()));
+
+    encoder.Encode(static_cast<int>(levels.size()), levels.data());
+
+    int32_t rle_bytes = encoder.len();
+    PARQUET_THROW_NOT_OK(
+        sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes), sizeof(int32_t)));
+    PARQUET_THROW_NOT_OK(sink_->Write(encode_buffer.data(), rle_bytes));
+  }
+};
+
+template <>
+inline void DataPageBuilder<BooleanType>::AppendValues(const ColumnDescriptor* d,
+                                                       const std::vector<bool>& values,
+                                                       Encoding::type encoding) {
+  if (encoding != Encoding::PLAIN) {
+    ParquetException::NYI("only plain encoding currently implemented");
+  }
+
+  auto encoder = MakeTypedEncoder<BooleanType>(Encoding::PLAIN, false, d);
+  dynamic_cast<BooleanEncoder*>(encoder.get())
+      ->Put(values, static_cast<int>(values.size()));
+  std::shared_ptr<Buffer> buffer = encoder->FlushValues();
+  PARQUET_THROW_NOT_OK(sink_->Write(buffer->data(), buffer->size()));
+
+  num_values_ = std::max(static_cast<int32_t>(values.size()), num_values_);
+  encoding_ = encoding;
+  have_values_ = true;
+}
+
+template <typename Type>
+static std::shared_ptr<DataPageV1> MakeDataPage(
+    const ColumnDescriptor* d, const std::vector<typename Type::c_type>& values,
+    int num_vals, Encoding::type encoding, const uint8_t* indices, int indices_size,
+    const std::vector<int16_t>& def_levels, int16_t max_def_level,
+    const std::vector<int16_t>& rep_levels, int16_t max_rep_level) {
+  int num_values = 0;
+
+  auto page_stream = CreateOutputStream();
+  test::DataPageBuilder<Type> page_builder(page_stream.get());
+
+  if (!rep_levels.empty()) {
+    page_builder.AppendRepLevels(rep_levels, max_rep_level);
+  }
+  if (!def_levels.empty()) {
+    page_builder.AppendDefLevels(def_levels, max_def_level);
+  }
+
+  if (encoding == Encoding::PLAIN) {
+    page_builder.AppendValues(d, values, encoding);
+    num_values = std::max(page_builder.num_values(), num_vals);
+  } else {  // DICTIONARY PAGES
+    PARQUET_THROW_NOT_OK(page_stream->Write(indices, indices_size));
+    num_values = std::max(page_builder.num_values(), num_vals);
+  }
+
+  PARQUET_ASSIGN_OR_THROW(auto buffer, page_stream->Finish());
+
+  return std::make_shared<DataPageV1>(buffer, num_values, encoding,
+                                      page_builder.def_level_encoding(),
+                                      page_builder.rep_level_encoding(), buffer->size());
+}
+
+template <typename TYPE>
+class DictionaryPageBuilder {
+ public:
+  typedef typename TYPE::c_type TC;
+  static constexpr int TN = TYPE::type_num;
+  using SpecializedEncoder = typename EncodingTraits<TYPE>::Encoder;
+
+  // This class writes data and metadata to the passed inputs
+  explicit DictionaryPageBuilder(const ColumnDescriptor* d)
+      : num_dict_values_(0), have_values_(false) {
+    auto encoder = MakeTypedEncoder<TYPE>(Encoding::PLAIN, true, d);
+    dict_traits_ = dynamic_cast<DictEncoder<TYPE>*>(encoder.get());
+    encoder_.reset(dynamic_cast<SpecializedEncoder*>(encoder.release()));
+  }
+
+  ~DictionaryPageBuilder() {}
+
+  std::shared_ptr<Buffer> AppendValues(const std::vector<TC>& values) {
+    int num_values = static_cast<int>(values.size());
+    // Dictionary encoding
+    encoder_->Put(values.data(), num_values);
+    num_dict_values_ = dict_traits_->num_entries();
+    have_values_ = true;
+    return encoder_->FlushValues();
+  }
+
+  std::shared_ptr<Buffer> WriteDict() {
+    std::shared_ptr<Buffer> dict_buffer =
+        AllocateBuffer(::arrow::default_memory_pool(), dict_traits_->dict_encoded_size());
+    dict_traits_->WriteDict(dict_buffer->mutable_data());
+    return dict_buffer;
+  }
+
+  int32_t num_values() const { return num_dict_values_; }
+
+ private:
+  DictEncoder<TYPE>* dict_traits_;
+  std::unique_ptr<SpecializedEncoder> encoder_;
+  int32_t num_dict_values_;
+  bool have_values_;
+};
+
+template <>
+inline DictionaryPageBuilder<BooleanType>::DictionaryPageBuilder(
+    const ColumnDescriptor* d) {
+  ParquetException::NYI("only plain encoding currently implemented for boolean");
+}
+
+template <>
+inline std::shared_ptr<Buffer> DictionaryPageBuilder<BooleanType>::WriteDict() {
+  ParquetException::NYI("only plain encoding currently implemented for boolean");
+  return nullptr;
+}
+
+template <>
+inline std::shared_ptr<Buffer> DictionaryPageBuilder<BooleanType>::AppendValues(
+    const std::vector<TC>& values) {
+  ParquetException::NYI("only plain encoding currently implemented for boolean");
+  return nullptr;
+}
+
+template <typename Type>
+inline static std::shared_ptr<DictionaryPage> MakeDictPage(
+    const ColumnDescriptor* d, const std::vector<typename Type::c_type>& values,
+    const std::vector<int>& values_per_page, Encoding::type encoding,
+    std::vector<std::shared_ptr<Buffer>>& rle_indices) {
+  test::DictionaryPageBuilder<Type> page_builder(d);
+  int num_pages = static_cast<int>(values_per_page.size());
+  int value_start = 0;
+
+  for (int i = 0; i < num_pages; i++) {
+    rle_indices.push_back(page_builder.AppendValues(
+        slice(values, value_start, value_start + values_per_page[i])));
+    value_start += values_per_page[i];
+  }
+
+  auto buffer = page_builder.WriteDict();
+
+  return std::make_shared<DictionaryPage>(buffer, page_builder.num_values(),
+                                          Encoding::PLAIN);
+}
+
+// Given def/rep levels and values create multiple dict pages
+template <typename Type>
+inline static void PaginateDict(const ColumnDescriptor* d,
+                                const std::vector<typename Type::c_type>& values,
+                                const std::vector<int16_t>& def_levels,
+                                int16_t max_def_level,
+                                const std::vector<int16_t>& rep_levels,
+                                int16_t max_rep_level, int num_levels_per_page,
+                                const std::vector<int>& values_per_page,
+                                std::vector<std::shared_ptr<Page>>& pages,
+                                Encoding::type encoding = Encoding::RLE_DICTIONARY) {
+  int num_pages = static_cast<int>(values_per_page.size());
+  std::vector<std::shared_ptr<Buffer>> rle_indices;
+  std::shared_ptr<DictionaryPage> dict_page =
+      MakeDictPage<Type>(d, values, values_per_page, encoding, rle_indices);
+  pages.push_back(dict_page);
+  int def_level_start = 0;
+  int def_level_end = 0;
+  int rep_level_start = 0;
+  int rep_level_end = 0;
+  for (int i = 0; i < num_pages; i++) {
+    if (max_def_level > 0) {
+      def_level_start = i * num_levels_per_page;
+      def_level_end = (i + 1) * num_levels_per_page;
+    }
+    if (max_rep_level > 0) {
+      rep_level_start = i * num_levels_per_page;
+      rep_level_end = (i + 1) * num_levels_per_page;
+    }
+    std::shared_ptr<DataPageV1> data_page = MakeDataPage<Int32Type>(
+        d, {}, values_per_page[i], encoding, rle_indices[i]->data(),
+        static_cast<int>(rle_indices[i]->size()),
+        slice(def_levels, def_level_start, def_level_end), max_def_level,
+        slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
+    pages.push_back(data_page);
+  }
+}
+
+// Given def/rep levels and values create multiple plain pages
+template <typename Type>
+static inline void PaginatePlain(const ColumnDescriptor* d,
+                                 const std::vector<typename Type::c_type>& values,
+                                 const std::vector<int16_t>& def_levels,
+                                 int16_t max_def_level,
+                                 const std::vector<int16_t>& rep_levels,
+                                 int16_t max_rep_level, int num_levels_per_page,
+                                 const std::vector<int>& values_per_page,
+                                 std::vector<std::shared_ptr<Page>>& pages,
+                                 Encoding::type encoding = Encoding::PLAIN) {
+  int num_pages = static_cast<int>(values_per_page.size());
+  int def_level_start = 0;
+  int def_level_end = 0;
+  int rep_level_start = 0;
+  int rep_level_end = 0;
+  int value_start = 0;
+  for (int i = 0; i < num_pages; i++) {
+    if (max_def_level > 0) {
+      def_level_start = i * num_levels_per_page;
+      def_level_end = (i + 1) * num_levels_per_page;
+    }
+    if (max_rep_level > 0) {
+      rep_level_start = i * num_levels_per_page;
+      rep_level_end = (i + 1) * num_levels_per_page;
+    }
+    std::shared_ptr<DataPage> page = MakeDataPage<Type>(
+        d, slice(values, value_start, value_start + values_per_page[i]),
+        values_per_page[i], encoding, nullptr, 0,
+        slice(def_levels, def_level_start, def_level_end), max_def_level,
+        slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
+    pages.push_back(page);
+    value_start += values_per_page[i];
+  }
+}
+
+// Generates pages from randomly generated data
+template <typename Type>
+static inline int MakePages(const ColumnDescriptor* d, int num_pages, int levels_per_page,
+                            std::vector<int16_t>& def_levels,
+                            std::vector<int16_t>& rep_levels,
+                            std::vector<typename Type::c_type>& values,
+                            std::vector<uint8_t>& buffer,
+                            std::vector<std::shared_ptr<Page>>& pages,
+                            Encoding::type encoding = Encoding::PLAIN) {
+  int num_levels = levels_per_page * num_pages;
+  int num_values = 0;
+  uint32_t seed = 0;
+  int16_t zero = 0;
+  int16_t max_def_level = d->max_definition_level();
+  int16_t max_rep_level = d->max_repetition_level();
+  std::vector<int> values_per_page(num_pages, levels_per_page);
+  // Create definition levels
+  if (max_def_level > 0) {
+    def_levels.resize(num_levels);
+    random_numbers(num_levels, seed, zero, max_def_level, def_levels.data());
+    for (int p = 0; p < num_pages; p++) {
+      int num_values_per_page = 0;
+      for (int i = 0; i < levels_per_page; i++) {
+        if (def_levels[i + p * levels_per_page] == max_def_level) {
+          num_values_per_page++;
+          num_values++;
+        }
+      }
+      values_per_page[p] = num_values_per_page;
+    }
+  } else {
+    num_values = num_levels;
+  }
+  // Create repetition levels
+  if (max_rep_level > 0) {
+    rep_levels.resize(num_levels);
+    random_numbers(num_levels, seed, zero, max_rep_level, rep_levels.data());
+  }
+  // Create values
+  values.resize(num_values);
+  if (encoding == Encoding::PLAIN) {
+    InitValues<typename Type::c_type>(num_values, values, buffer);
+    PaginatePlain<Type>(d, values, def_levels, max_def_level, rep_levels, max_rep_level,
+                        levels_per_page, values_per_page, pages);
+  } else if (encoding == Encoding::RLE_DICTIONARY ||
+             encoding == Encoding::PLAIN_DICTIONARY) {
+    // Calls InitValues and repeats the data
+    InitDictValues<typename Type::c_type>(num_values, levels_per_page, values, buffer);
+    PaginateDict<Type>(d, values, def_levels, max_def_level, rep_levels, max_rep_level,
+                       levels_per_page, values_per_page, pages);
+  }
+
+  return num_values;
+}
+
+// ----------------------------------------------------------------------
+// Test data generation
+
+template <>
+void inline InitValues<bool>(int num_values, std::vector<bool>& values,
+                             std::vector<uint8_t>& buffer) {
+  values = {};
+  ::arrow::random_is_valid(num_values, 0.5, &values,
+                           static_cast<int>(::arrow::random_seed()));
+}
+
+template <>
+inline void InitValues<ByteArray>(int num_values, std::vector<ByteArray>& values,
+                                  std::vector<uint8_t>& buffer) {
+  int max_byte_array_len = 12;
+  int num_bytes = static_cast<int>(max_byte_array_len + sizeof(uint32_t));
+  size_t nbytes = num_values * num_bytes;
+  buffer.resize(nbytes);
+  random_byte_array(num_values, 0, buffer.data(), values.data(), max_byte_array_len);
+}
+
+inline void InitWideByteArrayValues(int num_values, std::vector<ByteArray>& values,
+                                    std::vector<uint8_t>& buffer, int min_len,
+                                    int max_len) {
+  int num_bytes = static_cast<int>(max_len + sizeof(uint32_t));
+  size_t nbytes = num_values * num_bytes;
+  buffer.resize(nbytes);
+  random_byte_array(num_values, 0, buffer.data(), values.data(), min_len, max_len);
+}
+
+template <>
+inline void InitValues<FLBA>(int num_values, std::vector<FLBA>& values,
+                             std::vector<uint8_t>& buffer) {
+  size_t nbytes = num_values * FLBA_LENGTH;
+  buffer.resize(nbytes);
+  random_fixed_byte_array(num_values, 0, buffer.data(), FLBA_LENGTH, values.data());
+}
+
+template <>
+inline void InitValues<Int96>(int num_values, std::vector<Int96>& values,
+                              std::vector<uint8_t>& buffer) {
+  random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
+                       std::numeric_limits<int32_t>::max(), values.data());
+}
+
+inline std::string TestColumnName(int i) {
+  std::stringstream col_name;
+  col_name << "column_" << i;
+  return col_name.str();
+}
+
+// This class lives here because of its dependency on the InitValues specializations.
+template <typename TestType>
+class PrimitiveTypedTest : public ::testing::Test {
+ public:
+  using c_type = typename TestType::c_type;
+
+  void SetUpSchema(Repetition::type repetition, int num_columns = 1) {
+    std::vector<schema::NodePtr> fields;
+
+    for (int i = 0; i < num_columns; ++i) {
+      std::string name = TestColumnName(i);
+      fields.push_back(schema::PrimitiveNode::Make(name, repetition, TestType::type_num,
+                                                   ConvertedType::NONE, FLBA_LENGTH));
+    }
+    node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
+    schema_.Init(node_);
+  }
+
+  void GenerateData(int64_t num_values);
+  void SetupValuesOut(int64_t num_values);
+  void SyncValuesOut();
+
+ protected:
+  schema::NodePtr node_;
+  SchemaDescriptor schema_;
+
+  // Input buffers
+  std::vector<c_type> values_;
+
+  std::vector<int16_t> def_levels_;
+
+  std::vector<uint8_t> buffer_;
+  // Pointer to the values, needed as we cannot use std::vector<bool>::data()
+  c_type* values_ptr_;
+  std::vector<uint8_t> bool_buffer_;
+
+  // Output buffers
+  std::vector<c_type> values_out_;
+  std::vector<uint8_t> bool_buffer_out_;
+  c_type* values_out_ptr_;
+};
+
+template <typename TestType>
+inline void PrimitiveTypedTest<TestType>::SyncValuesOut() {}
+
+template <>
+inline void PrimitiveTypedTest<BooleanType>::SyncValuesOut() {
+  std::vector<uint8_t>::const_iterator source_iterator = bool_buffer_out_.begin();
+  std::vector<c_type>::iterator destination_iterator = values_out_.begin();
+  while (source_iterator != bool_buffer_out_.end()) {
+    *destination_iterator++ = *source_iterator++ != 0;
+  }
+}
+
+template <typename TestType>
+inline void PrimitiveTypedTest<TestType>::SetupValuesOut(int64_t num_values) {
+  values_out_.clear();
+  values_out_.resize(num_values);
+  values_out_ptr_ = values_out_.data();
+}
+
+template <>
+inline void PrimitiveTypedTest<BooleanType>::SetupValuesOut(int64_t num_values) {
+  values_out_.clear();
+  values_out_.resize(num_values);
+
+  bool_buffer_out_.clear();
+  bool_buffer_out_.resize(num_values);
+  // Write once to all values so we can copy it without getting Valgrind errors
+  // about uninitialised values.
+  std::fill(bool_buffer_out_.begin(), bool_buffer_out_.end(), true);
+  values_out_ptr_ = reinterpret_cast<bool*>(bool_buffer_out_.data());
+}
+
+template <typename TestType>
+inline void PrimitiveTypedTest<TestType>::GenerateData(int64_t num_values) {
+  def_levels_.resize(num_values);
+  values_.resize(num_values);
+
+  InitValues<c_type>(static_cast<int>(num_values), values_, buffer_);
+  values_ptr_ = values_.data();
+
+  std::fill(def_levels_.begin(), def_levels_.end(), 1);
+}
+
+template <>
+inline void PrimitiveTypedTest<BooleanType>::GenerateData(int64_t num_values) {
+  def_levels_.resize(num_values);
+  values_.resize(num_values);
+
+  InitValues<c_type>(static_cast<int>(num_values), values_, buffer_);
+  bool_buffer_.resize(num_values);
+  std::copy(values_.begin(), values_.end(), bool_buffer_.begin());
+  values_ptr_ = reinterpret_cast<bool*>(bool_buffer_.data());
+
+  std::fill(def_levels_.begin(), def_levels_.end(), 1);
+}
+
+}  // namespace test
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/type_fwd.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/type_fwd.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace parquet {
+
+/// \brief Feature selection when writing Parquet files
+///
+/// `ParquetVersion::type` governs which data types are allowed and how they
+/// are represented. For example, uint32_t data will be written differently
+/// depending on this value (as INT64 for PARQUET_1_0, as UINT32 for other
+/// versions).
+///
+/// However, some features - such as compression algorithms, encryption,
+/// or the improved "v2" data page format - must be enabled separately in
+/// ArrowWriterProperties.
+struct ParquetVersion {
+  enum type : int {
+    /// Enable only pre-2.2 Parquet format features when writing
+    ///
+    /// This setting is useful for maximum compatibility with legacy readers.
+    /// Note that logical types may still be emitted, as long they have a
+    /// corresponding converted type.
+    PARQUET_1_0,
+
+    /// DEPRECATED: Enable Parquet format 2.6 features
+    ///
+    /// This misleadingly named enum value is roughly similar to PARQUET_2_6.
+    PARQUET_2_0 ARROW_DEPRECATED_ENUM_VALUE("use PARQUET_2_4 or PARQUET_2_6 "
+                                            "for fine-grained feature selection"),
+
+    /// Enable Parquet format 2.4 and earlier features when writing
+    ///
+    /// This enables UINT32 as well as logical types which don't have
+    /// a corresponding converted type.
+    ///
+    /// Note: Parquet format 2.4.0 was released in October 2017.
+    PARQUET_2_4,
+
+    /// Enable Parquet format 2.6 and earlier features when writing
+    ///
+    /// This enables the NANOS time unit in addition to the PARQUET_2_4
+    /// features.
+    ///
+    /// Note: Parquet format 2.6.0 was released in September 2018.
+    PARQUET_2_6,
+
+    /// Enable latest Parquet format 2.x features
+    ///
+    /// This value is equal to the greatest 2.x version supported by
+    /// this library.
+    PARQUET_2_LATEST = PARQUET_2_6
+  };
+};
+
+class FileMetaData;
+class SchemaDescriptor;
+
+class ReaderProperties;
+class ArrowReaderProperties;
+
+class WriterProperties;
+class WriterPropertiesBuilder;
+class ArrowWriterProperties;
+class ArrowWriterPropertiesBuilder;
+
+namespace arrow {
+
+class FileWriter;
+class FileReader;
+
+}  // namespace arrow
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/types.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/types.h
@@ -0,0 +1,758 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/util/string_view.h"
+
+#include "parquet/platform.h"
+#include "parquet/type_fwd.h"
+#include "parquet/windows_fixup.h"  // for OPTIONAL
+
+namespace arrow {
+namespace util {
+
+class Codec;
+
+}  // namespace util
+}  // namespace arrow
+
+namespace parquet {
+
+// ----------------------------------------------------------------------
+// Metadata enums to match Thrift metadata
+//
+// The reason we maintain our own enums is to avoid transitive dependency on
+// the compiled Thrift headers (and thus thrift/Thrift.h) for users of the
+// public API. After building parquet-cpp, you should not need to include
+// Thrift headers in your application. This means some boilerplate to convert
+// between our types and Parquet's Thrift types.
+//
+// We can also add special values like NONE to distinguish between metadata
+// values being set and not set. As an example consider ConvertedType and
+// CompressionCodec
+
+// Mirrors parquet::Type
+struct Type {
+  enum type {
+    BOOLEAN = 0,
+    INT32 = 1,
+    INT64 = 2,
+    INT96 = 3,
+    FLOAT = 4,
+    DOUBLE = 5,
+    BYTE_ARRAY = 6,
+    FIXED_LEN_BYTE_ARRAY = 7,
+    // Should always be last element.
+    UNDEFINED = 8
+  };
+};
+
+// Mirrors parquet::ConvertedType
+struct ConvertedType {
+  enum type {
+    NONE,  // Not a real converted type, but means no converted type is specified
+    UTF8,
+    MAP,
+    MAP_KEY_VALUE,
+    LIST,
+    ENUM,
+    DECIMAL,
+    DATE,
+    TIME_MILLIS,
+    TIME_MICROS,
+    TIMESTAMP_MILLIS,
+    TIMESTAMP_MICROS,
+    UINT_8,
+    UINT_16,
+    UINT_32,
+    UINT_64,
+    INT_8,
+    INT_16,
+    INT_32,
+    INT_64,
+    JSON,
+    BSON,
+    INTERVAL,
+    // DEPRECATED INVALID ConvertedType for all-null data.
+    // Only useful for reading legacy files written out by interim Parquet C++ releases.
+    // For writing, always emit LogicalType::Null instead.
+    // See PARQUET-1990.
+    NA = 25,
+    UNDEFINED = 26  // Not a real converted type; should always be last element
+  };
+};
+
+// forward declaration
+namespace format {
+
+class LogicalType;
+
+}
+
+// Mirrors parquet::FieldRepetitionType
+struct Repetition {
+  enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 };
+};
+
+// Reference:
+// parquet-mr/parquet-hadoop/src/main/java/org/apache/parquet/
+//                            format/converter/ParquetMetadataConverter.java
+// Sort order for page and column statistics. Types are associated with sort
+// orders (e.g., UTF8 columns should use UNSIGNED) and column stats are
+// aggregated using a sort order. As of parquet-format version 2.3.1, the
+// order used to aggregate stats is always SIGNED and is not stored in the
+// Parquet file. These stats are discarded for types that need unsigned.
+// See PARQUET-686.
+struct SortOrder {
+  enum type { SIGNED, UNSIGNED, UNKNOWN };
+};
+
+namespace schema {
+
+struct DecimalMetadata {
+  bool isset;
+  int32_t scale;
+  int32_t precision;
+};
+
+}  // namespace schema
+
+/// \brief Implementation of parquet.thrift LogicalType types.
+class PARQUET_EXPORT LogicalType {
+ public:
+  struct Type {
+    enum type {
+      UNDEFINED = 0,  // Not a real logical type
+      STRING = 1,
+      MAP,
+      LIST,
+      ENUM,
+      DECIMAL,
+      DATE,
+      TIME,
+      TIMESTAMP,
+      INTERVAL,
+      INT,
+      NIL,  // Thrift NullType: annotates data that is always null
+      JSON,
+      BSON,
+      UUID,
+      NONE  // Not a real logical type; should always be last element
+    };
+  };
+
+  struct TimeUnit {
+    enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS };
+  };
+
+  /// \brief If possible, return a logical type equivalent to the given legacy
+  /// converted type (and decimal metadata if applicable).
+  static std::shared_ptr<const LogicalType> FromConvertedType(
+      const parquet::ConvertedType::type converted_type,
+      const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1,
+                                                                           -1});
+
+  /// \brief Return the logical type represented by the Thrift intermediary object.
+  static std::shared_ptr<const LogicalType> FromThrift(
+      const parquet::format::LogicalType& thrift_logical_type);
+
+  /// \brief Return the explicitly requested logical type.
+  static std::shared_ptr<const LogicalType> String();
+  static std::shared_ptr<const LogicalType> Map();
+  static std::shared_ptr<const LogicalType> List();
+  static std::shared_ptr<const LogicalType> Enum();
+  static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0);
+  static std::shared_ptr<const LogicalType> Date();
+  static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit);
+
+  /// \brief Create a Timestamp logical type
+  /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized
+  /// \param[in] time_unit the resolution of the timestamp
+  /// \param[in] is_from_converted_type if true, the timestamp was generated
+  /// by translating a legacy converted type of TIMESTAMP_MILLIS or
+  /// TIMESTAMP_MICROS. Default is false.
+  /// \param[in] force_set_converted_type if true, always set the
+  /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS
+  /// metadata. Default is false
+  static std::shared_ptr<const LogicalType> Timestamp(
+      bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+      bool is_from_converted_type = false, bool force_set_converted_type = false);
+
+  static std::shared_ptr<const LogicalType> Interval();
+  static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
+
+  /// \brief Create a logical type for data that's always null
+  ///
+  /// Any physical type can be annotated with this logical type.
+  static std::shared_ptr<const LogicalType> Null();
+
+  static std::shared_ptr<const LogicalType> JSON();
+  static std::shared_ptr<const LogicalType> BSON();
+  static std::shared_ptr<const LogicalType> UUID();
+
+  /// \brief Create a placeholder for when no logical type is specified
+  static std::shared_ptr<const LogicalType> None();
+
+  /// \brief Return true if this logical type is consistent with the given underlying
+  /// physical type.
+  bool is_applicable(parquet::Type::type primitive_type,
+                     int32_t primitive_length = -1) const;
+
+  /// \brief Return true if this logical type is equivalent to the given legacy converted
+  /// type (and decimal metadata if applicable).
+  bool is_compatible(parquet::ConvertedType::type converted_type,
+                     parquet::schema::DecimalMetadata converted_decimal_metadata = {
+                         false, -1, -1}) const;
+
+  /// \brief If possible, return the legacy converted type (and decimal metadata if
+  /// applicable) equivalent to this logical type.
+  parquet::ConvertedType::type ToConvertedType(
+      parquet::schema::DecimalMetadata* out_decimal_metadata) const;
+
+  /// \brief Return a printable representation of this logical type.
+  std::string ToString() const;
+
+  /// \brief Return a JSON representation of this logical type.
+  std::string ToJSON() const;
+
+  /// \brief Return a serializable Thrift object for this logical type.
+  parquet::format::LogicalType ToThrift() const;
+
+  /// \brief Return true if the given logical type is equivalent to this logical type.
+  bool Equals(const LogicalType& other) const;
+
+  /// \brief Return the enumerated type of this logical type.
+  LogicalType::Type::type type() const;
+
+  /// \brief Return the appropriate sort order for this logical type.
+  SortOrder::type sort_order() const;
+
+  // Type checks ...
+  bool is_string() const;
+  bool is_map() const;
+  bool is_list() const;
+  bool is_enum() const;
+  bool is_decimal() const;
+  bool is_date() const;
+  bool is_time() const;
+  bool is_timestamp() const;
+  bool is_interval() const;
+  bool is_int() const;
+  bool is_null() const;
+  bool is_JSON() const;
+  bool is_BSON() const;
+  bool is_UUID() const;
+  bool is_none() const;
+  /// \brief Return true if this logical type is of a known type.
+  bool is_valid() const;
+  bool is_invalid() const;
+  /// \brief Return true if this logical type is suitable for a schema GroupNode.
+  bool is_nested() const;
+  bool is_nonnested() const;
+  /// \brief Return true if this logical type is included in the Thrift output for its
+  /// node.
+  bool is_serialized() const;
+
+  LogicalType(const LogicalType&) = delete;
+  LogicalType& operator=(const LogicalType&) = delete;
+  virtual ~LogicalType() noexcept;
+
+ protected:
+  LogicalType();
+
+  class Impl;
+  std::unique_ptr<const Impl> impl_;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
+class PARQUET_EXPORT StringLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  StringLogicalType() = default;
+};
+
+/// \brief Allowed for group nodes only.
+class PARQUET_EXPORT MapLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  MapLogicalType() = default;
+};
+
+/// \brief Allowed for group nodes only.
+class PARQUET_EXPORT ListLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  ListLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8.
+class PARQUET_EXPORT EnumLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  EnumLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY,
+/// depending on the precision.
+class PARQUET_EXPORT DecimalLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0);
+  int32_t precision() const;
+  int32_t scale() const;
+
+ private:
+  DecimalLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32.
+class PARQUET_EXPORT DateLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  DateLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS).
+class PARQUET_EXPORT TimeLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit);
+  bool is_adjusted_to_utc() const;
+  LogicalType::TimeUnit::unit time_unit() const;
+
+ private:
+  TimeLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT64.
+class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
+                                                 LogicalType::TimeUnit::unit time_unit,
+                                                 bool is_from_converted_type = false,
+                                                 bool force_set_converted_type = false);
+  bool is_adjusted_to_utc() const;
+  LogicalType::TimeUnit::unit time_unit() const;
+
+  /// \brief If true, will not set LogicalType in Thrift metadata
+  bool is_from_converted_type() const;
+
+  /// \brief If true, will set ConvertedType for micros and millis
+  /// resolution in legacy ConvertedType Thrift metadata
+  bool force_set_converted_type() const;
+
+ private:
+  TimestampLogicalType() = default;
+};
+
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12
+class PARQUET_EXPORT IntervalLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  IntervalLogicalType() = default;
+};
+
+/// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64
+/// (for bit width 64).
+class PARQUET_EXPORT IntLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed);
+  int bit_width() const;
+  bool is_signed() const;
+
+ private:
+  IntLogicalType() = default;
+};
+
+/// \brief Allowed for any physical type.
+class PARQUET_EXPORT NullLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  NullLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY.
+class PARQUET_EXPORT JSONLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  JSONLogicalType() = default;
+};
+
+/// \brief Allowed for physical type BYTE_ARRAY.
+class PARQUET_EXPORT BSONLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  BSONLogicalType() = default;
+};
+
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16,
+/// must encode raw UUID bytes.
+class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  UUIDLogicalType() = default;
+};
+
+/// \brief Allowed for any physical type.
+class PARQUET_EXPORT NoLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  NoLogicalType() = default;
+};
+
+// Internal API, for unrecognized logical types
+class PARQUET_EXPORT UndefinedLogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  UndefinedLogicalType() = default;
+};
+
+// Data encodings. Mirrors parquet::Encoding
+struct Encoding {
+  enum type {
+    PLAIN = 0,
+    PLAIN_DICTIONARY = 2,
+    RLE = 3,
+    BIT_PACKED = 4,
+    DELTA_BINARY_PACKED = 5,
+    DELTA_LENGTH_BYTE_ARRAY = 6,
+    DELTA_BYTE_ARRAY = 7,
+    RLE_DICTIONARY = 8,
+    BYTE_STREAM_SPLIT = 9,
+    // Should always be last element (except UNKNOWN)
+    UNDEFINED = 10,
+    UNKNOWN = 999
+  };
+};
+
+// Exposed data encodings. It is the encoding of the data read from the file,
+// rather than the encoding of the data in the file. E.g., the data encoded as
+// RLE_DICTIONARY in the file can be read as dictionary indices by RLE
+// decoding, in which case the data read from the file is DICTIONARY encoded.
+enum class ExposedEncoding {
+  NO_ENCODING = 0,  // data is not encoded, i.e. already decoded during reading
+  DICTIONARY = 1
+};
+
+/// \brief Return true if Parquet supports indicated compression type
+PARQUET_EXPORT
+bool IsCodecSupported(Compression::type codec);
+
+PARQUET_EXPORT
+std::unique_ptr<Codec> GetCodec(Compression::type codec);
+
+PARQUET_EXPORT
+std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level);
+
+struct ParquetCipher {
+  enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 };
+};
+
+struct AadMetadata {
+  std::string aad_prefix;
+  std::string aad_file_unique;
+  bool supply_aad_prefix;
+};
+
+struct EncryptionAlgorithm {
+  ParquetCipher::type algorithm;
+  AadMetadata aad;
+};
+
+// parquet::PageType
+struct PageType {
+  enum type {
+    DATA_PAGE,
+    INDEX_PAGE,
+    DICTIONARY_PAGE,
+    DATA_PAGE_V2,
+    // Should always be last element
+    UNDEFINED
+  };
+};
+
+class ColumnOrder {
+ public:
+  enum type { UNDEFINED, TYPE_DEFINED_ORDER };
+  explicit ColumnOrder(ColumnOrder::type column_order) : column_order_(column_order) {}
+  // Default to Type Defined Order
+  ColumnOrder() : column_order_(type::TYPE_DEFINED_ORDER) {}
+  ColumnOrder::type get_order() { return column_order_; }
+
+  static ColumnOrder undefined_;
+  static ColumnOrder type_defined_;
+
+ private:
+  ColumnOrder::type column_order_;
+};
+
+// ----------------------------------------------------------------------
+
+struct ByteArray {
+  ByteArray() : len(0), ptr(NULLPTR) {}
+  ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {}
+
+  ByteArray(::arrow::util::string_view view)  // NOLINT implicit conversion
+      : ByteArray(static_cast<uint32_t>(view.size()),
+                  reinterpret_cast<const uint8_t*>(view.data())) {}
+  uint32_t len;
+  const uint8_t* ptr;
+};
+
+inline bool operator==(const ByteArray& left, const ByteArray& right) {
+  return left.len == right.len &&
+         (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0);
+}
+
+inline bool operator!=(const ByteArray& left, const ByteArray& right) {
+  return !(left == right);
+}
+
+struct FixedLenByteArray {
+  FixedLenByteArray() : ptr(NULLPTR) {}
+  explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {}
+  const uint8_t* ptr;
+};
+
+using FLBA = FixedLenByteArray;
+
+// Julian day at unix epoch.
+//
+// The Julian Day Number (JDN) is the integer assigned to a whole solar day in
+// the Julian day count starting from noon Universal time, with Julian day
+// number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC,
+// proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian
+// calendar),
+constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588);
+constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24);
+constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000);
+constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000);
+constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000);
+
+MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; };
+STRUCT_END(Int96, 12);
+
+inline bool operator==(const Int96& left, const Int96& right) {
+  return std::equal(left.value, left.value + 3, right.value);
+}
+
+inline bool operator!=(const Int96& left, const Int96& right) { return !(left == right); }
+
+static inline std::string ByteArrayToString(const ByteArray& a) {
+  return std::string(reinterpret_cast<const char*>(a.ptr), a.len);
+}
+
+static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) {
+  std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
+}
+
+struct DecodedInt96 {
+  uint64_t days_since_epoch;
+  uint64_t nanoseconds;
+};
+
+static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) {
+  // We do the computations in the unsigned domain to avoid unsigned behaviour
+  // on overflow.
+  DecodedInt96 result;
+  result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  result.nanoseconds = 0;
+
+  memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t));
+  return result;
+}
+
+static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  return static_cast<int64_t>(decoded.days_since_epoch * kNanosecondsPerDay +
+                              decoded.nanoseconds);
+}
+
+static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t microseconds = decoded.nanoseconds / static_cast<uint64_t>(1000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMicrosecondsPerDay +
+                              microseconds);
+}
+
+static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t milliseconds = decoded.nanoseconds / static_cast<uint64_t>(1000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kMillisecondsPerDay +
+                              milliseconds);
+}
+
+static inline int64_t Int96GetSeconds(const parquet::Int96& i96) {
+  const auto decoded = DecodeInt96Timestamp(i96);
+  uint64_t seconds = decoded.nanoseconds / static_cast<uint64_t>(1000000000);
+  return static_cast<int64_t>(decoded.days_since_epoch * kSecondsPerDay + seconds);
+}
+
+static inline std::string Int96ToString(const Int96& a) {
+  std::ostringstream result;
+  std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " "));
+  return result.str();
+}
+
+static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, int len) {
+  std::ostringstream result;
+  std::copy(a.ptr, a.ptr + len, std::ostream_iterator<uint32_t>(result, " "));
+  return result.str();
+}
+
+template <Type::type TYPE>
+struct type_traits {};
+
+template <>
+struct type_traits<Type::BOOLEAN> {
+  using value_type = bool;
+
+  static constexpr int value_byte_size = 1;
+  static constexpr const char* printf_code = "d";
+};
+
+template <>
+struct type_traits<Type::INT32> {
+  using value_type = int32_t;
+
+  static constexpr int value_byte_size = 4;
+  static constexpr const char* printf_code = "d";
+};
+
+template <>
+struct type_traits<Type::INT64> {
+  using value_type = int64_t;
+
+  static constexpr int value_byte_size = 8;
+  static constexpr const char* printf_code =
+      (sizeof(long) == 64) ? "ld" : "lld";  // NOLINT: runtime/int
+};
+
+template <>
+struct type_traits<Type::INT96> {
+  using value_type = Int96;
+
+  static constexpr int value_byte_size = 12;
+  static constexpr const char* printf_code = "s";
+};
+
+template <>
+struct type_traits<Type::FLOAT> {
+  using value_type = float;
+
+  static constexpr int value_byte_size = 4;
+  static constexpr const char* printf_code = "f";
+};
+
+template <>
+struct type_traits<Type::DOUBLE> {
+  using value_type = double;
+
+  static constexpr int value_byte_size = 8;
+  static constexpr const char* printf_code = "lf";
+};
+
+template <>
+struct type_traits<Type::BYTE_ARRAY> {
+  using value_type = ByteArray;
+
+  static constexpr int value_byte_size = sizeof(ByteArray);
+  static constexpr const char* printf_code = "s";
+};
+
+template <>
+struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> {
+  using value_type = FixedLenByteArray;
+
+  static constexpr int value_byte_size = sizeof(FixedLenByteArray);
+  static constexpr const char* printf_code = "s";
+};
+
+template <Type::type TYPE>
+struct PhysicalType {
+  using c_type = typename type_traits<TYPE>::value_type;
+  static constexpr Type::type type_num = TYPE;
+};
+
+using BooleanType = PhysicalType<Type::BOOLEAN>;
+using Int32Type = PhysicalType<Type::INT32>;
+using Int64Type = PhysicalType<Type::INT64>;
+using Int96Type = PhysicalType<Type::INT96>;
+using FloatType = PhysicalType<Type::FLOAT>;
+using DoubleType = PhysicalType<Type::DOUBLE>;
+using ByteArrayType = PhysicalType<Type::BYTE_ARRAY>;
+using FLBAType = PhysicalType<Type::FIXED_LEN_BYTE_ARRAY>;
+
+template <typename Type>
+inline std::string format_fwf(int width) {
+  std::stringstream ss;
+  ss << "%-" << width << type_traits<Type::type_num>::printf_code;
+  return ss.str();
+}
+
+PARQUET_EXPORT std::string EncodingToString(Encoding::type t);
+
+PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t);
+
+PARQUET_EXPORT std::string TypeToString(Type::type t);
+
+PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type,
+                                           ::arrow::util::string_view val);
+
+PARQUET_EXPORT int GetTypeByteSize(Type::type t);
+
+PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive);
+
+PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted,
+                                            Type::type primitive);
+
+PARQUET_EXPORT SortOrder::type GetSortOrder(
+    const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive);
+
+}  // namespace parquet
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/windows_compatibility.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/windows_compatibility.h
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/windows_compatibility.h"
+#include "parquet/windows_fixup.h"
--- a/.venv/Lib/site-packages/pyarrow/include/parquet/windows_fixup.h
+++ b/.venv/Lib/site-packages/pyarrow/include/parquet/windows_fixup.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This header needs to be included multiple times.
+
+#include "arrow/util/windows_fixup.h"
+
+#ifdef _WIN32
+
+// parquet.thrift's OPTIONAL RepetitionType conflicts with a Windows #define
+#ifdef OPTIONAL
+#undef OPTIONAL
+#endif
+
+#endif  // _WIN32