first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,139 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libc.stdint cimport *
from libcpp cimport bool as c_bool, nullptr
from libcpp.functional cimport function
from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
from libcpp.string cimport string as c_string
from libcpp.utility cimport pair
from libcpp.vector cimport vector
from libcpp.unordered_map cimport unordered_map
from libcpp.unordered_set cimport unordered_set
from cpython cimport PyObject
from cpython.datetime cimport PyDateTime_DateTime
cimport cpython
cdef extern from * namespace "std" nogil:
cdef shared_ptr[T] static_pointer_cast[T, U](shared_ptr[U])
# vendored from the cymove project https://github.com/ozars/cymove
cdef extern from * namespace "cymove" nogil:
"""
#include <type_traits>
#include <utility>
namespace cymove {
template <typename T>
inline typename std::remove_reference<T>::type&& cymove(T& t) {
return std::move(t);
}
template <typename T>
inline typename std::remove_reference<T>::type&& cymove(T&& t) {
return std::move(t);
}
} // namespace cymove
"""
cdef T move" cymove::cymove"[T](T)
cdef extern from * namespace "arrow::py" nogil:
"""
#include <memory>
#include <utility>
namespace arrow {
namespace py {
template <typename T>
std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) {
return std::move(t);
}
template <typename T>
std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) {
return std::move(t);
}
} // namespace py
} // namespace arrow
"""
cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T])
cdef extern from "arrow/python/platform.h":
pass
cdef extern from "<Python.h>":
void Py_XDECREF(PyObject* o)
Py_ssize_t Py_REFCNT(PyObject* o)
cdef extern from "numpy/halffloat.h":
ctypedef uint16_t npy_half
cdef extern from "arrow/api.h" namespace "arrow" nogil:
# We can later add more of the common status factory methods as needed
cdef CStatus CStatus_OK "arrow::Status::OK"()
cdef CStatus CStatus_Invalid "arrow::Status::Invalid"()
cdef CStatus CStatus_NotImplemented \
"arrow::Status::NotImplemented"(const c_string& msg)
cdef CStatus CStatus_UnknownError \
"arrow::Status::UnknownError"(const c_string& msg)
cdef cppclass CStatus "arrow::Status":
CStatus()
c_string ToString()
c_string message()
shared_ptr[CStatusDetail] detail()
c_bool ok()
c_bool IsIOError()
c_bool IsOutOfMemory()
c_bool IsInvalid()
c_bool IsKeyError()
c_bool IsNotImplemented()
c_bool IsTypeError()
c_bool IsCapacityError()
c_bool IsIndexError()
c_bool IsSerializationError()
c_bool IsCancelled()
cdef cppclass CStatusDetail "arrow::StatusDetail":
c_string ToString()
cdef extern from "arrow/result.h" namespace "arrow" nogil:
cdef cppclass CResult "arrow::Result"[T]:
CResult()
CResult(CStatus)
CResult(T)
c_bool ok()
CStatus status()
CStatus Value(T*)
T operator*()
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
T GetResultValue[T](CResult[T]) except *
cdef function[F] BindFunction[F](void* unbound, object bound, ...)
cdef inline object PyObject_to_object(PyObject* o):
# Cast to "object" increments reference count
cdef object result = <object> o
cpython.Py_DECREF(result)
return result

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,107 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow cimport *
cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil:
cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager":
@staticmethod
CResult[CCudaDeviceManager*] Instance()
CResult[shared_ptr[CCudaContext]] GetContext(int gpu_number)
CResult[shared_ptr[CCudaContext]] GetSharedContext(int gpu_number,
void* handle)
CStatus AllocateHost(int device_number, int64_t nbytes,
shared_ptr[CCudaHostBuffer]* buffer)
int num_devices() const
cdef cppclass CCudaContext" arrow::cuda::CudaContext":
CResult[shared_ptr[CCudaBuffer]] Allocate(int64_t nbytes)
CResult[shared_ptr[CCudaBuffer]] View(uint8_t* data, int64_t nbytes)
CResult[shared_ptr[CCudaBuffer]] OpenIpcBuffer(
const CCudaIpcMemHandle& ipc_handle)
CStatus Synchronize()
int64_t bytes_allocated() const
const void* handle() const
int device_number() const
CResult[uintptr_t] GetDeviceAddress(uintptr_t addr)
cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle":
@staticmethod
CResult[shared_ptr[CCudaIpcMemHandle]] FromBuffer(
const void* opaque_handle)
CResult[shared_ptr[CBuffer]] Serialize(CMemoryPool* pool) const
cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer):
CCudaBuffer(uint8_t* data, int64_t size,
const shared_ptr[CCudaContext]& context,
c_bool own_data=false, c_bool is_ipc=false)
CCudaBuffer(const shared_ptr[CCudaBuffer]& parent,
const int64_t offset, const int64_t size)
@staticmethod
CResult[shared_ptr[CCudaBuffer]] FromBuffer(shared_ptr[CBuffer] buf)
CStatus CopyToHost(const int64_t position, const int64_t nbytes,
void* out) const
CStatus CopyFromHost(const int64_t position, const void* data,
int64_t nbytes)
CStatus CopyFromDevice(const int64_t position, const void* data,
int64_t nbytes)
CStatus CopyFromAnotherDevice(const shared_ptr[CCudaContext]& src_ctx,
const int64_t position, const void* data,
int64_t nbytes)
CResult[shared_ptr[CCudaIpcMemHandle]] ExportForIpc()
shared_ptr[CCudaContext] context() const
cdef cppclass \
CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer):
pass
cdef cppclass \
CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader):
CCudaBufferReader(const shared_ptr[CBuffer]& buffer)
CResult[int64_t] Read(int64_t nbytes, void* buffer)
CResult[shared_ptr[CBuffer]] Read(int64_t nbytes)
cdef cppclass \
CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile):
CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer)
CStatus Close()
CStatus Write(const void* data, int64_t nbytes)
CStatus WriteAt(int64_t position, const void* data, int64_t nbytes)
CStatus SetBufferSize(const int64_t buffer_size)
int64_t buffer_size()
int64_t num_bytes_buffered() const
CResult[shared_ptr[CCudaHostBuffer]] AllocateCudaHostBuffer(
int device_number, const int64_t size)
# Cuda prefix is added to avoid picking up arrow::cuda functions
# from arrow namespace.
CResult[shared_ptr[CCudaBuffer]] \
CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\
(const CRecordBatch& batch,
CCudaContext* ctx)
CResult[shared_ptr[CRecordBatch]] \
CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\
(const shared_ptr[CSchema]& schema,
CDictionaryMemo* dictionary_memo,
const shared_ptr[CCudaBuffer]& buffer,
CMemoryPool* pool)

View File

@@ -0,0 +1,394 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libcpp.unordered_map cimport unordered_map
from libcpp cimport bool as c_bool
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_fs cimport *
cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CRecordBatchIterator "arrow::RecordBatchIterator"(
CIterator[shared_ptr[CRecordBatch]]):
pass
cdef extern from "arrow/dataset/plan.h" namespace "arrow::dataset::internal" nogil:
cdef void Initialize()
ctypedef CStatus cb_writer_finish_internal(CFileWriter*)
ctypedef void cb_writer_finish(dict, CFileWriter*)
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef enum ExistingDataBehavior" arrow::dataset::ExistingDataBehavior":
ExistingDataBehavior_DELETE_MATCHING" \
arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions"
ExistingDataBehavior_OVERWRITE_OR_IGNORE" \
arrow::dataset::ExistingDataBehavior::kOverwriteOrIgnore"
ExistingDataBehavior_ERROR" \
arrow::dataset::ExistingDataBehavior::kError"
cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
shared_ptr[CSchema] dataset_schema
shared_ptr[CSchema] projected_schema
c_bool use_threads
cdef cppclass CScanNodeOptions "arrow::dataset::ScanNodeOptions"(CExecNodeOptions):
CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options)
shared_ptr[CScanOptions] scan_options
cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions":
c_string type_name() const
ctypedef CIterator[shared_ptr[CScanTask]] CScanTaskIterator \
"arrow::dataset::ScanTaskIterator"
cdef cppclass CScanTask" arrow::dataset::ScanTask":
CResult[CRecordBatchIterator] Execute()
cdef cppclass CFragment "arrow::dataset::Fragment":
CResult[shared_ptr[CSchema]] ReadPhysicalSchema()
CResult[CScanTaskIterator] Scan(shared_ptr[CScanOptions] options)
c_bool splittable() const
c_string type_name() const
const CExpression& partition_expression() const
ctypedef vector[shared_ptr[CFragment]] CFragmentVector \
"arrow::dataset::FragmentVector"
ctypedef CIterator[shared_ptr[CFragment]] CFragmentIterator \
"arrow::dataset::FragmentIterator"
cdef cppclass CInMemoryFragment "arrow::dataset::InMemoryFragment"(
CFragment):
CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches,
CExpression partition_expression)
cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch":
shared_ptr[CRecordBatch] record_batch
shared_ptr[CFragment] fragment
ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \
"arrow::dataset::TaggedRecordBatchIterator"
cdef cppclass CScanner "arrow::dataset::Scanner":
CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions])
CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions])
CResult[CScanTaskIterator] Scan()
CResult[CTaggedRecordBatchIterator] ScanBatches()
CResult[shared_ptr[CTable]] ToTable()
CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
CResult[shared_ptr[CTable]] Head(int64_t num_rows)
CResult[int64_t] CountRows()
CResult[CFragmentIterator] GetFragments()
CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader()
const shared_ptr[CScanOptions]& options()
cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder":
CScannerBuilder(shared_ptr[CDataset],
shared_ptr[CScanOptions] scan_options)
CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment],
shared_ptr[CScanOptions] scan_options)
@staticmethod
shared_ptr[CScannerBuilder] FromRecordBatchReader(
shared_ptr[CRecordBatchReader] reader)
CStatus ProjectColumns "Project"(const vector[c_string]& columns)
CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
CStatus Filter(CExpression filter)
CStatus UseThreads(c_bool use_threads)
CStatus Pool(CMemoryPool* pool)
CStatus BatchSize(int64_t batch_size)
CStatus FragmentScanOptions(
shared_ptr[CFragmentScanOptions] fragment_scan_options)
CResult[shared_ptr[CScanner]] Finish()
shared_ptr[CSchema] schema() const
ctypedef vector[shared_ptr[CDataset]] CDatasetVector \
"arrow::dataset::DatasetVector"
cdef cppclass CDataset "arrow::dataset::Dataset":
const shared_ptr[CSchema] & schema()
CResult[CFragmentIterator] GetFragments()
CResult[CFragmentIterator] GetFragments(CExpression predicate)
const CExpression & partition_expression()
c_string type_name()
CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema])
CResult[shared_ptr[CScannerBuilder]] NewScan()
cdef cppclass CInMemoryDataset "arrow::dataset::InMemoryDataset"(
CDataset):
CInMemoryDataset(shared_ptr[CRecordBatchReader])
CInMemoryDataset(shared_ptr[CTable])
cdef cppclass CUnionDataset "arrow::dataset::UnionDataset"(
CDataset):
@staticmethod
CResult[shared_ptr[CUnionDataset]] Make(shared_ptr[CSchema] schema,
CDatasetVector children)
const CDatasetVector& children() const
cdef cppclass CInspectOptions "arrow::dataset::InspectOptions":
int fragments
cdef cppclass CFinishOptions "arrow::dataset::FinishOptions":
shared_ptr[CSchema] schema
CInspectOptions inspect_options
c_bool validate_fragments
cdef cppclass CDatasetFactory "arrow::dataset::DatasetFactory":
CResult[vector[shared_ptr[CSchema]]] InspectSchemas(CInspectOptions)
CResult[shared_ptr[CSchema]] Inspect(CInspectOptions)
CResult[shared_ptr[CDataset]] FinishWithSchema "Finish"(
const shared_ptr[CSchema]& schema)
CResult[shared_ptr[CDataset]] Finish()
const CExpression& root_partition()
CStatus SetRootPartition(CExpression partition)
cdef cppclass CUnionDatasetFactory "arrow::dataset::UnionDatasetFactory":
@staticmethod
CResult[shared_ptr[CDatasetFactory]] Make(
vector[shared_ptr[CDatasetFactory]] factories)
cdef cppclass CFileSource "arrow::dataset::FileSource":
const c_string& path() const
const shared_ptr[CFileSystem]& filesystem() const
const shared_ptr[CBuffer]& buffer() const
# HACK: Cython can't handle all the overloads so don't declare them.
# This means invalid construction of CFileSource won't be caught in
# the C++ generation phase (though it will still be caught when
# the generated C++ is compiled).
CFileSource(...)
cdef cppclass CFileWriteOptions \
"arrow::dataset::FileWriteOptions":
const shared_ptr[CFileFormat]& format() const
c_string type_name() const
cdef cppclass CFileWriter \
"arrow::dataset::FileWriter":
const shared_ptr[CFileFormat]& format() const
const shared_ptr[CSchema]& schema() const
const shared_ptr[CFileWriteOptions]& options() const
const CFileLocator& destination() const
cdef cppclass CFileFormat "arrow::dataset::FileFormat":
shared_ptr[CFragmentScanOptions] default_fragment_scan_options
c_string type_name() const
CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema)
shared_ptr[CFileWriteOptions] DefaultWriteOptions()
cdef cppclass CFileFragment "arrow::dataset::FileFragment"(
CFragment):
const CFileSource& source() const
const shared_ptr[CFileFormat]& format() const
cdef cppclass CFileSystemDatasetWriteOptions \
"arrow::dataset::FileSystemDatasetWriteOptions":
shared_ptr[CFileWriteOptions] file_write_options
shared_ptr[CFileSystem] filesystem
c_string base_dir
shared_ptr[CPartitioning] partitioning
int max_partitions
c_string basename_template
function[cb_writer_finish_internal] writer_pre_finish
function[cb_writer_finish_internal] writer_post_finish
ExistingDataBehavior existing_data_behavior
c_bool create_dir
uint32_t max_open_files
uint64_t max_rows_per_file
uint64_t min_rows_per_group
uint64_t max_rows_per_group
cdef cppclass CFileSystemDataset \
"arrow::dataset::FileSystemDataset"(CDataset):
@staticmethod
CResult[shared_ptr[CDataset]] Make(
shared_ptr[CSchema] schema,
CExpression source_partition,
shared_ptr[CFileFormat] format,
shared_ptr[CFileSystem] filesystem,
vector[shared_ptr[CFileFragment]] fragments)
@staticmethod
CStatus Write(
const CFileSystemDatasetWriteOptions& write_options,
shared_ptr[CScanner] scanner)
c_string type()
vector[c_string] files()
const shared_ptr[CFileFormat]& format() const
const shared_ptr[CFileSystem]& filesystem() const
const shared_ptr[CPartitioning]& partitioning() const
cdef cppclass CIpcFileWriteOptions \
"arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions):
pass
cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"(
CFileFormat):
pass
cdef cppclass COrcFileFormat "arrow::dataset::OrcFileFormat"(
CFileFormat):
pass
cdef cppclass CCsvFileWriteOptions \
"arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions):
shared_ptr[CCSVWriteOptions] write_options
CMemoryPool* pool
cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"(
CFileFormat):
CCSVParseOptions parse_options
cdef cppclass CCsvFragmentScanOptions \
"arrow::dataset::CsvFragmentScanOptions"(CFragmentScanOptions):
CCSVConvertOptions convert_options
CCSVReadOptions read_options
cdef cppclass CPartitioning "arrow::dataset::Partitioning":
c_string type_name() const
CResult[CExpression] Parse(const c_string & path) const
const shared_ptr[CSchema] & schema()
cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding":
pass
CSegmentEncoding CSegmentEncodingNone\
" arrow::dataset::SegmentEncoding::None"
CSegmentEncoding CSegmentEncodingUri\
" arrow::dataset::SegmentEncoding::Uri"
cdef cppclass CKeyValuePartitioningOptions \
"arrow::dataset::KeyValuePartitioningOptions":
CSegmentEncoding segment_encoding
cdef cppclass CHivePartitioningOptions \
"arrow::dataset::HivePartitioningOptions":
CSegmentEncoding segment_encoding
c_string null_fallback
cdef cppclass CPartitioningFactoryOptions \
"arrow::dataset::PartitioningFactoryOptions":
c_bool infer_dictionary
shared_ptr[CSchema] schema
CSegmentEncoding segment_encoding
cdef cppclass CHivePartitioningFactoryOptions \
"arrow::dataset::HivePartitioningFactoryOptions":
c_bool infer_dictionary
c_string null_fallback
shared_ptr[CSchema] schema
CSegmentEncoding segment_encoding
cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory":
c_string type_name() const
cdef cppclass CKeyValuePartitioning \
"arrow::dataset::KeyValuePartitioning"(CPartitioning):
CKeyValuePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries,
CKeyValuePartitioningOptions options)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CDirectoryPartitioning \
"arrow::dataset::DirectoryPartitioning"(CPartitioning):
CDirectoryPartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
vector[c_string] field_names, CPartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CHivePartitioning \
"arrow::dataset::HivePartitioning"(CPartitioning):
CHivePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries,
CHivePartitioningOptions options)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
CHivePartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CFilenamePartitioning \
"arrow::dataset::FilenamePartitioning"(CPartitioning):
CFilenamePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
vector[c_string] field_names, CPartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CPartitioningOrFactory \
"arrow::dataset::PartitioningOrFactory":
CPartitioningOrFactory(shared_ptr[CPartitioning])
CPartitioningOrFactory(shared_ptr[CPartitioningFactory])
CPartitioningOrFactory & operator = (shared_ptr[CPartitioning])
CPartitioningOrFactory & operator = (
shared_ptr[CPartitioningFactory])
shared_ptr[CPartitioning] partitioning() const
shared_ptr[CPartitioningFactory] factory() const
cdef cppclass CFileSystemFactoryOptions \
"arrow::dataset::FileSystemFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool exclude_invalid_files
vector[c_string] selector_ignore_prefixes
cdef cppclass CFileSystemDatasetFactory \
"arrow::dataset::FileSystemDatasetFactory"(
CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromPaths "Make"(
shared_ptr[CFileSystem] filesystem,
vector[c_string] paths,
shared_ptr[CFileFormat] format,
CFileSystemFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"(
shared_ptr[CFileSystem] filesystem,
CFileSelector,
shared_ptr[CFileFormat] format,
CFileSystemFactoryOptions options
)

View File

@@ -0,0 +1,89 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._parquet cimport *
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetFileWriter \
"arrow::dataset::ParquetFileWriter"(CFileWriter):
const shared_ptr[FileWriter]& parquet_writer() const
cdef cppclass CParquetFileWriteOptions \
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
shared_ptr[WriterProperties] writer_properties
shared_ptr[ArrowWriterProperties] arrow_writer_properties
cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
CFileFragment):
const vector[int]& row_groups() const
shared_ptr[CFileMetaData] metadata() const
CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
vector[int] row_group_ids)
CStatus EnsureCompleteMetadata()
cdef cppclass CParquetFileFormatReaderOptions \
"arrow::dataset::ParquetFileFormat::ReaderOptions":
unordered_set[c_string] dict_columns
TimeUnit coerce_int96_timestamp_unit
cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
CFileFormat):
CParquetFileFormatReaderOptions reader_options
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema,
vector[int] row_groups)
cdef cppclass CParquetFragmentScanOptions \
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
shared_ptr[CReaderProperties] reader_properties
shared_ptr[ArrowReaderProperties] arrow_reader_properties
cdef cppclass CParquetFactoryOptions \
"arrow::dataset::ParquetFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool validate_column_chunk_paths
cdef cppclass CParquetDatasetFactory \
"arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
const c_string& metadata_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
const CFileSource& metadata_path,
const c_string& base_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)

View File

@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable,
COutputStream, CResult, shared_ptr,
vector, CRandomAccessFile, CSchema,
c_string, CIpcReadOptions)
cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
int version
int chunksize
CCompressionType compression
int compression_level
CStatus WriteFeather" arrow::ipc::feather::WriteTable" \
(const CTable& table, COutputStream* out,
CFeatherProperties properties)
cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
@staticmethod
CResult[shared_ptr[CFeatherReader]] Open(
const shared_ptr[CRandomAccessFile]& file,
const CIpcReadOptions& options)
int version()
shared_ptr[CSchema] schema()
CStatus Read(shared_ptr[CTable]* out)
CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)

View File

@@ -0,0 +1,554 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
cdef char* CPyServerMiddlewareName\
" arrow::py::flight::kPyServerMiddlewareName"
cdef cppclass CActionType" arrow::flight::ActionType":
c_string type
c_string description
cdef cppclass CAction" arrow::flight::Action":
c_string type
shared_ptr[CBuffer] body
cdef cppclass CFlightResult" arrow::flight::Result":
CFlightResult()
CFlightResult(CFlightResult)
shared_ptr[CBuffer] body
cdef cppclass CBasicAuth" arrow::flight::BasicAuth":
CBasicAuth()
CBasicAuth(CBuffer)
CBasicAuth(CBasicAuth)
c_string username
c_string password
CResult[c_string] SerializeToString()
@staticmethod
CResult[CBasicAuth] Deserialize(const c_string& serialized)
cdef cppclass CResultStream" arrow::flight::ResultStream":
CResult[unique_ptr[CFlightResult]] Next()
cdef cppclass CDescriptorType \
" arrow::flight::FlightDescriptor::DescriptorType":
bint operator==(CDescriptorType)
CDescriptorType CDescriptorTypeUnknown\
" arrow::flight::FlightDescriptor::UNKNOWN"
CDescriptorType CDescriptorTypePath\
" arrow::flight::FlightDescriptor::PATH"
CDescriptorType CDescriptorTypeCmd\
" arrow::flight::FlightDescriptor::CMD"
cdef cppclass CFlightDescriptor" arrow::flight::FlightDescriptor":
CDescriptorType type
c_string cmd
vector[c_string] path
CResult[c_string] SerializeToString()
@staticmethod
CResult[CFlightDescriptor] Deserialize(const c_string& serialized)
bint operator==(CFlightDescriptor)
cdef cppclass CTicket" arrow::flight::Ticket":
CTicket()
c_string ticket
bint operator==(CTicket)
CResult[c_string] SerializeToString()
@staticmethod
CResult[CTicket] Deserialize(const c_string& serialized)
cdef cppclass CCriteria" arrow::flight::Criteria":
CCriteria()
c_string expression
cdef cppclass CLocation" arrow::flight::Location":
CLocation()
c_string ToString()
c_bool Equals(const CLocation& other)
@staticmethod
CResult[CLocation] Parse(c_string& uri_string)
@staticmethod
CResult[CLocation] ForGrpcTcp(c_string& host, int port)
@staticmethod
CResult[CLocation] ForGrpcTls(c_string& host, int port)
@staticmethod
CResult[CLocation] ForGrpcUnix(c_string& path)
cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint":
CFlightEndpoint()
CTicket ticket
vector[CLocation] locations
bint operator==(CFlightEndpoint)
cdef cppclass CFlightInfo" arrow::flight::FlightInfo":
CFlightInfo(CFlightInfo info)
int64_t total_records()
int64_t total_bytes()
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
CFlightDescriptor& descriptor()
const vector[CFlightEndpoint]& endpoints()
CResult[c_string] SerializeToString()
@staticmethod
CResult[unique_ptr[CFlightInfo]] Deserialize(
const c_string& serialized)
cdef cppclass CSchemaResult" arrow::flight::SchemaResult":
CSchemaResult(CSchemaResult result)
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
cdef cppclass CFlightListing" arrow::flight::FlightListing":
CResult[unique_ptr[CFlightInfo]] Next()
cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing":
CSimpleFlightListing(vector[CFlightInfo]&& info)
cdef cppclass CFlightPayload" arrow::flight::FlightPayload":
shared_ptr[CBuffer] descriptor
shared_ptr[CBuffer] app_metadata
CIpcPayload ipc_message
cdef cppclass CFlightDataStream" arrow::flight::FlightDataStream":
shared_ptr[CSchema] schema()
CResult[CFlightPayload] Next()
cdef cppclass CFlightStreamChunk" arrow::flight::FlightStreamChunk":
CFlightStreamChunk()
shared_ptr[CRecordBatch] data
shared_ptr[CBuffer] app_metadata
cdef cppclass CMetadataRecordBatchReader \
" arrow::flight::MetadataRecordBatchReader":
CResult[shared_ptr[CSchema]] GetSchema()
CResult[CFlightStreamChunk] Next()
CResult[shared_ptr[CTable]] ToTable()
CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\
" arrow::flight::MakeRecordBatchReader"(
shared_ptr[CMetadataRecordBatchReader])
cdef cppclass CMetadataRecordBatchWriter \
" arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter):
CStatus Begin(shared_ptr[CSchema] schema,
const CIpcWriteOptions& options)
CStatus WriteMetadata(shared_ptr[CBuffer] app_metadata)
CStatus WriteWithMetadata(const CRecordBatch& batch,
shared_ptr[CBuffer] app_metadata)
cdef cppclass CFlightStreamReader \
" arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader):
void Cancel()
CResult[shared_ptr[CTable]] ToTableWithStopToken" ToTable"\
(const CStopToken& stop_token)
cdef cppclass CFlightMessageReader \
" arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader):
CFlightDescriptor& descriptor()
cdef cppclass CFlightMessageWriter \
" arrow::flight::FlightMessageWriter"(CMetadataRecordBatchWriter):
pass
cdef cppclass CFlightStreamWriter \
" arrow::flight::FlightStreamWriter"(CMetadataRecordBatchWriter):
CStatus DoneWriting()
cdef cppclass CRecordBatchStream \
" arrow::flight::RecordBatchStream"(CFlightDataStream):
CRecordBatchStream(shared_ptr[CRecordBatchReader]& reader,
const CIpcWriteOptions& options)
cdef cppclass CFlightMetadataReader" arrow::flight::FlightMetadataReader":
CStatus ReadMetadata(shared_ptr[CBuffer]* out)
cdef cppclass CFlightMetadataWriter" arrow::flight::FlightMetadataWriter":
CStatus WriteMetadata(const CBuffer& message)
cdef cppclass CServerAuthReader" arrow::flight::ServerAuthReader":
CStatus Read(c_string* token)
cdef cppclass CServerAuthSender" arrow::flight::ServerAuthSender":
CStatus Write(c_string& token)
cdef cppclass CClientAuthReader" arrow::flight::ClientAuthReader":
CStatus Read(c_string* token)
cdef cppclass CClientAuthSender" arrow::flight::ClientAuthSender":
CStatus Write(c_string& token)
cdef cppclass CServerAuthHandler" arrow::flight::ServerAuthHandler":
pass
cdef cppclass CClientAuthHandler" arrow::flight::ClientAuthHandler":
pass
cdef cppclass CServerCallContext" arrow::flight::ServerCallContext":
c_string& peer_identity()
c_string& peer()
c_bool is_cancelled()
CServerMiddleware* GetMiddleware(const c_string& key)
cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration":
CTimeoutDuration(double)
cdef cppclass CFlightCallOptions" arrow::flight::FlightCallOptions":
CFlightCallOptions()
CTimeoutDuration timeout
CIpcWriteOptions write_options
CIpcReadOptions read_options
vector[pair[c_string, c_string]] headers
CStopToken stop_token
cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair":
CCertKeyPair()
c_string pem_cert
c_string pem_key
cdef cppclass CFlightMethod" arrow::flight::FlightMethod":
bint operator==(CFlightMethod)
CFlightMethod CFlightMethodInvalid\
" arrow::flight::FlightMethod::Invalid"
CFlightMethod CFlightMethodHandshake\
" arrow::flight::FlightMethod::Handshake"
CFlightMethod CFlightMethodListFlights\
" arrow::flight::FlightMethod::ListFlights"
CFlightMethod CFlightMethodGetFlightInfo\
" arrow::flight::FlightMethod::GetFlightInfo"
CFlightMethod CFlightMethodGetSchema\
" arrow::flight::FlightMethod::GetSchema"
CFlightMethod CFlightMethodDoGet\
" arrow::flight::FlightMethod::DoGet"
CFlightMethod CFlightMethodDoPut\
" arrow::flight::FlightMethod::DoPut"
CFlightMethod CFlightMethodDoAction\
" arrow::flight::FlightMethod::DoAction"
CFlightMethod CFlightMethodListActions\
" arrow::flight::FlightMethod::ListActions"
CFlightMethod CFlightMethodDoExchange\
" arrow::flight::FlightMethod::DoExchange"
cdef cppclass CCallInfo" arrow::flight::CallInfo":
CFlightMethod method
# This is really std::unordered_multimap, but Cython has no
# bindings for it, so treat it as an opaque class and bind the
# methods we need
cdef cppclass CCallHeaders" arrow::flight::CallHeaders":
cppclass const_iterator:
pair[c_string, c_string] operator*()
const_iterator operator++()
bint operator==(const_iterator)
bint operator!=(const_iterator)
const_iterator cbegin()
const_iterator cend()
cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders":
void AddHeader(const c_string& key, const c_string& value)
cdef cppclass CServerMiddleware" arrow::flight::ServerMiddleware":
c_string name()
cdef cppclass CServerMiddlewareFactory\
" arrow::flight::ServerMiddlewareFactory":
pass
cdef cppclass CClientMiddleware" arrow::flight::ClientMiddleware":
pass
cdef cppclass CClientMiddlewareFactory\
" arrow::flight::ClientMiddlewareFactory":
pass
cdef cppclass CFlightServerOptions" arrow::flight::FlightServerOptions":
CFlightServerOptions(const CLocation& location)
CLocation location
unique_ptr[CServerAuthHandler] auth_handler
vector[CCertKeyPair] tls_certificates
c_bool verify_client
c_string root_certificates
vector[pair[c_string, shared_ptr[CServerMiddlewareFactory]]] middleware
cdef cppclass CFlightClientOptions" arrow::flight::FlightClientOptions":
c_string tls_root_certs
c_string cert_chain
c_string private_key
c_string override_hostname
vector[shared_ptr[CClientMiddlewareFactory]] middleware
int64_t write_size_limit_bytes
vector[pair[c_string, CIntStringVariant]] generic_options
c_bool disable_server_verification
@staticmethod
CFlightClientOptions Defaults()
cdef cppclass CDoPutResult" arrow::flight::FlightClient::DoPutResult":
unique_ptr[CFlightStreamWriter] writer
unique_ptr[CFlightMetadataReader] reader
cdef cppclass CDoExchangeResult" arrow::flight::FlightClient::DoExchangeResult":
unique_ptr[CFlightStreamWriter] writer
unique_ptr[CFlightStreamReader] reader
cdef cppclass CFlightClient" arrow::flight::FlightClient":
@staticmethod
CResult[unique_ptr[CFlightClient]] Connect(const CLocation& location,
const CFlightClientOptions& options)
CStatus Authenticate(CFlightCallOptions& options,
unique_ptr[CClientAuthHandler] auth_handler)
CResult[pair[c_string, c_string]] AuthenticateBasicToken(
CFlightCallOptions& options,
const c_string& username,
const c_string& password)
CResult[unique_ptr[CResultStream]] DoAction(CFlightCallOptions& options, CAction& action)
CResult[vector[CActionType]] ListActions(CFlightCallOptions& options)
CResult[unique_ptr[CFlightListing]] ListFlights(CFlightCallOptions& options, CCriteria criteria)
CResult[unique_ptr[CFlightInfo]] GetFlightInfo(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CResult[unique_ptr[CSchemaResult]] GetSchema(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CResult[unique_ptr[CFlightStreamReader]] DoGet(CFlightCallOptions& options, CTicket& ticket)
CResult[CDoPutResult] DoPut(CFlightCallOptions& options,
CFlightDescriptor& descriptor,
shared_ptr[CSchema]& schema)
CResult[CDoExchangeResult] DoExchange(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CStatus Close()
cdef cppclass CFlightStatusCode" arrow::flight::FlightStatusCode":
bint operator==(CFlightStatusCode)
CFlightStatusCode CFlightStatusInternal \
" arrow::flight::FlightStatusCode::Internal"
CFlightStatusCode CFlightStatusTimedOut \
" arrow::flight::FlightStatusCode::TimedOut"
CFlightStatusCode CFlightStatusCancelled \
" arrow::flight::FlightStatusCode::Cancelled"
CFlightStatusCode CFlightStatusUnauthenticated \
" arrow::flight::FlightStatusCode::Unauthenticated"
CFlightStatusCode CFlightStatusUnauthorized \
" arrow::flight::FlightStatusCode::Unauthorized"
CFlightStatusCode CFlightStatusUnavailable \
" arrow::flight::FlightStatusCode::Unavailable"
CFlightStatusCode CFlightStatusFailed \
" arrow::flight::FlightStatusCode::Failed"
cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail":
CFlightStatusCode code()
c_string extra_info()
@staticmethod
shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status)
cdef cppclass FlightWriteSizeStatusDetail\
" arrow::flight::FlightWriteSizeStatusDetail":
int64_t limit()
int64_t actual()
@staticmethod
shared_ptr[FlightWriteSizeStatusDetail] UnwrapStatus(
const CStatus& status)
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
(CFlightStatusCode code, const c_string& message)
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
(CFlightStatusCode code,
const c_string& message,
const c_string& extra_info)
# Callbacks for implementing Flight servers
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef CStatus cb_list_flights(object, const CServerCallContext&,
const CCriteria*,
unique_ptr[CFlightListing]*)
ctypedef CStatus cb_get_flight_info(object, const CServerCallContext&,
const CFlightDescriptor&,
unique_ptr[CFlightInfo]*)
ctypedef CStatus cb_get_schema(object, const CServerCallContext&,
const CFlightDescriptor&,
unique_ptr[CSchemaResult]*)
ctypedef CStatus cb_do_put(object, const CServerCallContext&,
unique_ptr[CFlightMessageReader],
unique_ptr[CFlightMetadataWriter])
ctypedef CStatus cb_do_get(object, const CServerCallContext&,
const CTicket&,
unique_ptr[CFlightDataStream]*)
ctypedef CStatus cb_do_exchange(object, const CServerCallContext&,
unique_ptr[CFlightMessageReader],
unique_ptr[CFlightMessageWriter])
ctypedef CStatus cb_do_action(object, const CServerCallContext&,
const CAction&,
unique_ptr[CResultStream]*)
ctypedef CStatus cb_list_actions(object, const CServerCallContext&,
vector[CActionType]*)
ctypedef CStatus cb_result_next(object, unique_ptr[CFlightResult]*)
ctypedef CStatus cb_data_stream_next(object, CFlightPayload*)
ctypedef CStatus cb_server_authenticate(object, CServerAuthSender*,
CServerAuthReader*)
ctypedef CStatus cb_is_valid(object, const c_string&, c_string*)
ctypedef CStatus cb_client_authenticate(object, CClientAuthSender*,
CClientAuthReader*)
ctypedef CStatus cb_get_token(object, c_string*)
ctypedef CStatus cb_middleware_sending_headers(object, CAddCallHeaders*)
ctypedef CStatus cb_middleware_call_completed(object, const CStatus&)
ctypedef CStatus cb_client_middleware_received_headers(
object, const CCallHeaders&)
ctypedef CStatus cb_server_middleware_start_call(
object,
const CCallInfo&,
const CCallHeaders&,
shared_ptr[CServerMiddleware]*)
ctypedef CStatus cb_client_middleware_start_call(
object,
const CCallInfo&,
unique_ptr[CClientMiddleware]*)
cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil:
cdef cppclass PyFlightServerVtable:
PyFlightServerVtable()
function[cb_list_flights] list_flights
function[cb_get_flight_info] get_flight_info
function[cb_get_schema] get_schema
function[cb_do_put] do_put
function[cb_do_get] do_get
function[cb_do_exchange] do_exchange
function[cb_do_action] do_action
function[cb_list_actions] list_actions
cdef cppclass PyServerAuthHandlerVtable:
PyServerAuthHandlerVtable()
function[cb_server_authenticate] authenticate
function[cb_is_valid] is_valid
cdef cppclass PyClientAuthHandlerVtable:
PyClientAuthHandlerVtable()
function[cb_client_authenticate] authenticate
function[cb_get_token] get_token
cdef cppclass PyFlightServer:
PyFlightServer(object server, PyFlightServerVtable vtable)
CStatus Init(CFlightServerOptions& options)
int port()
CStatus ServeWithSignals() except *
CStatus Shutdown()
CStatus Wait()
cdef cppclass PyServerAuthHandler\
" arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler):
PyServerAuthHandler(object handler, PyServerAuthHandlerVtable vtable)
cdef cppclass PyClientAuthHandler\
" arrow::py::flight::PyClientAuthHandler"(CClientAuthHandler):
PyClientAuthHandler(object handler, PyClientAuthHandlerVtable vtable)
cdef cppclass CPyFlightResultStream\
" arrow::py::flight::PyFlightResultStream"(CResultStream):
CPyFlightResultStream(object generator,
function[cb_result_next] callback)
cdef cppclass CPyFlightDataStream\
" arrow::py::flight::PyFlightDataStream"(CFlightDataStream):
CPyFlightDataStream(object data_source,
unique_ptr[CFlightDataStream] stream)
cdef cppclass CPyGeneratorFlightDataStream\
" arrow::py::flight::PyGeneratorFlightDataStream"\
(CFlightDataStream):
CPyGeneratorFlightDataStream(object generator,
shared_ptr[CSchema] schema,
function[cb_data_stream_next] callback,
const CIpcWriteOptions& options)
cdef cppclass PyServerMiddlewareVtable\
" arrow::py::flight::PyServerMiddleware::Vtable":
PyServerMiddlewareVtable()
function[cb_middleware_sending_headers] sending_headers
function[cb_middleware_call_completed] call_completed
cdef cppclass PyClientMiddlewareVtable\
" arrow::py::flight::PyClientMiddleware::Vtable":
PyClientMiddlewareVtable()
function[cb_middleware_sending_headers] sending_headers
function[cb_client_middleware_received_headers] received_headers
function[cb_middleware_call_completed] call_completed
cdef cppclass CPyServerMiddleware\
" arrow::py::flight::PyServerMiddleware"(CServerMiddleware):
CPyServerMiddleware(object middleware, PyServerMiddlewareVtable vtable)
void* py_object()
cdef cppclass CPyServerMiddlewareFactory\
" arrow::py::flight::PyServerMiddlewareFactory"\
(CServerMiddlewareFactory):
CPyServerMiddlewareFactory(
object factory,
function[cb_server_middleware_start_call] start_call)
cdef cppclass CPyClientMiddleware\
" arrow::py::flight::PyClientMiddleware"(CClientMiddleware):
CPyClientMiddleware(object middleware, PyClientMiddlewareVtable vtable)
cdef cppclass CPyClientMiddlewareFactory\
" arrow::py::flight::PyClientMiddlewareFactory"\
(CClientMiddlewareFactory):
CPyClientMiddlewareFactory(
object factory,
function[cb_client_middleware_start_call] start_call)
cdef CStatus CreateFlightInfo" arrow::py::flight::CreateFlightInfo"(
shared_ptr[CSchema] schema,
CFlightDescriptor& descriptor,
vector[CFlightEndpoint] endpoints,
int64_t total_records,
int64_t total_bytes,
unique_ptr[CFlightInfo]* out)
cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"(
shared_ptr[CSchema] schema,
unique_ptr[CSchemaResult]* out)
cdef extern from "arrow/util/variant.h" namespace "arrow" nogil:
cdef cppclass CIntStringVariant" arrow::util::Variant<int, std::string>":
CIntStringVariant()
CIntStringVariant(int)
CIntStringVariant(c_string)

View File

@@ -0,0 +1,299 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_python cimport CTimePoint
cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
ctypedef enum CFileType "arrow::fs::FileType":
CFileType_NotFound "arrow::fs::FileType::NotFound"
CFileType_Unknown "arrow::fs::FileType::Unknown"
CFileType_File "arrow::fs::FileType::File"
CFileType_Directory "arrow::fs::FileType::Directory"
cdef cppclass CFileInfo "arrow::fs::FileInfo":
CFileInfo()
CFileInfo(CFileInfo&&)
CFileInfo& operator=(CFileInfo&&)
CFileInfo(const CFileInfo&)
CFileInfo& operator=(const CFileInfo&)
CFileType type()
void set_type(CFileType type)
c_string path()
void set_path(const c_string& path)
c_string base_name()
int64_t size()
void set_size(int64_t size)
c_string extension()
CTimePoint mtime()
void set_mtime(CTimePoint mtime)
cdef cppclass CFileSelector "arrow::fs::FileSelector":
CFileSelector()
c_string base_dir
c_bool allow_not_found
c_bool recursive
cdef cppclass CFileLocator "arrow::fs::FileLocator":
shared_ptr[CFileSystem] filesystem
c_string path
cdef cppclass CFileSystem "arrow::fs::FileSystem":
shared_ptr[CFileSystem] shared_from_this()
c_string type_name() const
CResult[c_string] NormalizePath(c_string path)
CResult[CFileInfo] GetFileInfo(const c_string& path)
CResult[vector[CFileInfo]] GetFileInfo(
const vector[c_string]& paths)
CResult[vector[CFileInfo]] GetFileInfo(const CFileSelector& select)
CStatus CreateDir(const c_string& path, c_bool recursive)
CStatus DeleteDir(const c_string& path)
CStatus DeleteDirContents(const c_string& path, c_bool missing_dir_ok)
CStatus DeleteRootDirContents()
CStatus DeleteFile(const c_string& path)
CStatus DeleteFiles(const vector[c_string]& paths)
CStatus Move(const c_string& src, const c_string& dest)
CStatus CopyFile(const c_string& src, const c_string& dest)
CResult[shared_ptr[CInputStream]] OpenInputStream(
const c_string& path)
CResult[shared_ptr[CRandomAccessFile]] OpenInputFile(
const c_string& path)
CResult[shared_ptr[COutputStream]] OpenOutputStream(
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
CResult[shared_ptr[COutputStream]] OpenAppendStream(
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
c_bool Equals(const CFileSystem& other)
c_bool Equals(shared_ptr[CFileSystem] other)
CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
"arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path)
CResult[shared_ptr[CFileSystem]] CFileSystemFromUriOrPath \
"arrow::fs::FileSystemFromUriOrPath"(const c_string& uri,
c_string* out_path)
cdef cppclass CFileSystemGlobalOptions \
"arrow::fs::FileSystemGlobalOptions":
c_string tls_ca_file_path
c_string tls_ca_dir_path
CStatus CFileSystemsInitialize "arrow::fs::Initialize" \
(const CFileSystemGlobalOptions& options)
cdef cppclass CLocalFileSystemOptions "arrow::fs::LocalFileSystemOptions":
c_bool use_mmap
@staticmethod
CLocalFileSystemOptions Defaults()
c_bool Equals(const CLocalFileSystemOptions& other)
cdef cppclass CLocalFileSystem "arrow::fs::LocalFileSystem"(CFileSystem):
CLocalFileSystem()
CLocalFileSystem(CLocalFileSystemOptions)
CLocalFileSystemOptions options()
cdef cppclass CSubTreeFileSystem \
"arrow::fs::SubTreeFileSystem"(CFileSystem):
CSubTreeFileSystem(const c_string& base_path,
shared_ptr[CFileSystem] base_fs)
c_string base_path()
shared_ptr[CFileSystem] base_fs()
ctypedef enum CS3LogLevel "arrow::fs::S3LogLevel":
CS3LogLevel_Off "arrow::fs::S3LogLevel::Off"
CS3LogLevel_Fatal "arrow::fs::S3LogLevel::Fatal"
CS3LogLevel_Error "arrow::fs::S3LogLevel::Error"
CS3LogLevel_Warn "arrow::fs::S3LogLevel::Warn"
CS3LogLevel_Info "arrow::fs::S3LogLevel::Info"
CS3LogLevel_Debug "arrow::fs::S3LogLevel::Debug"
CS3LogLevel_Trace "arrow::fs::S3LogLevel::Trace"
cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions":
CS3LogLevel log_level
cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions":
c_string scheme
c_string host
int port
c_string username
c_string password
c_bool Equals(const CS3ProxyOptions& other)
@staticmethod
CResult[CS3ProxyOptions] FromUriString "FromUri"(
const c_string& uri_string)
ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind":
CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous"
CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default"
CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit"
CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role"
CS3CredentialsKind_WebIdentity \
"arrow::fs::S3CredentialsKind::WebIdentity"
cdef cppclass CS3Options "arrow::fs::S3Options":
c_string region
c_string endpoint_override
c_string scheme
c_bool background_writes
shared_ptr[const CKeyValueMetadata] default_metadata
c_string role_arn
c_string session_name
c_string external_id
int load_frequency
CS3ProxyOptions proxy_options
CS3CredentialsKind credentials_kind
void ConfigureDefaultCredentials()
void ConfigureAccessKey(const c_string& access_key,
const c_string& secret_key,
const c_string& session_token)
c_string GetAccessKey()
c_string GetSecretKey()
c_string GetSessionToken()
c_bool Equals(const CS3Options& other)
@staticmethod
CS3Options Defaults()
@staticmethod
CS3Options Anonymous()
@staticmethod
CS3Options FromAccessKey(const c_string& access_key,
const c_string& secret_key,
const c_string& session_token)
@staticmethod
CS3Options FromAssumeRole(const c_string& role_arn,
const c_string& session_name,
const c_string& external_id,
const int load_frequency)
cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem):
@staticmethod
CResult[shared_ptr[CS3FileSystem]] Make(const CS3Options& options)
CS3Options options()
c_string region()
cdef CStatus CInitializeS3 "arrow::fs::InitializeS3"(
const CS3GlobalOptions& options)
cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"()
cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)
cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions":
HdfsConnectionConfig connection_config
int32_t buffer_size
int16_t replication
int64_t default_block_size
@staticmethod
CResult[CHdfsOptions] FromUriString "FromUri"(
const c_string& uri_string)
void ConfigureEndPoint(c_string host, int port)
void ConfigureDriver(c_bool use_hdfs3)
void ConfigureReplication(int16_t replication)
void ConfigureUser(c_string user_name)
void ConfigureBufferSize(int32_t buffer_size)
void ConfigureBlockSize(int64_t default_block_size)
void ConfigureKerberosTicketCachePath(c_string path)
void ConfigureExtraConf(c_string key, c_string value)
cdef cppclass CHadoopFileSystem "arrow::fs::HadoopFileSystem"(CFileSystem):
@staticmethod
CResult[shared_ptr[CHadoopFileSystem]] Make(
const CHdfsOptions& options)
CHdfsOptions options()
cdef cppclass CMockFileSystem "arrow::fs::internal::MockFileSystem"(
CFileSystem):
CMockFileSystem(CTimePoint current_time)
CStatus CCopyFiles "arrow::fs::CopyFiles"(
const vector[CFileLocator]& sources,
const vector[CFileLocator]& destinations,
const CIOContext& io_context,
int64_t chunk_size, c_bool use_threads)
CStatus CCopyFilesWithSelector "arrow::fs::CopyFiles"(
const shared_ptr[CFileSystem]& source_fs,
const CFileSelector& source_sel,
const shared_ptr[CFileSystem]& destination_fs,
const c_string& destination_base_dir,
const CIOContext& io_context,
int64_t chunk_size, c_bool use_threads)
# Callbacks for implementing Python filesystems
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef void CallbackGetTypeName(object, c_string*)
ctypedef c_bool CallbackEquals(object, const CFileSystem&)
ctypedef void CallbackGetFileInfo(object, const c_string&, CFileInfo*)
ctypedef void CallbackGetFileInfoVector(object, const vector[c_string]&,
vector[CFileInfo]*)
ctypedef void CallbackGetFileInfoSelector(object, const CFileSelector&,
vector[CFileInfo]*)
ctypedef void CallbackCreateDir(object, const c_string&, c_bool)
ctypedef void CallbackDeleteDir(object, const c_string&)
ctypedef void CallbackDeleteDirContents(object, const c_string&, c_bool)
ctypedef void CallbackDeleteRootDirContents(object)
ctypedef void CallbackDeleteFile(object, const c_string&)
ctypedef void CallbackMove(object, const c_string&, const c_string&)
ctypedef void CallbackCopyFile(object, const c_string&, const c_string&)
ctypedef void CallbackOpenInputStream(object, const c_string&,
shared_ptr[CInputStream]*)
ctypedef void CallbackOpenInputFile(object, const c_string&,
shared_ptr[CRandomAccessFile]*)
ctypedef void CallbackOpenOutputStream(
object, const c_string&, const shared_ptr[const CKeyValueMetadata]&,
shared_ptr[COutputStream]*)
ctypedef void CallbackNormalizePath(object, const c_string&, c_string*)
cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil:
cdef cppclass CPyFileSystemVtable "arrow::py::fs::PyFileSystemVtable":
PyFileSystemVtable()
function[CallbackGetTypeName] get_type_name
function[CallbackEquals] equals
function[CallbackGetFileInfo] get_file_info
function[CallbackGetFileInfoVector] get_file_info_vector
function[CallbackGetFileInfoSelector] get_file_info_selector
function[CallbackCreateDir] create_dir
function[CallbackDeleteDir] delete_dir
function[CallbackDeleteDirContents] delete_dir_contents
function[CallbackDeleteRootDirContents] delete_root_dir_contents
function[CallbackDeleteFile] delete_file
function[CallbackMove] move
function[CallbackCopyFile] copy_file
function[CallbackOpenInputStream] open_input_stream
function[CallbackOpenInputFile] open_input_file
function[CallbackOpenOutputStream] open_output_stream
function[CallbackOpenOutputStream] open_append_stream
function[CallbackNormalizePath] normalize_path
cdef cppclass CPyFileSystem "arrow::py::fs::PyFileSystem":
@staticmethod
shared_ptr[CPyFileSystem] Make(object handler,
CPyFileSystemVtable vtable)
PyObject* handler()

View File

@@ -0,0 +1,294 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
ctypedef CInvalidRowResult PyInvalidRowCallback(object,
const CCSVInvalidRow&)
cdef extern from "arrow/python/csv.h" namespace "arrow::py::csv":
function[CInvalidRowHandler] MakeInvalidRowHandler(
function[PyInvalidRowCallback], object handler)
cdef extern from "arrow/python/api.h" namespace "arrow::py":
# Requires GIL
CResult[shared_ptr[CDataType]] InferArrowType(
object obj, object mask, c_bool pandas_null_sentinels)
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal":
object NewMonthDayNanoTupleType()
CResult[PyObject*] MonthDayNanoIntervalArrayToPyList(
const CMonthDayNanoIntervalArray& array)
CResult[PyObject*] MonthDayNanoIntervalScalarToPyObject(
const CMonthDayNanoIntervalScalar& scalar)
cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
shared_ptr[CDataType] GetPrimitiveType(Type type)
object PyHalf_FromHalf(npy_half value)
cdef cppclass PyConversionOptions:
PyConversionOptions()
shared_ptr[CDataType] type
int64_t size
CMemoryPool* pool
c_bool from_pandas
c_bool ignore_timezone
c_bool strict
# TODO Some functions below are not actually "nogil"
CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
object obj, object mask, const PyConversionOptions& options,
CMemoryPool* pool)
CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
c_bool from_pandas,
const shared_ptr[CDataType]& type,
shared_ptr[CChunkedArray]* out)
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
c_bool from_pandas,
const shared_ptr[CDataType]& type,
const CCastOptions& cast_options,
shared_ptr[CChunkedArray]* out)
CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
const vector[c_string]& dim_names,
shared_ptr[CTensor]* out)
CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
PyObject** out)
CStatus SparseCOOTensorToNdarray(
const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_coords)
CStatus SparseCSRMatrixToNdarray(
const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus SparseCSCMatrixToNdarray(
const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus SparseCSFTensorToNdarray(
const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao,
object coords_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCOOTensor]* out)
CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSRMatrix]* out)
CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSCMatrix]* out)
CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[int64_t]& axis_order,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSFTensor]* out)
CStatus TensorToSparseCOOTensor(shared_ptr[CTensor],
shared_ptr[CSparseCOOTensor]* out)
CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor],
shared_ptr[CSparseCSRMatrix]* out)
CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor],
shared_ptr[CSparseCSCMatrix]* out)
CStatus TensorToSparseCSFTensor(shared_ptr[CTensor],
shared_ptr[CSparseCSFTensor]* out)
CStatus ConvertArrayToPandas(const PandasOptions& options,
shared_ptr[CArray] arr,
object py_ref, PyObject** out)
CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
shared_ptr[CChunkedArray] arr,
object py_ref, PyObject** out)
CStatus ConvertTableToPandas(const PandasOptions& options,
shared_ptr[CTable] table,
PyObject** out)
void c_set_default_memory_pool \
" arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
CMemoryPool* c_get_memory_pool \
" arrow::py::get_memory_pool"()
cdef cppclass PyBuffer(CBuffer):
@staticmethod
CResult[shared_ptr[CBuffer]] FromPyObject(object obj)
cdef cppclass PyForeignBuffer(CBuffer):
@staticmethod
CStatus Make(const uint8_t* data, int64_t size, object base,
shared_ptr[CBuffer]* out)
cdef cppclass PyReadableFile(CRandomAccessFile):
PyReadableFile(object fo)
cdef cppclass PyOutputStream(COutputStream):
PyOutputStream(object fo)
cdef cppclass PandasOptions:
CMemoryPool* pool
c_bool strings_to_categorical
c_bool zero_copy_only
c_bool integer_object_nulls
c_bool date_as_object
c_bool timestamp_as_object
c_bool use_threads
c_bool coerce_temporal_nanoseconds
c_bool ignore_timezone
c_bool deduplicate_objects
c_bool safe_cast
c_bool split_blocks
c_bool self_destruct
c_bool decode_dictionaries
unordered_set[c_string] categorical_columns
unordered_set[c_string] extension_columns
cdef cppclass CSerializedPyObject" arrow::py::SerializedPyObject":
shared_ptr[CRecordBatch] batch
vector[shared_ptr[CTensor]] tensors
CStatus WriteTo(COutputStream* dst)
CStatus GetComponents(CMemoryPool* pool, PyObject** dst)
CStatus SerializeObject(object context, object sequence,
CSerializedPyObject* out)
CStatus DeserializeObject(object context,
const CSerializedPyObject& obj,
PyObject* base, PyObject** out)
CStatus ReadSerializedObject(CRandomAccessFile* src,
CSerializedPyObject* out)
cdef cppclass SparseTensorCounts:
SparseTensorCounts()
int coo
int csr
int csc
int csf
int ndim_csf
int num_total_tensors() const
int num_total_buffers() const
CStatus GetSerializedFromComponents(
int num_tensors,
const SparseTensorCounts& num_sparse_tensors,
int num_ndarrays,
int num_buffers,
object buffers,
CSerializedPyObject* out)
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
pass
CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
int64_t TimePoint_to_ns(CTimePoint val)
CTimePoint TimePoint_from_s(double val)
CTimePoint TimePoint_from_ns(int64_t val)
CResult[c_string] TzinfoToString(PyObject* pytzinfo)
CResult[PyObject*] StringToTzinfo(c_string)
cdef extern from "arrow/python/init.h":
int arrow_init_numpy() except -1
cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
int import_pyarrow() except -1
cdef extern from "arrow/python/common.h" namespace "arrow::py":
c_bool IsPyError(const CStatus& status)
void RestorePyError(const CStatus& status)
cdef extern from "arrow/python/inference.h" namespace "arrow::py":
c_bool IsPyBool(object o)
c_bool IsPyInt(object o)
c_bool IsPyFloat(object o)
cdef extern from "arrow/python/ipc.h" namespace "arrow::py":
cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \
(CRecordBatchReader):
@staticmethod
CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema],
object)
cdef extern from "arrow/python/extension_type.h" namespace "arrow::py":
cdef cppclass CPyExtensionType \
" arrow::py::PyExtensionType"(CExtensionType):
@staticmethod
CStatus FromClass(const shared_ptr[CDataType] storage_type,
const c_string extension_name, object typ,
shared_ptr[CExtensionType]* out)
@staticmethod
CStatus FromInstance(shared_ptr[CDataType] storage_type,
object inst, shared_ptr[CExtensionType]* out)
object GetInstance()
CStatus SetInstance(object)
c_string PyExtensionName()
CStatus RegisterPyExtensionType(shared_ptr[CDataType])
CStatus UnregisterPyExtensionType(c_string type_name)
cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark":
void Benchmark_PandasObjectIsNull(object lst) except *
cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil:
void GdbTestSession "arrow::gdb::TestSession"()

View File

@@ -0,0 +1,286 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libcpp.string cimport string as c_string
from libcpp.unordered_set cimport unordered_set as c_unordered_set
from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
cdef extern from "gandiva/node.h" namespace "gandiva" nogil:
cdef cppclass CNode" gandiva::Node":
c_string ToString()
shared_ptr[CDataType] return_type()
cdef cppclass CGandivaExpression" gandiva::Expression":
c_string ToString()
shared_ptr[CNode] root()
shared_ptr[CField] result()
ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
ctypedef vector[shared_ptr[CGandivaExpression]] \
CExpressionVector" gandiva::ExpressionVector"
cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil:
cdef cppclass CSelectionVector" gandiva::SelectionVector":
shared_ptr[CArray] ToArray()
enum CSelectionVector_Mode" gandiva::SelectionVector::Mode":
CSelectionVector_Mode_NONE" gandiva::SelectionVector::Mode::MODE_NONE"
CSelectionVector_Mode_UINT16" \
gandiva::SelectionVector::Mode::MODE_UINT16"
CSelectionVector_Mode_UINT32" \
gandiva::SelectionVector::Mode::MODE_UINT32"
CSelectionVector_Mode_UINT64" \
gandiva::SelectionVector::Mode::MODE_UINT64"
cdef CStatus SelectionVector_MakeInt16\
"gandiva::SelectionVector::MakeInt16"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef CStatus SelectionVector_MakeInt32\
"gandiva::SelectionVector::MakeInt32"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef CStatus SelectionVector_MakeInt64\
"gandiva::SelectionVector::MakeInt64"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *:
uppercase = name.upper()
if uppercase == 'NONE':
return CSelectionVector_Mode_NONE
elif uppercase == 'UINT16':
return CSelectionVector_Mode_UINT16
elif uppercase == 'UINT32':
return CSelectionVector_Mode_UINT32
elif uppercase == 'UINT64':
return CSelectionVector_Mode_UINT64
else:
raise ValueError('Invalid value for Selection Mode: {!r}'.format(name))
cdef inline str _selection_mode_name(CSelectionVector_Mode ctype):
if ctype == CSelectionVector_Mode_NONE:
return 'NONE'
elif ctype == CSelectionVector_Mode_UINT16:
return 'UINT16'
elif ctype == CSelectionVector_Mode_UINT32:
return 'UINT32'
elif ctype == CSelectionVector_Mode_UINT64:
return 'UINT64'
else:
raise RuntimeError('Unexpected CSelectionVector_Mode value')
cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
cdef cppclass CCondition" gandiva::Condition":
c_string ToString()
shared_ptr[CNode] root()
shared_ptr[CField] result()
cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector"
cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil:
cdef shared_ptr[CNode] TreeExprBuilder_MakeBoolLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(c_bool value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt8Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint8_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt16Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint16_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt32Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint32_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt64Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint64_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt8Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int8_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt16Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int16_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt32Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int32_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt64Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int64_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeFloatLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(float value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeDoubleLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(double value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeStringLiteral \
"gandiva::TreeExprBuilder::MakeStringLiteral"(const c_string& value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeBinaryLiteral \
"gandiva::TreeExprBuilder::MakeBinaryLiteral"(const c_string& value)
cdef shared_ptr[CGandivaExpression] TreeExprBuilder_MakeExpression\
"gandiva::TreeExprBuilder::MakeExpression"(
shared_ptr[CNode] root_node, shared_ptr[CField] result_field)
cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \
"gandiva::TreeExprBuilder::MakeFunction"(
const c_string& name, const CNodeVector& children,
shared_ptr[CDataType] return_type)
cdef shared_ptr[CNode] TreeExprBuilder_MakeField \
"gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field)
cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \
"gandiva::TreeExprBuilder::MakeIf"(
shared_ptr[CNode] condition, shared_ptr[CNode] this_node,
shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type)
cdef shared_ptr[CNode] TreeExprBuilder_MakeAnd \
"gandiva::TreeExprBuilder::MakeAnd"(const CNodeVector& children)
cdef shared_ptr[CNode] TreeExprBuilder_MakeOr \
"gandiva::TreeExprBuilder::MakeOr"(const CNodeVector& children)
cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \
"gandiva::TreeExprBuilder::MakeCondition"(
shared_ptr[CNode] condition)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt32 \
"gandiva::TreeExprBuilder::MakeInExpressionInt32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt64 \
"gandiva::TreeExprBuilder::MakeInExpressionInt64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime32 \
"gandiva::TreeExprBuilder::MakeInExpressionTime32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime64 \
"gandiva::TreeExprBuilder::MakeInExpressionTime64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate32 \
"gandiva::TreeExprBuilder::MakeInExpressionDate32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate64 \
"gandiva::TreeExprBuilder::MakeInExpressionDate64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTimeStamp \
"gandiva::TreeExprBuilder::MakeInExpressionTimeStamp"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionString \
"gandiva::TreeExprBuilder::MakeInExpressionString"(
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionBinary \
"gandiva::TreeExprBuilder::MakeInExpressionBinary"(
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
cdef extern from "gandiva/projector.h" namespace "gandiva" nogil:
cdef cppclass CProjector" gandiva::Projector":
CStatus Evaluate(
const CRecordBatch& batch, CMemoryPool* pool,
const CArrayVector* output)
CStatus Evaluate(
const CRecordBatch& batch,
const CSelectionVector* selection,
CMemoryPool* pool,
const CArrayVector* output)
c_string DumpIR()
cdef CStatus Projector_Make \
"gandiva::Projector::Make"(
shared_ptr[CSchema] schema, const CExpressionVector& children,
shared_ptr[CProjector]* projector)
cdef CStatus Projector_Make \
"gandiva::Projector::Make"(
shared_ptr[CSchema] schema, const CExpressionVector& children,
CSelectionVector_Mode mode,
shared_ptr[CConfiguration] configuration,
shared_ptr[CProjector]* projector)
cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
cdef cppclass CFilter" gandiva::Filter":
CStatus Evaluate(
const CRecordBatch& batch,
shared_ptr[CSelectionVector] out_selection)
c_string DumpIR()
cdef CStatus Filter_Make \
"gandiva::Filter::Make"(
shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
shared_ptr[CFilter]* filter)
cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
cdef cppclass CFunctionSignature" gandiva::FunctionSignature":
CFunctionSignature(const c_string& base_name,
vector[shared_ptr[CDataType]] param_types,
shared_ptr[CDataType] ret_type)
shared_ptr[CDataType] ret_type() const
const c_string& base_name() const
vector[shared_ptr[CDataType]] param_types() const
c_string ToString() const
cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
cdef vector[shared_ptr[CFunctionSignature]] \
GetRegisteredFunctionSignatures()
cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:
cdef cppclass CConfiguration" gandiva::Configuration":
pass
cdef cppclass CConfigurationBuilder \
" gandiva::ConfigurationBuilder":
@staticmethod
shared_ptr[CConfiguration] DefaultConfiguration()

View File

@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
cdef extern from "plasma/common.h" namespace "plasma" nogil:
cdef c_bool IsPlasmaObjectExists(const CStatus& status)
cdef c_bool IsPlasmaObjectNotFound(const CStatus& status)
cdef c_bool IsPlasmaStoreFull(const CStatus& status)