mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-01 22:13:01 +00:00
first commit
This commit is contained in:
@ -0,0 +1,322 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include "arrow/compute/exec/partition_util.h"
|
||||
#include "arrow/compute/exec/util.h"
|
||||
#include "arrow/memory_pool.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
// A set of pre-generated bit masks from a 64-bit word.
|
||||
//
|
||||
// It is used to map selected bits of hash to a bit mask that will be used in
|
||||
// a Bloom filter.
|
||||
//
|
||||
// These bit masks need to look random and need to have a similar fractions of
|
||||
// bits set in order for a Bloom filter to have a low false positives rate.
|
||||
//
|
||||
struct ARROW_EXPORT BloomFilterMasks {
|
||||
// Generate all masks as a single bit vector. Each bit offset in this bit
|
||||
// vector corresponds to a single mask.
|
||||
// In each consecutive kBitsPerMask bits, there must be between
|
||||
// kMinBitsSet and kMaxBitsSet bits set.
|
||||
//
|
||||
BloomFilterMasks();
|
||||
|
||||
inline uint64_t mask(int bit_offset) {
|
||||
#if ARROW_LITTLE_ENDIAN
|
||||
return (util::SafeLoadAs<uint64_t>(masks_ + bit_offset / 8) >> (bit_offset % 8)) &
|
||||
kFullMask;
|
||||
#else
|
||||
return (BYTESWAP(util::SafeLoadAs<uint64_t>(masks_ + bit_offset / 8)) >>
|
||||
(bit_offset % 8)) &
|
||||
kFullMask;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Masks are 57 bits long because then they can be accessed at an
|
||||
// arbitrary bit offset using a single unaligned 64-bit load instruction.
|
||||
//
|
||||
static constexpr int kBitsPerMask = 57;
|
||||
static constexpr uint64_t kFullMask = (1ULL << kBitsPerMask) - 1;
|
||||
|
||||
// Minimum and maximum number of bits set in each mask.
|
||||
// This constraint is enforced when generating the bit masks.
|
||||
// Values should be close to each other and chosen as to minimize a Bloom
|
||||
// filter false positives rate.
|
||||
//
|
||||
static constexpr int kMinBitsSet = 4;
|
||||
static constexpr int kMaxBitsSet = 5;
|
||||
|
||||
// Number of generated masks.
|
||||
// Having more masks to choose will improve false positives rate of Bloom
|
||||
// filter but will also use more memory, which may lead to more CPU cache
|
||||
// misses.
|
||||
// The chosen value results in using only a few cache-lines for mask lookups,
|
||||
// while providing a good variety of available bit masks.
|
||||
//
|
||||
static constexpr int kLogNumMasks = 10;
|
||||
static constexpr int kNumMasks = 1 << kLogNumMasks;
|
||||
|
||||
// Data of masks. Masks are stored in a single bit vector. Nth mask is
|
||||
// kBitsPerMask bits starting at bit offset N.
|
||||
//
|
||||
static constexpr int kTotalBytes = (kNumMasks + 64) / 8;
|
||||
uint8_t masks_[kTotalBytes];
|
||||
};
|
||||
|
||||
// A variant of a blocked Bloom filter implementation.
|
||||
// A Bloom filter is a data structure that provides approximate membership test
|
||||
// functionality based only on the hash of the key. Membership test may return
|
||||
// false positives but not false negatives. Approximation of the result allows
|
||||
// in general case (for arbitrary data types of keys) to save on both memory and
|
||||
// lookup cost compared to the accurate membership test.
|
||||
// The accurate test may sometimes still be cheaper for a specific data types
|
||||
// and inputs, e.g. integers from a small range.
|
||||
//
|
||||
// This blocked Bloom filter is optimized for use in hash joins, to achieve a
|
||||
// good balance between the size of the filter, the cost of its building and
|
||||
// querying and the rate of false positives.
|
||||
//
|
||||
class ARROW_EXPORT BlockedBloomFilter {
|
||||
friend class BloomFilterBuilder_SingleThreaded;
|
||||
friend class BloomFilterBuilder_Parallel;
|
||||
|
||||
public:
|
||||
BlockedBloomFilter() : log_num_blocks_(0), num_blocks_(0), blocks_(NULLPTR) {}
|
||||
|
||||
inline bool Find(uint64_t hash) const {
|
||||
uint64_t m = mask(hash);
|
||||
uint64_t b = blocks_[block_id(hash)];
|
||||
return (b & m) == m;
|
||||
}
|
||||
|
||||
// Uses SIMD if available for smaller Bloom filters.
|
||||
// Uses memory prefetching for larger Bloom filters.
|
||||
//
|
||||
void Find(int64_t hardware_flags, int64_t num_rows, const uint32_t* hashes,
|
||||
uint8_t* result_bit_vector, bool enable_prefetch = true) const;
|
||||
void Find(int64_t hardware_flags, int64_t num_rows, const uint64_t* hashes,
|
||||
uint8_t* result_bit_vector, bool enable_prefetch = true) const;
|
||||
|
||||
int log_num_blocks() const { return log_num_blocks_; }
|
||||
|
||||
int NumHashBitsUsed() const;
|
||||
|
||||
bool IsSameAs(const BlockedBloomFilter* other) const;
|
||||
|
||||
int64_t NumBitsSet() const;
|
||||
|
||||
// Folding of a block Bloom filter after the initial version
|
||||
// has been built.
|
||||
//
|
||||
// One of the parameters for creation of Bloom filter is the number
|
||||
// of bits allocated for it. The more bits allocated, the lower the
|
||||
// probability of false positives. A good heuristic is to aim for
|
||||
// half of the bits set in the constructed Bloom filter. This should
|
||||
// result in a good trade off between size (and following cost of
|
||||
// memory accesses) and false positives rate.
|
||||
//
|
||||
// There might have been many duplicate keys in the input provided
|
||||
// to Bloom filter builder. In that case the resulting bit vector
|
||||
// would be more sparse then originally intended. It is possible to
|
||||
// easily correct that and cut in half the size of Bloom filter
|
||||
// after it has already been constructed. The process to do that is
|
||||
// approximately equal to OR-ing bits from upper and lower half (the
|
||||
// way we address these bits when inserting or querying a hash makes
|
||||
// such folding in half possible).
|
||||
//
|
||||
// We will keep folding as long as the fraction of bits set is less
|
||||
// than 1/4. The resulting bit vector density should be in the [1/4,
|
||||
// 1/2) range.
|
||||
//
|
||||
void Fold();
|
||||
|
||||
private:
|
||||
Status CreateEmpty(int64_t num_rows_to_insert, MemoryPool* pool);
|
||||
|
||||
inline void Insert(uint64_t hash) {
|
||||
uint64_t m = mask(hash);
|
||||
uint64_t& b = blocks_[block_id(hash)];
|
||||
b |= m;
|
||||
}
|
||||
|
||||
void Insert(int64_t hardware_flags, int64_t num_rows, const uint32_t* hashes);
|
||||
void Insert(int64_t hardware_flags, int64_t num_rows, const uint64_t* hashes);
|
||||
|
||||
inline uint64_t mask(uint64_t hash) const {
|
||||
// The lowest bits of hash are used to pick mask index.
|
||||
//
|
||||
int mask_id = static_cast<int>(hash & (BloomFilterMasks::kNumMasks - 1));
|
||||
uint64_t result = masks_.mask(mask_id);
|
||||
|
||||
// The next set of hash bits is used to pick the amount of bit
|
||||
// rotation of the mask.
|
||||
//
|
||||
int rotation = (hash >> BloomFilterMasks::kLogNumMasks) & 63;
|
||||
result = ROTL64(result, rotation);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline int64_t block_id(uint64_t hash) const {
|
||||
// The next set of hash bits following the bits used to select a
|
||||
// mask is used to pick block id (index of 64-bit word in a bit
|
||||
// vector).
|
||||
//
|
||||
return (hash >> (BloomFilterMasks::kLogNumMasks + 6)) & (num_blocks_ - 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void InsertImp(int64_t num_rows, const T* hashes);
|
||||
|
||||
template <typename T>
|
||||
inline void FindImp(int64_t num_rows, const T* hashes, uint8_t* result_bit_vector,
|
||||
bool enable_prefetch) const;
|
||||
|
||||
void SingleFold(int num_folds);
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
inline __m256i mask_avx2(__m256i hash) const;
|
||||
inline __m256i block_id_avx2(__m256i hash) const;
|
||||
int64_t Insert_avx2(int64_t num_rows, const uint32_t* hashes);
|
||||
int64_t Insert_avx2(int64_t num_rows, const uint64_t* hashes);
|
||||
template <typename T>
|
||||
int64_t InsertImp_avx2(int64_t num_rows, const T* hashes);
|
||||
int64_t Find_avx2(int64_t num_rows, const uint32_t* hashes,
|
||||
uint8_t* result_bit_vector) const;
|
||||
int64_t Find_avx2(int64_t num_rows, const uint64_t* hashes,
|
||||
uint8_t* result_bit_vector) const;
|
||||
template <typename T>
|
||||
int64_t FindImp_avx2(int64_t num_rows, const T* hashes,
|
||||
uint8_t* result_bit_vector) const;
|
||||
#endif
|
||||
|
||||
bool UsePrefetch() const {
|
||||
return num_blocks_ * sizeof(uint64_t) > kPrefetchLimitBytes;
|
||||
}
|
||||
|
||||
static constexpr int64_t kPrefetchLimitBytes = 256 * 1024;
|
||||
|
||||
static BloomFilterMasks masks_;
|
||||
|
||||
// Total number of bits used by block Bloom filter must be a power
|
||||
// of 2.
|
||||
//
|
||||
int log_num_blocks_;
|
||||
int64_t num_blocks_;
|
||||
|
||||
// Buffer allocated to store an array of power of 2 64-bit blocks.
|
||||
//
|
||||
std::shared_ptr<Buffer> buf_;
|
||||
// Pointer to mutable data owned by Buffer
|
||||
//
|
||||
uint64_t* blocks_;
|
||||
};
|
||||
|
||||
// We have two separate implementations of building a Bloom filter, multi-threaded and
|
||||
// single-threaded.
|
||||
//
|
||||
// Single threaded version is useful in two ways:
|
||||
// a) It allows to verify parallel implementation in tests (the single threaded one is
|
||||
// simpler and can be used as the source of truth).
|
||||
// b) It is preferred for small and medium size Bloom filters, because it skips extra
|
||||
// synchronization related steps from parallel variant (partitioning and taking locks).
|
||||
//
|
||||
enum class ARROW_EXPORT BloomFilterBuildStrategy {
|
||||
SINGLE_THREADED = 0,
|
||||
PARALLEL = 1,
|
||||
};
|
||||
|
||||
class ARROW_EXPORT BloomFilterBuilder {
|
||||
public:
|
||||
virtual ~BloomFilterBuilder() = default;
|
||||
virtual Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
|
||||
int64_t num_rows, int64_t num_batches,
|
||||
BlockedBloomFilter* build_target) = 0;
|
||||
virtual int64_t num_tasks() const { return 0; }
|
||||
virtual Status PushNextBatch(size_t thread_index, int num_rows,
|
||||
const uint32_t* hashes) = 0;
|
||||
virtual Status PushNextBatch(size_t thread_index, int num_rows,
|
||||
const uint64_t* hashes) = 0;
|
||||
virtual void CleanUp() {}
|
||||
static std::unique_ptr<BloomFilterBuilder> Make(BloomFilterBuildStrategy strategy);
|
||||
};
|
||||
|
||||
class BloomFilterBuilder_SingleThreaded : public BloomFilterBuilder {
|
||||
public:
|
||||
Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
|
||||
int64_t num_rows, int64_t num_batches,
|
||||
BlockedBloomFilter* build_target) override;
|
||||
|
||||
Status PushNextBatch(size_t /*thread_index*/, int num_rows,
|
||||
const uint32_t* hashes) override;
|
||||
|
||||
Status PushNextBatch(size_t /*thread_index*/, int num_rows,
|
||||
const uint64_t* hashes) override;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void PushNextBatchImp(int num_rows, const T* hashes);
|
||||
|
||||
int64_t hardware_flags_;
|
||||
BlockedBloomFilter* build_target_;
|
||||
};
|
||||
|
||||
class BloomFilterBuilder_Parallel : public BloomFilterBuilder {
|
||||
public:
|
||||
Status Begin(size_t num_threads, int64_t hardware_flags, MemoryPool* pool,
|
||||
int64_t num_rows, int64_t num_batches,
|
||||
BlockedBloomFilter* build_target) override;
|
||||
|
||||
Status PushNextBatch(size_t thread_id, int num_rows, const uint32_t* hashes) override;
|
||||
|
||||
Status PushNextBatch(size_t thread_id, int num_rows, const uint64_t* hashes) override;
|
||||
|
||||
void CleanUp() override;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void PushNextBatchImp(size_t thread_id, int num_rows, const T* hashes);
|
||||
|
||||
int64_t hardware_flags_;
|
||||
BlockedBloomFilter* build_target_;
|
||||
int log_num_prtns_;
|
||||
struct ThreadLocalState {
|
||||
std::vector<uint32_t> partitioned_hashes_32;
|
||||
std::vector<uint64_t> partitioned_hashes_64;
|
||||
std::vector<uint16_t> partition_ranges;
|
||||
std::vector<int> unprocessed_partition_ids;
|
||||
};
|
||||
std::vector<ThreadLocalState> thread_local_states_;
|
||||
PartitionLocks prtn_locks_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,460 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec.h"
|
||||
#include "arrow/compute/exec/util.h"
|
||||
#include "arrow/compute/type_fwd.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/async_util.h"
|
||||
#include "arrow/util/cancel.h"
|
||||
#include "arrow/util/key_value_metadata.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/optional.h"
|
||||
#include "arrow/util/tracing.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
namespace compute {
|
||||
|
||||
class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
|
||||
public:
|
||||
using NodeVector = std::vector<ExecNode*>;
|
||||
|
||||
virtual ~ExecPlan() = default;
|
||||
|
||||
ExecContext* exec_context() const { return exec_context_; }
|
||||
|
||||
/// Make an empty exec plan
|
||||
static Result<std::shared_ptr<ExecPlan>> Make(
|
||||
ExecContext* = default_exec_context(),
|
||||
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
|
||||
|
||||
ExecNode* AddNode(std::unique_ptr<ExecNode> node);
|
||||
|
||||
template <typename Node, typename... Args>
|
||||
Node* EmplaceNode(Args&&... args) {
|
||||
std::unique_ptr<Node> node{new Node{std::forward<Args>(args)...}};
|
||||
auto out = node.get();
|
||||
AddNode(std::move(node));
|
||||
return out;
|
||||
}
|
||||
|
||||
/// The initial inputs
|
||||
const NodeVector& sources() const;
|
||||
|
||||
/// The final outputs
|
||||
const NodeVector& sinks() const;
|
||||
|
||||
Status Validate();
|
||||
|
||||
/// \brief Start producing on all nodes
|
||||
///
|
||||
/// Nodes are started in reverse topological order, such that any node
|
||||
/// is started before all of its inputs.
|
||||
Status StartProducing();
|
||||
|
||||
/// \brief Stop producing on all nodes
|
||||
///
|
||||
/// Nodes are stopped in topological order, such that any node
|
||||
/// is stopped before all of its outputs.
|
||||
void StopProducing();
|
||||
|
||||
/// \brief A future which will be marked finished when all nodes have stopped producing.
|
||||
Future<> finished();
|
||||
|
||||
/// \brief Return whether the plan has non-empty metadata
|
||||
bool HasMetadata() const;
|
||||
|
||||
/// \brief Return the plan's attached metadata
|
||||
std::shared_ptr<const KeyValueMetadata> metadata() const;
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
protected:
|
||||
ExecContext* exec_context_;
|
||||
explicit ExecPlan(ExecContext* exec_context) : exec_context_(exec_context) {}
|
||||
};
|
||||
|
||||
class ARROW_EXPORT ExecNode {
|
||||
public:
|
||||
using NodeVector = std::vector<ExecNode*>;
|
||||
|
||||
virtual ~ExecNode() = default;
|
||||
|
||||
virtual const char* kind_name() const = 0;
|
||||
|
||||
// The number of inputs/outputs expected by this node
|
||||
int num_inputs() const { return static_cast<int>(inputs_.size()); }
|
||||
int num_outputs() const { return num_outputs_; }
|
||||
|
||||
/// This node's predecessors in the exec plan
|
||||
const NodeVector& inputs() const { return inputs_; }
|
||||
|
||||
/// \brief Labels identifying the function of each input.
|
||||
const std::vector<std::string>& input_labels() const { return input_labels_; }
|
||||
|
||||
/// This node's successors in the exec plan
|
||||
const NodeVector& outputs() const { return outputs_; }
|
||||
|
||||
/// The datatypes for batches produced by this node
|
||||
const std::shared_ptr<Schema>& output_schema() const { return output_schema_; }
|
||||
|
||||
/// This node's exec plan
|
||||
ExecPlan* plan() { return plan_; }
|
||||
|
||||
/// \brief An optional label, for display and debugging
|
||||
///
|
||||
/// There is no guarantee that this value is non-empty or unique.
|
||||
const std::string& label() const { return label_; }
|
||||
void SetLabel(std::string label) { label_ = std::move(label); }
|
||||
|
||||
Status Validate() const;
|
||||
|
||||
/// Upstream API:
|
||||
/// These functions are called by input nodes that want to inform this node
|
||||
/// about an updated condition (a new input batch, an error, an impeding
|
||||
/// end of stream).
|
||||
///
|
||||
/// Implementation rules:
|
||||
/// - these may be called anytime after StartProducing() has succeeded
|
||||
/// (and even during or after StopProducing())
|
||||
/// - these may be called concurrently
|
||||
/// - these are allowed to call back into PauseProducing(), ResumeProducing()
|
||||
/// and StopProducing()
|
||||
|
||||
/// Transfer input batch to ExecNode
|
||||
virtual void InputReceived(ExecNode* input, ExecBatch batch) = 0;
|
||||
|
||||
/// Signal error to ExecNode
|
||||
virtual void ErrorReceived(ExecNode* input, Status error) = 0;
|
||||
|
||||
/// Mark the inputs finished after the given number of batches.
|
||||
///
|
||||
/// This may be called before all inputs are received. This simply fixes
|
||||
/// the total number of incoming batches for an input, so that the ExecNode
|
||||
/// knows when it has received all input, regardless of order.
|
||||
virtual void InputFinished(ExecNode* input, int total_batches) = 0;
|
||||
|
||||
/// Lifecycle API:
|
||||
/// - start / stop to initiate and terminate production
|
||||
/// - pause / resume to apply backpressure
|
||||
///
|
||||
/// Implementation rules:
|
||||
/// - StartProducing() should not recurse into the inputs, as it is
|
||||
/// handled by ExecPlan::StartProducing()
|
||||
/// - PauseProducing(), ResumeProducing(), StopProducing() may be called
|
||||
/// concurrently (but only after StartProducing() has returned successfully)
|
||||
/// - PauseProducing(), ResumeProducing(), StopProducing() may be called
|
||||
/// by the downstream nodes' InputReceived(), ErrorReceived(), InputFinished()
|
||||
/// methods
|
||||
/// - StopProducing() should recurse into the inputs
|
||||
/// - StopProducing() must be idempotent
|
||||
|
||||
// XXX What happens if StartProducing() calls an output's InputReceived()
|
||||
// synchronously, and InputReceived() decides to call back into StopProducing()
|
||||
// (or PauseProducing()) because it received enough data?
|
||||
//
|
||||
// Right now, since synchronous calls happen in both directions (input to
|
||||
// output and then output to input), a node must be careful to be reentrant
|
||||
// against synchronous calls from its output, *and* also concurrent calls from
|
||||
// other threads. The most reliable solution is to update the internal state
|
||||
// first, and notify outputs only at the end.
|
||||
//
|
||||
// Alternate rules:
|
||||
// - StartProducing(), ResumeProducing() can call synchronously into
|
||||
// its ouputs' consuming methods (InputReceived() etc.)
|
||||
// - InputReceived(), ErrorReceived(), InputFinished() can call asynchronously
|
||||
// into its inputs' PauseProducing(), StopProducing()
|
||||
//
|
||||
// Alternate API:
|
||||
// - InputReceived(), ErrorReceived(), InputFinished() return a ProductionHint
|
||||
// enum: either None (default), PauseProducing, ResumeProducing, StopProducing
|
||||
// - A method allows passing a ProductionHint asynchronously from an output node
|
||||
// (replacing PauseProducing(), ResumeProducing(), StopProducing())
|
||||
|
||||
// Concurrent calls to PauseProducing and ResumeProducing can be hard to sequence
|
||||
// as they may travel at different speeds through the plan.
|
||||
//
|
||||
// For example, consider a resume that comes quickly after a pause. If the source
|
||||
// receives the resume before the pause the source may think the destination is full
|
||||
// and halt production which would lead to deadlock.
|
||||
//
|
||||
// To resolve this a counter is sent for all calls to pause/resume. Only the call with
|
||||
// the highest counter value is valid. So if a call to PauseProducing(5) comes after
|
||||
// a call to ResumeProducing(6) then the source should continue producing.
|
||||
//
|
||||
// If a node has multiple outputs it should emit a new counter value to its inputs
|
||||
// whenever any of its outputs changes which means the counters sent to inputs may be
|
||||
// larger than the counters received on its outputs.
|
||||
//
|
||||
// A node with multiple outputs will also need to ensure it is applying backpressure if
|
||||
// any of its outputs is asking to pause
|
||||
|
||||
/// \brief Start producing
|
||||
///
|
||||
/// This must only be called once. If this fails, then other lifecycle
|
||||
/// methods must not be called.
|
||||
///
|
||||
/// This is typically called automatically by ExecPlan::StartProducing().
|
||||
virtual Status StartProducing() = 0;
|
||||
|
||||
/// \brief Pause producing temporarily
|
||||
///
|
||||
/// \param output Pointer to the output that is full
|
||||
/// \param counter Counter used to sequence calls to pause/resume
|
||||
///
|
||||
/// This call is a hint that an output node is currently not willing
|
||||
/// to receive data.
|
||||
///
|
||||
/// This may be called any number of times after StartProducing() succeeds.
|
||||
/// However, the node is still free to produce data (which may be difficult
|
||||
/// to prevent anyway if data is produced using multiple threads).
|
||||
virtual void PauseProducing(ExecNode* output, int32_t counter) = 0;
|
||||
|
||||
/// \brief Resume producing after a temporary pause
|
||||
///
|
||||
/// \param output Pointer to the output that is now free
|
||||
/// \param counter Counter used to sequence calls to pause/resume
|
||||
///
|
||||
/// This call is a hint that an output node is willing to receive data again.
|
||||
///
|
||||
/// This may be called any number of times after StartProducing() succeeds.
|
||||
virtual void ResumeProducing(ExecNode* output, int32_t counter) = 0;
|
||||
|
||||
/// \brief Stop producing definitively to a single output
|
||||
///
|
||||
/// This call is a hint that an output node has completed and is not willing
|
||||
/// to receive any further data.
|
||||
virtual void StopProducing(ExecNode* output) = 0;
|
||||
|
||||
/// \brief Stop producing definitively to all outputs
|
||||
virtual void StopProducing() = 0;
|
||||
|
||||
/// \brief A future which will be marked finished when this node has stopped producing.
|
||||
virtual Future<> finished() = 0;
|
||||
|
||||
std::string ToString(int indent = 0) const;
|
||||
|
||||
protected:
|
||||
ExecNode(ExecPlan* plan, NodeVector inputs, std::vector<std::string> input_labels,
|
||||
std::shared_ptr<Schema> output_schema, int num_outputs);
|
||||
|
||||
// A helper method to send an error status to all outputs.
|
||||
// Returns true if the status was an error.
|
||||
bool ErrorIfNotOk(Status status);
|
||||
|
||||
/// Provide extra info to include in the string representation.
|
||||
virtual std::string ToStringExtra(int indent) const;
|
||||
|
||||
ExecPlan* plan_;
|
||||
std::string label_;
|
||||
|
||||
NodeVector inputs_;
|
||||
std::vector<std::string> input_labels_;
|
||||
|
||||
std::shared_ptr<Schema> output_schema_;
|
||||
int num_outputs_;
|
||||
NodeVector outputs_;
|
||||
|
||||
// Future to sync finished
|
||||
Future<> finished_ = Future<>::MakeFinished();
|
||||
|
||||
util::tracing::Span span_;
|
||||
};
|
||||
|
||||
/// \brief MapNode is an ExecNode type class which process a task like filter/project
|
||||
/// (See SubmitTask method) to each given ExecBatch object, which have one input, one
|
||||
/// output, and are pure functions on the input
|
||||
///
|
||||
/// A simple parallel runner is created with a "map_fn" which is just a function that
|
||||
/// takes a batch in and returns a batch. This simple parallel runner also needs an
|
||||
/// executor (use simple synchronous runner if there is no executor)
|
||||
|
||||
class MapNode : public ExecNode {
|
||||
public:
|
||||
MapNode(ExecPlan* plan, std::vector<ExecNode*> inputs,
|
||||
std::shared_ptr<Schema> output_schema, bool async_mode);
|
||||
|
||||
void ErrorReceived(ExecNode* input, Status error) override;
|
||||
|
||||
void InputFinished(ExecNode* input, int total_batches) override;
|
||||
|
||||
Status StartProducing() override;
|
||||
|
||||
void PauseProducing(ExecNode* output, int32_t counter) override;
|
||||
|
||||
void ResumeProducing(ExecNode* output, int32_t counter) override;
|
||||
|
||||
void StopProducing(ExecNode* output) override;
|
||||
|
||||
void StopProducing() override;
|
||||
|
||||
Future<> finished() override;
|
||||
|
||||
protected:
|
||||
void SubmitTask(std::function<Result<ExecBatch>(ExecBatch)> map_fn, ExecBatch batch);
|
||||
|
||||
void Finish(Status finish_st = Status::OK());
|
||||
|
||||
protected:
|
||||
// Counter for the number of batches received
|
||||
AtomicCounter input_counter_;
|
||||
|
||||
::arrow::internal::Executor* executor_;
|
||||
|
||||
// Variable used to cancel remaining tasks in the executor
|
||||
StopSource stop_source_;
|
||||
};
|
||||
|
||||
/// \brief An extensible registry for factories of ExecNodes
|
||||
class ARROW_EXPORT ExecFactoryRegistry {
|
||||
public:
|
||||
using Factory = std::function<Result<ExecNode*>(ExecPlan*, std::vector<ExecNode*>,
|
||||
const ExecNodeOptions&)>;
|
||||
|
||||
virtual ~ExecFactoryRegistry() = default;
|
||||
|
||||
/// \brief Get the named factory from this registry
|
||||
///
|
||||
/// will raise if factory_name is not found
|
||||
virtual Result<Factory> GetFactory(const std::string& factory_name) = 0;
|
||||
|
||||
/// \brief Add a factory to this registry with the provided name
|
||||
///
|
||||
/// will raise if factory_name is already in the registry
|
||||
virtual Status AddFactory(std::string factory_name, Factory factory) = 0;
|
||||
};
|
||||
|
||||
/// The default registry, which includes built-in factories.
|
||||
ARROW_EXPORT
|
||||
ExecFactoryRegistry* default_exec_factory_registry();
|
||||
|
||||
/// \brief Construct an ExecNode using the named factory
|
||||
inline Result<ExecNode*> MakeExecNode(
|
||||
const std::string& factory_name, ExecPlan* plan, std::vector<ExecNode*> inputs,
|
||||
const ExecNodeOptions& options,
|
||||
ExecFactoryRegistry* registry = default_exec_factory_registry()) {
|
||||
ARROW_ASSIGN_OR_RAISE(auto factory, registry->GetFactory(factory_name));
|
||||
return factory(plan, std::move(inputs), options);
|
||||
}
|
||||
|
||||
/// \brief Helper class for declaring sets of ExecNodes efficiently
|
||||
///
|
||||
/// A Declaration represents an unconstructed ExecNode (and potentially more since its
|
||||
/// inputs may also be Declarations). The node can be constructed and added to a plan
|
||||
/// with Declaration::AddToPlan, which will recursively construct any inputs as necessary.
|
||||
struct ARROW_EXPORT Declaration {
|
||||
using Input = util::Variant<ExecNode*, Declaration>;
|
||||
|
||||
Declaration(std::string factory_name, std::vector<Input> inputs,
|
||||
std::shared_ptr<ExecNodeOptions> options, std::string label)
|
||||
: factory_name{std::move(factory_name)},
|
||||
inputs{std::move(inputs)},
|
||||
options{std::move(options)},
|
||||
label{std::move(label)} {}
|
||||
|
||||
template <typename Options>
|
||||
Declaration(std::string factory_name, std::vector<Input> inputs, Options options,
|
||||
std::string label)
|
||||
: Declaration{std::move(factory_name), std::move(inputs),
|
||||
std::shared_ptr<ExecNodeOptions>(
|
||||
std::make_shared<Options>(std::move(options))),
|
||||
std::move(label)} {}
|
||||
|
||||
template <typename Options>
|
||||
Declaration(std::string factory_name, std::vector<Input> inputs, Options options)
|
||||
: Declaration{std::move(factory_name), std::move(inputs), std::move(options),
|
||||
/*label=*/""} {}
|
||||
|
||||
template <typename Options>
|
||||
Declaration(std::string factory_name, Options options)
|
||||
: Declaration{std::move(factory_name), {}, std::move(options), /*label=*/""} {}
|
||||
|
||||
template <typename Options>
|
||||
Declaration(std::string factory_name, Options options, std::string label)
|
||||
: Declaration{std::move(factory_name), {}, std::move(options), std::move(label)} {}
|
||||
|
||||
/// \brief Convenience factory for the common case of a simple sequence of nodes.
|
||||
///
|
||||
/// Each of decls will be appended to the inputs of the subsequent declaration,
|
||||
/// and the final modified declaration will be returned.
|
||||
///
|
||||
/// Without this convenience factory, constructing a sequence would require explicit,
|
||||
/// difficult-to-read nesting:
|
||||
///
|
||||
/// Declaration{"n3",
|
||||
/// {
|
||||
/// Declaration{"n2",
|
||||
/// {
|
||||
/// Declaration{"n1",
|
||||
/// {
|
||||
/// Declaration{"n0", N0Opts{}},
|
||||
/// },
|
||||
/// N1Opts{}},
|
||||
/// },
|
||||
/// N2Opts{}},
|
||||
/// },
|
||||
/// N3Opts{}};
|
||||
///
|
||||
/// An equivalent Declaration can be constructed more tersely using Sequence:
|
||||
///
|
||||
/// Declaration::Sequence({
|
||||
/// {"n0", N0Opts{}},
|
||||
/// {"n1", N1Opts{}},
|
||||
/// {"n2", N2Opts{}},
|
||||
/// {"n3", N3Opts{}},
|
||||
/// });
|
||||
static Declaration Sequence(std::vector<Declaration> decls);
|
||||
|
||||
Result<ExecNode*> AddToPlan(ExecPlan* plan, ExecFactoryRegistry* registry =
|
||||
default_exec_factory_registry()) const;
|
||||
|
||||
std::string factory_name;
|
||||
std::vector<Input> inputs;
|
||||
std::shared_ptr<ExecNodeOptions> options;
|
||||
std::string label;
|
||||
};
|
||||
|
||||
/// \brief Wrap an ExecBatch generator in a RecordBatchReader.
|
||||
///
|
||||
/// The RecordBatchReader does not impose any ordering on emitted batches.
|
||||
ARROW_EXPORT
|
||||
std::shared_ptr<RecordBatchReader> MakeGeneratorReader(
|
||||
std::shared_ptr<Schema>, std::function<Future<util::optional<ExecBatch>>()>,
|
||||
MemoryPool*);
|
||||
|
||||
constexpr int kDefaultBackgroundMaxQ = 32;
|
||||
constexpr int kDefaultBackgroundQRestart = 16;
|
||||
|
||||
/// \brief Make a generator of RecordBatchReaders
|
||||
///
|
||||
/// Useful as a source node for an Exec plan
|
||||
ARROW_EXPORT
|
||||
Result<std::function<Future<util::optional<ExecBatch>>()>> MakeReaderGenerator(
|
||||
std::shared_ptr<RecordBatchReader> reader, arrow::internal::Executor* io_executor,
|
||||
int max_q = kDefaultBackgroundMaxQ, int q_restart = kDefaultBackgroundQRestart);
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,283 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// This API is EXPERIMENTAL.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/type_fwd.h"
|
||||
#include "arrow/datum.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/small_vector.h"
|
||||
#include "arrow/util/variant.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
/// \defgroup expression-core Expressions to describe transformations in execution plans
|
||||
///
|
||||
/// @{
|
||||
|
||||
/// An unbound expression which maps a single Datum to another Datum.
|
||||
/// An expression is one of
|
||||
/// - A literal Datum.
|
||||
/// - A reference to a single (potentially nested) field of the input Datum.
|
||||
/// - A call to a compute function, with arguments specified by other Expressions.
|
||||
class ARROW_EXPORT Expression {
|
||||
public:
|
||||
struct Call {
|
||||
std::string function_name;
|
||||
std::vector<Expression> arguments;
|
||||
std::shared_ptr<FunctionOptions> options;
|
||||
// Cached hash value
|
||||
size_t hash;
|
||||
|
||||
// post-Bind properties:
|
||||
std::shared_ptr<Function> function;
|
||||
const Kernel* kernel = NULLPTR;
|
||||
std::shared_ptr<KernelState> kernel_state;
|
||||
ValueDescr descr;
|
||||
|
||||
void ComputeHash();
|
||||
};
|
||||
|
||||
std::string ToString() const;
|
||||
bool Equals(const Expression& other) const;
|
||||
size_t hash() const;
|
||||
struct Hash {
|
||||
size_t operator()(const Expression& expr) const { return expr.hash(); }
|
||||
};
|
||||
|
||||
/// Bind this expression to the given input type, looking up Kernels and field types.
|
||||
/// Some expression simplification may be performed and implicit casts will be inserted.
|
||||
/// Any state necessary for execution will be initialized and returned.
|
||||
Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
|
||||
Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
|
||||
|
||||
// XXX someday
|
||||
// Clone all KernelState in this bound expression. If any function referenced by this
|
||||
// expression has mutable KernelState, it is not safe to execute or apply simplification
|
||||
// passes to it (or copies of it!) from multiple threads. Cloning state produces new
|
||||
// KernelStates where necessary to ensure that Expressions may be manipulated safely
|
||||
// on multiple threads.
|
||||
// Result<ExpressionState> CloneState() const;
|
||||
// Status SetState(ExpressionState);
|
||||
|
||||
/// Return true if all an expression's field references have explicit ValueDescr and all
|
||||
/// of its functions' kernels are looked up.
|
||||
bool IsBound() const;
|
||||
|
||||
/// Return true if this expression is composed only of Scalar literals, field
|
||||
/// references, and calls to ScalarFunctions.
|
||||
bool IsScalarExpression() const;
|
||||
|
||||
/// Return true if this expression is literal and entirely null.
|
||||
bool IsNullLiteral() const;
|
||||
|
||||
/// Return true if this expression could evaluate to true. Will return true for any
|
||||
/// unbound, non-boolean, or unsimplified Expressions
|
||||
bool IsSatisfiable() const;
|
||||
|
||||
// XXX someday
|
||||
// Result<PipelineGraph> GetPipelines();
|
||||
|
||||
/// Access a Call or return nullptr if this expression is not a call
|
||||
const Call* call() const;
|
||||
/// Access a Datum or return nullptr if this expression is not a literal
|
||||
const Datum* literal() const;
|
||||
/// Access a FieldRef or return nullptr if this expression is not a field_ref
|
||||
const FieldRef* field_ref() const;
|
||||
|
||||
/// The type and shape to which this expression will evaluate
|
||||
ValueDescr descr() const;
|
||||
const std::shared_ptr<DataType>& type() const;
|
||||
// XXX someday
|
||||
// NullGeneralization::type nullable() const;
|
||||
|
||||
struct Parameter {
|
||||
FieldRef ref;
|
||||
|
||||
// post-bind properties
|
||||
ValueDescr descr;
|
||||
::arrow::internal::SmallVector<int, 2> indices;
|
||||
};
|
||||
const Parameter* parameter() const;
|
||||
|
||||
Expression() = default;
|
||||
explicit Expression(Call call);
|
||||
explicit Expression(Datum literal);
|
||||
explicit Expression(Parameter parameter);
|
||||
|
||||
private:
|
||||
using Impl = util::Variant<Datum, Parameter, Call>;
|
||||
std::shared_ptr<Impl> impl_;
|
||||
|
||||
ARROW_EXPORT friend bool Identical(const Expression& l, const Expression& r);
|
||||
|
||||
ARROW_EXPORT friend void PrintTo(const Expression&, std::ostream*);
|
||||
};
|
||||
|
||||
inline bool operator==(const Expression& l, const Expression& r) { return l.Equals(r); }
|
||||
inline bool operator!=(const Expression& l, const Expression& r) { return !l.Equals(r); }
|
||||
|
||||
// Factories
|
||||
|
||||
ARROW_EXPORT
|
||||
Expression literal(Datum lit);
|
||||
|
||||
template <typename Arg>
|
||||
Expression literal(Arg&& arg) {
|
||||
return literal(Datum(std::forward<Arg>(arg)));
|
||||
}
|
||||
|
||||
ARROW_EXPORT
|
||||
Expression field_ref(FieldRef ref);
|
||||
|
||||
ARROW_EXPORT
|
||||
Expression call(std::string function, std::vector<Expression> arguments,
|
||||
std::shared_ptr<FunctionOptions> options = NULLPTR);
|
||||
|
||||
template <typename Options, typename = typename std::enable_if<
|
||||
std::is_base_of<FunctionOptions, Options>::value>::type>
|
||||
Expression call(std::string function, std::vector<Expression> arguments,
|
||||
Options options) {
|
||||
return call(std::move(function), std::move(arguments),
|
||||
std::make_shared<Options>(std::move(options)));
|
||||
}
|
||||
|
||||
/// Assemble a list of all fields referenced by an Expression at any depth.
|
||||
ARROW_EXPORT
|
||||
std::vector<FieldRef> FieldsInExpression(const Expression&);
|
||||
|
||||
/// Check if the expression references any fields.
|
||||
ARROW_EXPORT
|
||||
bool ExpressionHasFieldRefs(const Expression&);
|
||||
|
||||
struct ARROW_EXPORT KnownFieldValues;
|
||||
|
||||
/// Assemble a mapping from field references to known values. This derives known values
|
||||
/// from "equal" and "is_null" Expressions referencing a field and a literal.
|
||||
ARROW_EXPORT
|
||||
Result<KnownFieldValues> ExtractKnownFieldValues(
|
||||
const Expression& guaranteed_true_predicate);
|
||||
|
||||
/// @}
|
||||
|
||||
/// \defgroup expression-passes Functions for modification of Expressions
|
||||
///
|
||||
/// @{
|
||||
///
|
||||
/// These transform bound expressions. Some transforms utilize a guarantee, which is
|
||||
/// provided as an Expression which is guaranteed to evaluate to true. The
|
||||
/// guaranteed_true_predicate need not be bound, but canonicalization is currently
|
||||
/// deferred to producers of guarantees. For example in order to be recognized as a
|
||||
/// guarantee on a field value, an Expression must be a call to "equal" with field_ref LHS
|
||||
/// and literal RHS. Flipping the arguments, "is_in" with a one-long value_set, ... or
|
||||
/// other semantically identical Expressions will not be recognized.
|
||||
|
||||
/// Weak canonicalization which establishes guarantees for subsequent passes. Even
|
||||
/// equivalent Expressions may result in different canonicalized expressions.
|
||||
/// TODO this could be a strong canonicalization
|
||||
ARROW_EXPORT
|
||||
Result<Expression> Canonicalize(Expression, ExecContext* = NULLPTR);
|
||||
|
||||
/// Simplify Expressions based on literal arguments (for example, add(null, x) will always
|
||||
/// be null so replace the call with a null literal). Includes early evaluation of all
|
||||
/// calls whose arguments are entirely literal.
|
||||
ARROW_EXPORT
|
||||
Result<Expression> FoldConstants(Expression);
|
||||
|
||||
/// Simplify Expressions by replacing with known values of the fields which it references.
|
||||
ARROW_EXPORT
|
||||
Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values,
|
||||
Expression);
|
||||
|
||||
/// Simplify an expression by replacing subexpressions based on a guarantee:
|
||||
/// a boolean expression which is guaranteed to evaluate to `true`. For example, this is
|
||||
/// used to remove redundant function calls from a filter expression or to replace a
|
||||
/// reference to a constant-value field with a literal.
|
||||
ARROW_EXPORT
|
||||
Result<Expression> SimplifyWithGuarantee(Expression,
|
||||
const Expression& guaranteed_true_predicate);
|
||||
|
||||
/// @}
|
||||
|
||||
// Execution
|
||||
|
||||
/// Create an ExecBatch suitable for passing to ExecuteScalarExpression() from a
|
||||
/// RecordBatch which may have missing or incorrectly ordered columns.
|
||||
/// Missing fields will be replaced with null scalars.
|
||||
ARROW_EXPORT Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
|
||||
const Datum& partial);
|
||||
|
||||
/// Execute a scalar expression against the provided state and input ExecBatch. This
|
||||
/// expression must be bound.
|
||||
ARROW_EXPORT
|
||||
Result<Datum> ExecuteScalarExpression(const Expression&, const ExecBatch& input,
|
||||
ExecContext* = NULLPTR);
|
||||
|
||||
/// Convenience function for invoking against a RecordBatch
|
||||
ARROW_EXPORT
|
||||
Result<Datum> ExecuteScalarExpression(const Expression&, const Schema& full_schema,
|
||||
const Datum& partial_input, ExecContext* = NULLPTR);
|
||||
|
||||
// Serialization
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<std::shared_ptr<Buffer>> Serialize(const Expression&);
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<Expression> Deserialize(std::shared_ptr<Buffer>);
|
||||
|
||||
/// \defgroup expression-convenience Functions convenient expression creation
|
||||
///
|
||||
/// @{
|
||||
|
||||
ARROW_EXPORT Expression project(std::vector<Expression> values,
|
||||
std::vector<std::string> names);
|
||||
|
||||
ARROW_EXPORT Expression equal(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression not_equal(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression less(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression less_equal(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression greater(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression greater_equal(Expression lhs, Expression rhs);
|
||||
|
||||
ARROW_EXPORT Expression is_null(Expression lhs, bool nan_is_null = false);
|
||||
|
||||
ARROW_EXPORT Expression is_valid(Expression lhs);
|
||||
|
||||
ARROW_EXPORT Expression and_(Expression lhs, Expression rhs);
|
||||
ARROW_EXPORT Expression and_(const std::vector<Expression>&);
|
||||
ARROW_EXPORT Expression or_(Expression lhs, Expression rhs);
|
||||
ARROW_EXPORT Expression or_(const std::vector<Expression>&);
|
||||
ARROW_EXPORT Expression not_(Expression operand);
|
||||
|
||||
/// @}
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,122 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec/options.h"
|
||||
#include "arrow/compute/exec/schema_util.h"
|
||||
#include "arrow/compute/exec/task_util.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/type.h"
|
||||
#include "arrow/util/tracing_internal.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
class ARROW_EXPORT HashJoinSchema {
|
||||
public:
|
||||
Status Init(JoinType join_type, const Schema& left_schema,
|
||||
const std::vector<FieldRef>& left_keys, const Schema& right_schema,
|
||||
const std::vector<FieldRef>& right_keys, const Expression& filter,
|
||||
const std::string& left_field_name_prefix,
|
||||
const std::string& right_field_name_prefix);
|
||||
|
||||
Status Init(JoinType join_type, const Schema& left_schema,
|
||||
const std::vector<FieldRef>& left_keys,
|
||||
const std::vector<FieldRef>& left_output, const Schema& right_schema,
|
||||
const std::vector<FieldRef>& right_keys,
|
||||
const std::vector<FieldRef>& right_output, const Expression& filter,
|
||||
const std::string& left_field_name_prefix,
|
||||
const std::string& right_field_name_prefix);
|
||||
|
||||
static Status ValidateSchemas(JoinType join_type, const Schema& left_schema,
|
||||
const std::vector<FieldRef>& left_keys,
|
||||
const std::vector<FieldRef>& left_output,
|
||||
const Schema& right_schema,
|
||||
const std::vector<FieldRef>& right_keys,
|
||||
const std::vector<FieldRef>& right_output,
|
||||
const std::string& left_field_name_prefix,
|
||||
const std::string& right_field_name_prefix);
|
||||
|
||||
Result<Expression> BindFilter(Expression filter, const Schema& left_schema,
|
||||
const Schema& right_schema);
|
||||
std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
|
||||
const std::string& right_field_name_suffix);
|
||||
|
||||
bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
|
||||
|
||||
bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
|
||||
|
||||
static int kMissingField() {
|
||||
return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
|
||||
}
|
||||
|
||||
SchemaProjectionMaps<HashJoinProjection> proj_maps[2];
|
||||
|
||||
private:
|
||||
static bool IsTypeSupported(const DataType& type);
|
||||
|
||||
Status CollectFilterColumns(std::vector<FieldRef>& left_filter,
|
||||
std::vector<FieldRef>& right_filter,
|
||||
const Expression& filter, const Schema& left_schema,
|
||||
const Schema& right_schema);
|
||||
|
||||
Expression RewriteFilterToUseFilterSchema(int right_filter_offset,
|
||||
const SchemaProjectionMap& left_to_filter,
|
||||
const SchemaProjectionMap& right_to_filter,
|
||||
const Expression& filter);
|
||||
|
||||
bool PayloadIsEmpty(int side) {
|
||||
ARROW_DCHECK(side == 0 || side == 1);
|
||||
return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
|
||||
}
|
||||
|
||||
static Result<std::vector<FieldRef>> ComputePayload(const Schema& schema,
|
||||
const std::vector<FieldRef>& output,
|
||||
const std::vector<FieldRef>& filter,
|
||||
const std::vector<FieldRef>& key);
|
||||
};
|
||||
|
||||
class HashJoinImpl {
|
||||
public:
|
||||
using OutputBatchCallback = std::function<void(ExecBatch)>;
|
||||
using FinishedCallback = std::function<void(int64_t)>;
|
||||
|
||||
virtual ~HashJoinImpl() = default;
|
||||
virtual Status Init(ExecContext* ctx, JoinType join_type, bool use_sync_execution,
|
||||
size_t num_threads, HashJoinSchema* schema_mgr,
|
||||
std::vector<JoinKeyCmp> key_cmp, Expression filter,
|
||||
OutputBatchCallback output_batch_callback,
|
||||
FinishedCallback finished_callback,
|
||||
TaskScheduler::ScheduleImpl schedule_task_callback) = 0;
|
||||
virtual Status InputReceived(size_t thread_index, int side, ExecBatch batch) = 0;
|
||||
virtual Status InputFinished(size_t thread_index, int side) = 0;
|
||||
virtual void Abort(TaskScheduler::AbortContinuationImpl pos_abort_callback) = 0;
|
||||
|
||||
static Result<std::unique_ptr<HashJoinImpl>> MakeBasic();
|
||||
|
||||
protected:
|
||||
util::tracing::Span span_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,315 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "arrow/compute/exec.h"
|
||||
#include "arrow/compute/exec/schema_util.h"
|
||||
#include "arrow/compute/kernels/row_encoder.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/type.h"
|
||||
|
||||
// This file contains hash join logic related to handling of dictionary encoded key
|
||||
// columns.
|
||||
//
|
||||
// A key column from probe side of the join can be matched against a key column from build
|
||||
// side of the join, as long as the underlying value types are equal. That means that:
|
||||
// - both scalars and arrays can be used and even mixed in the same column
|
||||
// - dictionary column can be matched against non-dictionary column if underlying value
|
||||
// types are equal
|
||||
// - dictionary column can be matched against dictionary column with a different index
|
||||
// type, and potentially using a different dictionary, if underlying value types are equal
|
||||
//
|
||||
// We currently require in hash join that for all dictionary encoded columns, the same
|
||||
// dictionary is used in all input exec batches.
|
||||
//
|
||||
// In order to allow matching columns with different dictionaries, different dictionary
|
||||
// index types, and dictionary key against non-dictionary key, internally comparisons will
|
||||
// be evaluated after remapping values on both sides of the join to a common
|
||||
// representation (which will be called "unified representation"). This common
|
||||
// representation is a column of int32() type (not a dictionary column). It represents an
|
||||
// index in the unified dictionary computed for the (only) dictionary present on build
|
||||
// side (an empty dictionary is still created for an empty build side). Null value is
|
||||
// always represented in this common representation as null int32 value, unified
|
||||
// dictionary will never contain a null value (so there is no ambiguity of representing
|
||||
// nulls as either index to a null entry in the dictionary or null index).
|
||||
//
|
||||
// Unified dictionary represents values present on build side. There may be values on
|
||||
// probe side that are not present in it. All such values, that are not null, are mapped
|
||||
// in the common representation to a special constant kMissingValueId.
|
||||
//
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
using internal::RowEncoder;
|
||||
|
||||
/// Helper class with operations that are stateless and common to processing of dictionary
|
||||
/// keys on both build and probe side.
|
||||
class HashJoinDictUtil {
|
||||
public:
|
||||
// Null values in unified representation are always represented as null that has
|
||||
// corresponding integer set to this constant
|
||||
static constexpr int32_t kNullId = 0;
|
||||
// Constant representing a value, that is not null, missing on the build side, in
|
||||
// unified representation.
|
||||
static constexpr int32_t kMissingValueId = -1;
|
||||
|
||||
// Check if data types of corresponding pair of key column on build and probe side are
|
||||
// compatible
|
||||
static bool KeyDataTypesValid(const std::shared_ptr<DataType>& probe_data_type,
|
||||
const std::shared_ptr<DataType>& build_data_type);
|
||||
|
||||
// Input must be dictionary array or dictionary scalar.
|
||||
// A precomputed and provided here lookup table in the form of int32() array will be
|
||||
// used to remap input indices to unified representation.
|
||||
//
|
||||
static Result<std::shared_ptr<ArrayData>> IndexRemapUsingLUT(
|
||||
ExecContext* ctx, const Datum& indices, int64_t batch_length,
|
||||
const std::shared_ptr<ArrayData>& map_array,
|
||||
const std::shared_ptr<DataType>& data_type);
|
||||
|
||||
// Return int32() array that contains indices of input dictionary array or scalar after
|
||||
// type casting.
|
||||
static Result<std::shared_ptr<ArrayData>> ConvertToInt32(
|
||||
const std::shared_ptr<DataType>& from_type, const Datum& input,
|
||||
int64_t batch_length, ExecContext* ctx);
|
||||
|
||||
// Return an array that contains elements of input int32() array after casting to a
|
||||
// given integer type. This is used for mapping unified representation stored in the
|
||||
// hash table on build side back to original input data type of hash join, when
|
||||
// outputting hash join results to parent exec node.
|
||||
//
|
||||
static Result<std::shared_ptr<ArrayData>> ConvertFromInt32(
|
||||
const std::shared_ptr<DataType>& to_type, const Datum& input, int64_t batch_length,
|
||||
ExecContext* ctx);
|
||||
|
||||
// Return dictionary referenced in either dictionary array or dictionary scalar
|
||||
static std::shared_ptr<Array> ExtractDictionary(const Datum& data);
|
||||
};
|
||||
|
||||
/// Implements processing of dictionary arrays/scalars in key columns on the build side of
|
||||
/// a hash join.
|
||||
/// Each instance of this class corresponds to a single column and stores and
|
||||
/// processes only the information related to that column.
|
||||
/// Const methods are thread-safe, non-const methods are not (the caller must make sure
|
||||
/// that only one thread at any time will access them).
|
||||
///
|
||||
class HashJoinDictBuild {
|
||||
public:
|
||||
// Returns true if the key column (described in input by its data type) requires any
|
||||
// pre- or post-processing related to handling dictionaries.
|
||||
//
|
||||
static bool KeyNeedsProcessing(const std::shared_ptr<DataType>& build_data_type) {
|
||||
return (build_data_type->id() == Type::DICTIONARY);
|
||||
}
|
||||
|
||||
// Data type of unified representation
|
||||
static std::shared_ptr<DataType> DataTypeAfterRemapping() { return int32(); }
|
||||
|
||||
// Should be called only once in hash join, before processing any build or probe
|
||||
// batches.
|
||||
//
|
||||
// Takes a pointer to the dictionary for a corresponding key column on the build side as
|
||||
// an input. If the build side is empty, it still needs to be called, but with
|
||||
// dictionary pointer set to null.
|
||||
//
|
||||
// Currently it is required that all input batches on build side share the same
|
||||
// dictionary. For each input batch during its pre-processing, dictionary will be
|
||||
// checked and error will be returned if it is different then the one provided in the
|
||||
// call to this method.
|
||||
//
|
||||
// Unifies the dictionary. The order of the values is still preserved.
|
||||
// Null and duplicate entries are removed. If the dictionary is already unified, its
|
||||
// copy will be produced and stored within this class.
|
||||
//
|
||||
// Prepares the mapping from ids within original dictionary to the ids in the resulting
|
||||
// dictionary. This is used later on to pre-process (map to unified representation) key
|
||||
// column on build side.
|
||||
//
|
||||
// Prepares the reverse mapping (in the form of hash table) from values to the ids in
|
||||
// the resulting dictionary. This will be used later on to pre-process (map to unified
|
||||
// representation) key column on probe side. Values on probe side that are not present
|
||||
// in the original dictionary will be mapped to a special constant kMissingValueId. The
|
||||
// exception is made for nulls, which get always mapped to nulls (both when null is
|
||||
// represented as a dictionary id pointing to a null and a null dictionary id).
|
||||
//
|
||||
Status Init(ExecContext* ctx, std::shared_ptr<Array> dictionary,
|
||||
std::shared_ptr<DataType> index_type, std::shared_ptr<DataType> value_type);
|
||||
|
||||
// Remap array or scalar values into unified representation (array of int32()).
|
||||
// Outputs kMissingValueId if input value is not found in the unified dictionary.
|
||||
// Outputs null for null input value (with corresponding data set to kNullId).
|
||||
//
|
||||
Result<std::shared_ptr<ArrayData>> RemapInputValues(ExecContext* ctx,
|
||||
const Datum& values,
|
||||
int64_t batch_length) const;
|
||||
|
||||
// Remap dictionary array or dictionary scalar on build side to unified representation.
|
||||
// Dictionary referenced in the input must match the dictionary that was
|
||||
// given during initialization.
|
||||
// The output is a dictionary array that references unified dictionary.
|
||||
//
|
||||
Result<std::shared_ptr<ArrayData>> RemapInput(
|
||||
ExecContext* ctx, const Datum& indices, int64_t batch_length,
|
||||
const std::shared_ptr<DataType>& data_type) const;
|
||||
|
||||
// Outputs dictionary array referencing unified dictionary, given an array with 32-bit
|
||||
// ids.
|
||||
// Used to post-process values looked up in a hash table on build side of the hash join
|
||||
// before outputting to the parent exec node.
|
||||
//
|
||||
Result<std::shared_ptr<ArrayData>> RemapOutput(const ArrayData& indices32Bit,
|
||||
ExecContext* ctx) const;
|
||||
|
||||
// Release shared pointers and memory
|
||||
void CleanUp();
|
||||
|
||||
private:
|
||||
// Data type of dictionary ids for the input dictionary on build side
|
||||
std::shared_ptr<DataType> index_type_;
|
||||
// Data type of values for the input dictionary on build side
|
||||
std::shared_ptr<DataType> value_type_;
|
||||
// Mapping from (encoded as string) values to the ids in unified dictionary
|
||||
std::unordered_map<std::string, int32_t> hash_table_;
|
||||
// Mapping from input dictionary ids to unified dictionary ids
|
||||
std::shared_ptr<ArrayData> remapped_ids_;
|
||||
// Input dictionary
|
||||
std::shared_ptr<Array> dictionary_;
|
||||
// Unified dictionary
|
||||
std::shared_ptr<ArrayData> unified_dictionary_;
|
||||
};
|
||||
|
||||
/// Implements processing of dictionary arrays/scalars in key columns on the probe side of
|
||||
/// a hash join.
|
||||
/// Each instance of this class corresponds to a single column and stores and
|
||||
/// processes only the information related to that column.
|
||||
/// It is not thread-safe - every participating thread should use its own instance of
|
||||
/// this class.
|
||||
///
|
||||
class HashJoinDictProbe {
|
||||
public:
|
||||
static bool KeyNeedsProcessing(const std::shared_ptr<DataType>& probe_data_type,
|
||||
const std::shared_ptr<DataType>& build_data_type);
|
||||
|
||||
// Data type of the result of remapping input key column.
|
||||
//
|
||||
// The result of remapping is what is used in hash join for matching keys on build and
|
||||
// probe side. The exact data types may be different, as described below, and therefore
|
||||
// a common representation is needed for simplifying comparisons of pairs of keys on
|
||||
// both sides.
|
||||
//
|
||||
// We support matching key that is of non-dictionary type with key that is of dictionary
|
||||
// type, as long as the underlying value types are equal. We support matching when both
|
||||
// keys are of dictionary type, regardless whether underlying dictionary index types are
|
||||
// the same or not.
|
||||
//
|
||||
static std::shared_ptr<DataType> DataTypeAfterRemapping(
|
||||
const std::shared_ptr<DataType>& build_data_type);
|
||||
|
||||
// Should only be called if KeyNeedsProcessing method returns true for a pair of
|
||||
// corresponding key columns from build and probe side.
|
||||
// Converts values in order to match the common representation for
|
||||
// both build and probe side used in hash table comparison.
|
||||
// Supports arrays and scalars as input.
|
||||
// Argument opt_build_side should be null if dictionary key on probe side is matched
|
||||
// with non-dictionary key on build side.
|
||||
//
|
||||
Result<std::shared_ptr<ArrayData>> RemapInput(
|
||||
const HashJoinDictBuild* opt_build_side, const Datum& data, int64_t batch_length,
|
||||
const std::shared_ptr<DataType>& probe_data_type,
|
||||
const std::shared_ptr<DataType>& build_data_type, ExecContext* ctx);
|
||||
|
||||
void CleanUp();
|
||||
|
||||
private:
|
||||
// May be null if probe side key is non-dictionary. Otherwise it is used to verify that
|
||||
// only a single dictionary is referenced in exec batch on probe side of hash join.
|
||||
std::shared_ptr<Array> dictionary_;
|
||||
// Mapping from dictionary on probe side of hash join (if it is used) to unified
|
||||
// representation.
|
||||
std::shared_ptr<ArrayData> remapped_ids_;
|
||||
// Encoder of key columns that uses unified representation instead of original data type
|
||||
// for key columns that need to use it (have dictionaries on either side of the join).
|
||||
internal::RowEncoder encoder_;
|
||||
};
|
||||
|
||||
// Encapsulates dictionary handling logic for build side of hash join.
|
||||
//
|
||||
class HashJoinDictBuildMulti {
|
||||
public:
|
||||
Status Init(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
|
||||
const ExecBatch* opt_non_empty_batch, ExecContext* ctx);
|
||||
static void InitEncoder(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
|
||||
RowEncoder* encoder, ExecContext* ctx);
|
||||
Status EncodeBatch(size_t thread_index,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map,
|
||||
const ExecBatch& batch, RowEncoder* encoder, ExecContext* ctx) const;
|
||||
Status PostDecode(const SchemaProjectionMaps<HashJoinProjection>& proj_map,
|
||||
ExecBatch* decoded_key_batch, ExecContext* ctx);
|
||||
const HashJoinDictBuild& get_dict_build(int icol) const { return remap_imp_[icol]; }
|
||||
|
||||
private:
|
||||
std::vector<bool> needs_remap_;
|
||||
std::vector<HashJoinDictBuild> remap_imp_;
|
||||
};
|
||||
|
||||
// Encapsulates dictionary handling logic for probe side of hash join
|
||||
//
|
||||
class HashJoinDictProbeMulti {
|
||||
public:
|
||||
void Init(size_t num_threads);
|
||||
bool BatchRemapNeeded(size_t thread_index,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
|
||||
ExecContext* ctx);
|
||||
Status EncodeBatch(size_t thread_index,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
|
||||
const HashJoinDictBuildMulti& dict_build, const ExecBatch& batch,
|
||||
RowEncoder** out_encoder, ExecBatch* opt_out_key_batch,
|
||||
ExecContext* ctx);
|
||||
|
||||
private:
|
||||
void InitLocalStateIfNeeded(
|
||||
size_t thread_index, const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_build, ExecContext* ctx);
|
||||
static void InitEncoder(const SchemaProjectionMaps<HashJoinProjection>& proj_map_probe,
|
||||
const SchemaProjectionMaps<HashJoinProjection>& proj_map_build,
|
||||
RowEncoder* encoder, ExecContext* ctx);
|
||||
struct ThreadLocalState {
|
||||
bool is_initialized;
|
||||
// Whether any key column needs remapping (because of dictionaries used) before doing
|
||||
// join hash table lookups
|
||||
bool any_needs_remap;
|
||||
// Whether each key column needs remapping before doing join hash table lookups
|
||||
std::vector<bool> needs_remap;
|
||||
std::vector<HashJoinDictProbe> remap_imp;
|
||||
// Encoder of key columns that uses unified representation instead of original data
|
||||
// type for key columns that need to use it (have dictionaries on either side of the
|
||||
// join).
|
||||
RowEncoder post_remap_encoder;
|
||||
};
|
||||
std::vector<ThreadLocalState> local_states_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,70 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <flatbuffers/flatbuffers.h>
|
||||
|
||||
#include "arrow/compute/exec/exec_plan.h"
|
||||
#include "arrow/compute/exec/expression.h"
|
||||
#include "arrow/compute/exec/options.h"
|
||||
#include "arrow/datum.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
#include "generated/Plan_generated.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
namespace flatbuf = org::apache::arrow::flatbuf;
|
||||
|
||||
namespace compute {
|
||||
|
||||
namespace ir = org::apache::arrow::computeir::flatbuf;
|
||||
|
||||
class ARROW_EXPORT CatalogSourceNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
CatalogSourceNodeOptions(std::string name, std::shared_ptr<Schema> schema,
|
||||
Expression filter = literal(true),
|
||||
std::vector<FieldRef> projection = {})
|
||||
: name(std::move(name)),
|
||||
schema(std::move(schema)),
|
||||
filter(std::move(filter)),
|
||||
projection(std::move(projection)) {}
|
||||
|
||||
std::string name;
|
||||
std::shared_ptr<Schema> schema;
|
||||
Expression filter;
|
||||
std::vector<FieldRef> projection;
|
||||
};
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<Datum> Convert(const ir::Literal& lit);
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<Expression> Convert(const ir::Expression& lit);
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<Declaration> Convert(const ir::Relation& rel);
|
||||
|
||||
template <typename Ir>
|
||||
auto ConvertRoot(const Buffer& buf) -> decltype(Convert(std::declval<Ir>())) {
|
||||
return Convert(*flatbuffers::GetRoot<Ir>(buf.data()));
|
||||
}
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,134 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "arrow/compute/exec/key_encode.h"
|
||||
#include "arrow/compute/exec/util.h"
|
||||
#include "arrow/memory_pool.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
class KeyCompare {
|
||||
public:
|
||||
// Returns a single 16-bit selection vector of rows that failed comparison.
|
||||
// If there is input selection on the left, the resulting selection is a filtered image
|
||||
// of input selection.
|
||||
static void CompareColumnsToRows(
|
||||
uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
|
||||
const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
|
||||
uint32_t* out_num_rows, uint16_t* out_sel_left_maybe_same,
|
||||
const std::vector<KeyColumnArray>& cols, const KeyEncoder::KeyRowArray& rows);
|
||||
|
||||
private:
|
||||
template <bool use_selection>
|
||||
static void NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null,
|
||||
const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx,
|
||||
const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows,
|
||||
uint8_t* match_bytevector);
|
||||
|
||||
template <bool use_selection, class COMPARE_FN>
|
||||
static void CompareBinaryColumnToRowHelper(
|
||||
uint32_t offset_within_row, uint32_t first_row_to_compare,
|
||||
uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
|
||||
const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
|
||||
const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
|
||||
uint8_t* match_bytevector, COMPARE_FN compare_fn);
|
||||
|
||||
template <bool use_selection>
|
||||
static void CompareBinaryColumnToRow(
|
||||
uint32_t offset_within_row, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
template <bool use_selection, bool is_first_varbinary_col>
|
||||
static void CompareVarBinaryColumnToRow(
|
||||
uint32_t id_varlen_col, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
static void AndByteVectors(KeyEncoder::KeyEncoderContext* ctx, uint32_t num_elements,
|
||||
uint8_t* bytevector_A, const uint8_t* bytevector_B);
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
|
||||
template <bool use_selection>
|
||||
static uint32_t NullUpdateColumnToRowImp_avx2(
|
||||
uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
|
||||
const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
|
||||
const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
|
||||
uint8_t* match_bytevector);
|
||||
|
||||
template <bool use_selection, class COMPARE8_FN>
|
||||
static uint32_t CompareBinaryColumnToRowHelper_avx2(
|
||||
uint32_t offset_within_row, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector,
|
||||
COMPARE8_FN compare8_fn);
|
||||
|
||||
template <bool use_selection>
|
||||
static uint32_t CompareBinaryColumnToRowImp_avx2(
|
||||
uint32_t offset_within_row, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
template <bool use_selection, bool is_first_varbinary_col>
|
||||
static void CompareVarBinaryColumnToRowImp_avx2(
|
||||
uint32_t id_varlen_col, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
static uint32_t AndByteVectors_avx2(uint32_t num_elements, uint8_t* bytevector_A,
|
||||
const uint8_t* bytevector_B);
|
||||
|
||||
static uint32_t NullUpdateColumnToRow_avx2(
|
||||
bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
static uint32_t CompareBinaryColumnToRow_avx2(
|
||||
bool use_selection, uint32_t offset_within_row, uint32_t num_rows_to_compare,
|
||||
const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map,
|
||||
KeyEncoder::KeyEncoderContext* ctx, const KeyColumnArray& col,
|
||||
const KeyEncoder::KeyRowArray& rows, uint8_t* match_bytevector);
|
||||
|
||||
static void CompareVarBinaryColumnToRow_avx2(
|
||||
bool use_selection, bool is_first_varbinary_col, uint32_t id_varlen_col,
|
||||
uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
|
||||
const uint32_t* left_to_right_map, KeyEncoder::KeyEncoderContext* ctx,
|
||||
const KeyColumnArray& col, const KeyEncoder::KeyRowArray& rows,
|
||||
uint8_t* match_bytevector);
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,500 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec/util.h"
|
||||
#include "arrow/compute/light_array.h"
|
||||
#include "arrow/memory_pool.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/bit_util.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
/// Converts between key representation as a collection of arrays for
|
||||
/// individual columns and another representation as a single array of rows
|
||||
/// combining data from all columns into one value.
|
||||
/// This conversion is reversible.
|
||||
/// Row-oriented storage is beneficial when there is a need for random access
|
||||
/// of individual rows and at the same time all included columns are likely to
|
||||
/// be accessed together, as in the case of hash table key.
|
||||
class KeyEncoder {
|
||||
public:
|
||||
struct KeyEncoderContext {
|
||||
bool has_avx2() const {
|
||||
return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
|
||||
}
|
||||
int64_t hardware_flags;
|
||||
util::TempVectorStack* stack;
|
||||
};
|
||||
|
||||
/// Description of a storage format for rows produced by encoder.
|
||||
struct KeyRowMetadata {
|
||||
/// Is row a varying-length binary, using offsets array to find a beginning of a row,
|
||||
/// or is it a fixed-length binary.
|
||||
bool is_fixed_length;
|
||||
|
||||
/// For a fixed-length binary row, common size of rows in bytes,
|
||||
/// rounded up to the multiple of alignment.
|
||||
///
|
||||
/// For a varying-length binary, size of all encoded fixed-length key columns,
|
||||
/// including lengths of varying-length columns, rounded up to the multiple of string
|
||||
/// alignment.
|
||||
uint32_t fixed_length;
|
||||
|
||||
/// Offset within a row to the array of 32-bit offsets within a row of
|
||||
/// ends of varbinary fields.
|
||||
/// Used only when the row is not fixed-length, zero for fixed-length row.
|
||||
/// There are N elements for N varbinary fields.
|
||||
/// Each element is the offset within a row of the first byte after
|
||||
/// the corresponding varbinary field bytes in that row.
|
||||
/// If varbinary fields begin at aligned addresses, than the end of the previous
|
||||
/// varbinary field needs to be rounded up according to the specified alignment
|
||||
/// to obtain the beginning of the next varbinary field.
|
||||
/// The first varbinary field starts at offset specified by fixed_length,
|
||||
/// which should already be aligned.
|
||||
uint32_t varbinary_end_array_offset;
|
||||
|
||||
/// Fixed number of bytes per row that are used to encode null masks.
|
||||
/// Null masks indicate for a single row which of its key columns are null.
|
||||
/// Nth bit in the sequence of bytes assigned to a row represents null
|
||||
/// information for Nth field according to the order in which they are encoded.
|
||||
int null_masks_bytes_per_row;
|
||||
|
||||
/// Power of 2. Every row will start at the offset aligned to that number of bytes.
|
||||
int row_alignment;
|
||||
|
||||
/// Power of 2. Must be no greater than row alignment.
|
||||
/// Every non-power-of-2 binary field and every varbinary field bytes
|
||||
/// will start aligned to that number of bytes.
|
||||
int string_alignment;
|
||||
|
||||
/// Metadata of encoded columns in their original order.
|
||||
std::vector<KeyColumnMetadata> column_metadatas;
|
||||
|
||||
/// Order in which fields are encoded.
|
||||
std::vector<uint32_t> column_order;
|
||||
|
||||
/// Offsets within a row to fields in their encoding order.
|
||||
std::vector<uint32_t> column_offsets;
|
||||
|
||||
/// Rounding up offset to the nearest multiple of alignment value.
|
||||
/// Alignment must be a power of 2.
|
||||
static inline uint32_t padding_for_alignment(uint32_t offset,
|
||||
int required_alignment) {
|
||||
ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
|
||||
return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
|
||||
(required_alignment - 1));
|
||||
}
|
||||
|
||||
/// Rounding up offset to the beginning of next column,
|
||||
/// chosing required alignment based on the data type of that column.
|
||||
static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
|
||||
const KeyColumnMetadata& col_metadata) {
|
||||
if (!col_metadata.is_fixed_length ||
|
||||
ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
|
||||
return 0;
|
||||
} else {
|
||||
return padding_for_alignment(offset, string_alignment);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an array of offsets within a row of ends of varbinary fields.
|
||||
inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
|
||||
ARROW_DCHECK(!is_fixed_length);
|
||||
return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
|
||||
}
|
||||
inline uint32_t* varbinary_end_array(uint8_t* row) const {
|
||||
ARROW_DCHECK(!is_fixed_length);
|
||||
return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
|
||||
}
|
||||
|
||||
/// Returns the offset within the row and length of the first varbinary field.
|
||||
inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
|
||||
uint32_t* length) const {
|
||||
ARROW_DCHECK(!is_fixed_length);
|
||||
*offset = fixed_length;
|
||||
*length = varbinary_end_array(row)[0] - fixed_length;
|
||||
}
|
||||
|
||||
/// Returns the offset within the row and length of the second and further varbinary
|
||||
/// fields.
|
||||
inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
|
||||
uint32_t* out_offset,
|
||||
uint32_t* out_length) const {
|
||||
ARROW_DCHECK(!is_fixed_length);
|
||||
ARROW_DCHECK(varbinary_id > 0);
|
||||
const uint32_t* varbinary_end = varbinary_end_array(row);
|
||||
uint32_t offset = varbinary_end[varbinary_id - 1];
|
||||
offset += padding_for_alignment(offset, string_alignment);
|
||||
*out_offset = offset;
|
||||
*out_length = varbinary_end[varbinary_id] - offset;
|
||||
}
|
||||
|
||||
uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
|
||||
|
||||
uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
|
||||
|
||||
uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
|
||||
|
||||
uint32_t num_varbinary_cols() const;
|
||||
|
||||
void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
|
||||
int in_row_alignment, int in_string_alignment);
|
||||
|
||||
bool is_compatible(const KeyRowMetadata& other) const;
|
||||
};
|
||||
|
||||
class KeyRowArray {
|
||||
public:
|
||||
KeyRowArray();
|
||||
Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
|
||||
void Clean();
|
||||
Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
|
||||
Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
|
||||
const uint16_t* source_row_ids);
|
||||
const KeyRowMetadata& metadata() const { return metadata_; }
|
||||
int64_t length() const { return num_rows_; }
|
||||
const uint8_t* data(int i) const {
|
||||
ARROW_DCHECK(i >= 0 && i <= max_buffers_);
|
||||
return buffers_[i];
|
||||
}
|
||||
uint8_t* mutable_data(int i) {
|
||||
ARROW_DCHECK(i >= 0 && i <= max_buffers_);
|
||||
return mutable_buffers_[i];
|
||||
}
|
||||
const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
|
||||
uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
|
||||
const uint8_t* null_masks() const { return null_masks_->data(); }
|
||||
uint8_t* null_masks() { return null_masks_->mutable_data(); }
|
||||
|
||||
bool has_any_nulls(const KeyEncoderContext* ctx) const;
|
||||
|
||||
private:
|
||||
Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
|
||||
Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
|
||||
|
||||
int64_t size_null_masks(int64_t num_rows);
|
||||
int64_t size_offsets(int64_t num_rows);
|
||||
int64_t size_rows_fixed_length(int64_t num_rows);
|
||||
int64_t size_rows_varying_length(int64_t num_bytes);
|
||||
void update_buffer_pointers();
|
||||
|
||||
static constexpr int64_t padding_for_vectors = 64;
|
||||
MemoryPool* pool_;
|
||||
KeyRowMetadata metadata_;
|
||||
/// Buffers can only expand during lifetime and never shrink.
|
||||
std::unique_ptr<ResizableBuffer> null_masks_;
|
||||
std::unique_ptr<ResizableBuffer> offsets_;
|
||||
std::unique_ptr<ResizableBuffer> rows_;
|
||||
static constexpr int max_buffers_ = 3;
|
||||
const uint8_t* buffers_[max_buffers_];
|
||||
uint8_t* mutable_buffers_[max_buffers_];
|
||||
int64_t num_rows_;
|
||||
int64_t rows_capacity_;
|
||||
int64_t bytes_capacity_;
|
||||
|
||||
// Mutable to allow lazy evaluation
|
||||
mutable int64_t num_rows_for_has_any_nulls_;
|
||||
mutable bool has_any_nulls_;
|
||||
};
|
||||
|
||||
void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
|
||||
int row_alignment, int string_alignment);
|
||||
|
||||
const KeyRowMetadata& row_metadata() { return row_metadata_; }
|
||||
|
||||
void PrepareEncodeSelected(int64_t start_row, int64_t num_rows,
|
||||
const std::vector<KeyColumnArray>& cols);
|
||||
Status EncodeSelected(KeyRowArray* rows, uint32_t num_selected,
|
||||
const uint16_t* selection);
|
||||
|
||||
/// Decode a window of row oriented data into a corresponding
|
||||
/// window of column oriented storage.
|
||||
/// The output buffers need to be correctly allocated and sized before
|
||||
/// calling each method.
|
||||
/// For that reason decoding is split into two functions.
|
||||
/// The output of the first one, that processes everything except for
|
||||
/// varying length buffers, can be used to find out required varying
|
||||
/// length buffers sizes.
|
||||
void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
|
||||
int64_t num_rows, const KeyRowArray& rows,
|
||||
std::vector<KeyColumnArray>* cols);
|
||||
|
||||
void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
|
||||
int64_t num_rows, const KeyRowArray& rows,
|
||||
std::vector<KeyColumnArray>* cols);
|
||||
|
||||
const std::vector<KeyColumnArray>& GetBatchColumns() const { return batch_all_cols_; }
|
||||
|
||||
private:
|
||||
/// Prepare column array vectors.
|
||||
/// Output column arrays represent a range of input column arrays
|
||||
/// specified by starting row and number of rows.
|
||||
/// Three vectors are generated:
|
||||
/// - all columns
|
||||
/// - fixed-length columns only
|
||||
/// - varying-length columns only
|
||||
void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
|
||||
const std::vector<KeyColumnArray>& cols_in);
|
||||
|
||||
class TransformBoolean {
|
||||
public:
|
||||
static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
|
||||
const KeyColumnArray& temp);
|
||||
static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
|
||||
KeyEncoderContext* ctx);
|
||||
};
|
||||
|
||||
class EncoderInteger {
|
||||
public:
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray& rows, KeyColumnArray* col,
|
||||
KeyEncoderContext* ctx, KeyColumnArray* temp);
|
||||
static bool UsesTransform(const KeyColumnArray& column);
|
||||
static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
|
||||
const KeyColumnArray& temp);
|
||||
static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
|
||||
KeyEncoderContext* ctx);
|
||||
|
||||
private:
|
||||
static bool IsBoolean(const KeyColumnMetadata& metadata);
|
||||
};
|
||||
|
||||
class EncoderBinary {
|
||||
public:
|
||||
static void EncodeSelected(uint32_t offset_within_row, KeyRowArray* rows,
|
||||
const KeyColumnArray& col, uint32_t num_selected,
|
||||
const uint16_t* selection);
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray& rows, KeyColumnArray* col,
|
||||
KeyEncoderContext* ctx, KeyColumnArray* temp);
|
||||
static bool IsInteger(const KeyColumnMetadata& metadata);
|
||||
|
||||
private:
|
||||
template <class COPY_FN, class SET_NULL_FN>
|
||||
static void EncodeSelectedImp(uint32_t offset_within_row, KeyRowArray* rows,
|
||||
const KeyColumnArray& col, uint32_t num_selected,
|
||||
const uint16_t* selection, COPY_FN copy_fn,
|
||||
SET_NULL_FN set_null_fn);
|
||||
|
||||
template <bool is_row_fixed_length, class COPY_FN>
|
||||
static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t offset_within_row,
|
||||
const KeyRowArray* rows_const,
|
||||
KeyRowArray* rows_mutable_maybe_null,
|
||||
const KeyColumnArray* col_const,
|
||||
KeyColumnArray* col_mutable_maybe_null,
|
||||
COPY_FN copy_fn);
|
||||
template <bool is_row_fixed_length>
|
||||
static void DecodeImp(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t offset_within_row, const KeyRowArray& rows,
|
||||
KeyColumnArray* col);
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
|
||||
uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray& rows, KeyColumnArray* col);
|
||||
template <bool is_row_fixed_length>
|
||||
static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t offset_within_row, const KeyRowArray& rows,
|
||||
KeyColumnArray* col);
|
||||
#endif
|
||||
};
|
||||
|
||||
class EncoderBinaryPair {
|
||||
public:
|
||||
static bool CanProcessPair(const KeyColumnMetadata& col1,
|
||||
const KeyColumnMetadata& col2) {
|
||||
return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
|
||||
}
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray& rows, KeyColumnArray* col1,
|
||||
KeyColumnArray* col2, KeyEncoderContext* ctx,
|
||||
KeyColumnArray* temp1, KeyColumnArray* temp2);
|
||||
|
||||
private:
|
||||
template <bool is_row_fixed_length, typename col1_type, typename col2_type>
|
||||
static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
|
||||
uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray& rows, KeyColumnArray* col1,
|
||||
KeyColumnArray* col2);
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
|
||||
uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t offset_within_row, const KeyRowArray& rows,
|
||||
KeyColumnArray* col1, KeyColumnArray* col2);
|
||||
template <bool is_row_fixed_length, uint32_t col_width>
|
||||
static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t offset_within_row, const KeyRowArray& rows,
|
||||
KeyColumnArray* col1, KeyColumnArray* col2);
|
||||
#endif
|
||||
};
|
||||
|
||||
class EncoderOffsets {
|
||||
public:
|
||||
static void GetRowOffsetsSelected(KeyRowArray* rows,
|
||||
const std::vector<KeyColumnArray>& cols,
|
||||
uint32_t num_selected, const uint16_t* selection);
|
||||
static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
|
||||
uint32_t num_selected, const uint16_t* selection);
|
||||
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
|
||||
std::vector<KeyColumnArray>* varbinary_cols,
|
||||
const std::vector<uint32_t>& varbinary_cols_base_offset,
|
||||
KeyEncoderContext* ctx);
|
||||
|
||||
private:
|
||||
template <bool has_nulls, bool is_first_varbinary>
|
||||
static void EncodeSelectedImp(uint32_t ivarbinary, KeyRowArray* rows,
|
||||
const std::vector<KeyColumnArray>& cols,
|
||||
uint32_t num_selected, const uint16_t* selection);
|
||||
};
|
||||
|
||||
class EncoderVarBinary {
|
||||
public:
|
||||
static void EncodeSelected(uint32_t ivarbinary, KeyRowArray* rows,
|
||||
const KeyColumnArray& cols, uint32_t num_selected,
|
||||
const uint16_t* selection);
|
||||
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
|
||||
const KeyRowArray& rows, KeyColumnArray* col,
|
||||
KeyEncoderContext* ctx);
|
||||
|
||||
private:
|
||||
template <bool first_varbinary_col, class COPY_FN>
|
||||
static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t varbinary_col_id,
|
||||
const KeyRowArray* rows_const,
|
||||
KeyRowArray* rows_mutable_maybe_null,
|
||||
const KeyColumnArray* col_const,
|
||||
KeyColumnArray* col_mutable_maybe_null,
|
||||
COPY_FN copy_fn);
|
||||
template <bool first_varbinary_col>
|
||||
static void DecodeImp(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t varbinary_col_id, const KeyRowArray& rows,
|
||||
KeyColumnArray* col);
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t varbinary_col_id, const KeyRowArray& rows,
|
||||
KeyColumnArray* col);
|
||||
template <bool first_varbinary_col>
|
||||
static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
|
||||
uint32_t varbinary_col_id, const KeyRowArray& rows,
|
||||
KeyColumnArray* col);
|
||||
#endif
|
||||
};
|
||||
|
||||
class EncoderNulls {
|
||||
public:
|
||||
static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
|
||||
uint32_t num_selected, const uint16_t* selection);
|
||||
|
||||
static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
|
||||
std::vector<KeyColumnArray>* cols);
|
||||
};
|
||||
|
||||
KeyEncoderContext* ctx_;
|
||||
|
||||
// Data initialized once, based on data types of key columns
|
||||
KeyRowMetadata row_metadata_;
|
||||
|
||||
// Data initialized for each input batch.
|
||||
// All elements are ordered according to the order of encoded fields in a row.
|
||||
std::vector<KeyColumnArray> batch_all_cols_;
|
||||
std::vector<KeyColumnArray> batch_varbinary_cols_;
|
||||
std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
|
||||
};
|
||||
|
||||
template <bool is_row_fixed_length, class COPY_FN>
|
||||
inline void KeyEncoder::EncoderBinary::DecodeHelper(
|
||||
uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
|
||||
const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
|
||||
const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
|
||||
COPY_FN copy_fn) {
|
||||
ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
|
||||
uint32_t col_width = col_const->metadata().fixed_length;
|
||||
|
||||
if (is_row_fixed_length) {
|
||||
uint32_t row_width = rows_const->metadata().fixed_length;
|
||||
for (uint32_t i = 0; i < num_rows; ++i) {
|
||||
const uint8_t* src;
|
||||
uint8_t* dst;
|
||||
src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
|
||||
dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
|
||||
copy_fn(dst, src, col_width);
|
||||
}
|
||||
} else {
|
||||
const uint32_t* row_offsets = rows_const->offsets();
|
||||
for (uint32_t i = 0; i < num_rows; ++i) {
|
||||
const uint8_t* src;
|
||||
uint8_t* dst;
|
||||
src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
|
||||
dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
|
||||
copy_fn(dst, src, col_width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool first_varbinary_col, class COPY_FN>
|
||||
inline void KeyEncoder::EncoderVarBinary::DecodeHelper(
|
||||
uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
|
||||
const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
|
||||
const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
|
||||
COPY_FN copy_fn) {
|
||||
// Column and rows need to be varying length
|
||||
ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
|
||||
!col_const->metadata().is_fixed_length);
|
||||
|
||||
const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
|
||||
const uint32_t* col_offsets = col_const->offsets();
|
||||
|
||||
uint32_t col_offset_next = col_offsets[0];
|
||||
for (uint32_t i = 0; i < num_rows; ++i) {
|
||||
uint32_t col_offset = col_offset_next;
|
||||
col_offset_next = col_offsets[i + 1];
|
||||
|
||||
uint32_t row_offset = row_offsets_for_batch[i];
|
||||
const uint8_t* row = rows_const->data(2) + row_offset;
|
||||
|
||||
uint32_t offset_within_row;
|
||||
uint32_t length;
|
||||
if (first_varbinary_col) {
|
||||
rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
|
||||
&length);
|
||||
} else {
|
||||
rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
|
||||
&offset_within_row, &length);
|
||||
}
|
||||
|
||||
row_offset += offset_within_row;
|
||||
|
||||
const uint8_t* src;
|
||||
uint8_t* dst;
|
||||
src = rows_const->data(2) + row_offset;
|
||||
dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
|
||||
copy_fn(dst, src, length);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,213 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "arrow/compute/exec/key_encode.h"
|
||||
#include "arrow/compute/exec/util.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
// Forward declarations only needed for making test functions a friend of the classes in
|
||||
// this file.
|
||||
//
|
||||
enum class BloomFilterBuildStrategy;
|
||||
|
||||
// Implementations are based on xxh3 32-bit algorithm description from:
|
||||
// https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
|
||||
//
|
||||
class ARROW_EXPORT Hashing32 {
|
||||
friend class TestVectorHash;
|
||||
template <typename T>
|
||||
friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector<uint64_t>&,
|
||||
int64_t, int, T*);
|
||||
friend void TestBloomSmall(BloomFilterBuildStrategy, int64_t, int, bool, bool);
|
||||
|
||||
public:
|
||||
static void HashMultiColumn(const std::vector<KeyColumnArray>& cols,
|
||||
KeyEncoder::KeyEncoderContext* ctx, uint32_t* out_hash);
|
||||
|
||||
private:
|
||||
static const uint32_t PRIME32_1 = 0x9E3779B1;
|
||||
static const uint32_t PRIME32_2 = 0x85EBCA77;
|
||||
static const uint32_t PRIME32_3 = 0xC2B2AE3D;
|
||||
static const uint32_t PRIME32_4 = 0x27D4EB2F;
|
||||
static const uint32_t PRIME32_5 = 0x165667B1;
|
||||
static const uint32_t kCombineConst = 0x9e3779b9UL;
|
||||
static const int64_t kStripeSize = 4 * sizeof(uint32_t);
|
||||
|
||||
static void HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys,
|
||||
uint64_t length_key, const uint8_t* keys, uint32_t* hashes,
|
||||
uint32_t* temp_hashes_for_combine);
|
||||
|
||||
static void HashVarLen(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows,
|
||||
const uint32_t* offsets, const uint8_t* concatenated_keys,
|
||||
uint32_t* hashes, uint32_t* temp_hashes_for_combine);
|
||||
|
||||
static void HashVarLen(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows,
|
||||
const uint64_t* offsets, const uint8_t* concatenated_keys,
|
||||
uint32_t* hashes, uint32_t* temp_hashes_for_combine);
|
||||
|
||||
static inline uint32_t Avalanche(uint32_t acc) {
|
||||
acc ^= (acc >> 15);
|
||||
acc *= PRIME32_2;
|
||||
acc ^= (acc >> 13);
|
||||
acc *= PRIME32_3;
|
||||
acc ^= (acc >> 16);
|
||||
return acc;
|
||||
}
|
||||
static inline uint32_t Round(uint32_t acc, uint32_t input);
|
||||
static inline uint32_t CombineAccumulators(uint32_t acc1, uint32_t acc2, uint32_t acc3,
|
||||
uint32_t acc4);
|
||||
static inline uint32_t CombineHashesImp(uint32_t previous_hash, uint32_t hash) {
|
||||
uint32_t next_hash = previous_hash ^ (hash + kCombineConst + (previous_hash << 6) +
|
||||
(previous_hash >> 2));
|
||||
return next_hash;
|
||||
}
|
||||
static inline void ProcessFullStripes(uint64_t num_stripes, const uint8_t* key,
|
||||
uint32_t* out_acc1, uint32_t* out_acc2,
|
||||
uint32_t* out_acc3, uint32_t* out_acc4);
|
||||
static inline void ProcessLastStripe(uint32_t mask1, uint32_t mask2, uint32_t mask3,
|
||||
uint32_t mask4, const uint8_t* last_stripe,
|
||||
uint32_t* acc1, uint32_t* acc2, uint32_t* acc3,
|
||||
uint32_t* acc4);
|
||||
static inline void StripeMask(int i, uint32_t* mask1, uint32_t* mask2, uint32_t* mask3,
|
||||
uint32_t* mask4);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
|
||||
uint32_t* hashes);
|
||||
template <typename T, bool T_COMBINE_HASHES>
|
||||
static void HashVarLenImp(uint32_t num_rows, const T* offsets,
|
||||
const uint8_t* concatenated_keys, uint32_t* hashes);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys,
|
||||
uint32_t* hashes);
|
||||
static void HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_keys,
|
||||
const uint8_t* keys, uint32_t* hashes);
|
||||
template <bool T_COMBINE_HASHES, typename T>
|
||||
static void HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes);
|
||||
static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
|
||||
const uint8_t* keys, uint32_t* hashes);
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
static inline __m256i Avalanche_avx2(__m256i hash);
|
||||
static inline __m256i CombineHashesImp_avx2(__m256i previous_hash, __m256i hash);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static void AvalancheAll_avx2(uint32_t num_rows, uint32_t* hashes,
|
||||
const uint32_t* hashes_temp_for_combine);
|
||||
static inline __m256i Round_avx2(__m256i acc, __m256i input);
|
||||
static inline uint64_t CombineAccumulators_avx2(__m256i acc);
|
||||
static inline __m256i StripeMask_avx2(int i, int j);
|
||||
template <bool two_equal_lengths>
|
||||
static inline __m256i ProcessStripes_avx2(int64_t num_stripes_A, int64_t num_stripes_B,
|
||||
__m256i mask_last_stripe, const uint8_t* keys,
|
||||
int64_t offset_A, int64_t offset_B);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length,
|
||||
const uint8_t* keys, uint32_t* hashes,
|
||||
uint32_t* hashes_temp_for_combine);
|
||||
static uint32_t HashFixedLen_avx2(bool combine_hashes, uint32_t num_rows,
|
||||
uint64_t length, const uint8_t* keys,
|
||||
uint32_t* hashes, uint32_t* hashes_temp_for_combine);
|
||||
template <typename T, bool T_COMBINE_HASHES>
|
||||
static uint32_t HashVarLenImp_avx2(uint32_t num_rows, const T* offsets,
|
||||
const uint8_t* concatenated_keys, uint32_t* hashes,
|
||||
uint32_t* hashes_temp_for_combine);
|
||||
static uint32_t HashVarLen_avx2(bool combine_hashes, uint32_t num_rows,
|
||||
const uint32_t* offsets,
|
||||
const uint8_t* concatenated_keys, uint32_t* hashes,
|
||||
uint32_t* hashes_temp_for_combine);
|
||||
static uint32_t HashVarLen_avx2(bool combine_hashes, uint32_t num_rows,
|
||||
const uint64_t* offsets,
|
||||
const uint8_t* concatenated_keys, uint32_t* hashes,
|
||||
uint32_t* hashes_temp_for_combine);
|
||||
#endif
|
||||
};
|
||||
|
||||
class ARROW_EXPORT Hashing64 {
|
||||
friend class TestVectorHash;
|
||||
template <typename T>
|
||||
friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector<uint64_t>&,
|
||||
int64_t, int, T*);
|
||||
friend void TestBloomSmall(BloomFilterBuildStrategy, int64_t, int, bool, bool);
|
||||
|
||||
public:
|
||||
static void HashMultiColumn(const std::vector<KeyColumnArray>& cols,
|
||||
KeyEncoder::KeyEncoderContext* ctx, uint64_t* hashes);
|
||||
|
||||
private:
|
||||
static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
|
||||
static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
|
||||
static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
|
||||
static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL;
|
||||
static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL;
|
||||
static const uint32_t kCombineConst = 0x9e3779b9UL;
|
||||
static const int64_t kStripeSize = 4 * sizeof(uint64_t);
|
||||
|
||||
static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
|
||||
const uint8_t* keys, uint64_t* hashes);
|
||||
|
||||
static void HashVarLen(bool combine_hashes, uint32_t num_rows, const uint32_t* offsets,
|
||||
const uint8_t* concatenated_keys, uint64_t* hashes);
|
||||
|
||||
static void HashVarLen(bool combine_hashes, uint32_t num_rows, const uint64_t* offsets,
|
||||
const uint8_t* concatenated_keys, uint64_t* hashes);
|
||||
|
||||
static inline uint64_t Avalanche(uint64_t acc);
|
||||
static inline uint64_t Round(uint64_t acc, uint64_t input);
|
||||
static inline uint64_t CombineAccumulators(uint64_t acc1, uint64_t acc2, uint64_t acc3,
|
||||
uint64_t acc4);
|
||||
static inline uint64_t CombineHashesImp(uint64_t previous_hash, uint64_t hash) {
|
||||
uint64_t next_hash = previous_hash ^ (hash + kCombineConst + (previous_hash << 6) +
|
||||
(previous_hash >> 2));
|
||||
return next_hash;
|
||||
}
|
||||
static inline void ProcessFullStripes(uint64_t num_stripes, const uint8_t* key,
|
||||
uint64_t* out_acc1, uint64_t* out_acc2,
|
||||
uint64_t* out_acc3, uint64_t* out_acc4);
|
||||
static inline void ProcessLastStripe(uint64_t mask1, uint64_t mask2, uint64_t mask3,
|
||||
uint64_t mask4, const uint8_t* last_stripe,
|
||||
uint64_t* acc1, uint64_t* acc2, uint64_t* acc3,
|
||||
uint64_t* acc4);
|
||||
static inline void StripeMask(int i, uint64_t* mask1, uint64_t* mask2, uint64_t* mask3,
|
||||
uint64_t* mask4);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
|
||||
uint64_t* hashes);
|
||||
template <typename T, bool T_COMBINE_HASHES>
|
||||
static void HashVarLenImp(uint32_t num_rows, const T* offsets,
|
||||
const uint8_t* concatenated_keys, uint64_t* hashes);
|
||||
template <bool T_COMBINE_HASHES>
|
||||
static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys,
|
||||
uint64_t* hashes);
|
||||
static void HashBit(bool T_COMBINE_HASHES, int64_t bit_offset, uint32_t num_keys,
|
||||
const uint8_t* keys, uint64_t* hashes);
|
||||
template <bool T_COMBINE_HASHES, typename T>
|
||||
static void HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes);
|
||||
static void HashInt(bool T_COMBINE_HASHES, uint32_t num_keys, uint64_t length_key,
|
||||
const uint8_t* keys, uint64_t* hashes);
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,208 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "arrow/compute/exec/util.h"
|
||||
#include "arrow/memory_pool.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
class SwissTable {
|
||||
public:
|
||||
SwissTable() = default;
|
||||
~SwissTable() { cleanup(); }
|
||||
|
||||
using EqualImpl =
|
||||
std::function<void(int num_keys, const uint16_t* selection /* may be null */,
|
||||
const uint32_t* group_ids, uint32_t* out_num_keys_mismatch,
|
||||
uint16_t* out_selection_mismatch)>;
|
||||
using AppendImpl = std::function<Status(int num_keys, const uint16_t* selection)>;
|
||||
|
||||
Status init(int64_t hardware_flags, MemoryPool* pool, util::TempVectorStack* temp_stack,
|
||||
int log_minibatch, EqualImpl equal_impl, AppendImpl append_impl);
|
||||
|
||||
void cleanup();
|
||||
|
||||
void early_filter(const int num_keys, const uint32_t* hashes,
|
||||
uint8_t* out_match_bitvector, uint8_t* out_local_slots) const;
|
||||
|
||||
void find(const int num_keys, const uint32_t* hashes, uint8_t* inout_match_bitvector,
|
||||
const uint8_t* local_slots, uint32_t* out_group_ids) const;
|
||||
|
||||
Status map_new_keys(uint32_t num_ids, uint16_t* ids, const uint32_t* hashes,
|
||||
uint32_t* group_ids);
|
||||
|
||||
private:
|
||||
// Lookup helpers
|
||||
|
||||
/// \brief Scan bytes in block in reverse and stop as soon
|
||||
/// as a position of interest is found.
|
||||
///
|
||||
/// Positions of interest:
|
||||
/// a) slot with a matching stamp is encountered,
|
||||
/// b) first empty slot is encountered,
|
||||
/// c) we reach the end of the block.
|
||||
///
|
||||
/// Optionally an index of the first slot to start the search from can be specified.
|
||||
/// In this case slots before it will be ignored.
|
||||
///
|
||||
/// \param[in] block 8 byte block of hash table
|
||||
/// \param[in] stamp 7 bits of hash used as a stamp
|
||||
/// \param[in] start_slot Index of the first slot in the block to start search from. We
|
||||
/// assume that this index always points to a non-empty slot, equivalently
|
||||
/// that it comes before any empty slots. (Used only by one template
|
||||
/// variant.)
|
||||
/// \param[out] out_slot index corresponding to the discovered position of interest (8
|
||||
/// represents end of block).
|
||||
/// \param[out] out_match_found an integer flag (0 or 1) indicating if we reached an
|
||||
/// empty slot (0) or not (1). Therefore 1 can mean that either actual match was found
|
||||
/// (case a) above) or we reached the end of full block (case b) above).
|
||||
///
|
||||
template <bool use_start_slot>
|
||||
inline void search_block(uint64_t block, int stamp, int start_slot, int* out_slot,
|
||||
int* out_match_found) const;
|
||||
|
||||
/// \brief Extract group id for a given slot in a given block.
|
||||
///
|
||||
inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
|
||||
uint64_t group_id_mask) const;
|
||||
void extract_group_ids(const int num_keys, const uint16_t* optional_selection,
|
||||
const uint32_t* hashes, const uint8_t* local_slots,
|
||||
uint32_t* out_group_ids) const;
|
||||
|
||||
template <typename T, bool use_selection>
|
||||
void extract_group_ids_imp(const int num_keys, const uint16_t* selection,
|
||||
const uint32_t* hashes, const uint8_t* local_slots,
|
||||
uint32_t* out_group_ids, int elements_offset,
|
||||
int element_mutltiplier) const;
|
||||
|
||||
inline uint64_t next_slot_to_visit(uint64_t block_index, int slot,
|
||||
int match_found) const;
|
||||
|
||||
inline uint64_t num_groups_for_resize() const;
|
||||
|
||||
inline uint64_t wrap_global_slot_id(uint64_t global_slot_id) const;
|
||||
|
||||
void init_slot_ids(const int num_keys, const uint16_t* selection,
|
||||
const uint32_t* hashes, const uint8_t* local_slots,
|
||||
const uint8_t* match_bitvector, uint32_t* out_slot_ids) const;
|
||||
|
||||
void init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* ids,
|
||||
const uint32_t* hashes, uint32_t* slot_ids) const;
|
||||
|
||||
// Quickly filter out keys that have no matches based only on hash value and the
|
||||
// corresponding starting 64-bit block of slot status bytes. May return false positives.
|
||||
//
|
||||
void early_filter_imp(const int num_keys, const uint32_t* hashes,
|
||||
uint8_t* out_match_bitvector, uint8_t* out_local_slots) const;
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
void early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* hashes,
|
||||
uint8_t* out_match_bitvector,
|
||||
uint8_t* out_local_slots) const;
|
||||
void early_filter_imp_avx2_x32(const int num_hashes, const uint32_t* hashes,
|
||||
uint8_t* out_match_bitvector,
|
||||
uint8_t* out_local_slots) const;
|
||||
void extract_group_ids_avx2(const int num_keys, const uint32_t* hashes,
|
||||
const uint8_t* local_slots, uint32_t* out_group_ids,
|
||||
int byte_offset, int byte_multiplier, int byte_size) const;
|
||||
#endif
|
||||
|
||||
void run_comparisons(const int num_keys, const uint16_t* optional_selection_ids,
|
||||
const uint8_t* optional_selection_bitvector,
|
||||
const uint32_t* groupids, int* out_num_not_equal,
|
||||
uint16_t* out_not_equal_selection) const;
|
||||
|
||||
inline bool find_next_stamp_match(const uint32_t hash, const uint32_t in_slot_id,
|
||||
uint32_t* out_slot_id, uint32_t* out_group_id) const;
|
||||
|
||||
inline void insert_into_empty_slot(uint32_t slot_id, uint32_t hash, uint32_t group_id);
|
||||
|
||||
// Slow processing of input keys in the most generic case.
|
||||
// Handles inserting new keys.
|
||||
// Pre-existing keys will be handled correctly, although the intended use is for this
|
||||
// call to follow a call to find() method, which would only pass on new keys that were
|
||||
// not present in the hash table.
|
||||
//
|
||||
Status map_new_keys_helper(const uint32_t* hashes, uint32_t* inout_num_selected,
|
||||
uint16_t* inout_selection, bool* out_need_resize,
|
||||
uint32_t* out_group_ids, uint32_t* out_next_slot_ids);
|
||||
|
||||
// Resize small hash tables when 50% full (up to 8KB).
|
||||
// Resize large hash tables when 75% full.
|
||||
Status grow_double();
|
||||
|
||||
static int num_groupid_bits_from_log_blocks(int log_blocks) {
|
||||
int required_bits = log_blocks + 3;
|
||||
return required_bits <= 8 ? 8
|
||||
: required_bits <= 16 ? 16
|
||||
: required_bits <= 32 ? 32
|
||||
: 64;
|
||||
}
|
||||
|
||||
// Use 32-bit hash for now
|
||||
static constexpr int bits_hash_ = 32;
|
||||
|
||||
// Number of hash bits stored in slots in a block.
|
||||
// The highest bits of hash determine block id.
|
||||
// The next set of highest bits is a "stamp" stored in a slot in a block.
|
||||
static constexpr int bits_stamp_ = 7;
|
||||
|
||||
// Padding bytes added at the end of buffers for ease of SIMD access
|
||||
static constexpr int padding_ = 64;
|
||||
|
||||
int log_minibatch_;
|
||||
// Base 2 log of the number of blocks
|
||||
int log_blocks_ = 0;
|
||||
// Number of keys inserted into hash table
|
||||
uint32_t num_inserted_ = 0;
|
||||
|
||||
// Data for blocks.
|
||||
// Each block has 8 status bytes for 8 slots, followed by 8 bit packed group ids for
|
||||
// these slots. In 8B status word, the order of bytes is reversed. Group ids are in
|
||||
// normal order. There is 64B padding at the end.
|
||||
//
|
||||
// 0 byte - 7 bucket | 1. byte - 6 bucket | ...
|
||||
// ---------------------------------------------------
|
||||
// | Empty bit* | Empty bit |
|
||||
// ---------------------------------------------------
|
||||
// | 7-bit hash | 7-bit hash |
|
||||
// ---------------------------------------------------
|
||||
// * Empty bucket has value 0x80. Non-empty bucket has highest bit set to 0.
|
||||
//
|
||||
uint8_t* blocks_;
|
||||
|
||||
// Array of hashes of values inserted into slots.
|
||||
// Undefined if the corresponding slot is empty.
|
||||
// There is 64B padding at the end.
|
||||
uint32_t* hashes_;
|
||||
|
||||
int64_t hardware_flags_;
|
||||
MemoryPool* pool_;
|
||||
util::TempVectorStack* temp_stack_;
|
||||
|
||||
EqualImpl equal_impl_;
|
||||
AppendImpl append_impl_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,393 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/api_aggregate.h"
|
||||
#include "arrow/compute/api_vector.h"
|
||||
#include "arrow/compute/exec.h"
|
||||
#include "arrow/compute/exec/expression.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/util/async_generator.h"
|
||||
#include "arrow/util/async_util.h"
|
||||
#include "arrow/util/optional.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
using AsyncExecBatchGenerator = AsyncGenerator<util::optional<ExecBatch>>;
|
||||
|
||||
/// \addtogroup execnode-options
|
||||
/// @{
|
||||
class ARROW_EXPORT ExecNodeOptions {
|
||||
public:
|
||||
virtual ~ExecNodeOptions() = default;
|
||||
};
|
||||
|
||||
/// \brief Adapt an AsyncGenerator<ExecBatch> as a source node
|
||||
///
|
||||
/// plan->exec_context()->executor() will be used to parallelize pushing to
|
||||
/// outputs, if provided.
|
||||
class ARROW_EXPORT SourceNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
SourceNodeOptions(std::shared_ptr<Schema> output_schema,
|
||||
std::function<Future<util::optional<ExecBatch>>()> generator)
|
||||
: output_schema(std::move(output_schema)), generator(std::move(generator)) {}
|
||||
|
||||
static Result<std::shared_ptr<SourceNodeOptions>> FromTable(const Table& table,
|
||||
arrow::internal::Executor*);
|
||||
|
||||
std::shared_ptr<Schema> output_schema;
|
||||
std::function<Future<util::optional<ExecBatch>>()> generator;
|
||||
};
|
||||
|
||||
/// \brief An extended Source node which accepts a table
|
||||
class ARROW_EXPORT TableSourceNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
TableSourceNodeOptions(std::shared_ptr<Table> table, int64_t max_batch_size)
|
||||
: table(table), max_batch_size(max_batch_size) {}
|
||||
|
||||
// arrow table which acts as the data source
|
||||
std::shared_ptr<Table> table;
|
||||
// Size of batches to emit from this node
|
||||
// If the table is larger the node will emit multiple batches from the
|
||||
// the table to be processed in parallel.
|
||||
int64_t max_batch_size;
|
||||
};
|
||||
|
||||
/// \brief Make a node which excludes some rows from batches passed through it
|
||||
///
|
||||
/// filter_expression will be evaluated against each batch which is pushed to
|
||||
/// this node. Any rows for which filter_expression does not evaluate to `true` will be
|
||||
/// excluded in the batch emitted by this node.
|
||||
class ARROW_EXPORT FilterNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
explicit FilterNodeOptions(Expression filter_expression, bool async_mode = true)
|
||||
: filter_expression(std::move(filter_expression)), async_mode(async_mode) {}
|
||||
|
||||
Expression filter_expression;
|
||||
bool async_mode;
|
||||
};
|
||||
|
||||
/// \brief Make a node which executes expressions on input batches, producing new batches.
|
||||
///
|
||||
/// Each expression will be evaluated against each batch which is pushed to
|
||||
/// this node to produce a corresponding output column.
|
||||
///
|
||||
/// If names are not provided, the string representations of exprs will be used.
|
||||
class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
explicit ProjectNodeOptions(std::vector<Expression> expressions,
|
||||
std::vector<std::string> names = {}, bool async_mode = true)
|
||||
: expressions(std::move(expressions)),
|
||||
names(std::move(names)),
|
||||
async_mode(async_mode) {}
|
||||
|
||||
std::vector<Expression> expressions;
|
||||
std::vector<std::string> names;
|
||||
bool async_mode;
|
||||
};
|
||||
|
||||
/// \brief Make a node which aggregates input batches, optionally grouped by keys.
|
||||
class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
AggregateNodeOptions(std::vector<internal::Aggregate> aggregates,
|
||||
std::vector<FieldRef> targets, std::vector<std::string> names,
|
||||
std::vector<FieldRef> keys = {})
|
||||
: aggregates(std::move(aggregates)),
|
||||
targets(std::move(targets)),
|
||||
names(std::move(names)),
|
||||
keys(std::move(keys)) {}
|
||||
|
||||
// aggregations which will be applied to the targetted fields
|
||||
std::vector<internal::Aggregate> aggregates;
|
||||
// fields to which aggregations will be applied
|
||||
std::vector<FieldRef> targets;
|
||||
// output field names for aggregations
|
||||
std::vector<std::string> names;
|
||||
// keys by which aggregations will be grouped
|
||||
std::vector<FieldRef> keys;
|
||||
};
|
||||
|
||||
constexpr int32_t kDefaultBackpressureHighBytes = 1 << 30; // 1GiB
|
||||
constexpr int32_t kDefaultBackpressureLowBytes = 1 << 28; // 256MiB
|
||||
|
||||
class ARROW_EXPORT BackpressureMonitor {
|
||||
public:
|
||||
virtual ~BackpressureMonitor() = default;
|
||||
virtual uint64_t bytes_in_use() const = 0;
|
||||
virtual bool is_paused() const = 0;
|
||||
};
|
||||
|
||||
/// \brief Options to control backpressure behavior
|
||||
struct ARROW_EXPORT BackpressureOptions {
|
||||
/// \brief Create default options that perform no backpressure
|
||||
BackpressureOptions() : resume_if_below(0), pause_if_above(0) {}
|
||||
/// \brief Create options that will perform backpressure
|
||||
///
|
||||
/// \param resume_if_below The producer should resume producing if the backpressure
|
||||
/// queue has fewer than resume_if_below items.
|
||||
/// \param pause_if_above The producer should pause producing if the backpressure
|
||||
/// queue has more than pause_if_above items
|
||||
BackpressureOptions(uint32_t resume_if_below, uint32_t pause_if_above)
|
||||
: resume_if_below(resume_if_below), pause_if_above(pause_if_above) {}
|
||||
|
||||
static BackpressureOptions DefaultBackpressure() {
|
||||
return BackpressureOptions(kDefaultBackpressureLowBytes,
|
||||
kDefaultBackpressureHighBytes);
|
||||
}
|
||||
|
||||
bool should_apply_backpressure() const { return pause_if_above > 0; }
|
||||
|
||||
uint64_t resume_if_below;
|
||||
uint64_t pause_if_above;
|
||||
};
|
||||
|
||||
/// \brief Add a sink node which forwards to an AsyncGenerator<ExecBatch>
|
||||
///
|
||||
/// Emitted batches will not be ordered.
|
||||
class ARROW_EXPORT SinkNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
explicit SinkNodeOptions(std::function<Future<util::optional<ExecBatch>>()>* generator,
|
||||
BackpressureOptions backpressure = {},
|
||||
BackpressureMonitor** backpressure_monitor = NULLPTR)
|
||||
: generator(generator),
|
||||
backpressure(std::move(backpressure)),
|
||||
backpressure_monitor(backpressure_monitor) {}
|
||||
|
||||
/// \brief A pointer to a generator of batches.
|
||||
///
|
||||
/// This will be set when the node is added to the plan and should be used to consume
|
||||
/// data from the plan. If this function is not called frequently enough then the sink
|
||||
/// node will start to accumulate data and may apply backpressure.
|
||||
std::function<Future<util::optional<ExecBatch>>()>* generator;
|
||||
/// \brief Options to control when to apply backpressure
|
||||
///
|
||||
/// This is optional, the default is to never apply backpressure. If the plan is not
|
||||
/// consumed quickly enough the system may eventually run out of memory.
|
||||
BackpressureOptions backpressure;
|
||||
/// \brief A pointer to a backpressure monitor
|
||||
///
|
||||
/// This will be set when the node is added to the plan. This can be used to inspect
|
||||
/// the amount of data currently queued in the sink node. This is an optional utility
|
||||
/// and backpressure can be applied even if this is not used.
|
||||
BackpressureMonitor** backpressure_monitor;
|
||||
};
|
||||
|
||||
/// \brief Control used by a SinkNodeConsumer to pause & resume
|
||||
///
|
||||
/// Callers should ensure that they do not call Pause and Resume simultaneously and they
|
||||
/// should sequence things so that a call to Pause() is always followed by an eventual
|
||||
/// call to Resume()
|
||||
class ARROW_EXPORT BackpressureControl {
|
||||
public:
|
||||
virtual ~BackpressureControl() = default;
|
||||
/// \brief Ask the input to pause
|
||||
///
|
||||
/// This is best effort, batches may continue to arrive
|
||||
/// Must eventually be followed by a call to Resume() or deadlock will occur
|
||||
virtual void Pause() = 0;
|
||||
/// \brief Ask the input to resume
|
||||
virtual void Resume() = 0;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT SinkNodeConsumer {
|
||||
public:
|
||||
virtual ~SinkNodeConsumer() = default;
|
||||
/// \brief Prepare any consumer state
|
||||
///
|
||||
/// This will be run once the schema is finalized as the plan is starting and
|
||||
/// before any calls to Consume. A common use is to save off the schema so that
|
||||
/// batches can be interpreted.
|
||||
virtual Status Init(const std::shared_ptr<Schema>& schema,
|
||||
BackpressureControl* backpressure_control) = 0;
|
||||
/// \brief Consume a batch of data
|
||||
virtual Status Consume(ExecBatch batch) = 0;
|
||||
/// \brief Signal to the consumer that the last batch has been delivered
|
||||
///
|
||||
/// The returned future should only finish when all outstanding tasks have completed
|
||||
virtual Future<> Finish() = 0;
|
||||
};
|
||||
|
||||
/// \brief Add a sink node which consumes data within the exec plan run
|
||||
class ARROW_EXPORT ConsumingSinkNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
explicit ConsumingSinkNodeOptions(std::shared_ptr<SinkNodeConsumer> consumer)
|
||||
: consumer(std::move(consumer)) {}
|
||||
|
||||
std::shared_ptr<SinkNodeConsumer> consumer;
|
||||
};
|
||||
|
||||
/// \brief Make a node which sorts rows passed through it
|
||||
///
|
||||
/// All batches pushed to this node will be accumulated, then sorted, by the given
|
||||
/// fields. Then sorted batches will be forwarded to the generator in sorted order.
|
||||
class ARROW_EXPORT OrderBySinkNodeOptions : public SinkNodeOptions {
|
||||
public:
|
||||
explicit OrderBySinkNodeOptions(
|
||||
SortOptions sort_options,
|
||||
std::function<Future<util::optional<ExecBatch>>()>* generator)
|
||||
: SinkNodeOptions(generator), sort_options(std::move(sort_options)) {}
|
||||
|
||||
SortOptions sort_options;
|
||||
};
|
||||
|
||||
/// @}
|
||||
|
||||
enum class JoinType {
|
||||
LEFT_SEMI,
|
||||
RIGHT_SEMI,
|
||||
LEFT_ANTI,
|
||||
RIGHT_ANTI,
|
||||
INNER,
|
||||
LEFT_OUTER,
|
||||
RIGHT_OUTER,
|
||||
FULL_OUTER
|
||||
};
|
||||
|
||||
std::string ToString(JoinType t);
|
||||
|
||||
enum class JoinKeyCmp { EQ, IS };
|
||||
|
||||
/// \addtogroup execnode-options
|
||||
/// @{
|
||||
|
||||
/// \brief Make a node which implements join operation using hash join strategy.
|
||||
class ARROW_EXPORT HashJoinNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
static constexpr const char* default_output_suffix_for_left = "";
|
||||
static constexpr const char* default_output_suffix_for_right = "";
|
||||
HashJoinNodeOptions(
|
||||
JoinType in_join_type, std::vector<FieldRef> in_left_keys,
|
||||
std::vector<FieldRef> in_right_keys, Expression filter = literal(true),
|
||||
std::string output_suffix_for_left = default_output_suffix_for_left,
|
||||
std::string output_suffix_for_right = default_output_suffix_for_right)
|
||||
: join_type(in_join_type),
|
||||
left_keys(std::move(in_left_keys)),
|
||||
right_keys(std::move(in_right_keys)),
|
||||
output_all(true),
|
||||
output_suffix_for_left(std::move(output_suffix_for_left)),
|
||||
output_suffix_for_right(std::move(output_suffix_for_right)),
|
||||
filter(std::move(filter)) {
|
||||
this->key_cmp.resize(this->left_keys.size());
|
||||
for (size_t i = 0; i < this->left_keys.size(); ++i) {
|
||||
this->key_cmp[i] = JoinKeyCmp::EQ;
|
||||
}
|
||||
}
|
||||
HashJoinNodeOptions(
|
||||
JoinType join_type, std::vector<FieldRef> left_keys,
|
||||
std::vector<FieldRef> right_keys, std::vector<FieldRef> left_output,
|
||||
std::vector<FieldRef> right_output, Expression filter = literal(true),
|
||||
std::string output_suffix_for_left = default_output_suffix_for_left,
|
||||
std::string output_suffix_for_right = default_output_suffix_for_right)
|
||||
: join_type(join_type),
|
||||
left_keys(std::move(left_keys)),
|
||||
right_keys(std::move(right_keys)),
|
||||
output_all(false),
|
||||
left_output(std::move(left_output)),
|
||||
right_output(std::move(right_output)),
|
||||
output_suffix_for_left(std::move(output_suffix_for_left)),
|
||||
output_suffix_for_right(std::move(output_suffix_for_right)),
|
||||
filter(std::move(filter)) {
|
||||
this->key_cmp.resize(this->left_keys.size());
|
||||
for (size_t i = 0; i < this->left_keys.size(); ++i) {
|
||||
this->key_cmp[i] = JoinKeyCmp::EQ;
|
||||
}
|
||||
}
|
||||
HashJoinNodeOptions(
|
||||
JoinType join_type, std::vector<FieldRef> left_keys,
|
||||
std::vector<FieldRef> right_keys, std::vector<FieldRef> left_output,
|
||||
std::vector<FieldRef> right_output, std::vector<JoinKeyCmp> key_cmp,
|
||||
Expression filter = literal(true),
|
||||
std::string output_suffix_for_left = default_output_suffix_for_left,
|
||||
std::string output_suffix_for_right = default_output_suffix_for_right)
|
||||
: join_type(join_type),
|
||||
left_keys(std::move(left_keys)),
|
||||
right_keys(std::move(right_keys)),
|
||||
output_all(false),
|
||||
left_output(std::move(left_output)),
|
||||
right_output(std::move(right_output)),
|
||||
key_cmp(std::move(key_cmp)),
|
||||
output_suffix_for_left(std::move(output_suffix_for_left)),
|
||||
output_suffix_for_right(std::move(output_suffix_for_right)),
|
||||
filter(std::move(filter)) {}
|
||||
|
||||
// type of join (inner, left, semi...)
|
||||
JoinType join_type;
|
||||
// key fields from left input
|
||||
std::vector<FieldRef> left_keys;
|
||||
// key fields from right input
|
||||
std::vector<FieldRef> right_keys;
|
||||
// if set all valid fields from both left and right input will be output
|
||||
// (and field ref vectors for output fields will be ignored)
|
||||
bool output_all;
|
||||
// output fields passed from left input
|
||||
std::vector<FieldRef> left_output;
|
||||
// output fields passed from right input
|
||||
std::vector<FieldRef> right_output;
|
||||
// key comparison function (determines whether a null key is equal another null
|
||||
// key or not)
|
||||
std::vector<JoinKeyCmp> key_cmp;
|
||||
// suffix added to names of output fields coming from left input (used to distinguish,
|
||||
// if necessary, between fields of the same name in left and right input and can be left
|
||||
// empty if there are no name collisions)
|
||||
std::string output_suffix_for_left;
|
||||
// suffix added to names of output fields coming from right input
|
||||
std::string output_suffix_for_right;
|
||||
// residual filter which is applied to matching rows. Rows that do not match
|
||||
// the filter are not included. The filter is applied against the
|
||||
// concatenated input schema (left fields then right fields) and can reference
|
||||
// fields that are not included in the output.
|
||||
Expression filter;
|
||||
};
|
||||
|
||||
/// \brief Make a node which select top_k/bottom_k rows passed through it
|
||||
///
|
||||
/// All batches pushed to this node will be accumulated, then selected, by the given
|
||||
/// fields. Then sorted batches will be forwarded to the generator in sorted order.
|
||||
class ARROW_EXPORT SelectKSinkNodeOptions : public SinkNodeOptions {
|
||||
public:
|
||||
explicit SelectKSinkNodeOptions(
|
||||
SelectKOptions select_k_options,
|
||||
std::function<Future<util::optional<ExecBatch>>()>* generator)
|
||||
: SinkNodeOptions(generator), select_k_options(std::move(select_k_options)) {}
|
||||
|
||||
/// SelectK options
|
||||
SelectKOptions select_k_options;
|
||||
};
|
||||
/// @}
|
||||
|
||||
/// \brief Adapt a Table as a sink node
|
||||
///
|
||||
/// obtains the output of an execution plan to
|
||||
/// a table pointer.
|
||||
class ARROW_EXPORT TableSinkNodeOptions : public ExecNodeOptions {
|
||||
public:
|
||||
explicit TableSinkNodeOptions(std::shared_ptr<Table>* output_table)
|
||||
: output_table(output_table) {}
|
||||
|
||||
std::shared_ptr<Table>* output_table;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,53 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec/options.h"
|
||||
#include "arrow/record_batch.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/type.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
class OrderByImpl {
|
||||
public:
|
||||
virtual ~OrderByImpl() = default;
|
||||
|
||||
virtual void InputReceived(const std::shared_ptr<RecordBatch>& batch) = 0;
|
||||
|
||||
virtual Result<Datum> DoFinish() = 0;
|
||||
|
||||
virtual std::string ToString() const = 0;
|
||||
|
||||
static Result<std::unique_ptr<OrderByImpl>> MakeSort(
|
||||
ExecContext* ctx, const std::shared_ptr<Schema>& output_schema,
|
||||
const SortOptions& options);
|
||||
|
||||
static Result<std::unique_ptr<OrderByImpl>> MakeSelectK(
|
||||
ExecContext* ctx, const std::shared_ptr<Schema>& output_schema,
|
||||
const SelectKOptions& options);
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,136 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <random>
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/compute/exec/util.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
class PartitionSort {
|
||||
public:
|
||||
/// \brief Bucket sort rows on partition ids in O(num_rows) time.
|
||||
///
|
||||
/// Include in the output exclusive cummulative sum of bucket sizes.
|
||||
/// This corresponds to ranges in the sorted array containing all row ids for
|
||||
/// each of the partitions.
|
||||
///
|
||||
/// prtn_ranges must be initailized and have at least prtn_ranges + 1 elements
|
||||
/// when this method returns prtn_ranges[i] will contains the total number of
|
||||
/// elements in partitions 0 through i. prtn_ranges[0] will be 0.
|
||||
///
|
||||
/// prtn_id_impl must be a function that takes in a row id (int) and returns
|
||||
/// a partition id (int). The returned partition id must be between 0 and
|
||||
/// num_prtns (exclusive).
|
||||
///
|
||||
/// output_pos_impl is a function that takes in a row id (int) and a position (int)
|
||||
/// in the bucket sorted output. The function should insert the row in the
|
||||
/// output.
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// in_arr: [5, 7, 2, 3, 5, 4]
|
||||
/// num_prtns: 3
|
||||
/// prtn_id_impl: [&in_arr] (int row_id) { return in_arr[row_id] / 3; }
|
||||
/// output_pos_impl: [&out_arr] (int row_id, int pos) { out_arr[pos] = row_id; }
|
||||
///
|
||||
/// After Execution
|
||||
/// out_arr: [2, 5, 3, 5, 4, 7]
|
||||
/// prtn_ranges: [0, 1, 5, 6]
|
||||
template <class INPUT_PRTN_ID_FN, class OUTPUT_POS_FN>
|
||||
static void Eval(int num_rows, int num_prtns, uint16_t* prtn_ranges,
|
||||
INPUT_PRTN_ID_FN prtn_id_impl, OUTPUT_POS_FN output_pos_impl) {
|
||||
ARROW_DCHECK(num_rows > 0 && num_rows <= (1 << 15));
|
||||
ARROW_DCHECK(num_prtns >= 1 && num_prtns <= (1 << 15));
|
||||
|
||||
memset(prtn_ranges, 0, (num_prtns + 1) * sizeof(uint16_t));
|
||||
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
int prtn_id = static_cast<int>(prtn_id_impl(i));
|
||||
++prtn_ranges[prtn_id + 1];
|
||||
}
|
||||
|
||||
uint16_t sum = 0;
|
||||
for (int i = 0; i < num_prtns; ++i) {
|
||||
uint16_t sum_next = sum + prtn_ranges[i + 1];
|
||||
prtn_ranges[i + 1] = sum;
|
||||
sum = sum_next;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
int prtn_id = static_cast<int>(prtn_id_impl(i));
|
||||
int pos = prtn_ranges[prtn_id + 1]++;
|
||||
output_pos_impl(i, pos);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief A control for synchronizing threads on a partitionable workload
|
||||
class PartitionLocks {
|
||||
public:
|
||||
PartitionLocks();
|
||||
~PartitionLocks();
|
||||
/// \brief Initializes the control, must be called before use
|
||||
///
|
||||
/// \param num_prtns Number of partitions to synchronize
|
||||
void Init(int num_prtns);
|
||||
/// \brief Cleans up the control, it should not be used after this call
|
||||
void CleanUp();
|
||||
/// \brief Acquire a partition to work on one
|
||||
///
|
||||
/// \param num_prtns Length of prtns_to_try, must be <= num_prtns used in Init
|
||||
/// \param prtns_to_try An array of partitions that still have remaining work
|
||||
/// \param limit_retries If false, this method will spinwait forever until success
|
||||
/// \param max_retries Max times to attempt checking out work before returning false
|
||||
/// \param[out] locked_prtn_id The id of the partition locked
|
||||
/// \param[out] locked_prtn_id_pos The index of the partition locked in prtns_to_try
|
||||
/// \return True if a partition was locked, false if max_retries was attempted
|
||||
/// without successfully acquiring a lock
|
||||
///
|
||||
/// This method is thread safe
|
||||
bool AcquirePartitionLock(int num_prtns, const int* prtns_to_try, bool limit_retries,
|
||||
int max_retries, int* locked_prtn_id,
|
||||
int* locked_prtn_id_pos);
|
||||
/// \brief Release a partition so that other threads can work on it
|
||||
void ReleasePartitionLock(int prtn_id);
|
||||
|
||||
private:
|
||||
std::atomic<bool>* lock_ptr(int prtn_id);
|
||||
int random_int(int num_values);
|
||||
|
||||
struct PartitionLock {
|
||||
static constexpr int kCacheLineBytes = 64;
|
||||
std::atomic<bool> lock;
|
||||
uint8_t padding[kCacheLineBytes];
|
||||
};
|
||||
int num_prtns_;
|
||||
std::unique_ptr<PartitionLock[]> locks_;
|
||||
|
||||
std::seed_seq rand_seed_;
|
||||
std::mt19937 rand_engine_;
|
||||
std::uniform_int_distribution<uint64_t> rand_distribution_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,215 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec/key_encode.h" // for KeyColumnMetadata
|
||||
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
|
||||
#include "arrow/util/mutex.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
using internal::checked_cast;
|
||||
|
||||
namespace compute {
|
||||
|
||||
// Identifiers for all different row schemas that are used in a join
|
||||
//
|
||||
enum class HashJoinProjection : int {
|
||||
INPUT = 0,
|
||||
KEY = 1,
|
||||
PAYLOAD = 2,
|
||||
FILTER = 3,
|
||||
OUTPUT = 4
|
||||
};
|
||||
|
||||
struct SchemaProjectionMap {
|
||||
static constexpr int kMissingField = -1;
|
||||
int num_cols;
|
||||
const int* source_to_base;
|
||||
const int* base_to_target;
|
||||
inline int get(int i) const {
|
||||
ARROW_DCHECK(i >= 0 && i < num_cols);
|
||||
ARROW_DCHECK(source_to_base[i] != kMissingField);
|
||||
return base_to_target[source_to_base[i]];
|
||||
}
|
||||
};
|
||||
|
||||
/// Helper class for managing different projections of the same row schema.
|
||||
/// Used to efficiently map any field in one projection to a corresponding field in
|
||||
/// another projection.
|
||||
/// Materialized mappings are generated lazily at the time of the first access.
|
||||
/// Thread-safe apart from initialization.
|
||||
template <typename ProjectionIdEnum>
|
||||
class SchemaProjectionMaps {
|
||||
public:
|
||||
static constexpr int kMissingField = -1;
|
||||
|
||||
Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
|
||||
const std::vector<ProjectionIdEnum>& projection_handles,
|
||||
const std::vector<const std::vector<FieldRef>*>& projections) {
|
||||
ARROW_DCHECK(projection_handles.size() == projections.size());
|
||||
ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
|
||||
for (size_t i = 0; i < projections.size(); ++i) {
|
||||
ARROW_RETURN_NOT_OK(
|
||||
RegisterProjectedSchema(projection_handles[i], *(projections[i]), schema));
|
||||
}
|
||||
RegisterEnd();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
int num_cols(ProjectionIdEnum schema_handle) const {
|
||||
int id = schema_id(schema_handle);
|
||||
return static_cast<int>(schemas_[id].second.size());
|
||||
}
|
||||
|
||||
const std::string& field_name(ProjectionIdEnum schema_handle, int field_id) const {
|
||||
return field(schema_handle, field_id).field_name;
|
||||
}
|
||||
|
||||
const std::shared_ptr<DataType>& data_type(ProjectionIdEnum schema_handle,
|
||||
int field_id) const {
|
||||
return field(schema_handle, field_id).data_type;
|
||||
}
|
||||
|
||||
SchemaProjectionMap map(ProjectionIdEnum from, ProjectionIdEnum to) const {
|
||||
int id_from = schema_id(from);
|
||||
int id_to = schema_id(to);
|
||||
SchemaProjectionMap result;
|
||||
result.num_cols = num_cols(from);
|
||||
result.source_to_base = mappings_[id_from].data();
|
||||
result.base_to_target = inverse_mappings_[id_to].data();
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct FieldInfo {
|
||||
int field_path;
|
||||
std::string field_name;
|
||||
std::shared_ptr<DataType> data_type;
|
||||
};
|
||||
|
||||
Status RegisterSchema(ProjectionIdEnum handle, const Schema& schema) {
|
||||
std::vector<FieldInfo> out_fields;
|
||||
const FieldVector& in_fields = schema.fields();
|
||||
out_fields.resize(in_fields.size());
|
||||
for (size_t i = 0; i < in_fields.size(); ++i) {
|
||||
const std::string& name = in_fields[i]->name();
|
||||
const std::shared_ptr<DataType>& type = in_fields[i]->type();
|
||||
out_fields[i].field_path = static_cast<int>(i);
|
||||
out_fields[i].field_name = name;
|
||||
out_fields[i].data_type = type;
|
||||
}
|
||||
schemas_.push_back(std::make_pair(handle, out_fields));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RegisterProjectedSchema(ProjectionIdEnum handle,
|
||||
const std::vector<FieldRef>& selected_fields,
|
||||
const Schema& full_schema) {
|
||||
std::vector<FieldInfo> out_fields;
|
||||
const FieldVector& in_fields = full_schema.fields();
|
||||
out_fields.resize(selected_fields.size());
|
||||
for (size_t i = 0; i < selected_fields.size(); ++i) {
|
||||
// All fields must be found in schema without ambiguity
|
||||
ARROW_ASSIGN_OR_RAISE(auto match, selected_fields[i].FindOne(full_schema));
|
||||
const std::string& name = in_fields[match[0]]->name();
|
||||
const std::shared_ptr<DataType>& type = in_fields[match[0]]->type();
|
||||
out_fields[i].field_path = match[0];
|
||||
out_fields[i].field_name = name;
|
||||
out_fields[i].data_type = type;
|
||||
}
|
||||
schemas_.push_back(std::make_pair(handle, out_fields));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void RegisterEnd() {
|
||||
size_t size = schemas_.size();
|
||||
mappings_.resize(size);
|
||||
inverse_mappings_.resize(size);
|
||||
int id_base = 0;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
GenerateMapForProjection(static_cast<int>(i), id_base);
|
||||
}
|
||||
}
|
||||
|
||||
int schema_id(ProjectionIdEnum schema_handle) const {
|
||||
for (size_t i = 0; i < schemas_.size(); ++i) {
|
||||
if (schemas_[i].first == schema_handle) {
|
||||
return static_cast<int>(i);
|
||||
}
|
||||
}
|
||||
// We should never get here
|
||||
ARROW_DCHECK(false);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const FieldInfo& field(ProjectionIdEnum schema_handle, int field_id) const {
|
||||
int id = schema_id(schema_handle);
|
||||
const std::vector<FieldInfo>& field_infos = schemas_[id].second;
|
||||
return field_infos[field_id];
|
||||
}
|
||||
|
||||
void GenerateMapForProjection(int id_proj, int id_base) {
|
||||
int num_cols_proj = static_cast<int>(schemas_[id_proj].second.size());
|
||||
int num_cols_base = static_cast<int>(schemas_[id_base].second.size());
|
||||
|
||||
std::vector<int>& mapping = mappings_[id_proj];
|
||||
std::vector<int>& inverse_mapping = inverse_mappings_[id_proj];
|
||||
mapping.resize(num_cols_proj);
|
||||
inverse_mapping.resize(num_cols_base);
|
||||
|
||||
if (id_proj == id_base) {
|
||||
for (int i = 0; i < num_cols_base; ++i) {
|
||||
mapping[i] = inverse_mapping[i] = i;
|
||||
}
|
||||
} else {
|
||||
const std::vector<FieldInfo>& fields_proj = schemas_[id_proj].second;
|
||||
const std::vector<FieldInfo>& fields_base = schemas_[id_base].second;
|
||||
for (int i = 0; i < num_cols_base; ++i) {
|
||||
inverse_mapping[i] = SchemaProjectionMap::kMissingField;
|
||||
}
|
||||
for (int i = 0; i < num_cols_proj; ++i) {
|
||||
int field_id = SchemaProjectionMap::kMissingField;
|
||||
for (int j = 0; j < num_cols_base; ++j) {
|
||||
if (fields_proj[i].field_path == fields_base[j].field_path) {
|
||||
field_id = j;
|
||||
// If there are multiple matches for the same input field,
|
||||
// it will be mapped to the first match.
|
||||
break;
|
||||
}
|
||||
}
|
||||
ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
|
||||
mapping[i] = field_id;
|
||||
inverse_mapping[field_id] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vector used as a mapping from ProjectionIdEnum to fields
|
||||
std::vector<std::pair<ProjectionIdEnum, std::vector<FieldInfo>>> schemas_;
|
||||
std::vector<std::vector<int>> mappings_;
|
||||
std::vector<std::vector<int>> inverse_mappings_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,100 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/logging.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
// Atomic value surrounded by padding bytes to avoid cache line invalidation
|
||||
// whenever it is modified by a concurrent thread on a different CPU core.
|
||||
//
|
||||
template <typename T>
|
||||
class AtomicWithPadding {
|
||||
private:
|
||||
static constexpr int kCacheLineSize = 64;
|
||||
uint8_t padding_before[kCacheLineSize];
|
||||
|
||||
public:
|
||||
std::atomic<T> value;
|
||||
|
||||
private:
|
||||
uint8_t padding_after[kCacheLineSize];
|
||||
};
|
||||
|
||||
// Used for asynchronous execution of operations that can be broken into
|
||||
// a fixed number of symmetric tasks that can be executed concurrently.
|
||||
//
|
||||
// Implements priorities between multiple such operations, called task groups.
|
||||
//
|
||||
// Allows to specify the maximum number of in-flight tasks at any moment.
|
||||
//
|
||||
// Also allows for executing next pending tasks immediately using a caller thread.
|
||||
//
|
||||
class TaskScheduler {
|
||||
public:
|
||||
using TaskImpl = std::function<Status(size_t, int64_t)>;
|
||||
using TaskGroupContinuationImpl = std::function<Status(size_t)>;
|
||||
using ScheduleImpl = std::function<Status(TaskGroupContinuationImpl)>;
|
||||
using AbortContinuationImpl = std::function<void()>;
|
||||
|
||||
virtual ~TaskScheduler() = default;
|
||||
|
||||
// Order in which task groups are registered represents priorities of their tasks
|
||||
// (the first group has the highest priority).
|
||||
//
|
||||
// Returns task group identifier that is used to request operations on the task group.
|
||||
virtual int RegisterTaskGroup(TaskImpl task_impl,
|
||||
TaskGroupContinuationImpl cont_impl) = 0;
|
||||
|
||||
virtual void RegisterEnd() = 0;
|
||||
|
||||
// total_num_tasks may be zero, in which case task group continuation will be executed
|
||||
// immediately
|
||||
virtual Status StartTaskGroup(size_t thread_id, int group_id,
|
||||
int64_t total_num_tasks) = 0;
|
||||
|
||||
// Execute given number of tasks immediately using caller thread
|
||||
virtual Status ExecuteMore(size_t thread_id, int num_tasks_to_execute,
|
||||
bool execute_all) = 0;
|
||||
|
||||
// Begin scheduling tasks using provided callback and
|
||||
// the limit on the number of in-flight tasks at any moment.
|
||||
//
|
||||
// Scheduling will continue as long as there are waiting tasks.
|
||||
//
|
||||
// It will automatically resume whenever new task group gets started.
|
||||
virtual Status StartScheduling(size_t thread_id, ScheduleImpl schedule_impl,
|
||||
int num_concurrent_tasks, bool use_sync_execution) = 0;
|
||||
|
||||
// Abort scheduling and execution.
|
||||
// Used in case of being notified about unrecoverable error for the entire query.
|
||||
virtual void Abort(AbortContinuationImpl impl) = 0;
|
||||
|
||||
static std::unique_ptr<TaskScheduler> Make();
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,132 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <arrow/testing/gtest_util.h>
|
||||
#include <arrow/util/vector.h>
|
||||
|
||||
#include <functional>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/exec.h"
|
||||
#include "arrow/compute/exec/exec_plan.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/util/async_generator.h"
|
||||
#include "arrow/util/pcg_random.h"
|
||||
#include "arrow/util/string_view.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
|
||||
using StartProducingFunc = std::function<Status(ExecNode*)>;
|
||||
using StopProducingFunc = std::function<void(ExecNode*)>;
|
||||
|
||||
// Make a dummy node that has no execution behaviour
|
||||
ARROW_TESTING_EXPORT
|
||||
ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*> inputs,
|
||||
int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
|
||||
util::string_view json);
|
||||
|
||||
struct BatchesWithSchema {
|
||||
std::vector<ExecBatch> batches;
|
||||
std::shared_ptr<Schema> schema;
|
||||
|
||||
AsyncGenerator<util::optional<ExecBatch>> gen(bool parallel, bool slow) const {
|
||||
auto opt_batches = ::arrow::internal::MapVector(
|
||||
[](ExecBatch batch) { return util::make_optional(std::move(batch)); }, batches);
|
||||
|
||||
AsyncGenerator<util::optional<ExecBatch>> gen;
|
||||
|
||||
if (parallel) {
|
||||
// emulate batches completing initial decode-after-scan on a cpu thread
|
||||
gen = MakeBackgroundGenerator(MakeVectorIterator(std::move(opt_batches)),
|
||||
::arrow::internal::GetCpuThreadPool())
|
||||
.ValueOrDie();
|
||||
|
||||
// ensure that callbacks are not executed immediately on a background thread
|
||||
gen =
|
||||
MakeTransferredGenerator(std::move(gen), ::arrow::internal::GetCpuThreadPool());
|
||||
} else {
|
||||
gen = MakeVectorGenerator(std::move(opt_batches));
|
||||
}
|
||||
|
||||
if (slow) {
|
||||
gen =
|
||||
MakeMappedGenerator(std::move(gen), [](const util::optional<ExecBatch>& batch) {
|
||||
SleepABit();
|
||||
return batch;
|
||||
});
|
||||
}
|
||||
|
||||
return gen;
|
||||
}
|
||||
};
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Future<std::vector<ExecBatch>> StartAndCollect(
|
||||
ExecPlan* plan, AsyncGenerator<util::optional<ExecBatch>> gen);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
BatchesWithSchema MakeBasicBatches();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
BatchesWithSchema MakeNestedBatches();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
BatchesWithSchema MakeRandomBatches(const std::shared_ptr<Schema>& schema,
|
||||
int num_batches = 10, int batch_size = 4);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Result<std::shared_ptr<Table>> SortTableOnAllFields(const std::shared_ptr<Table>& tab);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void AssertTablesEqual(const std::shared_ptr<Table>& exp,
|
||||
const std::shared_ptr<Table>& act);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void AssertExecBatchesEqual(const std::shared_ptr<Schema>& schema,
|
||||
const std::vector<ExecBatch>& exp,
|
||||
const std::vector<ExecBatch>& act);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
bool operator==(const Declaration&, const Declaration&);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void PrintTo(const Declaration& decl, std::ostream* os);
|
||||
|
||||
class Random64Bit {
|
||||
public:
|
||||
explicit Random64Bit(int32_t seed) : rng_(seed) {}
|
||||
uint64_t next() { return dist_(rng_); }
|
||||
template <typename T>
|
||||
inline T from_range(const T& min_val, const T& max_val) {
|
||||
return static_cast<T>(min_val + (next() % (max_val - min_val + 1)));
|
||||
}
|
||||
|
||||
private:
|
||||
random::pcg32_fast rng_;
|
||||
std::uniform_int_distribution<uint64_t> dist_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,64 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/compute/type_fwd.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/optional.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace compute {
|
||||
namespace internal {
|
||||
|
||||
class ARROW_EXPORT TpchGen {
|
||||
public:
|
||||
virtual ~TpchGen() = default;
|
||||
|
||||
/*
|
||||
* \brief Create a factory for nodes that generate TPC-H data
|
||||
*
|
||||
* Note: Individual tables will reference each other. It is important that you only
|
||||
* create a single TpchGen instance for each plan and then you can create nodes for each
|
||||
* table from that single TpchGen instance. Note: Every batch will be scheduled as a new
|
||||
* task using the ExecPlan's scheduler.
|
||||
*/
|
||||
static Result<std::unique_ptr<TpchGen>> Make(
|
||||
ExecPlan* plan, double scale_factor = 1.0, int64_t batch_size = 4096,
|
||||
util::optional<int64_t> seed = util::nullopt);
|
||||
|
||||
// The below methods will create and add an ExecNode to the plan that generates
|
||||
// data for the desired table. If columns is empty, all columns will be generated.
|
||||
// The methods return the added ExecNode, which should be used for inputs.
|
||||
virtual Result<ExecNode*> Supplier(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Part(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> PartSupp(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Customer(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Orders(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Lineitem(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Nation(std::vector<std::string> columns = {}) = 0;
|
||||
virtual Result<ExecNode*> Region(std::vector<std::string> columns = {}) = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
@ -0,0 +1,297 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/compute/type_fwd.h"
|
||||
#include "arrow/memory_pool.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/bit_util.h"
|
||||
#include "arrow/util/cpu_info.h"
|
||||
#include "arrow/util/logging.h"
|
||||
#include "arrow/util/mutex.h"
|
||||
#include "arrow/util/optional.h"
|
||||
#include "arrow/util/thread_pool.h"
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define BYTESWAP(x) __builtin_bswap64(x)
|
||||
#define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
|
||||
#define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
|
||||
#define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
#elif defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#define BYTESWAP(x) _byteswap_uint64(x)
|
||||
#define ROTL(x, n) _rotl((x), (n))
|
||||
#define ROTL64(x, n) _rotl64((x), (n))
|
||||
#if defined(_M_X64) || defined(_M_I86)
|
||||
#include <mmintrin.h> // https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
|
||||
#define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
#else
|
||||
#define PREFETCH(ptr) (void)(ptr) /* disabled */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace arrow {
|
||||
namespace util {
|
||||
|
||||
template <typename T>
|
||||
inline void CheckAlignment(const void* ptr) {
|
||||
ARROW_DCHECK(reinterpret_cast<uint64_t>(ptr) % sizeof(T) == 0);
|
||||
}
|
||||
|
||||
// Some platforms typedef int64_t as long int instead of long long int,
|
||||
// which breaks the _mm256_i64gather_epi64 and _mm256_i32gather_epi64 intrinsics
|
||||
// which need long long.
|
||||
// We use the cast to the type below in these intrinsics to make the code
|
||||
// compile in all cases.
|
||||
//
|
||||
using int64_for_gather_t = const long long int; // NOLINT runtime-int
|
||||
|
||||
// All MiniBatch... classes use TempVectorStack for vector allocations and can
|
||||
// only work with vectors up to 1024 elements.
|
||||
//
|
||||
// They should only be allocated on the stack to guarantee the right sequence
|
||||
// of allocation and deallocation of vectors from TempVectorStack.
|
||||
//
|
||||
class MiniBatch {
|
||||
public:
|
||||
static constexpr int kMiniBatchLength = 1024;
|
||||
};
|
||||
|
||||
/// Storage used to allocate temporary vectors of a batch size.
|
||||
/// Temporary vectors should resemble allocating temporary variables on the stack
|
||||
/// but in the context of vectorized processing where we need to store a vector of
|
||||
/// temporaries instead of a single value.
|
||||
class TempVectorStack {
|
||||
template <typename>
|
||||
friend class TempVectorHolder;
|
||||
|
||||
public:
|
||||
Status Init(MemoryPool* pool, int64_t size) {
|
||||
num_vectors_ = 0;
|
||||
top_ = 0;
|
||||
buffer_size_ = size;
|
||||
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
|
||||
// Ensure later operations don't accidentally read uninitialized memory.
|
||||
std::memset(buffer->mutable_data(), 0xFF, size);
|
||||
buffer_ = std::move(buffer);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t PaddedAllocationSize(int64_t num_bytes) {
|
||||
// Round up allocation size to multiple of 8 bytes
|
||||
// to avoid returning temp vectors with unaligned address.
|
||||
//
|
||||
// Also add padding at the end to facilitate loads and stores
|
||||
// using SIMD when number of vector elements is not divisible
|
||||
// by the number of SIMD lanes.
|
||||
//
|
||||
return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding;
|
||||
}
|
||||
void alloc(uint32_t num_bytes, uint8_t** data, int* id) {
|
||||
int64_t old_top = top_;
|
||||
top_ += PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
|
||||
// Stack overflow check
|
||||
ARROW_DCHECK(top_ <= buffer_size_);
|
||||
*data = buffer_->mutable_data() + old_top + sizeof(uint64_t);
|
||||
// We set 8 bytes before the beginning of the allocated range and
|
||||
// 8 bytes after the end to check for stack overflow (which would
|
||||
// result in those known bytes being corrupted).
|
||||
reinterpret_cast<uint64_t*>(buffer_->mutable_data() + old_top)[0] = kGuard1;
|
||||
reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[-1] = kGuard2;
|
||||
*id = num_vectors_++;
|
||||
}
|
||||
void release(int id, uint32_t num_bytes) {
|
||||
ARROW_DCHECK(num_vectors_ == id + 1);
|
||||
int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
|
||||
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[-1] ==
|
||||
kGuard2);
|
||||
ARROW_DCHECK(top_ >= size);
|
||||
top_ -= size;
|
||||
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[0] ==
|
||||
kGuard1);
|
||||
--num_vectors_;
|
||||
}
|
||||
static constexpr uint64_t kGuard1 = 0x3141592653589793ULL;
|
||||
static constexpr uint64_t kGuard2 = 0x0577215664901532ULL;
|
||||
static constexpr int64_t kPadding = 64;
|
||||
int num_vectors_;
|
||||
int64_t top_;
|
||||
std::unique_ptr<Buffer> buffer_;
|
||||
int64_t buffer_size_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TempVectorHolder {
|
||||
friend class TempVectorStack;
|
||||
|
||||
public:
|
||||
~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); }
|
||||
T* mutable_data() { return reinterpret_cast<T*>(data_); }
|
||||
TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) {
|
||||
stack_ = stack;
|
||||
num_elements_ = num_elements;
|
||||
stack_->alloc(num_elements * sizeof(T), &data_, &id_);
|
||||
}
|
||||
|
||||
private:
|
||||
TempVectorStack* stack_;
|
||||
uint8_t* data_;
|
||||
int id_;
|
||||
uint32_t num_elements_;
|
||||
};
|
||||
|
||||
class bit_util {
|
||||
public:
|
||||
static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
|
||||
const int num_bits, const uint8_t* bits, int* num_indexes,
|
||||
uint16_t* indexes, int bit_offset = 0);
|
||||
|
||||
static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
|
||||
const int num_bits, const uint8_t* bits,
|
||||
const uint16_t* input_indexes, int* num_indexes,
|
||||
uint16_t* indexes, int bit_offset = 0);
|
||||
|
||||
// Input and output indexes may be pointing to the same data (in-place filtering).
|
||||
static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
|
||||
const uint8_t* bits, int* num_indexes_bit0,
|
||||
uint16_t* indexes_bit0, uint16_t* indexes_bit1,
|
||||
int bit_offset = 0);
|
||||
|
||||
// Bit 1 is replaced with byte 0xFF.
|
||||
static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
|
||||
const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
|
||||
|
||||
// Return highest bit of each byte.
|
||||
static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
|
||||
const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
|
||||
|
||||
static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
|
||||
uint32_t num_bytes);
|
||||
|
||||
private:
|
||||
inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
|
||||
int* num_indexes, uint16_t* indexes);
|
||||
inline static void bits_filter_indexes_helper(uint64_t word,
|
||||
const uint16_t* input_indexes,
|
||||
int* num_indexes, uint16_t* indexes);
|
||||
template <int bit_to_search, bool filter_input_indexes>
|
||||
static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
|
||||
const uint8_t* bits, const uint16_t* input_indexes,
|
||||
int* num_indexes, uint16_t* indexes,
|
||||
uint16_t base_index = 0);
|
||||
|
||||
#if defined(ARROW_HAVE_AVX2)
|
||||
static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
|
||||
const uint8_t* bits, int* num_indexes,
|
||||
uint16_t* indexes, uint16_t base_index = 0);
|
||||
static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
|
||||
const uint8_t* bits, const uint16_t* input_indexes,
|
||||
int* num_indexes, uint16_t* indexes);
|
||||
template <int bit_to_search>
|
||||
static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
|
||||
int* num_indexes, uint16_t* indexes,
|
||||
uint16_t base_index = 0);
|
||||
template <int bit_to_search>
|
||||
static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
|
||||
const uint16_t* input_indexes,
|
||||
int* num_indexes, uint16_t* indexes);
|
||||
static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
|
||||
static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
|
||||
static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
namespace compute {
|
||||
|
||||
ARROW_EXPORT
|
||||
Status ValidateExecNodeInputs(ExecPlan* plan, const std::vector<ExecNode*>& inputs,
|
||||
int expected_num_inputs, const char* kind_name);
|
||||
|
||||
ARROW_EXPORT
|
||||
Result<std::shared_ptr<Table>> TableFromExecBatches(
|
||||
const std::shared_ptr<Schema>& schema, const std::vector<ExecBatch>& exec_batches);
|
||||
|
||||
class AtomicCounter {
|
||||
public:
|
||||
AtomicCounter() = default;
|
||||
|
||||
int count() const { return count_.load(); }
|
||||
|
||||
util::optional<int> total() const {
|
||||
int total = total_.load();
|
||||
if (total == -1) return {};
|
||||
return total;
|
||||
}
|
||||
|
||||
// return true if the counter is complete
|
||||
bool Increment() {
|
||||
DCHECK_NE(count_.load(), total_.load());
|
||||
int count = count_.fetch_add(1) + 1;
|
||||
if (count != total_.load()) return false;
|
||||
return DoneOnce();
|
||||
}
|
||||
|
||||
// return true if the counter is complete
|
||||
bool SetTotal(int total) {
|
||||
total_.store(total);
|
||||
if (count_.load() != total) return false;
|
||||
return DoneOnce();
|
||||
}
|
||||
|
||||
// return true if the counter has not already been completed
|
||||
bool Cancel() { return DoneOnce(); }
|
||||
|
||||
// return true if the counter has finished or been cancelled
|
||||
bool Completed() { return complete_.load(); }
|
||||
|
||||
private:
|
||||
// ensure there is only one true return from Increment(), SetTotal(), or Cancel()
|
||||
bool DoneOnce() {
|
||||
bool expected = false;
|
||||
return complete_.compare_exchange_strong(expected, true);
|
||||
}
|
||||
|
||||
std::atomic<int> count_{0}, total_{-1};
|
||||
std::atomic<bool> complete_{false};
|
||||
};
|
||||
|
||||
class ThreadIndexer {
|
||||
public:
|
||||
size_t operator()();
|
||||
|
||||
static size_t Capacity();
|
||||
|
||||
private:
|
||||
static size_t Check(size_t thread_index);
|
||||
|
||||
util::Mutex mutex_;
|
||||
std::unordered_map<std::thread::id, size_t> id_to_index_;
|
||||
};
|
||||
|
||||
} // namespace compute
|
||||
} // namespace arrow
|
Reference in New Issue
Block a user