mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-23 02:40:43 +00:00
105 lines
3.4 KiB
C++
105 lines
3.4 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <vector>
|
|
|
|
#include "arrow/type_fwd.h"
|
|
#include "arrow/util/macros.h"
|
|
|
|
namespace arrow {
|
|
namespace internal {
|
|
|
|
struct ChunkLocation {
|
|
int64_t chunk_index, index_in_chunk;
|
|
};
|
|
|
|
// An object that resolves an array chunk depending on a logical index
|
|
struct ChunkResolver {
|
|
explicit ChunkResolver(const ArrayVector& chunks);
|
|
|
|
explicit ChunkResolver(const std::vector<const Array*>& chunks);
|
|
|
|
explicit ChunkResolver(const RecordBatchVector& batches);
|
|
|
|
ChunkResolver(ChunkResolver&& other)
|
|
: offsets_(std::move(other.offsets_)), cached_chunk_(other.cached_chunk_.load()) {}
|
|
|
|
ChunkResolver& operator=(ChunkResolver&& other) {
|
|
offsets_ = std::move(other.offsets_);
|
|
cached_chunk_.store(other.cached_chunk_.load());
|
|
return *this;
|
|
}
|
|
|
|
/// \brief Return a ChunkLocation containing the chunk index and in-chunk value index of
|
|
/// the chunked array at logical index
|
|
inline ChunkLocation Resolve(const int64_t index) const {
|
|
// It is common for the algorithms below to make consecutive accesses at
|
|
// a relatively small distance from each other, hence often falling in
|
|
// the same chunk.
|
|
// This is trivial when merging (assuming each side of the merge uses
|
|
// its own resolver), but also in the inner recursive invocations of
|
|
// partitioning.
|
|
if (offsets_.size() <= 1) {
|
|
return {0, index};
|
|
}
|
|
const auto cached_chunk = cached_chunk_.load();
|
|
const bool cache_hit =
|
|
(index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]);
|
|
if (ARROW_PREDICT_TRUE(cache_hit)) {
|
|
return {cached_chunk, index - offsets_[cached_chunk]};
|
|
}
|
|
auto chunk_index = Bisect(index);
|
|
cached_chunk_.store(chunk_index);
|
|
return {chunk_index, index - offsets_[chunk_index]};
|
|
}
|
|
|
|
protected:
|
|
// Find the chunk index corresponding to a value index using binary search
|
|
inline int64_t Bisect(const int64_t index) const {
|
|
// Like std::upper_bound(), but hand-written as it can help the compiler.
|
|
// Search [lo, lo + n)
|
|
int64_t lo = 0;
|
|
auto n = static_cast<int64_t>(offsets_.size());
|
|
while (n > 1) {
|
|
const int64_t m = n >> 1;
|
|
const int64_t mid = lo + m;
|
|
if (static_cast<int64_t>(index) >= offsets_[mid]) {
|
|
lo = mid;
|
|
n -= m;
|
|
} else {
|
|
n = m;
|
|
}
|
|
}
|
|
return lo;
|
|
}
|
|
|
|
private:
|
|
// Collection of starting offsets used for binary search
|
|
std::vector<int64_t> offsets_;
|
|
|
|
// Tracks the most recently used chunk index to allow fast
|
|
// access for consecutive indices corresponding to the same chunk
|
|
mutable std::atomic<int64_t> cached_chunk_;
|
|
};
|
|
|
|
} // namespace internal
|
|
} // namespace arrow
|