mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 18:32:15 +00:00
238 lines
8.4 KiB
C++
238 lines
8.4 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "arrow/array.h"
|
|
#include "arrow/array/builder_binary.h"
|
|
#include "arrow/array/builder_primitive.h"
|
|
#include "arrow/array/builder_time.h"
|
|
#include "arrow/buffer.h"
|
|
#include "arrow/testing/gtest_util.h"
|
|
#include "arrow/util/bit_util.h"
|
|
#include "arrow/visit_type_inline.h"
|
|
|
|
namespace arrow {
|
|
|
|
// ArrayFromVector: construct an Array from vectors of C values
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ArrayFromVector(const std::shared_ptr<DataType>& type,
|
|
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
|
|
std::shared_ptr<Array>* out) {
|
|
auto type_id = TYPE::type_id;
|
|
ASSERT_EQ(type_id, type->id())
|
|
<< "template parameter and concrete DataType instance don't agree";
|
|
|
|
std::unique_ptr<ArrayBuilder> builder_ptr;
|
|
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
|
|
// Get the concrete builder class to access its Append() specializations
|
|
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
|
|
|
|
for (size_t i = 0; i < values.size(); ++i) {
|
|
if (is_valid[i]) {
|
|
ASSERT_OK(builder.Append(values[i]));
|
|
} else {
|
|
ASSERT_OK(builder.AppendNull());
|
|
}
|
|
}
|
|
ASSERT_OK(builder.Finish(out));
|
|
}
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ArrayFromVector(const std::shared_ptr<DataType>& type,
|
|
const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
|
|
auto type_id = TYPE::type_id;
|
|
ASSERT_EQ(type_id, type->id())
|
|
<< "template parameter and concrete DataType instance don't agree";
|
|
|
|
std::unique_ptr<ArrayBuilder> builder_ptr;
|
|
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
|
|
// Get the concrete builder class to access its Append() specializations
|
|
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
|
|
|
|
for (size_t i = 0; i < values.size(); ++i) {
|
|
ASSERT_OK(builder.Append(values[i]));
|
|
}
|
|
ASSERT_OK(builder.Finish(out));
|
|
}
|
|
|
|
// Overloads without a DataType argument, for parameterless types
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
|
|
std::shared_ptr<Array>* out) {
|
|
auto type = TypeTraits<TYPE>::type_singleton();
|
|
ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
|
|
}
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
|
|
auto type = TypeTraits<TYPE>::type_singleton();
|
|
ArrayFromVector<TYPE, C_TYPE>(type, values, out);
|
|
}
|
|
|
|
// ChunkedArrayFromVector: construct a ChunkedArray from vectors of C values
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
|
|
const std::vector<std::vector<bool>>& is_valid,
|
|
const std::vector<std::vector<C_TYPE>>& values,
|
|
std::shared_ptr<ChunkedArray>* out) {
|
|
ArrayVector chunks;
|
|
ASSERT_EQ(is_valid.size(), values.size());
|
|
for (size_t i = 0; i < values.size(); ++i) {
|
|
std::shared_ptr<Array> array;
|
|
ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
|
|
chunks.push_back(array);
|
|
}
|
|
*out = std::make_shared<ChunkedArray>(chunks);
|
|
}
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
|
|
const std::vector<std::vector<C_TYPE>>& values,
|
|
std::shared_ptr<ChunkedArray>* out) {
|
|
ArrayVector chunks;
|
|
for (size_t i = 0; i < values.size(); ++i) {
|
|
std::shared_ptr<Array> array;
|
|
ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array);
|
|
chunks.push_back(array);
|
|
}
|
|
*out = std::make_shared<ChunkedArray>(chunks);
|
|
}
|
|
|
|
// Overloads without a DataType argument, for parameterless types
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid,
|
|
const std::vector<std::vector<C_TYPE>>& values,
|
|
std::shared_ptr<ChunkedArray>* out) {
|
|
auto type = TypeTraits<TYPE>::type_singleton();
|
|
ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
|
|
}
|
|
|
|
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
|
void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values,
|
|
std::shared_ptr<ChunkedArray>* out) {
|
|
auto type = TypeTraits<TYPE>::type_singleton();
|
|
ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out);
|
|
}
|
|
|
|
template <typename BuilderType>
|
|
void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
|
|
ASSERT_OK_AND_ASSIGN(*out, builder->Finish());
|
|
AssertZeroPadded(**out);
|
|
TestInitialized(**out);
|
|
}
|
|
|
|
template <class T, class Builder>
|
|
Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
|
|
int64_t size, Builder* builder, std::shared_ptr<Array>* out) {
|
|
// Append the first 1000
|
|
for (int64_t i = 0; i < size; ++i) {
|
|
if (valid_bytes[i] > 0) {
|
|
RETURN_NOT_OK(builder->Append(values[i]));
|
|
} else {
|
|
RETURN_NOT_OK(builder->AppendNull());
|
|
}
|
|
}
|
|
return builder->Finish(out);
|
|
}
|
|
|
|
template <typename Fn>
|
|
struct VisitBuilderImpl {
|
|
template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
|
|
// need to let SFINAE drop this Visit when it would result in
|
|
// [](NullBuilder*){}(double_builder)
|
|
typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
|
|
Status Visit(const T&) {
|
|
fn_(internal::checked_cast<BuilderType*>(builder_));
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Visit(const DataType& t) {
|
|
return Status::NotImplemented("visiting builders of type ", t);
|
|
}
|
|
|
|
Status Visit() { return VisitTypeInline(*builder_->type(), this); }
|
|
|
|
ArrayBuilder* builder_;
|
|
Fn fn_;
|
|
};
|
|
|
|
template <typename Fn>
|
|
Status VisitBuilder(ArrayBuilder* builder, Fn&& fn) {
|
|
return VisitBuilderImpl<Fn>{builder, std::forward<Fn>(fn)}.Visit();
|
|
}
|
|
|
|
template <typename Fn>
|
|
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
|
|
const std::shared_ptr<DataType>& type, int64_t initial_capacity,
|
|
int64_t visitor_repetitions, Fn&& fn) {
|
|
std::unique_ptr<ArrayBuilder> builder;
|
|
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder));
|
|
|
|
if (initial_capacity != 0) {
|
|
RETURN_NOT_OK(builder->Resize(initial_capacity));
|
|
}
|
|
|
|
for (int64_t i = 0; i < visitor_repetitions; ++i) {
|
|
RETURN_NOT_OK(VisitBuilder(builder.get(), std::forward<Fn>(fn)));
|
|
}
|
|
|
|
std::shared_ptr<Array> out;
|
|
RETURN_NOT_OK(builder->Finish(&out));
|
|
return std::move(out);
|
|
}
|
|
|
|
template <typename Fn>
|
|
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
|
|
const std::shared_ptr<DataType>& type, int64_t length, Fn&& fn) {
|
|
return ArrayFromBuilderVisitor(type, length, length, std::forward<Fn>(fn));
|
|
}
|
|
|
|
template <typename T>
|
|
static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
|
|
std::shared_ptr<Buffer>* result) {
|
|
size_t length = is_valid.size();
|
|
|
|
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length));
|
|
|
|
uint8_t* bitmap = buffer->mutable_data();
|
|
for (size_t i = 0; i < static_cast<size_t>(length); ++i) {
|
|
if (is_valid[i]) {
|
|
bit_util::SetBit(bitmap, i);
|
|
}
|
|
}
|
|
|
|
*result = buffer;
|
|
return Status::OK();
|
|
}
|
|
|
|
template <typename T>
|
|
inline void BitmapFromVector(const std::vector<T>& is_valid,
|
|
std::shared_ptr<Buffer>* out) {
|
|
ASSERT_OK(GetBitmapFromVector(is_valid, out));
|
|
}
|
|
|
|
} // namespace arrow
|