mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-04 23:32:37 +00:00
first commit
This commit is contained in:
@ -0,0 +1,269 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Array accessor classes for Binary, LargeBinart, String, LargeString,
|
||||
// FixedSizeBinary
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/array/array_base.h"
|
||||
#include "arrow/array/data.h"
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/stl_iterator.h"
|
||||
#include "arrow/type.h"
|
||||
#include "arrow/util/checked_cast.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/string_view.h" // IWYU pragma: export
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
/// \addtogroup binary-arrays
|
||||
///
|
||||
/// @{
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Binary and String
|
||||
|
||||
/// Base class for variable-sized binary arrays, regardless of offset size
|
||||
/// and logical interpretation.
|
||||
template <typename TYPE>
|
||||
class BaseBinaryArray : public FlatArray {
|
||||
public:
|
||||
using TypeClass = TYPE;
|
||||
using offset_type = typename TypeClass::offset_type;
|
||||
using IteratorType = stl::ArrayIterator<BaseBinaryArray<TYPE>>;
|
||||
|
||||
/// Return the pointer to the given elements bytes
|
||||
// XXX should GetValue(int64_t i) return a string_view?
|
||||
const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
|
||||
// Account for base offset
|
||||
i += data_->offset;
|
||||
const offset_type pos = raw_value_offsets_[i];
|
||||
*out_length = raw_value_offsets_[i + 1] - pos;
|
||||
return raw_data_ + pos;
|
||||
}
|
||||
|
||||
/// \brief Get binary value as a string_view
|
||||
///
|
||||
/// \param i the value index
|
||||
/// \return the view over the selected value
|
||||
util::string_view GetView(int64_t i) const {
|
||||
// Account for base offset
|
||||
i += data_->offset;
|
||||
const offset_type pos = raw_value_offsets_[i];
|
||||
return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
|
||||
raw_value_offsets_[i + 1] - pos);
|
||||
}
|
||||
|
||||
util::optional<util::string_view> operator[](int64_t i) const {
|
||||
return *IteratorType(*this, i);
|
||||
}
|
||||
|
||||
/// \brief Get binary value as a string_view
|
||||
/// Provided for consistency with other arrays.
|
||||
///
|
||||
/// \param i the value index
|
||||
/// \return the view over the selected value
|
||||
util::string_view Value(int64_t i) const { return GetView(i); }
|
||||
|
||||
/// \brief Get binary value as a std::string
|
||||
///
|
||||
/// \param i the value index
|
||||
/// \return the value copied into a std::string
|
||||
std::string GetString(int64_t i) const { return std::string(GetView(i)); }
|
||||
|
||||
/// Note that this buffer does not account for any slice offset
|
||||
std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
|
||||
|
||||
/// Note that this buffer does not account for any slice offset
|
||||
std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
|
||||
|
||||
const offset_type* raw_value_offsets() const {
|
||||
return raw_value_offsets_ + data_->offset;
|
||||
}
|
||||
|
||||
const uint8_t* raw_data() const { return raw_data_; }
|
||||
|
||||
/// \brief Return the data buffer absolute offset of the data for the value
|
||||
/// at the passed index.
|
||||
///
|
||||
/// Does not perform boundschecking
|
||||
offset_type value_offset(int64_t i) const {
|
||||
return raw_value_offsets_[i + data_->offset];
|
||||
}
|
||||
|
||||
/// \brief Return the length of the data for the value at the passed index.
|
||||
///
|
||||
/// Does not perform boundschecking
|
||||
offset_type value_length(int64_t i) const {
|
||||
i += data_->offset;
|
||||
return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
|
||||
}
|
||||
|
||||
/// \brief Return the total length of the memory in the data buffer
|
||||
/// referenced by this array. If the array has been sliced then this may be
|
||||
/// less than the size of the data buffer (data_->buffers[2]).
|
||||
offset_type total_values_length() const {
|
||||
if (data_->length > 0) {
|
||||
return raw_value_offsets_[data_->length + data_->offset] -
|
||||
raw_value_offsets_[data_->offset];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
IteratorType begin() const { return IteratorType(*this); }
|
||||
|
||||
IteratorType end() const { return IteratorType(*this, length()); }
|
||||
|
||||
protected:
|
||||
// For subclasses
|
||||
BaseBinaryArray() = default;
|
||||
|
||||
// Protected method for constructors
|
||||
void SetData(const std::shared_ptr<ArrayData>& data) {
|
||||
this->Array::SetData(data);
|
||||
raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
|
||||
raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
|
||||
}
|
||||
|
||||
const offset_type* raw_value_offsets_ = NULLPTR;
|
||||
const uint8_t* raw_data_ = NULLPTR;
|
||||
};
|
||||
|
||||
/// Concrete Array class for variable-size binary data
|
||||
class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
|
||||
public:
|
||||
explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
|
||||
|
||||
BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
|
||||
const std::shared_ptr<Buffer>& data,
|
||||
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
|
||||
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
|
||||
|
||||
protected:
|
||||
// For subclasses such as StringArray
|
||||
BinaryArray() : BaseBinaryArray() {}
|
||||
};
|
||||
|
||||
/// Concrete Array class for variable-size string (utf-8) data
|
||||
class ARROW_EXPORT StringArray : public BinaryArray {
|
||||
public:
|
||||
using TypeClass = StringType;
|
||||
|
||||
explicit StringArray(const std::shared_ptr<ArrayData>& data);
|
||||
|
||||
StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
|
||||
const std::shared_ptr<Buffer>& data,
|
||||
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
|
||||
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
|
||||
|
||||
/// \brief Validate that this array contains only valid UTF8 entries
|
||||
///
|
||||
/// This check is also implied by ValidateFull()
|
||||
Status ValidateUTF8() const;
|
||||
};
|
||||
|
||||
/// Concrete Array class for large variable-size binary data
|
||||
class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
|
||||
public:
|
||||
explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
|
||||
|
||||
LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
|
||||
const std::shared_ptr<Buffer>& data,
|
||||
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
|
||||
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
|
||||
|
||||
protected:
|
||||
// For subclasses such as LargeStringArray
|
||||
LargeBinaryArray() : BaseBinaryArray() {}
|
||||
};
|
||||
|
||||
/// Concrete Array class for large variable-size string (utf-8) data
|
||||
class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
|
||||
public:
|
||||
using TypeClass = LargeStringType;
|
||||
|
||||
explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
|
||||
|
||||
LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
|
||||
const std::shared_ptr<Buffer>& data,
|
||||
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
|
||||
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
|
||||
|
||||
/// \brief Validate that this array contains only valid UTF8 entries
|
||||
///
|
||||
/// This check is also implied by ValidateFull()
|
||||
Status ValidateUTF8() const;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Fixed width binary
|
||||
|
||||
/// Concrete Array class for fixed-size binary data
|
||||
class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
|
||||
public:
|
||||
using TypeClass = FixedSizeBinaryType;
|
||||
using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;
|
||||
|
||||
explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
|
||||
|
||||
FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
|
||||
const std::shared_ptr<Buffer>& data,
|
||||
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
|
||||
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
|
||||
|
||||
const uint8_t* GetValue(int64_t i) const;
|
||||
const uint8_t* Value(int64_t i) const { return GetValue(i); }
|
||||
|
||||
util::string_view GetView(int64_t i) const {
|
||||
return util::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
|
||||
}
|
||||
|
||||
util::optional<util::string_view> operator[](int64_t i) const {
|
||||
return *IteratorType(*this, i);
|
||||
}
|
||||
|
||||
std::string GetString(int64_t i) const { return std::string(GetView(i)); }
|
||||
|
||||
int32_t byte_width() const { return byte_width_; }
|
||||
|
||||
const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
|
||||
|
||||
IteratorType begin() const { return IteratorType(*this); }
|
||||
|
||||
IteratorType end() const { return IteratorType(*this, length()); }
|
||||
|
||||
protected:
|
||||
void SetData(const std::shared_ptr<ArrayData>& data) {
|
||||
this->PrimitiveArray::SetData(data);
|
||||
byte_width_ =
|
||||
internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
|
||||
}
|
||||
|
||||
int32_t byte_width_;
|
||||
};
|
||||
|
||||
/// @}
|
||||
|
||||
} // namespace arrow
|
Reference in New Issue
Block a user