// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // Object model for scalar (non-Array) values. Not intended for use with large // amounts of data #pragma once #include #include #include #include #include #include "arrow/compare.h" #include "arrow/extension_type.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_fwd.h" #include "arrow/type_traits.h" #include "arrow/util/compare.h" #include "arrow/util/decimal.h" #include "arrow/util/string_view.h" #include "arrow/util/visibility.h" #include "arrow/visit_type_inline.h" namespace arrow { class Array; /// \brief Base class for scalar values /// /// A Scalar represents a single value with a specific DataType. /// Scalars are useful for passing single value inputs to compute functions, /// or for representing individual array elements (with a non-trivial /// wrapping cost, though). struct ARROW_EXPORT Scalar : public util::EqualityComparable { virtual ~Scalar() = default; explicit Scalar(std::shared_ptr type) : type(std::move(type)) {} /// \brief The type of the scalar value std::shared_ptr type; /// \brief Whether the value is valid (not null) or not bool is_valid = false; using util::EqualityComparable::operator==; using util::EqualityComparable::Equals; bool Equals(const Scalar& other, const EqualOptions& options = EqualOptions::Defaults()) const; bool ApproxEquals(const Scalar& other, const EqualOptions& options = EqualOptions::Defaults()) const; struct ARROW_EXPORT Hash { size_t operator()(const Scalar& scalar) const { return scalar.hash(); } size_t operator()(const std::shared_ptr& scalar) const { return scalar->hash(); } }; size_t hash() const; std::string ToString() const; /// \brief Perform cheap validation checks /// /// This is O(k) where k is the number of descendents. /// /// \return Status Status Validate() const; /// \brief Perform extensive data validation checks /// /// This is potentially O(k*n) where k is the number of descendents and n /// is the length of descendents (if list scalars are involved). /// /// \return Status Status ValidateFull() const; static Result> Parse(const std::shared_ptr& type, util::string_view repr); // TODO(bkietz) add compute::CastOptions Result> CastTo(std::shared_ptr to) const; ARROW_EXPORT friend void PrintTo(const Scalar& scalar, std::ostream* os); /// \brief Apply the ScalarVisitor::Visit() method specialized to the scalar type Status Accept(ScalarVisitor* visitor) const; protected: Scalar(std::shared_ptr type, bool is_valid) : type(std::move(type)), is_valid(is_valid) {} }; /// \defgroup concrete-scalar-classes Concrete Scalar subclasses /// /// @{ /// \brief A scalar value for NullType. Never valid struct ARROW_EXPORT NullScalar : public Scalar { public: using TypeClass = NullType; NullScalar() : Scalar{null(), false} {} }; /// @} namespace internal { struct ARROW_EXPORT PrimitiveScalarBase : public Scalar { using Scalar::Scalar; /// \brief Get a mutable pointer to the value of this scalar. May be null. virtual void* mutable_data() = 0; /// \brief Get an immutable view of the value of this scalar as bytes. virtual util::string_view view() const = 0; }; template struct ARROW_EXPORT PrimitiveScalar : public PrimitiveScalarBase { using PrimitiveScalarBase::PrimitiveScalarBase; using TypeClass = T; using ValueType = CType; // Non-null constructor. PrimitiveScalar(ValueType value, std::shared_ptr type) : PrimitiveScalarBase(std::move(type), true), value(value) {} explicit PrimitiveScalar(std::shared_ptr type) : PrimitiveScalarBase(std::move(type), false) {} ValueType value{}; void* mutable_data() override { return &value; } util::string_view view() const override { return util::string_view(reinterpret_cast(&value), sizeof(ValueType)); }; }; } // namespace internal /// \addtogroup concrete-scalar-classes Concrete Scalar subclasses /// /// @{ struct ARROW_EXPORT BooleanScalar : public internal::PrimitiveScalar { using Base = internal::PrimitiveScalar; using Base::Base; explicit BooleanScalar(bool value) : Base(value, boolean()) {} BooleanScalar() : Base(boolean()) {} }; template struct NumericScalar : public internal::PrimitiveScalar { using Base = typename internal::PrimitiveScalar; using Base::Base; using TypeClass = typename Base::TypeClass; using ValueType = typename Base::ValueType; explicit NumericScalar(ValueType value) : Base(value, TypeTraits::type_singleton()) {} NumericScalar() : Base(TypeTraits::type_singleton()) {} }; struct ARROW_EXPORT Int8Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT Int16Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT Int32Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT Int64Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT UInt8Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT UInt16Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT UInt32Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT UInt64Scalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT HalfFloatScalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT FloatScalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT DoubleScalar : public NumericScalar { using NumericScalar::NumericScalar; }; struct ARROW_EXPORT BaseBinaryScalar : public internal::PrimitiveScalarBase { using internal::PrimitiveScalarBase::PrimitiveScalarBase; using ValueType = std::shared_ptr; std::shared_ptr value; void* mutable_data() override { return value ? reinterpret_cast(value->mutable_data()) : NULLPTR; } util::string_view view() const override { return value ? util::string_view(*value) : util::string_view(); } protected: BaseBinaryScalar(std::shared_ptr value, std::shared_ptr type) : internal::PrimitiveScalarBase{std::move(type), true}, value(std::move(value)) {} }; struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryType; BinaryScalar(std::shared_ptr value, std::shared_ptr type) : BaseBinaryScalar(std::move(value), std::move(type)) {} explicit BinaryScalar(std::shared_ptr value) : BinaryScalar(std::move(value), binary()) {} explicit BinaryScalar(std::string s); BinaryScalar() : BinaryScalar(binary()) {} }; struct ARROW_EXPORT StringScalar : public BinaryScalar { using BinaryScalar::BinaryScalar; using TypeClass = StringType; explicit StringScalar(std::shared_ptr value) : StringScalar(std::move(value), utf8()) {} explicit StringScalar(std::string s); StringScalar() : StringScalar(utf8()) {} }; struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = LargeBinaryType; LargeBinaryScalar(std::shared_ptr value, std::shared_ptr type) : BaseBinaryScalar(std::move(value), std::move(type)) {} explicit LargeBinaryScalar(std::shared_ptr value) : LargeBinaryScalar(std::move(value), large_binary()) {} explicit LargeBinaryScalar(std::string s); LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {} }; struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar { using LargeBinaryScalar::LargeBinaryScalar; using TypeClass = LargeStringType; explicit LargeStringScalar(std::shared_ptr value) : LargeStringScalar(std::move(value), large_utf8()) {} explicit LargeStringScalar(std::string s); LargeStringScalar() : LargeStringScalar(large_utf8()) {} }; struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar { using TypeClass = FixedSizeBinaryType; FixedSizeBinaryScalar(std::shared_ptr value, std::shared_ptr type); explicit FixedSizeBinaryScalar(const std::shared_ptr& value); explicit FixedSizeBinaryScalar(std::string s); explicit FixedSizeBinaryScalar(std::shared_ptr type) : BinaryScalar(std::move(type)) {} }; template struct TemporalScalar : internal::PrimitiveScalar { using internal::PrimitiveScalar::PrimitiveScalar; using ValueType = typename TemporalScalar::ValueType; TemporalScalar(ValueType value, std::shared_ptr type) : internal::PrimitiveScalar(std::move(value), type) {} }; template struct DateScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; using ValueType = typename TemporalScalar::ValueType; explicit DateScalar(ValueType value) : TemporalScalar(std::move(value), TypeTraits::type_singleton()) {} DateScalar() : TemporalScalar(TypeTraits::type_singleton()) {} }; struct ARROW_EXPORT Date32Scalar : public DateScalar { using DateScalar::DateScalar; }; struct ARROW_EXPORT Date64Scalar : public DateScalar { using DateScalar::DateScalar; }; template struct ARROW_EXPORT TimeScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; TimeScalar(typename TemporalScalar::ValueType value, TimeUnit::type unit) : TimeScalar(std::move(value), std::make_shared(unit)) {} }; struct ARROW_EXPORT Time32Scalar : public TimeScalar { using TimeScalar::TimeScalar; }; struct ARROW_EXPORT Time64Scalar : public TimeScalar { using TimeScalar::TimeScalar; }; struct ARROW_EXPORT TimestampScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; TimestampScalar(typename TemporalScalar::ValueType value, TimeUnit::type unit, std::string tz = "") : TimestampScalar(std::move(value), timestamp(unit, std::move(tz))) {} }; template struct IntervalScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; using ValueType = typename TemporalScalar::ValueType; explicit IntervalScalar(ValueType value) : TemporalScalar(value, TypeTraits::type_singleton()) {} IntervalScalar() : TemporalScalar(TypeTraits::type_singleton()) {} }; struct ARROW_EXPORT MonthIntervalScalar : public IntervalScalar { using IntervalScalar::IntervalScalar; }; struct ARROW_EXPORT DayTimeIntervalScalar : public IntervalScalar { using IntervalScalar::IntervalScalar; }; struct ARROW_EXPORT MonthDayNanoIntervalScalar : public IntervalScalar { using IntervalScalar::IntervalScalar; }; struct ARROW_EXPORT DurationScalar : public TemporalScalar { using TemporalScalar::TemporalScalar; DurationScalar(typename TemporalScalar::ValueType value, TimeUnit::type unit) : DurationScalar(std::move(value), duration(unit)) {} }; template struct ARROW_EXPORT DecimalScalar : public internal::PrimitiveScalarBase { using internal::PrimitiveScalarBase::PrimitiveScalarBase; using TypeClass = TYPE_CLASS; using ValueType = VALUE_TYPE; DecimalScalar(ValueType value, std::shared_ptr type) : internal::PrimitiveScalarBase(std::move(type), true), value(value) {} void* mutable_data() override { return reinterpret_cast(value.mutable_native_endian_bytes()); } util::string_view view() const override { return util::string_view(reinterpret_cast(value.native_endian_bytes()), ValueType::kByteWidth); } ValueType value; }; struct ARROW_EXPORT Decimal128Scalar : public DecimalScalar { using DecimalScalar::DecimalScalar; }; struct ARROW_EXPORT Decimal256Scalar : public DecimalScalar { using DecimalScalar::DecimalScalar; }; struct ARROW_EXPORT BaseListScalar : public Scalar { using Scalar::Scalar; using ValueType = std::shared_ptr; BaseListScalar(std::shared_ptr value, std::shared_ptr type); std::shared_ptr value; }; struct ARROW_EXPORT ListScalar : public BaseListScalar { using TypeClass = ListType; using BaseListScalar::BaseListScalar; explicit ListScalar(std::shared_ptr value); }; struct ARROW_EXPORT LargeListScalar : public BaseListScalar { using TypeClass = LargeListType; using BaseListScalar::BaseListScalar; explicit LargeListScalar(std::shared_ptr value); }; struct ARROW_EXPORT MapScalar : public BaseListScalar { using TypeClass = MapType; using BaseListScalar::BaseListScalar; explicit MapScalar(std::shared_ptr value); }; struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar { using TypeClass = FixedSizeListType; using BaseListScalar::BaseListScalar; FixedSizeListScalar(std::shared_ptr value, std::shared_ptr type); explicit FixedSizeListScalar(std::shared_ptr value); }; struct ARROW_EXPORT StructScalar : public Scalar { using TypeClass = StructType; using ValueType = std::vector>; ScalarVector value; Result> field(FieldRef ref) const; StructScalar(ValueType value, std::shared_ptr type) : Scalar(std::move(type), true), value(std::move(value)) {} static Result> Make(ValueType value, std::vector field_names); explicit StructScalar(std::shared_ptr type) : Scalar(std::move(type)) {} }; struct ARROW_EXPORT UnionScalar : public Scalar { using Scalar::Scalar; using ValueType = std::shared_ptr; ValueType value; int8_t type_code; UnionScalar(int8_t type_code, std::shared_ptr type) : Scalar(std::move(type), false), type_code(type_code) {} UnionScalar(ValueType value, int8_t type_code, std::shared_ptr type) : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {} }; struct ARROW_EXPORT SparseUnionScalar : public UnionScalar { using UnionScalar::UnionScalar; using TypeClass = SparseUnionType; }; struct ARROW_EXPORT DenseUnionScalar : public UnionScalar { using UnionScalar::UnionScalar; using TypeClass = DenseUnionType; }; /// \brief A Scalar value for DictionaryType /// /// `is_valid` denotes the validity of the `index`, regardless of /// the corresponding value in the `dictionary`. struct ARROW_EXPORT DictionaryScalar : public internal::PrimitiveScalarBase { using TypeClass = DictionaryType; struct ValueType { std::shared_ptr index; std::shared_ptr dictionary; } value; explicit DictionaryScalar(std::shared_ptr type); DictionaryScalar(ValueType value, std::shared_ptr type, bool is_valid = true) : internal::PrimitiveScalarBase(std::move(type), is_valid), value(std::move(value)) {} static std::shared_ptr Make(std::shared_ptr index, std::shared_ptr dict); Result> GetEncodedValue() const; void* mutable_data() override { return internal::checked_cast(*value.index) .mutable_data(); } util::string_view view() const override { return internal::checked_cast(*value.index) .view(); } }; /// \brief A Scalar value for ExtensionType /// /// The value is the underlying storage scalar. /// `is_valid` must only be true if `value` is non-null and `value->is_valid` is true struct ARROW_EXPORT ExtensionScalar : public Scalar { using Scalar::Scalar; using TypeClass = ExtensionType; using ValueType = std::shared_ptr; ExtensionScalar(std::shared_ptr storage, std::shared_ptr type) : Scalar(std::move(type), true), value(std::move(storage)) {} template ::value>> ExtensionScalar(Storage&& storage, std::shared_ptr type) : ExtensionScalar(std::make_shared(std::move(storage)), std::move(type)) {} std::shared_ptr value; }; /// @} namespace internal { inline Status CheckBufferLength(...) { return Status::OK(); } ARROW_EXPORT Status CheckBufferLength(const FixedSizeBinaryType* t, const std::shared_ptr* b); } // namespace internal template struct MakeScalarImpl; /// \defgroup scalar-factories Scalar factory functions /// /// @{ /// \brief Scalar factory for null scalars ARROW_EXPORT std::shared_ptr MakeNullScalar(std::shared_ptr type); /// \brief Scalar factory for non-null scalars template Result> MakeScalar(std::shared_ptr type, Value&& value) { return MakeScalarImpl{type, std::forward(value), NULLPTR}.Finish(); } /// \brief Type-inferring scalar factory for non-null scalars /// /// Construct a Scalar instance with a DataType determined by the input C++ type. /// (for example Int8Scalar for a int8_t input). /// Only non-parametric primitive types and String are supported. template ::type>, typename ScalarType = typename Traits::ScalarType, typename Enable = decltype(ScalarType(std::declval(), Traits::type_singleton()))> std::shared_ptr MakeScalar(Value value) { return std::make_shared(std::move(value), Traits::type_singleton()); } inline std::shared_ptr MakeScalar(std::string value) { return std::make_shared(std::move(value)); } /// @} template struct MakeScalarImpl { template ::ScalarType, typename ValueType = typename ScalarType::ValueType, typename Enable = typename std::enable_if< std::is_constructible>::value && std::is_convertible::value>::type> Status Visit(const T& t) { ARROW_RETURN_NOT_OK(internal::CheckBufferLength(&t, &value_)); // `static_cast` makes a rvalue if ValueRef is `ValueType&&` out_ = std::make_shared( static_cast(static_cast(value_)), std::move(type_)); return Status::OK(); } Status Visit(const ExtensionType& t) { ARROW_ASSIGN_OR_RAISE(auto storage, MakeScalar(t.storage_type(), static_cast(value_))); out_ = std::make_shared(std::move(storage), type_); return Status::OK(); } // Enable constructing string/binary scalars (but not decimal, etc) from std::string template enable_if_t< std::is_same::type, std::string>::value && (is_base_binary_type::value || std::is_same::value), Status> Visit(const T& t) { using ScalarType = typename TypeTraits::ScalarType; out_ = std::make_shared(Buffer::FromString(std::move(value_)), std::move(type_)); return Status::OK(); } Status Visit(const DataType& t) { return Status::NotImplemented("constructing scalars of type ", t, " from unboxed values"); } Result> Finish() && { ARROW_RETURN_NOT_OK(VisitTypeInline(*type_, this)); return std::move(out_); } std::shared_ptr type_; ValueRef value_; std::shared_ptr out_; }; } // namespace arrow