first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <atomic>
#include <memory>
#include "arrow/testing/gtest_util.h"
#include "arrow/util/async_generator.h"
#include "arrow/util/future.h"
namespace arrow {
namespace util {
template <typename T>
AsyncGenerator<T> AsyncVectorIt(std::vector<T> v) {
return MakeVectorGenerator(std::move(v));
}
template <typename T>
AsyncGenerator<T> FailAt(AsyncGenerator<T> src, int failing_index) {
auto index = std::make_shared<std::atomic<int>>(0);
return [src, index, failing_index]() {
auto idx = index->fetch_add(1);
if (idx >= failing_index) {
return Future<T>::MakeFinished(Status::Invalid("XYZ"));
}
return src();
};
}
template <typename T>
AsyncGenerator<T> SlowdownABit(AsyncGenerator<T> source) {
return MakeMappedGenerator(std::move(source), [](const T& res) {
return SleepABitAsync().Then([res]() { return res; });
});
}
template <typename T>
class TrackingGenerator {
public:
explicit TrackingGenerator(AsyncGenerator<T> source)
: state_(std::make_shared<State>(std::move(source))) {}
Future<T> operator()() {
state_->num_read++;
return state_->source();
}
int num_read() { return state_->num_read.load(); }
private:
struct State {
explicit State(AsyncGenerator<T> source) : source(std::move(source)), num_read(0) {}
AsyncGenerator<T> source;
std::atomic<int> num_read;
};
std::shared_ptr<State> state_;
};
} // namespace util
} // namespace arrow

View File

@ -0,0 +1,237 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include "arrow/array.h"
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/array/builder_time.h"
#include "arrow/buffer.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/bit_util.h"
#include "arrow/visit_type_inline.h"
namespace arrow {
// ArrayFromVector: construct an Array from vectors of C values
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
std::shared_ptr<Array>* out) {
auto type_id = TYPE::type_id;
ASSERT_EQ(type_id, type->id())
<< "template parameter and concrete DataType instance don't agree";
std::unique_ptr<ArrayBuilder> builder_ptr;
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
// Get the concrete builder class to access its Append() specializations
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
for (size_t i = 0; i < values.size(); ++i) {
if (is_valid[i]) {
ASSERT_OK(builder.Append(values[i]));
} else {
ASSERT_OK(builder.AppendNull());
}
}
ASSERT_OK(builder.Finish(out));
}
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
auto type_id = TYPE::type_id;
ASSERT_EQ(type_id, type->id())
<< "template parameter and concrete DataType instance don't agree";
std::unique_ptr<ArrayBuilder> builder_ptr;
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
// Get the concrete builder class to access its Append() specializations
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_OK(builder.Append(values[i]));
}
ASSERT_OK(builder.Finish(out));
}
// Overloads without a DataType argument, for parameterless types
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
std::shared_ptr<Array>* out) {
auto type = TypeTraits<TYPE>::type_singleton();
ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
}
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
auto type = TypeTraits<TYPE>::type_singleton();
ArrayFromVector<TYPE, C_TYPE>(type, values, out);
}
// ChunkedArrayFromVector: construct a ChunkedArray from vectors of C values
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<std::vector<bool>>& is_valid,
const std::vector<std::vector<C_TYPE>>& values,
std::shared_ptr<ChunkedArray>* out) {
ArrayVector chunks;
ASSERT_EQ(is_valid.size(), values.size());
for (size_t i = 0; i < values.size(); ++i) {
std::shared_ptr<Array> array;
ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
chunks.push_back(array);
}
*out = std::make_shared<ChunkedArray>(chunks);
}
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<std::vector<C_TYPE>>& values,
std::shared_ptr<ChunkedArray>* out) {
ArrayVector chunks;
for (size_t i = 0; i < values.size(); ++i) {
std::shared_ptr<Array> array;
ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array);
chunks.push_back(array);
}
*out = std::make_shared<ChunkedArray>(chunks);
}
// Overloads without a DataType argument, for parameterless types
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid,
const std::vector<std::vector<C_TYPE>>& values,
std::shared_ptr<ChunkedArray>* out) {
auto type = TypeTraits<TYPE>::type_singleton();
ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
}
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values,
std::shared_ptr<ChunkedArray>* out) {
auto type = TypeTraits<TYPE>::type_singleton();
ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out);
}
template <typename BuilderType>
void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
ASSERT_OK_AND_ASSIGN(*out, builder->Finish());
AssertZeroPadded(**out);
TestInitialized(**out);
}
template <class T, class Builder>
Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
int64_t size, Builder* builder, std::shared_ptr<Array>* out) {
// Append the first 1000
for (int64_t i = 0; i < size; ++i) {
if (valid_bytes[i] > 0) {
RETURN_NOT_OK(builder->Append(values[i]));
} else {
RETURN_NOT_OK(builder->AppendNull());
}
}
return builder->Finish(out);
}
template <typename Fn>
struct VisitBuilderImpl {
template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
// need to let SFINAE drop this Visit when it would result in
// [](NullBuilder*){}(double_builder)
typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
Status Visit(const T&) {
fn_(internal::checked_cast<BuilderType*>(builder_));
return Status::OK();
}
Status Visit(const DataType& t) {
return Status::NotImplemented("visiting builders of type ", t);
}
Status Visit() { return VisitTypeInline(*builder_->type(), this); }
ArrayBuilder* builder_;
Fn fn_;
};
template <typename Fn>
Status VisitBuilder(ArrayBuilder* builder, Fn&& fn) {
return VisitBuilderImpl<Fn>{builder, std::forward<Fn>(fn)}.Visit();
}
template <typename Fn>
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
const std::shared_ptr<DataType>& type, int64_t initial_capacity,
int64_t visitor_repetitions, Fn&& fn) {
std::unique_ptr<ArrayBuilder> builder;
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder));
if (initial_capacity != 0) {
RETURN_NOT_OK(builder->Resize(initial_capacity));
}
for (int64_t i = 0; i < visitor_repetitions; ++i) {
RETURN_NOT_OK(VisitBuilder(builder.get(), std::forward<Fn>(fn)));
}
std::shared_ptr<Array> out;
RETURN_NOT_OK(builder->Finish(&out));
return std::move(out);
}
template <typename Fn>
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
const std::shared_ptr<DataType>& type, int64_t length, Fn&& fn) {
return ArrayFromBuilderVisitor(type, length, length, std::forward<Fn>(fn));
}
template <typename T>
static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
std::shared_ptr<Buffer>* result) {
size_t length = is_valid.size();
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length));
uint8_t* bitmap = buffer->mutable_data();
for (size_t i = 0; i < static_cast<size_t>(length); ++i) {
if (is_valid[i]) {
bit_util::SetBit(bitmap, i);
}
}
*result = buffer;
return Status::OK();
}
template <typename T>
inline void BitmapFromVector(const std::vector<T>& is_valid,
std::shared_ptr<Buffer>* out) {
ASSERT_OK(GetBitmapFromVector(is_valid, out));
}
} // namespace arrow

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/util/thread_pool.h"
namespace arrow {
/// An executor which synchronously runs the task as part of the SpawnReal call.
class MockExecutor : public internal::Executor {
public:
int GetCapacity() override { return 0; }
Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
StopCallback&&) override {
spawn_count++;
std::move(task)();
return Status::OK();
}
int spawn_count = 0;
};
/// An executor which does not actually run the task. Can be used to simulate situations
/// where the executor schedules a task in a long queue and doesn't get around to running
/// it for a while
class DelayedExecutor : public internal::Executor {
public:
int GetCapacity() override { return 0; }
Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
StopCallback&&) override {
captured_tasks.push_back(std::move(task));
return Status::OK();
}
std::vector<internal::FnOnce<void()>> captured_tasks;
};
} // namespace arrow

View File

@ -0,0 +1,183 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "arrow/extension_type.h"
#include "arrow/testing/visibility.h"
#include "arrow/util/macros.h"
namespace arrow {
class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
public:
UuidType() : ExtensionType(fixed_size_binary(16)) {}
std::string extension_name() const override { return "uuid"; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return "uuid-serialized"; }
};
class ARROW_TESTING_EXPORT SmallintArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
class ARROW_TESTING_EXPORT ListExtensionArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
class ARROW_TESTING_EXPORT SmallintType : public ExtensionType {
public:
SmallintType() : ExtensionType(int16()) {}
std::string extension_name() const override { return "smallint"; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return "smallint"; }
};
class ARROW_TESTING_EXPORT ListExtensionType : public ExtensionType {
public:
ListExtensionType() : ExtensionType(list(int32())) {}
std::string extension_name() const override { return "list-ext"; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return "list-ext"; }
};
class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType {
public:
DictExtensionType() : ExtensionType(dictionary(int8(), utf8())) {}
std::string extension_name() const override { return "dict-extension"; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return "dict-extension-serialized"; }
};
class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
class ARROW_TESTING_EXPORT Complex128Type : public ExtensionType {
public:
Complex128Type()
: ExtensionType(struct_({::arrow::field("real", float64(), /*nullable=*/false),
::arrow::field("imag", float64(), /*nullable=*/false)})) {}
std::string extension_name() const override { return "complex128"; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return "complex128-serialized"; }
};
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> uuid();
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> smallint();
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> list_extension_type();
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> dict_extension_type();
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> complex128();
ARROW_TESTING_EXPORT
std::shared_ptr<Array> ExampleUuid();
ARROW_TESTING_EXPORT
std::shared_ptr<Array> ExampleSmallint();
ARROW_TESTING_EXPORT
std::shared_ptr<Array> ExampleDictExtension();
ARROW_TESTING_EXPORT
std::shared_ptr<Array> ExampleComplex128();
ARROW_TESTING_EXPORT
std::shared_ptr<Array> MakeComplex128(const std::shared_ptr<Array>& real,
const std::shared_ptr<Array>& imag);
// A RAII class that registers an extension type on construction
// and unregisters it on destruction.
class ARROW_TESTING_EXPORT ExtensionTypeGuard {
public:
explicit ExtensionTypeGuard(const std::shared_ptr<DataType>& type);
explicit ExtensionTypeGuard(const DataTypeVector& types);
~ExtensionTypeGuard();
ARROW_DEFAULT_MOVE_AND_ASSIGN(ExtensionTypeGuard);
protected:
ARROW_DISALLOW_COPY_AND_ASSIGN(ExtensionTypeGuard);
std::vector<std::string> extension_names_;
};
} // namespace arrow

View File

@ -0,0 +1,142 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/testing/gtest_util.h"
#include "arrow/util/future.h"
// This macro should be called by futures that are expected to
// complete pretty quickly. arrow::kDefaultAssertFinishesWaitSeconds is the
// default max wait here. Anything longer than that and it's a questionable unit test
// anyways.
#define ASSERT_FINISHES_IMPL(fut) \
do { \
ASSERT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
if (!fut.is_finished()) { \
FAIL() << "Future did not finish in a timely fashion"; \
} \
} while (false)
#define ASSERT_FINISHES_OK(expr) \
do { \
auto&& _fut = (expr); \
ASSERT_TRUE(_fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
if (!_fut.is_finished()) { \
FAIL() << "Future did not finish in a timely fashion"; \
} \
auto& _st = _fut.status(); \
if (!_st.ok()) { \
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString(); \
} \
} while (false)
#define ASSERT_FINISHES_AND_RAISES(ENUM, expr) \
do { \
auto&& _fut = (expr); \
ASSERT_FINISHES_IMPL(_fut); \
ASSERT_RAISES(ENUM, _fut.status()); \
} while (false)
#define EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
do { \
auto&& fut = (expr); \
ASSERT_FINISHES_IMPL(fut); \
EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, fut.status()); \
} while (false)
#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, _future_name) \
auto _future_name = (rexpr); \
ASSERT_FINISHES_IMPL(_future_name); \
ASSERT_OK_AND_ASSIGN(lhs, _future_name.result());
#define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, \
ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__))
#define ASSERT_FINISHES_OK_AND_EQ(expected, expr) \
do { \
ASSERT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
ASSERT_EQ(expected, _actual); \
} while (0)
#define EXPECT_FINISHES_IMPL(fut) \
do { \
EXPECT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
if (!fut.is_finished()) { \
ADD_FAILURE() << "Future did not finish in a timely fashion"; \
} \
} while (false)
#define ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, future_name, lhs, rexpr) \
auto future_name = (rexpr); \
EXPECT_FINISHES_IMPL(future_name); \
handle_error(future_name.status()); \
EXPECT_OK_AND_ASSIGN(lhs, future_name.result());
#define EXPECT_FINISHES(expr) \
do { \
EXPECT_FINISHES_IMPL(expr); \
} while (0)
#define EXPECT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL( \
ARROW_EXPECT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__), lhs, rexpr);
#define EXPECT_FINISHES_OK_AND_EQ(expected, expr) \
do { \
EXPECT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
EXPECT_EQ(expected, _actual); \
} while (0)
namespace arrow {
constexpr double kDefaultAssertFinishesWaitSeconds = 64;
template <typename T>
void AssertNotFinished(const Future<T>& fut) {
ASSERT_FALSE(IsFutureFinished(fut.state()));
}
template <typename T>
void AssertFinished(const Future<T>& fut) {
ASSERT_TRUE(IsFutureFinished(fut.state()));
}
// Assert the future is successful *now*
template <typename T>
void AssertSuccessful(const Future<T>& fut) {
if (IsFutureFinished(fut.state())) {
ASSERT_EQ(fut.state(), FutureState::SUCCESS);
ASSERT_OK(fut.status());
} else {
FAIL() << "Expected future to be completed successfully but it was still pending";
}
}
// Assert the future is failed *now*
template <typename T>
void AssertFailed(const Future<T>& fut) {
if (IsFutureFinished(fut.state())) {
ASSERT_EQ(fut.state(), FutureState::FAILURE);
ASSERT_FALSE(fut.status().ok());
} else {
FAIL() << "Expected future to have failed but it was still pending";
}
}
} // namespace arrow

View File

@ -0,0 +1,237 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "arrow/array/array_base.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/visibility.h"
#include "arrow/type_fwd.h"
namespace arrow {
class ARROW_TESTING_EXPORT ConstantArrayGenerator {
public:
/// \brief Generates a constant BooleanArray
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Boolean(int64_t size, bool value = false);
/// \brief Generates a constant UInt8Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> UInt8(int64_t size, uint8_t value = 0);
/// \brief Generates a constant Int8Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Int8(int64_t size, int8_t value = 0);
/// \brief Generates a constant UInt16Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> UInt16(int64_t size, uint16_t value = 0);
/// \brief Generates a constant UInt16Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Int16(int64_t size, int16_t value = 0);
/// \brief Generates a constant UInt32Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> UInt32(int64_t size, uint32_t value = 0);
/// \brief Generates a constant UInt32Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Int32(int64_t size, int32_t value = 0);
/// \brief Generates a constant UInt64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> UInt64(int64_t size, uint64_t value = 0);
/// \brief Generates a constant UInt64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Int64(int64_t size, int64_t value = 0);
/// \brief Generates a constant Float32Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Float32(int64_t size, float value = 0);
/// \brief Generates a constant Float64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> Float64(int64_t size, double value = 0);
/// \brief Generates a constant StringArray
///
/// \param[in] size the size of the array to generate
/// \param[in] value to repeat
///
/// \return a generated Array
static std::shared_ptr<Array> String(int64_t size, std::string value = "");
template <typename ArrowType, typename CType = typename ArrowType::c_type>
static std::shared_ptr<Array> Numeric(int64_t size, CType value = 0) {
switch (ArrowType::type_id) {
case Type::BOOL:
return Boolean(size, static_cast<bool>(value));
case Type::UINT8:
return UInt8(size, static_cast<uint8_t>(value));
case Type::INT8:
return Int8(size, static_cast<int8_t>(value));
case Type::UINT16:
return UInt16(size, static_cast<uint16_t>(value));
case Type::INT16:
return Int16(size, static_cast<int16_t>(value));
case Type::UINT32:
return UInt32(size, static_cast<uint32_t>(value));
case Type::INT32:
return Int32(size, static_cast<int32_t>(value));
case Type::UINT64:
return UInt64(size, static_cast<uint64_t>(value));
case Type::INT64:
return Int64(size, static_cast<int64_t>(value));
case Type::FLOAT:
return Float32(size, static_cast<float>(value));
case Type::DOUBLE:
return Float64(size, static_cast<double>(value));
case Type::INTERVAL_DAY_TIME:
case Type::DATE32: {
EXPECT_OK_AND_ASSIGN(auto viewed,
Int32(size, static_cast<uint32_t>(value))->View(date32()));
return viewed;
}
case Type::INTERVAL_MONTHS: {
EXPECT_OK_AND_ASSIGN(auto viewed,
Int32(size, static_cast<uint32_t>(value))
->View(std::make_shared<MonthIntervalType>()));
return viewed;
}
case Type::TIME32: {
EXPECT_OK_AND_ASSIGN(auto viewed,
Int32(size, static_cast<uint32_t>(value))
->View(std::make_shared<Time32Type>(TimeUnit::SECOND)));
return viewed;
}
case Type::TIME64: {
EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast<uint64_t>(value))
->View(std::make_shared<Time64Type>()));
return viewed;
}
case Type::DATE64: {
EXPECT_OK_AND_ASSIGN(auto viewed,
Int64(size, static_cast<uint64_t>(value))->View(date64()));
return viewed;
}
case Type::TIMESTAMP: {
EXPECT_OK_AND_ASSIGN(
auto viewed, Int64(size, static_cast<int64_t>(value))
->View(std::make_shared<TimestampType>(TimeUnit::SECOND)));
return viewed;
}
default:
return nullptr;
}
}
/// \brief Generates a constant Array of zeroes
///
/// \param[in] size the size of the array to generate
/// \param[in] type the type of the Array
///
/// \return a generated Array
static std::shared_ptr<Array> Zeroes(int64_t size,
const std::shared_ptr<DataType>& type);
/// \brief Generates a RecordBatch of zeroes
///
/// \param[in] size the size of the array to generate
/// \param[in] schema to conform to
///
/// This function is handy to return of RecordBatch of a desired shape.
///
/// \return a generated RecordBatch
static std::shared_ptr<RecordBatch> Zeroes(int64_t size,
const std::shared_ptr<Schema>& schema);
/// \brief Generates a RecordBatchReader by repeating a RecordBatch
///
/// \param[in] n_batch the number of times it repeats batch
/// \param[in] batch the RecordBatch to repeat
///
/// \return a generated RecordBatchReader
static std::shared_ptr<RecordBatchReader> Repeat(
int64_t n_batch, const std::shared_ptr<RecordBatch> batch);
/// \brief Generates a RecordBatchReader of zeroes batches
///
/// \param[in] n_batch the number of RecordBatch
/// \param[in] batch_size the size of each RecordBatch
/// \param[in] schema to conform to
///
/// \return a generated RecordBatchReader
static std::shared_ptr<RecordBatchReader> Zeroes(int64_t n_batch, int64_t batch_size,
const std::shared_ptr<Schema>& schema);
};
ARROW_TESTING_EXPORT
Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars);
} // namespace arrow

View File

@ -0,0 +1,33 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <gtest/gtest.h>
// GTest < 1.11
#ifndef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
#endif
// GTest < 1.10
#ifndef TYPED_TEST_SUITE
#define TYPED_TEST_SUITE TYPED_TEST_CASE
#define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
#define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
#define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
#endif

View File

@ -0,0 +1,559 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include <gtest/gtest.h>
#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_compat.h"
#include "arrow/testing/visibility.h"
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/macros.h"
#include "arrow/util/optional.h"
#include "arrow/util/string_builder.h"
#include "arrow/util/string_view.h"
#include "arrow/util/type_fwd.h"
// NOTE: failing must be inline in the macros below, to get correct file / line number
// reporting on test failures.
// NOTE: using a for loop for this macro allows extra failure messages to be
// appended with operator<<
#define ASSERT_RAISES(ENUM, expr) \
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); \
!_st.Is##ENUM();) \
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
ENUM) ", but got " \
<< _st.ToString()
#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \
do { \
auto _res = (expr); \
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
if (!_st.Is##ENUM()) { \
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
ENUM) ", but got " \
<< _st.ToString(); \
} \
ASSERT_EQ((message), _st.ToString()); \
} while (false)
#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
do { \
auto _res = (expr); \
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
EXPECT_TRUE(_st.Is##ENUM()) << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " \
<< ARROW_STRINGIFY(ENUM) ", but got " << _st.ToString(); \
EXPECT_THAT(_st.ToString(), (matcher)); \
} while (false)
#define EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, matcher, expr) \
do { \
auto _res = (expr); \
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
EXPECT_EQ(_st.CodeAsString(), Status::CodeAsString(code)); \
EXPECT_THAT(_st.ToString(), (matcher)); \
} while (false)
#define ASSERT_OK(expr) \
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString()
#define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
#define ARROW_EXPECT_OK(expr) \
do { \
auto _res = (expr); \
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
EXPECT_TRUE(_st.ok()) << "'" ARROW_STRINGIFY(expr) "' failed with " \
<< _st.ToString(); \
} while (false)
#define ASSERT_NOT_OK(expr) \
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); _st.ok();) \
FAIL() << "'" ARROW_STRINGIFY(expr) "' did not failed" << _st.ToString()
#define ABORT_NOT_OK(expr) \
do { \
auto _res = (expr); \
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
if (ARROW_PREDICT_FALSE(!_st.ok())) { \
_st.Abort(); \
} \
} while (false);
#define ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, status_name, lhs, rexpr) \
auto&& status_name = (rexpr); \
handle_error(status_name.status()); \
lhs = std::move(status_name).ValueOrDie();
#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \
ASSIGN_OR_HANDLE_ERROR_IMPL( \
ASSERT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr);
#define ASSIGN_OR_ABORT(lhs, rexpr) \
ASSIGN_OR_HANDLE_ERROR_IMPL(ABORT_NOT_OK, \
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
lhs, rexpr);
#define EXPECT_OK_AND_ASSIGN(lhs, rexpr) \
ASSIGN_OR_HANDLE_ERROR_IMPL(ARROW_EXPECT_OK, \
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
lhs, rexpr);
#define ASSERT_OK_AND_EQ(expected, expr) \
do { \
ASSERT_OK_AND_ASSIGN(auto _actual, (expr)); \
ASSERT_EQ(expected, _actual); \
} while (0)
// A generalized version of GTest's SCOPED_TRACE that takes arbitrary arguments.
// ARROW_SCOPED_TRACE("some variable = ", some_variable, ...)
#define ARROW_SCOPED_TRACE(...) SCOPED_TRACE(::arrow::util::StringBuilder(__VA_ARGS__))
namespace arrow {
// ----------------------------------------------------------------------
// Useful testing::Types declarations
inline void PrintTo(StatusCode code, std::ostream* os) {
*os << Status::CodeAsString(code);
}
using NumericArrowTypes =
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
Int32Type, Int64Type, FloatType, DoubleType>;
using RealArrowTypes = ::testing::Types<FloatType, DoubleType>;
using IntegralArrowTypes = ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type,
Int8Type, Int16Type, Int32Type, Int64Type>;
using PhysicalIntegralArrowTypes =
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
Int32Type, Int64Type, Date32Type, Date64Type, Time32Type, Time64Type,
TimestampType, MonthIntervalType>;
using PrimitiveArrowTypes =
::testing::Types<BooleanType, Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>;
using TemporalArrowTypes =
::testing::Types<Date32Type, Date64Type, TimestampType, Time32Type, Time64Type>;
using DecimalArrowTypes = ::testing::Types<Decimal128Type, Decimal256Type>;
using BaseBinaryArrowTypes =
::testing::Types<BinaryType, LargeBinaryType, StringType, LargeStringType>;
using BinaryArrowTypes = ::testing::Types<BinaryType, LargeBinaryType>;
using StringArrowTypes = ::testing::Types<StringType, LargeStringType>;
using ListArrowTypes = ::testing::Types<ListType, LargeListType>;
using UnionArrowTypes = ::testing::Types<SparseUnionType, DenseUnionType>;
class Array;
class ChunkedArray;
class RecordBatch;
class Table;
struct Datum;
ARROW_TESTING_EXPORT
std::vector<Type::type> AllTypeIds();
#define ASSERT_ARRAYS_EQUAL(lhs, rhs) AssertArraysEqual((lhs), (rhs))
#define ASSERT_BATCHES_EQUAL(lhs, rhs) AssertBatchesEqual((lhs), (rhs))
#define ASSERT_BATCHES_APPROX_EQUAL(lhs, rhs) AssertBatchesApproxEqual((lhs), (rhs))
#define ASSERT_TABLES_EQUAL(lhs, rhs) AssertTablesEqual((lhs), (rhs))
// Default EqualOptions for testing
static inline EqualOptions TestingEqualOptions() {
return EqualOptions{}.nans_equal(true).signed_zeros_equal(false);
}
// If verbose is true, then the arrays will be pretty printed
ARROW_TESTING_EXPORT void AssertArraysEqual(
const Array& expected, const Array& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertArraysApproxEqual(
const Array& expected, const Array& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
// Returns true when values are both null
ARROW_TESTING_EXPORT void AssertScalarsEqual(
const Scalar& expected, const Scalar& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertScalarsApproxEqual(
const Scalar& expected, const Scalar& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
const RecordBatch& actual,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(const RecordBatch& expected,
const RecordBatch& actual);
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
const ArrayVector& expected);
// Like ChunkedEqual, but permits different chunk layout
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
const ChunkedArray& expected, const ChunkedArray& actual,
const EqualOptions& options = TestingEqualOptions());
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
const std::vector<uint8_t>& expected);
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
const std::string& expected);
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer, const Buffer& expected);
ARROW_TESTING_EXPORT void AssertTypeEqual(const DataType& lhs, const DataType& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertTypeEqual(const std::shared_ptr<DataType>& lhs,
const std::shared_ptr<DataType>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertFieldEqual(const Field& lhs, const Field& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertFieldEqual(const std::shared_ptr<Field>& lhs,
const std::shared_ptr<Field>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertSchemaEqual(const Schema& lhs, const Schema& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertSchemaEqual(const std::shared_ptr<Schema>& lhs,
const std::shared_ptr<Schema>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertTypeNotEqual(const DataType& lhs, const DataType& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertTypeNotEqual(const std::shared_ptr<DataType>& lhs,
const std::shared_ptr<DataType>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertFieldNotEqual(const Field& lhs, const Field& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertFieldNotEqual(const std::shared_ptr<Field>& lhs,
const std::shared_ptr<Field>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const Schema& lhs, const Schema& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const std::shared_ptr<Schema>& lhs,
const std::shared_ptr<Schema>& rhs,
bool check_metadata = false);
ARROW_TESTING_EXPORT Result<util::optional<std::string>> PrintArrayDiff(
const ChunkedArray& expected, const ChunkedArray& actual);
ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout = true,
bool flatten = false);
ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
const Datum& expected, const Datum& actual, bool verbose = false,
const EqualOptions& options = TestingEqualOptions());
template <typename C_TYPE>
void AssertNumericDataEqual(const C_TYPE* raw_data,
const std::vector<C_TYPE>& expected_values) {
for (auto expected : expected_values) {
ASSERT_EQ(expected, *raw_data);
++raw_data;
}
}
ARROW_TESTING_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right,
bool compare_metadata = true);
ARROW_TESTING_EXPORT void ApproxCompareBatch(const RecordBatch& left,
const RecordBatch& right,
bool compare_metadata = true);
// Check if the padding of the buffers of the array is zero.
// Also cause valgrind warnings if the padding bytes are uninitialized.
ARROW_TESTING_EXPORT void AssertZeroPadded(const Array& array);
// Check if the valid buffer bytes are initialized
// and cause valgrind warnings otherwise.
ARROW_TESTING_EXPORT void TestInitialized(const ArrayData& array);
ARROW_TESTING_EXPORT void TestInitialized(const Array& array);
#define DECL_T() typedef typename TestFixture::T T;
#define DECL_TYPE() typedef typename TestFixture::Type Type;
// ArrayFromJSON: construct an Array from a simple JSON representation
ARROW_TESTING_EXPORT
std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>&,
util::string_view json);
ARROW_TESTING_EXPORT
std::shared_ptr<Array> DictArrayFromJSON(const std::shared_ptr<DataType>& type,
util::string_view indices_json,
util::string_view dictionary_json);
ARROW_TESTING_EXPORT
std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&,
util::string_view);
ARROW_TESTING_EXPORT
std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>&,
const std::vector<std::string>& json);
ARROW_TESTING_EXPORT
std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>&,
util::string_view json);
ARROW_TESTING_EXPORT
std::shared_ptr<Scalar> DictScalarFromJSON(const std::shared_ptr<DataType>&,
util::string_view index_json,
util::string_view dictionary_json);
ARROW_TESTING_EXPORT
std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
const std::vector<std::string>& json);
// Given an array, return a new identical array except for one validity bit
// set to a new value.
// This is useful to force the underlying "value" of null entries to otherwise
// invalid data and check that errors don't get reported.
ARROW_TESTING_EXPORT
std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
int64_t index, bool validity);
ARROW_TESTING_EXPORT
void SleepFor(double seconds);
// Sleeps for a very small amount of time. The thread will be yielded
// at least once ensuring that context switches could happen. It is intended
// to be used for stress testing parallel code and shouldn't be assumed to do any
// reliable timing.
ARROW_TESTING_EXPORT
void SleepABit();
// Wait until predicate is true or timeout in seconds expires.
ARROW_TESTING_EXPORT
void BusyWait(double seconds, std::function<bool()> predicate);
ARROW_TESTING_EXPORT
Future<> SleepAsync(double seconds);
// \see SleepABit
ARROW_TESTING_EXPORT
Future<> SleepABitAsync();
template <typename T>
std::vector<T> IteratorToVector(Iterator<T> iterator) {
EXPECT_OK_AND_ASSIGN(auto out, iterator.ToVector());
return out;
}
ARROW_TESTING_EXPORT
bool LocaleExists(const char* locale);
// A RAII-style object that switches to a new locale, and switches back
// to the old locale when going out of scope. Doesn't do anything if the
// new locale doesn't exist on the local machine.
// ATTENTION: may crash with an assertion failure on Windows debug builds.
// See ARROW-6108, also https://gerrit.libreoffice.org/#/c/54110/
class ARROW_TESTING_EXPORT LocaleGuard {
public:
explicit LocaleGuard(const char* new_locale);
~LocaleGuard();
protected:
class Impl;
std::unique_ptr<Impl> impl_;
};
class ARROW_TESTING_EXPORT EnvVarGuard {
public:
EnvVarGuard(const std::string& name, const std::string& value);
~EnvVarGuard();
protected:
const std::string name_;
std::string old_value_;
bool was_set_;
};
namespace internal {
class SignalHandler;
}
class ARROW_TESTING_EXPORT SignalHandlerGuard {
public:
typedef void (*Callback)(int);
SignalHandlerGuard(int signum, Callback cb);
SignalHandlerGuard(int signum, const internal::SignalHandler& handler);
~SignalHandlerGuard();
protected:
struct Impl;
std::unique_ptr<Impl> impl_;
};
#ifndef ARROW_LARGE_MEMORY_TESTS
#define LARGE_MEMORY_TEST(name) DISABLED_##name
#else
#define LARGE_MEMORY_TEST(name) name
#endif
inline void PrintTo(const Status& st, std::ostream* os) { *os << st.ToString(); }
template <typename T>
void PrintTo(const Result<T>& result, std::ostream* os) {
if (result.ok()) {
::testing::internal::UniversalPrint(result.ValueOrDie(), os);
} else {
*os << result.status();
}
}
// A data type with only move constructors (no copy, no default).
struct MoveOnlyDataType {
explicit MoveOnlyDataType(int x) : data(new int(x)) {}
MoveOnlyDataType(const MoveOnlyDataType& other) = delete;
MoveOnlyDataType& operator=(const MoveOnlyDataType& other) = delete;
MoveOnlyDataType(MoveOnlyDataType&& other) { MoveFrom(&other); }
MoveOnlyDataType& operator=(MoveOnlyDataType&& other) {
MoveFrom(&other);
return *this;
}
MoveOnlyDataType& operator=(int x) {
if (data != nullptr) {
delete data;
}
data = new int(x);
return *this;
}
~MoveOnlyDataType() { Destroy(); }
void Destroy() {
if (data != nullptr) {
delete data;
data = nullptr;
moves = -1;
}
}
void MoveFrom(MoveOnlyDataType* other) {
Destroy();
data = other->data;
other->data = nullptr;
moves = other->moves + 1;
}
int ToInt() const { return data == nullptr ? -42 : *data; }
bool operator==(const MoveOnlyDataType& other) const {
return data != nullptr && other.data != nullptr && *data == *other.data;
}
bool operator<(const MoveOnlyDataType& other) const {
return data == nullptr || (other.data != nullptr && *data < *other.data);
}
bool operator==(int other) const { return data != nullptr && *data == other; }
friend bool operator==(int left, const MoveOnlyDataType& right) {
return right == left;
}
int* data = nullptr;
int moves = 0;
};
// A task that blocks until unlocked. Useful for timing tests.
class ARROW_TESTING_EXPORT GatingTask {
public:
explicit GatingTask(double timeout_seconds = 10);
/// \brief During destruction we wait for all pending tasks to finish
~GatingTask();
/// \brief Creates a new waiting task (presumably to spawn on a thread). It will return
/// invalid if the timeout arrived before the unlock. The task will not complete until
/// unlocked or timed out
///
/// Note: The GatingTask must outlive any Task instances
std::function<void()> Task();
/// \brief Creates a new waiting task as a future. The future will not complete
/// until unlocked.
Future<> AsyncTask();
/// \brief Waits until at least count tasks are running.
Status WaitForRunning(int count);
/// \brief Unlocks all waiting tasks. Returns an invalid status if any waiting task has
/// timed out
Status Unlock();
static std::shared_ptr<GatingTask> Make(double timeout_seconds = 10);
private:
class Impl;
std::shared_ptr<Impl> impl_;
};
} // namespace arrow
namespace nonstd {
namespace sv_lite {
// Without this hint, GTest will print string_views as a container of char
template <class Char, class Traits = std::char_traits<Char>>
void PrintTo(const basic_string_view<Char, Traits>& view, std::ostream* os) {
*os << view;
}
} // namespace sv_lite
namespace optional_lite {
template <typename T>
void PrintTo(const optional<T>& opt, std::ostream* os) {
if (opt.has_value()) {
*os << "{";
::testing::internal::UniversalPrint(*opt, os);
*os << "}";
} else {
*os << "nullopt";
}
}
inline void PrintTo(const decltype(nullopt)&, std::ostream* os) { *os << "nullopt"; }
} // namespace optional_lite
} // namespace nonstd

View File

@ -0,0 +1,129 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Implement Arrow JSON serialization format for integration tests
#pragma once
#include <memory>
#include <string>
#include "arrow/status.h"
#include "arrow/testing/visibility.h"
namespace arrow {
class Buffer;
class MemoryPool;
class RecordBatch;
class Schema;
namespace io {
class ReadableFile;
} // namespace io
namespace testing {
/// \class IntegrationJsonWriter
/// \brief Write the JSON representation of an Arrow record batch file or stream
///
/// This is used for integration testing
class ARROW_TESTING_EXPORT IntegrationJsonWriter {
public:
~IntegrationJsonWriter();
/// \brief Create a new JSON writer that writes to memory
///
/// \param[in] schema the schema of record batches
/// \param[out] out the returned writer object
/// \return Status
static Status Open(const std::shared_ptr<Schema>& schema,
std::unique_ptr<IntegrationJsonWriter>* out);
/// \brief Append a record batch
Status WriteRecordBatch(const RecordBatch& batch);
/// \brief Finish the JSON payload and return as a std::string
///
/// \param[out] result the JSON as as a std::string
/// \return Status
Status Finish(std::string* result);
private:
explicit IntegrationJsonWriter(const std::shared_ptr<Schema>& schema);
// Hide RapidJSON details from public API
class Impl;
std::unique_ptr<Impl> impl_;
};
/// \class IntegrationJsonReader
/// \brief Read the JSON representation of an Arrow record batch file or stream
///
/// This is used for integration testing
class ARROW_TESTING_EXPORT IntegrationJsonReader {
public:
~IntegrationJsonReader();
/// \brief Create a new JSON reader
///
/// \param[in] pool a MemoryPool to use for buffer allocations
/// \param[in] data a Buffer containing the JSON data
/// \param[out] reader the returned reader object
/// \return Status
static Status Open(MemoryPool* pool, const std::shared_ptr<Buffer>& data,
std::unique_ptr<IntegrationJsonReader>* reader);
/// \brief Create a new JSON reader that uses the default memory pool
///
/// \param[in] data a Buffer containing the JSON data
/// \param[out] reader the returned reader object
/// \return Status
static Status Open(const std::shared_ptr<Buffer>& data,
std::unique_ptr<IntegrationJsonReader>* reader);
/// \brief Create a new JSON reader from a file
///
/// \param[in] pool a MemoryPool to use for buffer allocations
/// \param[in] in_file a ReadableFile containing JSON data
/// \param[out] reader the returned reader object
/// \return Status
static Status Open(MemoryPool* pool, const std::shared_ptr<io::ReadableFile>& in_file,
std::unique_ptr<IntegrationJsonReader>* reader);
/// \brief Return the schema read from the JSON
std::shared_ptr<Schema> schema() const;
/// \brief Return the number of record batches
int num_record_batches() const;
/// \brief Read a particular record batch from the file
///
/// \param[in] i the record batch index, does not boundscheck
/// \param[out] batch the read record batch
Status ReadRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const;
private:
IntegrationJsonReader(MemoryPool* pool, const std::shared_ptr<Buffer>& data);
// Hide RapidJSON details from public API
class Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace testing
} // namespace arrow

View File

@ -0,0 +1,469 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <utility>
#include <gmock/gmock-matchers.h>
#include "arrow/datum.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/stl_iterator.h"
#include "arrow/testing/future_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/future.h"
#include "arrow/util/unreachable.h"
namespace arrow {
class PointeesEqualMatcher {
public:
template <typename PtrPair>
operator testing::Matcher<PtrPair>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const PtrPair&> {
void DescribeTo(::std::ostream* os) const override { *os << "pointees are equal"; }
void DescribeNegationTo(::std::ostream* os) const override {
*os << "pointees are not equal";
}
bool MatchAndExplain(const PtrPair& pair,
testing::MatchResultListener* listener) const override {
const auto& first = *std::get<0>(pair);
const auto& second = *std::get<1>(pair);
const bool match = first.Equals(second);
*listener << "whose pointees " << testing::PrintToString(first) << " and "
<< testing::PrintToString(second)
<< (match ? " are equal" : " are not equal");
return match;
}
};
return testing::Matcher<PtrPair>(new Impl());
}
};
// A matcher that checks that the values pointed to are Equals().
// Useful in conjunction with other googletest matchers.
inline PointeesEqualMatcher PointeesEqual() { return {}; }
class AnyOfJSONMatcher {
public:
AnyOfJSONMatcher(std::shared_ptr<DataType> type, std::string array_json)
: type_(std::move(type)), array_json_(std::move(array_json)) {}
template <typename arg_type>
operator testing::Matcher<arg_type>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const arg_type&> {
Impl(std::shared_ptr<DataType> type, std::string array_json)
: type_(std::move(type)), array_json_(std::move(array_json)) {
array = ArrayFromJSON(type_, array_json_);
}
void DescribeTo(std::ostream* os) const override {
*os << "matches at least one scalar from ";
*os << array->ToString();
}
void DescribeNegationTo(::std::ostream* os) const override {
*os << "matches no scalar from ";
*os << array->ToString();
}
bool MatchAndExplain(
const arg_type& arg,
::testing::MatchResultListener* result_listener) const override {
for (int64_t i = 0; i < array->length(); ++i) {
std::shared_ptr<Scalar> scalar;
auto maybe_scalar = array->GetScalar(i);
if (maybe_scalar.ok()) {
scalar = maybe_scalar.ValueOrDie();
} else {
*result_listener << "GetScalar() had status "
<< maybe_scalar.status().ToString() << "at index " << i
<< " in the input JSON Array";
return false;
}
if (scalar->Equals(arg)) return true;
}
*result_listener << "Argument scalar: '" << arg->ToString()
<< "' matches no scalar from " << array->ToString();
return false;
}
const std::shared_ptr<DataType> type_;
const std::string array_json_;
std::shared_ptr<Array> array;
};
return testing::Matcher<arg_type>(new Impl(type_, array_json_));
}
private:
const std::shared_ptr<DataType> type_;
const std::string array_json_;
};
inline AnyOfJSONMatcher AnyOfJSON(std::shared_ptr<DataType> type,
std::string array_json) {
return {std::move(type), std::move(array_json)};
}
template <typename ResultMatcher>
class FutureMatcher {
public:
explicit FutureMatcher(ResultMatcher result_matcher, double wait_seconds)
: result_matcher_(std::move(result_matcher)), wait_seconds_(wait_seconds) {}
template <typename Fut,
typename ValueType = typename std::decay<Fut>::type::ValueType>
operator testing::Matcher<Fut>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const Fut&> {
explicit Impl(const ResultMatcher& result_matcher, double wait_seconds)
: result_matcher_(testing::MatcherCast<Result<ValueType>>(result_matcher)),
wait_seconds_(wait_seconds) {}
void DescribeTo(::std::ostream* os) const override {
*os << "value ";
result_matcher_.DescribeTo(os);
}
void DescribeNegationTo(::std::ostream* os) const override {
*os << "value ";
result_matcher_.DescribeNegationTo(os);
}
bool MatchAndExplain(const Fut& fut,
testing::MatchResultListener* listener) const override {
if (!fut.Wait(wait_seconds_)) {
*listener << "which didn't finish within " << wait_seconds_ << " seconds";
return false;
}
return result_matcher_.MatchAndExplain(fut.result(), listener);
}
const testing::Matcher<Result<ValueType>> result_matcher_;
const double wait_seconds_;
};
return testing::Matcher<Fut>(new Impl(result_matcher_, wait_seconds_));
}
private:
const ResultMatcher result_matcher_;
const double wait_seconds_;
};
template <typename ValueMatcher>
class ResultMatcher {
public:
explicit ResultMatcher(ValueMatcher value_matcher)
: value_matcher_(std::move(value_matcher)) {}
template <typename Res,
typename ValueType = typename std::decay<Res>::type::ValueType>
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const Res&> {
explicit Impl(const ValueMatcher& value_matcher)
: value_matcher_(testing::MatcherCast<ValueType>(value_matcher)) {}
void DescribeTo(::std::ostream* os) const override {
*os << "value ";
value_matcher_.DescribeTo(os);
}
void DescribeNegationTo(::std::ostream* os) const override {
*os << "value ";
value_matcher_.DescribeNegationTo(os);
}
bool MatchAndExplain(const Res& maybe_value,
testing::MatchResultListener* listener) const override {
if (!maybe_value.status().ok()) {
*listener << "whose error "
<< testing::PrintToString(maybe_value.status().ToString())
<< " doesn't match";
return false;
}
const ValueType& value = maybe_value.ValueOrDie();
testing::StringMatchResultListener value_listener;
const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
*listener << "whose value " << testing::PrintToString(value)
<< (match ? " matches" : " doesn't match");
testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
return match;
}
const testing::Matcher<ValueType> value_matcher_;
};
return testing::Matcher<Res>(new Impl(value_matcher_));
}
private:
const ValueMatcher value_matcher_;
};
class ErrorMatcher {
public:
explicit ErrorMatcher(StatusCode code,
util::optional<testing::Matcher<std::string>> message_matcher)
: code_(code), message_matcher_(std::move(message_matcher)) {}
template <typename Res>
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const Res&> {
explicit Impl(StatusCode code,
util::optional<testing::Matcher<std::string>> message_matcher)
: code_(code), message_matcher_(std::move(message_matcher)) {}
void DescribeTo(::std::ostream* os) const override {
*os << "raises StatusCode::" << Status::CodeAsString(code_);
if (message_matcher_) {
*os << " and message ";
message_matcher_->DescribeTo(os);
}
}
void DescribeNegationTo(::std::ostream* os) const override {
*os << "does not raise StatusCode::" << Status::CodeAsString(code_);
if (message_matcher_) {
*os << " or message ";
message_matcher_->DescribeNegationTo(os);
}
}
bool MatchAndExplain(const Res& maybe_value,
testing::MatchResultListener* listener) const override {
const Status& status = internal::GenericToStatus(maybe_value);
testing::StringMatchResultListener value_listener;
bool match = status.code() == code_;
if (message_matcher_) {
match = match &&
message_matcher_->MatchAndExplain(status.message(), &value_listener);
}
if (match) {
*listener << "whose error matches";
} else if (status.ok()) {
*listener << "whose non-error doesn't match";
} else {
*listener << "whose error doesn't match";
}
testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
return match;
}
const StatusCode code_;
const util::optional<testing::Matcher<std::string>> message_matcher_;
};
return testing::Matcher<Res>(new Impl(code_, message_matcher_));
}
private:
const StatusCode code_;
const util::optional<testing::Matcher<std::string>> message_matcher_;
};
class OkMatcher {
public:
template <typename Res>
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const Res&> {
void DescribeTo(::std::ostream* os) const override { *os << "is ok"; }
void DescribeNegationTo(::std::ostream* os) const override { *os << "is not ok"; }
bool MatchAndExplain(const Res& maybe_value,
testing::MatchResultListener* listener) const override {
const Status& status = internal::GenericToStatus(maybe_value);
const bool match = status.ok();
*listener << "whose " << (match ? "non-error matches" : "error doesn't match");
return match;
}
};
return testing::Matcher<Res>(new Impl());
}
};
// Returns a matcher that waits on a Future (by default for 16 seconds)
// then applies a matcher to the result.
template <typename ResultMatcher>
FutureMatcher<ResultMatcher> Finishes(
const ResultMatcher& result_matcher,
double wait_seconds = kDefaultAssertFinishesWaitSeconds) {
return FutureMatcher<ResultMatcher>(result_matcher, wait_seconds);
}
// Returns a matcher that matches the value of a successful Result<T>.
template <typename ValueMatcher>
ResultMatcher<ValueMatcher> ResultWith(const ValueMatcher& value_matcher) {
return ResultMatcher<ValueMatcher>(value_matcher);
}
// Returns a matcher that matches an ok Status or Result<T>.
inline OkMatcher Ok() { return {}; }
// Returns a matcher that matches the StatusCode of a Status or Result<T>.
// Do not use Raises(StatusCode::OK) to match a non error code.
inline ErrorMatcher Raises(StatusCode code) { return ErrorMatcher(code, util::nullopt); }
// Returns a matcher that matches the StatusCode and message of a Status or Result<T>.
template <typename MessageMatcher>
ErrorMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
return ErrorMatcher(code, testing::MatcherCast<std::string>(message_matcher));
}
class DataEqMatcher {
public:
// TODO(bkietz) support EqualOptions, ApproxEquals, etc
// Probably it's better to use something like config-through-key_value_metadata
// as with the random generators to decouple this from EqualOptions etc.
explicit DataEqMatcher(Datum expected) : expected_(std::move(expected)) {}
template <typename Data>
operator testing::Matcher<Data>() const { // NOLINT runtime/explicit
struct Impl : testing::MatcherInterface<const Data&> {
explicit Impl(Datum expected) : expected_(std::move(expected)) {}
void DescribeTo(::std::ostream* os) const override {
*os << "has data ";
PrintTo(expected_, os);
}
void DescribeNegationTo(::std::ostream* os) const override {
*os << "doesn't have data ";
PrintTo(expected_, os);
}
bool MatchAndExplain(const Data& data,
testing::MatchResultListener* listener) const override {
Datum boxed(data);
if (boxed.kind() != expected_.kind()) {
*listener << "whose Datum::kind " << boxed.ToString() << " doesn't match "
<< expected_.ToString();
return false;
}
if (const auto& boxed_type = boxed.type()) {
if (*boxed_type != *expected_.type()) {
*listener << "whose DataType " << boxed_type->ToString() << " doesn't match "
<< expected_.type()->ToString();
return false;
}
} else if (const auto& boxed_schema = boxed.schema()) {
if (*boxed_schema != *expected_.schema()) {
*listener << "whose Schema " << boxed_schema->ToString() << " doesn't match "
<< expected_.schema()->ToString();
return false;
}
} else {
Unreachable();
}
if (boxed == expected_) {
*listener << "whose value matches";
return true;
}
if (listener->IsInterested() && boxed.kind() == Datum::ARRAY) {
*listener << "whose value differs from the expected value by "
<< boxed.make_array()->Diff(*expected_.make_array());
} else {
*listener << "whose value doesn't match";
}
return false;
}
Datum expected_;
};
return testing::Matcher<Data>(new Impl(expected_));
}
private:
Datum expected_;
};
/// Constructs a datum against which arguments are matched
template <typename Data>
DataEqMatcher DataEq(Data&& dat) {
return DataEqMatcher(Datum(std::forward<Data>(dat)));
}
/// Constructs an array with ArrayFromJSON against which arguments are matched
inline DataEqMatcher DataEqArray(const std::shared_ptr<DataType>& type,
util::string_view json) {
return DataEq(ArrayFromJSON(type, json));
}
/// Constructs an array from a vector of optionals against which arguments are matched
template <typename T, typename ArrayType = typename TypeTraits<T>::ArrayType,
typename BuilderType = typename TypeTraits<T>::BuilderType,
typename ValueType =
typename ::arrow::stl::detail::DefaultValueAccessor<ArrayType>::ValueType>
DataEqMatcher DataEqArray(T type, const std::vector<util::optional<ValueType>>& values) {
// FIXME(bkietz) broken until DataType is move constructible
BuilderType builder(std::make_shared<T>(std::move(type)), default_memory_pool());
DCHECK_OK(builder.Reserve(static_cast<int64_t>(values.size())));
// pseudo constexpr:
static const bool need_safe_append = !is_fixed_width(T::type_id);
for (auto value : values) {
if (value) {
if (need_safe_append) {
builder.UnsafeAppend(*value);
} else {
DCHECK_OK(builder.Append(*value));
}
} else {
builder.UnsafeAppendNull();
}
}
return DataEq(builder.Finish().ValueOrDie());
}
/// Constructs a scalar with ScalarFromJSON against which arguments are matched
inline DataEqMatcher DataEqScalar(const std::shared_ptr<DataType>& type,
util::string_view json) {
return DataEq(ScalarFromJSON(type, json));
}
/// Constructs a scalar against which arguments are matched
template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType,
typename ValueType = typename ScalarType::ValueType>
DataEqMatcher DataEqScalar(T type, util::optional<ValueType> value) {
ScalarType expected(std::make_shared<T>(std::move(type)));
if (value) {
expected.is_valid = true;
expected.value = std::move(*value);
}
return DataEq(std::move(expected));
}
// HasType, HasSchema matchers
} // namespace arrow

View File

@ -0,0 +1,25 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Often-used headers, for precompiling.
// If updating this header, please make sure you check compilation speed
// before checking in. Adding headers which are not used extremely often
// may incur a slowdown, since it makes the precompiled header heavier to load.
#include "arrow/pch.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"

View File

@ -0,0 +1,504 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <random>
#include <vector>
#include "arrow/testing/uniform_real.h"
#include "arrow/testing/visibility.h"
#include "arrow/type.h"
namespace arrow {
class Array;
namespace random {
using SeedType = int32_t;
constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();
class ARROW_TESTING_EXPORT RandomArrayGenerator {
public:
explicit RandomArrayGenerator(SeedType seed)
: seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {}
/// \brief Generate a null bitmap
///
/// \param[in] size the size of the bitmap to generate
/// \param[in] null_probability the probability of a bit being zero
///
/// \return a generated Buffer
std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0);
/// \brief Generate a random BooleanArray
///
/// \param[in] size the size of the array to generate
/// \param[in] true_probability the probability of a value being 1 / bit-set
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
double null_probability = 0);
/// \brief Generate a random UInt8Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
double null_probability = 0);
/// \brief Generate a random Int8Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
double null_probability = 0);
/// \brief Generate a random UInt16Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
double null_probability = 0);
/// \brief Generate a random Int16Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
double null_probability = 0);
/// \brief Generate a random UInt32Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
double null_probability = 0);
/// \brief Generate a random Int32Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
double null_probability = 0);
/// \brief Generate a random UInt64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
double null_probability = 0);
/// \brief Generate a random Int64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
double null_probability = 0);
/// \brief Generate a random HalfFloatArray
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the distribution
/// \param[in] max the upper bound of the distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
double null_probability = 0);
/// \brief Generate a random FloatArray
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
/// \param[in] nan_probability the probability of a value being NaN
///
/// \return a generated Array
std::shared_ptr<Array> Float32(int64_t size, float min, float max,
double null_probability = 0, double nan_probability = 0);
/// \brief Generate a random DoubleArray
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
/// \param[in] nan_probability the probability of a value being NaN
///
/// \return a generated Array
std::shared_ptr<Array> Float64(int64_t size, double min, double max,
double null_probability = 0, double nan_probability = 0);
/// \brief Generate a random Date64Array
///
/// \param[in] size the size of the array to generate
/// \param[in] min the lower bound of the uniform distribution
/// \param[in] max the upper bound of the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max,
double null_probability = 0);
template <typename ArrowType, typename CType = typename ArrowType::c_type>
std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max,
double null_probability = 0) {
switch (ArrowType::type_id) {
case Type::UINT8:
return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max),
null_probability);
case Type::INT8:
return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max),
null_probability);
case Type::UINT16:
return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max),
null_probability);
case Type::INT16:
return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
null_probability);
case Type::UINT32:
return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max),
null_probability);
case Type::INT32:
return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max),
null_probability);
case Type::UINT64:
return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max),
null_probability);
case Type::INT64:
return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
null_probability);
case Type::HALF_FLOAT:
return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
null_probability);
case Type::FLOAT:
return Float32(size, static_cast<float>(min), static_cast<float>(max),
null_probability);
case Type::DOUBLE:
return Float64(size, static_cast<double>(min), static_cast<double>(max),
null_probability);
case Type::DATE64:
return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
null_probability);
default:
return nullptr;
}
}
/// \brief Generate a random Decimal128Array
///
/// \param[in] type the type of the array to generate
/// (must be an instance of Decimal128Type)
/// \param[in] size the size of the array to generate
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
double null_probability = 0);
/// \brief Generate a random Decimal256Array
///
/// \param[in] type the type of the array to generate
/// (must be an instance of Decimal256Type)
/// \param[in] size the size of the array to generate
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
double null_probability = 0);
/// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
///
/// \param[in] size the size of the array to generate
/// \param[in] first_offset the first offset value (usually 0)
/// \param[in] last_offset the last offset value (usually the size of the child array)
/// \param[in] null_probability the probability of an offset being null
/// \param[in] force_empty_nulls if true, null offsets must have 0 "length"
///
/// \return a generated Array
std::shared_ptr<Array> Offsets(int64_t size, int32_t first_offset, int32_t last_offset,
double null_probability = 0,
bool force_empty_nulls = false);
std::shared_ptr<Array> LargeOffsets(int64_t size, int64_t first_offset,
int64_t last_offset, double null_probability = 0,
bool force_empty_nulls = false);
/// \brief Generate a random StringArray
///
/// \param[in] size the size of the array to generate
/// \param[in] min_length the lower bound of the string length
/// determined by the uniform distribution
/// \param[in] max_length the upper bound of the string length
/// determined by the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
double null_probability = 0);
/// \brief Generate a random LargeStringArray
///
/// \param[in] size the size of the array to generate
/// \param[in] min_length the lower bound of the string length
/// determined by the uniform distribution
/// \param[in] max_length the upper bound of the string length
/// determined by the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
double null_probability = 0);
/// \brief Generate a random StringArray with repeated values
///
/// \param[in] size the size of the array to generate
/// \param[in] unique the number of unique string values used
/// to populate the array
/// \param[in] min_length the lower bound of the string length
/// determined by the uniform distribution
/// \param[in] max_length the upper bound of the string length
/// determined by the uniform distribution
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> StringWithRepeats(int64_t size, int64_t unique,
int32_t min_length, int32_t max_length,
double null_probability = 0);
/// \brief Like StringWithRepeats but return BinaryArray
std::shared_ptr<Array> BinaryWithRepeats(int64_t size, int64_t unique,
int32_t min_length, int32_t max_length,
double null_probability = 0);
/// \brief Generate a random FixedSizeBinaryArray
///
/// \param[in] size the size of the array to generate
/// \param[in] byte_width the byte width of fixed-size binary items
/// \param[in] null_probability the probability of a value being null
///
/// \return a generated Array
std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
double null_probability = 0);
/// \brief Generate a random ListArray
///
/// \param[in] values The underlying values array
/// \param[in] size The size of the generated list array
/// \param[in] null_probability the probability of a list value being null
/// \param[in] force_empty_nulls if true, null list entries must have 0 length
///
/// \return a generated Array
std::shared_ptr<Array> List(const Array& values, int64_t size,
double null_probability = 0,
bool force_empty_nulls = false);
/// \brief Generate a random MapArray
///
/// \param[in] keys The underlying keys array
/// \param[in] items The underlying items array
/// \param[in] size The size of the generated map array
/// \param[in] null_probability the probability of a map value being null
/// \param[in] force_empty_nulls if true, null map entries must have 0 length
///
/// \return a generated Array
std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& items, int64_t size,
double null_probability = 0, bool force_empty_nulls = false);
/// \brief Generate a random SparseUnionArray
///
/// The type ids are chosen randomly, according to a uniform distribution,
/// amongst the given child fields.
///
/// \param[in] fields Vector of Arrays containing the data for each union field
/// \param[in] size The size of the generated sparse union array
std::shared_ptr<Array> SparseUnion(const ArrayVector& fields, int64_t size);
/// \brief Generate a random DenseUnionArray
///
/// The type ids are chosen randomly, according to a uniform distribution,
/// amongst the given child fields. The offsets are incremented along
/// each child field.
///
/// \param[in] fields Vector of Arrays containing the data for each union field
/// \param[in] size The size of the generated sparse union array
std::shared_ptr<Array> DenseUnion(const ArrayVector& fields, int64_t size);
/// \brief Generate a random Array of the specified type, size, and null_probability.
///
/// Generation parameters other than size and null_probability are determined based on
/// the type of Array to be generated.
/// If boolean the probabilities of true,false values are 0.25,0.75 respectively.
/// If numeric min,max will be the least and greatest representable values.
/// If string min_length,max_length will be 0,sqrt(size) respectively.
///
/// \param[in] type the type of Array to generate
/// \param[in] size the size of the Array to generate
/// \param[in] null_probability the probability of a slot being null
/// \return a generated Array
std::shared_ptr<Array> ArrayOf(std::shared_ptr<DataType> type, int64_t size,
double null_probability = 0);
/// \brief Generate an array with random data based on the given field. See BatchOf
/// for usage info.
std::shared_ptr<Array> ArrayOf(const Field& field, int64_t size);
/// \brief Generate a record batch with random data of the specified length.
///
/// Generation options are read from key-value metadata for each field, and may be
/// specified at any nesting level. For example, generation options for the child
/// values of a list array can be specified by constructing the list type with
/// list(field("item", int8(), options_metadata))
///
/// The following options are supported:
///
/// For all types except NullType:
/// - null_probability (double): range [0.0, 1.0] the probability of a null value.
/// Default/value is 0.0 if the field is marked non-nullable, else it is 0.01
///
/// For all numeric types T:
/// - min (T::c_type): the minimum value to generate (inclusive), default
/// std::numeric_limits<T::c_type>::min()
/// - max (T::c_type): the maximum value to generate (inclusive), default
/// std::numeric_limits<T::c_type>::max()
/// Note this means that, for example, min/max are int16_t values for HalfFloatType.
///
/// For floating point types T for which is_physical_floating_type<T>:
/// - nan_probability (double): range [0.0, 1.0] the probability of a NaN value.
///
/// For BooleanType:
/// - true_probability (double): range [0.0, 1.0] the probability of a true.
///
/// For DictionaryType:
/// - values (int32_t): the size of the dictionary.
/// Other properties are passed to the generator for the dictionary indices. However,
/// min and max cannot be specified. Note it is not possible to otherwise customize
/// the generation of dictionary values.
///
/// For list, string, and binary types T, including their large variants:
/// - min_length (T::offset_type): the minimum length of the child to generate,
/// default 0
/// - max_length (T::offset_type): the minimum length of the child to generate,
/// default 1024
///
/// For string and binary types T (not including their large variants):
/// - unique (int32_t): if positive, this many distinct values will be generated
/// and all array values will be one of these values, default -1
///
/// For MapType:
/// - values (int32_t): the number of key-value pairs to generate, which will be
/// partitioned among the array values.
std::shared_ptr<arrow::RecordBatch> BatchOf(const FieldVector& fields, int64_t size);
SeedType seed() { return seed_distribution_(seed_rng_); }
private:
std::uniform_int_distribution<SeedType> seed_distribution_;
std::default_random_engine seed_rng_;
};
/// Generate an array with random data. See RandomArrayGenerator::BatchOf.
ARROW_TESTING_EXPORT
std::shared_ptr<arrow::RecordBatch> GenerateBatch(const FieldVector& fields, int64_t size,
SeedType seed);
/// Generate an array with random data. See RandomArrayGenerator::BatchOf.
ARROW_TESTING_EXPORT
std::shared_ptr<arrow::Array> GenerateArray(const Field& field, int64_t size,
SeedType seed);
} // namespace random
//
// Assorted functions
//
ARROW_TESTING_EXPORT
void rand_day_millis(int64_t N, std::vector<DayTimeIntervalType::DayMilliseconds>* out);
ARROW_TESTING_EXPORT
void rand_month_day_nanos(int64_t N,
std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out);
template <typename T, typename U>
void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
const int random_seed = 0;
std::default_random_engine gen(random_seed);
std::uniform_int_distribution<T> d(lower, upper);
out->resize(N, static_cast<T>(0));
std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}
template <typename T, typename U>
void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
std::vector<U>* out) {
std::default_random_engine gen(seed);
::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
out->resize(n, static_cast<T>(0));
std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
}
template <typename T, typename U>
void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
assert(out || (n == 0));
std::default_random_engine gen(seed);
std::uniform_int_distribution<T> d(min_value, max_value);
std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
}
} // namespace arrow

View File

@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Random real generation is very slow on Arm if built with clang + libstdc++
// due to software emulated long double arithmetic.
// This file ports some random real libs from llvm libc++ library, which are
// free from long double calculation.
// It improves performance significantly on both Arm (~100x) and x86 (~8x) in
// generating random reals when built with clang + gnu libstdc++.
// Based on: https://github.com/llvm/llvm-project/tree/main/libcxx
#pragma once
#include <limits>
#include <arrow/util/bit_util.h>
namespace arrow {
namespace random {
namespace detail {
// std::generate_canonical, simplified
// https://en.cppreference.com/w/cpp/numeric/random/generate_canonical
template <typename RealType, typename Rng>
RealType generate_canonical(Rng& rng) {
const size_t b = std::numeric_limits<RealType>::digits;
const size_t log2R = 63 - ::arrow::bit_util::CountLeadingZeros(
static_cast<uint64_t>(Rng::max() - Rng::min()) + 1);
const size_t k = b / log2R + (b % log2R != 0) + (b == 0);
const RealType r = static_cast<RealType>(Rng::max() - Rng::min()) + 1;
RealType base = r;
RealType sp = static_cast<RealType>(rng() - Rng::min());
for (size_t i = 1; i < k; ++i, base *= r) {
sp += (rng() - Rng::min()) * base;
}
return sp / base;
}
} // namespace detail
// std::uniform_real_distribution, simplified
// https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution
template <typename RealType = double>
struct uniform_real_distribution {
const RealType a, b;
explicit uniform_real_distribution(RealType a = 0, RealType b = 1) : a(a), b(b) {}
template <typename Rng>
RealType operator()(Rng& rng) {
return (b - a) * detail::generate_canonical<RealType>(rng) + a;
}
};
// std::bernoulli_distribution, simplified
// https://en.cppreference.com/w/cpp/numeric/random/bernoulli_distribution
struct bernoulli_distribution {
const double p;
explicit bernoulli_distribution(double p = 0.5) : p(p) {}
template <class Rng>
bool operator()(Rng& rng) {
return detail::generate_canonical<double>(rng) < p;
}
};
} // namespace random
} // namespace arrow

View File

@ -0,0 +1,134 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include "arrow/buffer.h"
#include "arrow/record_batch.h"
#include "arrow/status.h"
#include "arrow/testing/visibility.h"
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/optional.h"
namespace arrow {
template <typename T>
Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
std::shared_ptr<Buffer>* result) {
int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(nbytes, pool));
auto immutable_data = reinterpret_cast<const uint8_t*>(values.data());
std::copy(immutable_data, immutable_data + nbytes, buffer->mutable_data());
memset(buffer->mutable_data() + nbytes, 0,
static_cast<size_t>(buffer->capacity() - nbytes));
*result = std::move(buffer);
return Status::OK();
}
// Sets approximately pct_null of the first n bytes in null_bytes to zero
// and the rest to non-zero (true) values.
ARROW_TESTING_EXPORT void random_null_bytes(int64_t n, double pct_null,
uint8_t* null_bytes);
ARROW_TESTING_EXPORT void random_is_valid(int64_t n, double pct_null,
std::vector<bool>* is_valid,
int random_seed = 0);
ARROW_TESTING_EXPORT void random_bytes(int64_t n, uint32_t seed, uint8_t* out);
ARROW_TESTING_EXPORT std::string random_string(int64_t n, uint32_t seed);
ARROW_TESTING_EXPORT int32_t DecimalSize(int32_t precision);
ARROW_TESTING_EXPORT void random_ascii(int64_t n, uint32_t seed, uint8_t* out);
ARROW_TESTING_EXPORT int64_t CountNulls(const std::vector<uint8_t>& valid_bytes);
ARROW_TESTING_EXPORT Status MakeRandomByteBuffer(int64_t length, MemoryPool* pool,
std::shared_ptr<ResizableBuffer>* out,
uint32_t seed = 0);
ARROW_TESTING_EXPORT uint64_t random_seed();
#define DECL_T() typedef typename TestFixture::T T;
#define DECL_TYPE() typedef typename TestFixture::Type Type;
// ----------------------------------------------------------------------
// A RecordBatchReader for serving a sequence of in-memory record batches
class BatchIterator : public RecordBatchReader {
public:
BatchIterator(const std::shared_ptr<Schema>& schema,
const std::vector<std::shared_ptr<RecordBatch>>& batches)
: schema_(schema), batches_(batches), position_(0) {}
std::shared_ptr<Schema> schema() const override { return schema_; }
Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
if (position_ >= batches_.size()) {
*out = nullptr;
} else {
*out = batches_[position_++];
}
return Status::OK();
}
private:
std::shared_ptr<Schema> schema_;
std::vector<std::shared_ptr<RecordBatch>> batches_;
size_t position_;
};
static inline std::vector<std::shared_ptr<DataType> (*)(FieldVector, std::vector<int8_t>)>
UnionTypeFactories() {
return {sparse_union, dense_union};
}
// Return the value of the ARROW_TEST_DATA environment variable or return error
// Status
ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*);
// Return the value of the ARROW_TIMEZONE_DATABASE environment variable
ARROW_TESTING_EXPORT util::optional<std::string> GetTestTimezoneDatabaseRoot();
// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable
// This is only relevant on Windows, since other OSs have compatible databases built-in
ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase();
// Get a TCP port number to listen on. This is a different number every time,
// as reusing the same port across tests can produce spurious bind errors on
// Windows.
ARROW_TESTING_EXPORT int GetListenPort();
// Get a IPv4 "address:port" to listen on. The address will be a loopback
// address. Compared to GetListenPort(), this will minimize the risk of
// port conflicts.
ARROW_TESTING_EXPORT std::string GetListenAddress();
ARROW_TESTING_EXPORT
const std::vector<std::shared_ptr<DataType>>& all_dictionary_index_types();
} // namespace arrow

View File

@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#if defined(_WIN32) || defined(__CYGWIN__)
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4251)
#else
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#ifdef ARROW_TESTING_STATIC
#define ARROW_TESTING_EXPORT
#elif defined(ARROW_TESTING_EXPORTING)
#define ARROW_TESTING_EXPORT __declspec(dllexport)
#else
#define ARROW_TESTING_EXPORT __declspec(dllimport)
#endif
#define ARROW_TESTING_NO_EXPORT
#else // Not Windows
#ifndef ARROW_TESTING_EXPORT
#define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
#endif
#ifndef ARROW_TESTING_NO_EXPORT
#define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
#endif
#endif // Non-Windows
#if defined(_MSC_VER)
#pragma warning(pop)
#endif