mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-02 14:27:31 +00:00
first commit
This commit is contained in:
@ -0,0 +1,79 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/util/async_generator.h"
|
||||
#include "arrow/util/future.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace util {
|
||||
|
||||
template <typename T>
|
||||
AsyncGenerator<T> AsyncVectorIt(std::vector<T> v) {
|
||||
return MakeVectorGenerator(std::move(v));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
AsyncGenerator<T> FailAt(AsyncGenerator<T> src, int failing_index) {
|
||||
auto index = std::make_shared<std::atomic<int>>(0);
|
||||
return [src, index, failing_index]() {
|
||||
auto idx = index->fetch_add(1);
|
||||
if (idx >= failing_index) {
|
||||
return Future<T>::MakeFinished(Status::Invalid("XYZ"));
|
||||
}
|
||||
return src();
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
AsyncGenerator<T> SlowdownABit(AsyncGenerator<T> source) {
|
||||
return MakeMappedGenerator(std::move(source), [](const T& res) {
|
||||
return SleepABitAsync().Then([res]() { return res; });
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class TrackingGenerator {
|
||||
public:
|
||||
explicit TrackingGenerator(AsyncGenerator<T> source)
|
||||
: state_(std::make_shared<State>(std::move(source))) {}
|
||||
|
||||
Future<T> operator()() {
|
||||
state_->num_read++;
|
||||
return state_->source();
|
||||
}
|
||||
|
||||
int num_read() { return state_->num_read.load(); }
|
||||
|
||||
private:
|
||||
struct State {
|
||||
explicit State(AsyncGenerator<T> source) : source(std::move(source)), num_read(0) {}
|
||||
|
||||
AsyncGenerator<T> source;
|
||||
std::atomic<int> num_read;
|
||||
};
|
||||
|
||||
std::shared_ptr<State> state_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace arrow
|
237
.venv/Lib/site-packages/pyarrow/include/arrow/testing/builder.h
Normal file
237
.venv/Lib/site-packages/pyarrow/include/arrow/testing/builder.h
Normal file
@ -0,0 +1,237 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/array.h"
|
||||
#include "arrow/array/builder_binary.h"
|
||||
#include "arrow/array/builder_primitive.h"
|
||||
#include "arrow/array/builder_time.h"
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/util/bit_util.h"
|
||||
#include "arrow/visit_type_inline.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
// ArrayFromVector: construct an Array from vectors of C values
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ArrayFromVector(const std::shared_ptr<DataType>& type,
|
||||
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
|
||||
std::shared_ptr<Array>* out) {
|
||||
auto type_id = TYPE::type_id;
|
||||
ASSERT_EQ(type_id, type->id())
|
||||
<< "template parameter and concrete DataType instance don't agree";
|
||||
|
||||
std::unique_ptr<ArrayBuilder> builder_ptr;
|
||||
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
|
||||
// Get the concrete builder class to access its Append() specializations
|
||||
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
|
||||
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
if (is_valid[i]) {
|
||||
ASSERT_OK(builder.Append(values[i]));
|
||||
} else {
|
||||
ASSERT_OK(builder.AppendNull());
|
||||
}
|
||||
}
|
||||
ASSERT_OK(builder.Finish(out));
|
||||
}
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ArrayFromVector(const std::shared_ptr<DataType>& type,
|
||||
const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
|
||||
auto type_id = TYPE::type_id;
|
||||
ASSERT_EQ(type_id, type->id())
|
||||
<< "template parameter and concrete DataType instance don't agree";
|
||||
|
||||
std::unique_ptr<ArrayBuilder> builder_ptr;
|
||||
ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder_ptr));
|
||||
// Get the concrete builder class to access its Append() specializations
|
||||
auto& builder = dynamic_cast<typename TypeTraits<TYPE>::BuilderType&>(*builder_ptr);
|
||||
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
ASSERT_OK(builder.Append(values[i]));
|
||||
}
|
||||
ASSERT_OK(builder.Finish(out));
|
||||
}
|
||||
|
||||
// Overloads without a DataType argument, for parameterless types
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ArrayFromVector(const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
|
||||
std::shared_ptr<Array>* out) {
|
||||
auto type = TypeTraits<TYPE>::type_singleton();
|
||||
ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
|
||||
}
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ArrayFromVector(const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
|
||||
auto type = TypeTraits<TYPE>::type_singleton();
|
||||
ArrayFromVector<TYPE, C_TYPE>(type, values, out);
|
||||
}
|
||||
|
||||
// ChunkedArrayFromVector: construct a ChunkedArray from vectors of C values
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
|
||||
const std::vector<std::vector<bool>>& is_valid,
|
||||
const std::vector<std::vector<C_TYPE>>& values,
|
||||
std::shared_ptr<ChunkedArray>* out) {
|
||||
ArrayVector chunks;
|
||||
ASSERT_EQ(is_valid.size(), values.size());
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
std::shared_ptr<Array> array;
|
||||
ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
|
||||
chunks.push_back(array);
|
||||
}
|
||||
*out = std::make_shared<ChunkedArray>(chunks);
|
||||
}
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
|
||||
const std::vector<std::vector<C_TYPE>>& values,
|
||||
std::shared_ptr<ChunkedArray>* out) {
|
||||
ArrayVector chunks;
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
std::shared_ptr<Array> array;
|
||||
ArrayFromVector<TYPE, C_TYPE>(type, values[i], &array);
|
||||
chunks.push_back(array);
|
||||
}
|
||||
*out = std::make_shared<ChunkedArray>(chunks);
|
||||
}
|
||||
|
||||
// Overloads without a DataType argument, for parameterless types
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ChunkedArrayFromVector(const std::vector<std::vector<bool>>& is_valid,
|
||||
const std::vector<std::vector<C_TYPE>>& values,
|
||||
std::shared_ptr<ChunkedArray>* out) {
|
||||
auto type = TypeTraits<TYPE>::type_singleton();
|
||||
ChunkedArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, out);
|
||||
}
|
||||
|
||||
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
|
||||
void ChunkedArrayFromVector(const std::vector<std::vector<C_TYPE>>& values,
|
||||
std::shared_ptr<ChunkedArray>* out) {
|
||||
auto type = TypeTraits<TYPE>::type_singleton();
|
||||
ChunkedArrayFromVector<TYPE, C_TYPE>(type, values, out);
|
||||
}
|
||||
|
||||
template <typename BuilderType>
|
||||
void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
|
||||
ASSERT_OK_AND_ASSIGN(*out, builder->Finish());
|
||||
AssertZeroPadded(**out);
|
||||
TestInitialized(**out);
|
||||
}
|
||||
|
||||
template <class T, class Builder>
|
||||
Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
|
||||
int64_t size, Builder* builder, std::shared_ptr<Array>* out) {
|
||||
// Append the first 1000
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
if (valid_bytes[i] > 0) {
|
||||
RETURN_NOT_OK(builder->Append(values[i]));
|
||||
} else {
|
||||
RETURN_NOT_OK(builder->AppendNull());
|
||||
}
|
||||
}
|
||||
return builder->Finish(out);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
struct VisitBuilderImpl {
|
||||
template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType,
|
||||
// need to let SFINAE drop this Visit when it would result in
|
||||
// [](NullBuilder*){}(double_builder)
|
||||
typename = decltype(std::declval<Fn>()(std::declval<BuilderType*>()))>
|
||||
Status Visit(const T&) {
|
||||
fn_(internal::checked_cast<BuilderType*>(builder_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Visit(const DataType& t) {
|
||||
return Status::NotImplemented("visiting builders of type ", t);
|
||||
}
|
||||
|
||||
Status Visit() { return VisitTypeInline(*builder_->type(), this); }
|
||||
|
||||
ArrayBuilder* builder_;
|
||||
Fn fn_;
|
||||
};
|
||||
|
||||
template <typename Fn>
|
||||
Status VisitBuilder(ArrayBuilder* builder, Fn&& fn) {
|
||||
return VisitBuilderImpl<Fn>{builder, std::forward<Fn>(fn)}.Visit();
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
|
||||
const std::shared_ptr<DataType>& type, int64_t initial_capacity,
|
||||
int64_t visitor_repetitions, Fn&& fn) {
|
||||
std::unique_ptr<ArrayBuilder> builder;
|
||||
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder));
|
||||
|
||||
if (initial_capacity != 0) {
|
||||
RETURN_NOT_OK(builder->Resize(initial_capacity));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < visitor_repetitions; ++i) {
|
||||
RETURN_NOT_OK(VisitBuilder(builder.get(), std::forward<Fn>(fn)));
|
||||
}
|
||||
|
||||
std::shared_ptr<Array> out;
|
||||
RETURN_NOT_OK(builder->Finish(&out));
|
||||
return std::move(out);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
Result<std::shared_ptr<Array>> ArrayFromBuilderVisitor(
|
||||
const std::shared_ptr<DataType>& type, int64_t length, Fn&& fn) {
|
||||
return ArrayFromBuilderVisitor(type, length, length, std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline Status GetBitmapFromVector(const std::vector<T>& is_valid,
|
||||
std::shared_ptr<Buffer>* result) {
|
||||
size_t length = is_valid.size();
|
||||
|
||||
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length));
|
||||
|
||||
uint8_t* bitmap = buffer->mutable_data();
|
||||
for (size_t i = 0; i < static_cast<size_t>(length); ++i) {
|
||||
if (is_valid[i]) {
|
||||
bit_util::SetBit(bitmap, i);
|
||||
}
|
||||
}
|
||||
|
||||
*result = buffer;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void BitmapFromVector(const std::vector<T>& is_valid,
|
||||
std::shared_ptr<Buffer>* out) {
|
||||
ASSERT_OK(GetBitmapFromVector(is_valid, out));
|
||||
}
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,55 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/util/thread_pool.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
/// An executor which synchronously runs the task as part of the SpawnReal call.
|
||||
class MockExecutor : public internal::Executor {
|
||||
public:
|
||||
int GetCapacity() override { return 0; }
|
||||
|
||||
Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
|
||||
StopCallback&&) override {
|
||||
spawn_count++;
|
||||
std::move(task)();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
int spawn_count = 0;
|
||||
};
|
||||
|
||||
/// An executor which does not actually run the task. Can be used to simulate situations
|
||||
/// where the executor schedules a task in a long queue and doesn't get around to running
|
||||
/// it for a while
|
||||
class DelayedExecutor : public internal::Executor {
|
||||
public:
|
||||
int GetCapacity() override { return 0; }
|
||||
|
||||
Status SpawnReal(internal::TaskHints hints, internal::FnOnce<void()> task, StopToken,
|
||||
StopCallback&&) override {
|
||||
captured_tasks.push_back(std::move(task));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::vector<internal::FnOnce<void()>> captured_tasks;
|
||||
};
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,183 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/util/macros.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
|
||||
public:
|
||||
UuidType() : ExtensionType(fixed_size_binary(16)) {}
|
||||
|
||||
std::string extension_name() const override { return "uuid"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return "uuid-serialized"; }
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT SmallintArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT ListExtensionArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT SmallintType : public ExtensionType {
|
||||
public:
|
||||
SmallintType() : ExtensionType(int16()) {}
|
||||
|
||||
std::string extension_name() const override { return "smallint"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return "smallint"; }
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT ListExtensionType : public ExtensionType {
|
||||
public:
|
||||
ListExtensionType() : ExtensionType(list(int32())) {}
|
||||
|
||||
std::string extension_name() const override { return "list-ext"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return "list-ext"; }
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType {
|
||||
public:
|
||||
DictExtensionType() : ExtensionType(dictionary(int8(), utf8())) {}
|
||||
|
||||
std::string extension_name() const override { return "dict-extension"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return "dict-extension-serialized"; }
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT Complex128Type : public ExtensionType {
|
||||
public:
|
||||
Complex128Type()
|
||||
: ExtensionType(struct_({::arrow::field("real", float64(), /*nullable=*/false),
|
||||
::arrow::field("imag", float64(), /*nullable=*/false)})) {}
|
||||
|
||||
std::string extension_name() const override { return "complex128"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return "complex128-serialized"; }
|
||||
};
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<DataType> uuid();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<DataType> smallint();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<DataType> list_extension_type();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<DataType> dict_extension_type();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<DataType> complex128();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> ExampleUuid();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> ExampleSmallint();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> ExampleDictExtension();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> ExampleComplex128();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> MakeComplex128(const std::shared_ptr<Array>& real,
|
||||
const std::shared_ptr<Array>& imag);
|
||||
|
||||
// A RAII class that registers an extension type on construction
|
||||
// and unregisters it on destruction.
|
||||
class ARROW_TESTING_EXPORT ExtensionTypeGuard {
|
||||
public:
|
||||
explicit ExtensionTypeGuard(const std::shared_ptr<DataType>& type);
|
||||
explicit ExtensionTypeGuard(const DataTypeVector& types);
|
||||
~ExtensionTypeGuard();
|
||||
ARROW_DEFAULT_MOVE_AND_ASSIGN(ExtensionTypeGuard);
|
||||
|
||||
protected:
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(ExtensionTypeGuard);
|
||||
|
||||
std::vector<std::string> extension_names_;
|
||||
};
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,142 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/util/future.h"
|
||||
|
||||
// This macro should be called by futures that are expected to
|
||||
// complete pretty quickly. arrow::kDefaultAssertFinishesWaitSeconds is the
|
||||
// default max wait here. Anything longer than that and it's a questionable unit test
|
||||
// anyways.
|
||||
#define ASSERT_FINISHES_IMPL(fut) \
|
||||
do { \
|
||||
ASSERT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
|
||||
if (!fut.is_finished()) { \
|
||||
FAIL() << "Future did not finish in a timely fashion"; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define ASSERT_FINISHES_OK(expr) \
|
||||
do { \
|
||||
auto&& _fut = (expr); \
|
||||
ASSERT_TRUE(_fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
|
||||
if (!_fut.is_finished()) { \
|
||||
FAIL() << "Future did not finish in a timely fashion"; \
|
||||
} \
|
||||
auto& _st = _fut.status(); \
|
||||
if (!_st.ok()) { \
|
||||
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define ASSERT_FINISHES_AND_RAISES(ENUM, expr) \
|
||||
do { \
|
||||
auto&& _fut = (expr); \
|
||||
ASSERT_FINISHES_IMPL(_fut); \
|
||||
ASSERT_RAISES(ENUM, _fut.status()); \
|
||||
} while (false)
|
||||
|
||||
#define EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
|
||||
do { \
|
||||
auto&& fut = (expr); \
|
||||
ASSERT_FINISHES_IMPL(fut); \
|
||||
EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, fut.status()); \
|
||||
} while (false)
|
||||
|
||||
#define ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, _future_name) \
|
||||
auto _future_name = (rexpr); \
|
||||
ASSERT_FINISHES_IMPL(_future_name); \
|
||||
ASSERT_OK_AND_ASSIGN(lhs, _future_name.result());
|
||||
|
||||
#define ASSERT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
|
||||
ASSERT_FINISHES_OK_AND_ASSIGN_IMPL(lhs, rexpr, \
|
||||
ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__))
|
||||
|
||||
#define ASSERT_FINISHES_OK_AND_EQ(expected, expr) \
|
||||
do { \
|
||||
ASSERT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
|
||||
ASSERT_EQ(expected, _actual); \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_FINISHES_IMPL(fut) \
|
||||
do { \
|
||||
EXPECT_TRUE(fut.Wait(::arrow::kDefaultAssertFinishesWaitSeconds)); \
|
||||
if (!fut.is_finished()) { \
|
||||
ADD_FAILURE() << "Future did not finish in a timely fashion"; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, future_name, lhs, rexpr) \
|
||||
auto future_name = (rexpr); \
|
||||
EXPECT_FINISHES_IMPL(future_name); \
|
||||
handle_error(future_name.status()); \
|
||||
EXPECT_OK_AND_ASSIGN(lhs, future_name.result());
|
||||
|
||||
#define EXPECT_FINISHES(expr) \
|
||||
do { \
|
||||
EXPECT_FINISHES_IMPL(expr); \
|
||||
} while (0)
|
||||
|
||||
#define EXPECT_FINISHES_OK_AND_ASSIGN(lhs, rexpr) \
|
||||
ON_FINISH_ASSIGN_OR_HANDLE_ERROR_IMPL( \
|
||||
ARROW_EXPECT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_fut, __COUNTER__), lhs, rexpr);
|
||||
|
||||
#define EXPECT_FINISHES_OK_AND_EQ(expected, expr) \
|
||||
do { \
|
||||
EXPECT_FINISHES_OK_AND_ASSIGN(auto _actual, (expr)); \
|
||||
EXPECT_EQ(expected, _actual); \
|
||||
} while (0)
|
||||
|
||||
namespace arrow {
|
||||
|
||||
constexpr double kDefaultAssertFinishesWaitSeconds = 64;
|
||||
|
||||
template <typename T>
|
||||
void AssertNotFinished(const Future<T>& fut) {
|
||||
ASSERT_FALSE(IsFutureFinished(fut.state()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AssertFinished(const Future<T>& fut) {
|
||||
ASSERT_TRUE(IsFutureFinished(fut.state()));
|
||||
}
|
||||
|
||||
// Assert the future is successful *now*
|
||||
template <typename T>
|
||||
void AssertSuccessful(const Future<T>& fut) {
|
||||
if (IsFutureFinished(fut.state())) {
|
||||
ASSERT_EQ(fut.state(), FutureState::SUCCESS);
|
||||
ASSERT_OK(fut.status());
|
||||
} else {
|
||||
FAIL() << "Expected future to be completed successfully but it was still pending";
|
||||
}
|
||||
}
|
||||
|
||||
// Assert the future is failed *now*
|
||||
template <typename T>
|
||||
void AssertFailed(const Future<T>& fut) {
|
||||
if (IsFutureFinished(fut.state())) {
|
||||
ASSERT_EQ(fut.state(), FutureState::FAILURE);
|
||||
ASSERT_FALSE(fut.status().ok());
|
||||
} else {
|
||||
FAIL() << "Expected future to have failed but it was still pending";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,237 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/array/array_base.h"
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class ARROW_TESTING_EXPORT ConstantArrayGenerator {
|
||||
public:
|
||||
/// \brief Generates a constant BooleanArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Boolean(int64_t size, bool value = false);
|
||||
|
||||
/// \brief Generates a constant UInt8Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> UInt8(int64_t size, uint8_t value = 0);
|
||||
|
||||
/// \brief Generates a constant Int8Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Int8(int64_t size, int8_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt16Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> UInt16(int64_t size, uint16_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt16Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Int16(int64_t size, int16_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt32Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> UInt32(int64_t size, uint32_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt32Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Int32(int64_t size, int32_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> UInt64(int64_t size, uint64_t value = 0);
|
||||
|
||||
/// \brief Generates a constant UInt64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Int64(int64_t size, int64_t value = 0);
|
||||
|
||||
/// \brief Generates a constant Float32Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Float32(int64_t size, float value = 0);
|
||||
|
||||
/// \brief Generates a constant Float64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Float64(int64_t size, double value = 0);
|
||||
|
||||
/// \brief Generates a constant StringArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] value to repeat
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> String(int64_t size, std::string value = "");
|
||||
|
||||
template <typename ArrowType, typename CType = typename ArrowType::c_type>
|
||||
static std::shared_ptr<Array> Numeric(int64_t size, CType value = 0) {
|
||||
switch (ArrowType::type_id) {
|
||||
case Type::BOOL:
|
||||
return Boolean(size, static_cast<bool>(value));
|
||||
case Type::UINT8:
|
||||
return UInt8(size, static_cast<uint8_t>(value));
|
||||
case Type::INT8:
|
||||
return Int8(size, static_cast<int8_t>(value));
|
||||
case Type::UINT16:
|
||||
return UInt16(size, static_cast<uint16_t>(value));
|
||||
case Type::INT16:
|
||||
return Int16(size, static_cast<int16_t>(value));
|
||||
case Type::UINT32:
|
||||
return UInt32(size, static_cast<uint32_t>(value));
|
||||
case Type::INT32:
|
||||
return Int32(size, static_cast<int32_t>(value));
|
||||
case Type::UINT64:
|
||||
return UInt64(size, static_cast<uint64_t>(value));
|
||||
case Type::INT64:
|
||||
return Int64(size, static_cast<int64_t>(value));
|
||||
case Type::FLOAT:
|
||||
return Float32(size, static_cast<float>(value));
|
||||
case Type::DOUBLE:
|
||||
return Float64(size, static_cast<double>(value));
|
||||
case Type::INTERVAL_DAY_TIME:
|
||||
case Type::DATE32: {
|
||||
EXPECT_OK_AND_ASSIGN(auto viewed,
|
||||
Int32(size, static_cast<uint32_t>(value))->View(date32()));
|
||||
return viewed;
|
||||
}
|
||||
case Type::INTERVAL_MONTHS: {
|
||||
EXPECT_OK_AND_ASSIGN(auto viewed,
|
||||
Int32(size, static_cast<uint32_t>(value))
|
||||
->View(std::make_shared<MonthIntervalType>()));
|
||||
return viewed;
|
||||
}
|
||||
case Type::TIME32: {
|
||||
EXPECT_OK_AND_ASSIGN(auto viewed,
|
||||
Int32(size, static_cast<uint32_t>(value))
|
||||
->View(std::make_shared<Time32Type>(TimeUnit::SECOND)));
|
||||
return viewed;
|
||||
}
|
||||
case Type::TIME64: {
|
||||
EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast<uint64_t>(value))
|
||||
->View(std::make_shared<Time64Type>()));
|
||||
return viewed;
|
||||
}
|
||||
case Type::DATE64: {
|
||||
EXPECT_OK_AND_ASSIGN(auto viewed,
|
||||
Int64(size, static_cast<uint64_t>(value))->View(date64()));
|
||||
return viewed;
|
||||
}
|
||||
case Type::TIMESTAMP: {
|
||||
EXPECT_OK_AND_ASSIGN(
|
||||
auto viewed, Int64(size, static_cast<int64_t>(value))
|
||||
->View(std::make_shared<TimestampType>(TimeUnit::SECOND)));
|
||||
return viewed;
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generates a constant Array of zeroes
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] type the type of the Array
|
||||
///
|
||||
/// \return a generated Array
|
||||
static std::shared_ptr<Array> Zeroes(int64_t size,
|
||||
const std::shared_ptr<DataType>& type);
|
||||
|
||||
/// \brief Generates a RecordBatch of zeroes
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] schema to conform to
|
||||
///
|
||||
/// This function is handy to return of RecordBatch of a desired shape.
|
||||
///
|
||||
/// \return a generated RecordBatch
|
||||
static std::shared_ptr<RecordBatch> Zeroes(int64_t size,
|
||||
const std::shared_ptr<Schema>& schema);
|
||||
|
||||
/// \brief Generates a RecordBatchReader by repeating a RecordBatch
|
||||
///
|
||||
/// \param[in] n_batch the number of times it repeats batch
|
||||
/// \param[in] batch the RecordBatch to repeat
|
||||
///
|
||||
/// \return a generated RecordBatchReader
|
||||
static std::shared_ptr<RecordBatchReader> Repeat(
|
||||
int64_t n_batch, const std::shared_ptr<RecordBatch> batch);
|
||||
|
||||
/// \brief Generates a RecordBatchReader of zeroes batches
|
||||
///
|
||||
/// \param[in] n_batch the number of RecordBatch
|
||||
/// \param[in] batch_size the size of each RecordBatch
|
||||
/// \param[in] schema to conform to
|
||||
///
|
||||
/// \return a generated RecordBatchReader
|
||||
static std::shared_ptr<RecordBatchReader> Zeroes(int64_t n_batch, int64_t batch_size,
|
||||
const std::shared_ptr<Schema>& schema);
|
||||
};
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars);
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,33 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
// GTest < 1.11
|
||||
#ifndef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
|
||||
#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
|
||||
#endif
|
||||
// GTest < 1.10
|
||||
#ifndef TYPED_TEST_SUITE
|
||||
#define TYPED_TEST_SUITE TYPED_TEST_CASE
|
||||
#define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
|
||||
#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
|
||||
#define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
|
||||
#define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
|
||||
#endif
|
@ -0,0 +1,559 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "arrow/compare.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/testing/gtest_compat.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/type_traits.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/optional.h"
|
||||
#include "arrow/util/string_builder.h"
|
||||
#include "arrow/util/string_view.h"
|
||||
#include "arrow/util/type_fwd.h"
|
||||
|
||||
// NOTE: failing must be inline in the macros below, to get correct file / line number
|
||||
// reporting on test failures.
|
||||
|
||||
// NOTE: using a for loop for this macro allows extra failure messages to be
|
||||
// appended with operator<<
|
||||
#define ASSERT_RAISES(ENUM, expr) \
|
||||
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); \
|
||||
!_st.Is##ENUM();) \
|
||||
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
|
||||
ENUM) ", but got " \
|
||||
<< _st.ToString()
|
||||
|
||||
#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \
|
||||
do { \
|
||||
auto _res = (expr); \
|
||||
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
|
||||
if (!_st.Is##ENUM()) { \
|
||||
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
|
||||
ENUM) ", but got " \
|
||||
<< _st.ToString(); \
|
||||
} \
|
||||
ASSERT_EQ((message), _st.ToString()); \
|
||||
} while (false)
|
||||
|
||||
#define EXPECT_RAISES_WITH_MESSAGE_THAT(ENUM, matcher, expr) \
|
||||
do { \
|
||||
auto _res = (expr); \
|
||||
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
|
||||
EXPECT_TRUE(_st.Is##ENUM()) << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " \
|
||||
<< ARROW_STRINGIFY(ENUM) ", but got " << _st.ToString(); \
|
||||
EXPECT_THAT(_st.ToString(), (matcher)); \
|
||||
} while (false)
|
||||
|
||||
#define EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, matcher, expr) \
|
||||
do { \
|
||||
auto _res = (expr); \
|
||||
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
|
||||
EXPECT_EQ(_st.CodeAsString(), Status::CodeAsString(code)); \
|
||||
EXPECT_THAT(_st.ToString(), (matcher)); \
|
||||
} while (false)
|
||||
|
||||
#define ASSERT_OK(expr) \
|
||||
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \
|
||||
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString()
|
||||
|
||||
#define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
|
||||
|
||||
#define ARROW_EXPECT_OK(expr) \
|
||||
do { \
|
||||
auto _res = (expr); \
|
||||
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
|
||||
EXPECT_TRUE(_st.ok()) << "'" ARROW_STRINGIFY(expr) "' failed with " \
|
||||
<< _st.ToString(); \
|
||||
} while (false)
|
||||
|
||||
#define ASSERT_NOT_OK(expr) \
|
||||
for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); _st.ok();) \
|
||||
FAIL() << "'" ARROW_STRINGIFY(expr) "' did not failed" << _st.ToString()
|
||||
|
||||
#define ABORT_NOT_OK(expr) \
|
||||
do { \
|
||||
auto _res = (expr); \
|
||||
::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
|
||||
if (ARROW_PREDICT_FALSE(!_st.ok())) { \
|
||||
_st.Abort(); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
#define ASSIGN_OR_HANDLE_ERROR_IMPL(handle_error, status_name, lhs, rexpr) \
|
||||
auto&& status_name = (rexpr); \
|
||||
handle_error(status_name.status()); \
|
||||
lhs = std::move(status_name).ValueOrDie();
|
||||
|
||||
#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \
|
||||
ASSIGN_OR_HANDLE_ERROR_IMPL( \
|
||||
ASSERT_OK, ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr);
|
||||
|
||||
#define ASSIGN_OR_ABORT(lhs, rexpr) \
|
||||
ASSIGN_OR_HANDLE_ERROR_IMPL(ABORT_NOT_OK, \
|
||||
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
|
||||
lhs, rexpr);
|
||||
|
||||
#define EXPECT_OK_AND_ASSIGN(lhs, rexpr) \
|
||||
ASSIGN_OR_HANDLE_ERROR_IMPL(ARROW_EXPECT_OK, \
|
||||
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
|
||||
lhs, rexpr);
|
||||
|
||||
#define ASSERT_OK_AND_EQ(expected, expr) \
|
||||
do { \
|
||||
ASSERT_OK_AND_ASSIGN(auto _actual, (expr)); \
|
||||
ASSERT_EQ(expected, _actual); \
|
||||
} while (0)
|
||||
|
||||
// A generalized version of GTest's SCOPED_TRACE that takes arbitrary arguments.
|
||||
// ARROW_SCOPED_TRACE("some variable = ", some_variable, ...)
|
||||
|
||||
#define ARROW_SCOPED_TRACE(...) SCOPED_TRACE(::arrow::util::StringBuilder(__VA_ARGS__))
|
||||
|
||||
namespace arrow {
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Useful testing::Types declarations
|
||||
|
||||
inline void PrintTo(StatusCode code, std::ostream* os) {
|
||||
*os << Status::CodeAsString(code);
|
||||
}
|
||||
|
||||
using NumericArrowTypes =
|
||||
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
|
||||
Int32Type, Int64Type, FloatType, DoubleType>;
|
||||
|
||||
using RealArrowTypes = ::testing::Types<FloatType, DoubleType>;
|
||||
|
||||
using IntegralArrowTypes = ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type,
|
||||
Int8Type, Int16Type, Int32Type, Int64Type>;
|
||||
|
||||
using PhysicalIntegralArrowTypes =
|
||||
::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
|
||||
Int32Type, Int64Type, Date32Type, Date64Type, Time32Type, Time64Type,
|
||||
TimestampType, MonthIntervalType>;
|
||||
|
||||
using PrimitiveArrowTypes =
|
||||
::testing::Types<BooleanType, Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
|
||||
UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>;
|
||||
|
||||
using TemporalArrowTypes =
|
||||
::testing::Types<Date32Type, Date64Type, TimestampType, Time32Type, Time64Type>;
|
||||
|
||||
using DecimalArrowTypes = ::testing::Types<Decimal128Type, Decimal256Type>;
|
||||
|
||||
using BaseBinaryArrowTypes =
|
||||
::testing::Types<BinaryType, LargeBinaryType, StringType, LargeStringType>;
|
||||
|
||||
using BinaryArrowTypes = ::testing::Types<BinaryType, LargeBinaryType>;
|
||||
|
||||
using StringArrowTypes = ::testing::Types<StringType, LargeStringType>;
|
||||
|
||||
using ListArrowTypes = ::testing::Types<ListType, LargeListType>;
|
||||
|
||||
using UnionArrowTypes = ::testing::Types<SparseUnionType, DenseUnionType>;
|
||||
|
||||
class Array;
|
||||
class ChunkedArray;
|
||||
class RecordBatch;
|
||||
class Table;
|
||||
struct Datum;
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::vector<Type::type> AllTypeIds();
|
||||
|
||||
#define ASSERT_ARRAYS_EQUAL(lhs, rhs) AssertArraysEqual((lhs), (rhs))
|
||||
#define ASSERT_BATCHES_EQUAL(lhs, rhs) AssertBatchesEqual((lhs), (rhs))
|
||||
#define ASSERT_BATCHES_APPROX_EQUAL(lhs, rhs) AssertBatchesApproxEqual((lhs), (rhs))
|
||||
#define ASSERT_TABLES_EQUAL(lhs, rhs) AssertTablesEqual((lhs), (rhs))
|
||||
|
||||
// Default EqualOptions for testing
|
||||
static inline EqualOptions TestingEqualOptions() {
|
||||
return EqualOptions{}.nans_equal(true).signed_zeros_equal(false);
|
||||
}
|
||||
|
||||
// If verbose is true, then the arrays will be pretty printed
|
||||
ARROW_TESTING_EXPORT void AssertArraysEqual(
|
||||
const Array& expected, const Array& actual, bool verbose = false,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
ARROW_TESTING_EXPORT void AssertArraysApproxEqual(
|
||||
const Array& expected, const Array& actual, bool verbose = false,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
// Returns true when values are both null
|
||||
ARROW_TESTING_EXPORT void AssertScalarsEqual(
|
||||
const Scalar& expected, const Scalar& actual, bool verbose = false,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
ARROW_TESTING_EXPORT void AssertScalarsApproxEqual(
|
||||
const Scalar& expected, const Scalar& actual, bool verbose = false,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
|
||||
const RecordBatch& actual,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(const RecordBatch& expected,
|
||||
const RecordBatch& actual);
|
||||
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
|
||||
const ChunkedArray& actual);
|
||||
ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
|
||||
const ArrayVector& expected);
|
||||
// Like ChunkedEqual, but permits different chunk layout
|
||||
ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
|
||||
const ChunkedArray& actual);
|
||||
ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
|
||||
const ChunkedArray& expected, const ChunkedArray& actual,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
|
||||
const std::vector<uint8_t>& expected);
|
||||
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer,
|
||||
const std::string& expected);
|
||||
ARROW_TESTING_EXPORT void AssertBufferEqual(const Buffer& buffer, const Buffer& expected);
|
||||
|
||||
ARROW_TESTING_EXPORT void AssertTypeEqual(const DataType& lhs, const DataType& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertTypeEqual(const std::shared_ptr<DataType>& lhs,
|
||||
const std::shared_ptr<DataType>& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertFieldEqual(const Field& lhs, const Field& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertFieldEqual(const std::shared_ptr<Field>& lhs,
|
||||
const std::shared_ptr<Field>& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertSchemaEqual(const Schema& lhs, const Schema& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertSchemaEqual(const std::shared_ptr<Schema>& lhs,
|
||||
const std::shared_ptr<Schema>& rhs,
|
||||
bool check_metadata = false);
|
||||
|
||||
ARROW_TESTING_EXPORT void AssertTypeNotEqual(const DataType& lhs, const DataType& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertTypeNotEqual(const std::shared_ptr<DataType>& lhs,
|
||||
const std::shared_ptr<DataType>& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertFieldNotEqual(const Field& lhs, const Field& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertFieldNotEqual(const std::shared_ptr<Field>& lhs,
|
||||
const std::shared_ptr<Field>& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const Schema& lhs, const Schema& rhs,
|
||||
bool check_metadata = false);
|
||||
ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const std::shared_ptr<Schema>& lhs,
|
||||
const std::shared_ptr<Schema>& rhs,
|
||||
bool check_metadata = false);
|
||||
|
||||
ARROW_TESTING_EXPORT Result<util::optional<std::string>> PrintArrayDiff(
|
||||
const ChunkedArray& expected, const ChunkedArray& actual);
|
||||
|
||||
ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
|
||||
bool same_chunk_layout = true,
|
||||
bool flatten = false);
|
||||
|
||||
ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
|
||||
bool verbose = false);
|
||||
ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
|
||||
const Datum& expected, const Datum& actual, bool verbose = false,
|
||||
const EqualOptions& options = TestingEqualOptions());
|
||||
|
||||
template <typename C_TYPE>
|
||||
void AssertNumericDataEqual(const C_TYPE* raw_data,
|
||||
const std::vector<C_TYPE>& expected_values) {
|
||||
for (auto expected : expected_values) {
|
||||
ASSERT_EQ(expected, *raw_data);
|
||||
++raw_data;
|
||||
}
|
||||
}
|
||||
|
||||
ARROW_TESTING_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right,
|
||||
bool compare_metadata = true);
|
||||
|
||||
ARROW_TESTING_EXPORT void ApproxCompareBatch(const RecordBatch& left,
|
||||
const RecordBatch& right,
|
||||
bool compare_metadata = true);
|
||||
|
||||
// Check if the padding of the buffers of the array is zero.
|
||||
// Also cause valgrind warnings if the padding bytes are uninitialized.
|
||||
ARROW_TESTING_EXPORT void AssertZeroPadded(const Array& array);
|
||||
|
||||
// Check if the valid buffer bytes are initialized
|
||||
// and cause valgrind warnings otherwise.
|
||||
ARROW_TESTING_EXPORT void TestInitialized(const ArrayData& array);
|
||||
ARROW_TESTING_EXPORT void TestInitialized(const Array& array);
|
||||
|
||||
#define DECL_T() typedef typename TestFixture::T T;
|
||||
|
||||
#define DECL_TYPE() typedef typename TestFixture::Type Type;
|
||||
|
||||
// ArrayFromJSON: construct an Array from a simple JSON representation
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>&,
|
||||
util::string_view json);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> DictArrayFromJSON(const std::shared_ptr<DataType>& type,
|
||||
util::string_view indices_json,
|
||||
util::string_view dictionary_json);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&,
|
||||
util::string_view);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>&,
|
||||
const std::vector<std::string>& json);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>&,
|
||||
util::string_view json);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Scalar> DictScalarFromJSON(const std::shared_ptr<DataType>&,
|
||||
util::string_view index_json,
|
||||
util::string_view dictionary_json);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
|
||||
const std::vector<std::string>& json);
|
||||
|
||||
// Given an array, return a new identical array except for one validity bit
|
||||
// set to a new value.
|
||||
// This is useful to force the underlying "value" of null entries to otherwise
|
||||
// invalid data and check that errors don't get reported.
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
|
||||
int64_t index, bool validity);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void SleepFor(double seconds);
|
||||
|
||||
// Sleeps for a very small amount of time. The thread will be yielded
|
||||
// at least once ensuring that context switches could happen. It is intended
|
||||
// to be used for stress testing parallel code and shouldn't be assumed to do any
|
||||
// reliable timing.
|
||||
ARROW_TESTING_EXPORT
|
||||
void SleepABit();
|
||||
|
||||
// Wait until predicate is true or timeout in seconds expires.
|
||||
ARROW_TESTING_EXPORT
|
||||
void BusyWait(double seconds, std::function<bool()> predicate);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Future<> SleepAsync(double seconds);
|
||||
|
||||
// \see SleepABit
|
||||
ARROW_TESTING_EXPORT
|
||||
Future<> SleepABitAsync();
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> IteratorToVector(Iterator<T> iterator) {
|
||||
EXPECT_OK_AND_ASSIGN(auto out, iterator.ToVector());
|
||||
return out;
|
||||
}
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
bool LocaleExists(const char* locale);
|
||||
|
||||
// A RAII-style object that switches to a new locale, and switches back
|
||||
// to the old locale when going out of scope. Doesn't do anything if the
|
||||
// new locale doesn't exist on the local machine.
|
||||
// ATTENTION: may crash with an assertion failure on Windows debug builds.
|
||||
// See ARROW-6108, also https://gerrit.libreoffice.org/#/c/54110/
|
||||
class ARROW_TESTING_EXPORT LocaleGuard {
|
||||
public:
|
||||
explicit LocaleGuard(const char* new_locale);
|
||||
~LocaleGuard();
|
||||
|
||||
protected:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
class ARROW_TESTING_EXPORT EnvVarGuard {
|
||||
public:
|
||||
EnvVarGuard(const std::string& name, const std::string& value);
|
||||
~EnvVarGuard();
|
||||
|
||||
protected:
|
||||
const std::string name_;
|
||||
std::string old_value_;
|
||||
bool was_set_;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
class SignalHandler;
|
||||
}
|
||||
|
||||
class ARROW_TESTING_EXPORT SignalHandlerGuard {
|
||||
public:
|
||||
typedef void (*Callback)(int);
|
||||
|
||||
SignalHandlerGuard(int signum, Callback cb);
|
||||
SignalHandlerGuard(int signum, const internal::SignalHandler& handler);
|
||||
~SignalHandlerGuard();
|
||||
|
||||
protected:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
#ifndef ARROW_LARGE_MEMORY_TESTS
|
||||
#define LARGE_MEMORY_TEST(name) DISABLED_##name
|
||||
#else
|
||||
#define LARGE_MEMORY_TEST(name) name
|
||||
#endif
|
||||
|
||||
inline void PrintTo(const Status& st, std::ostream* os) { *os << st.ToString(); }
|
||||
|
||||
template <typename T>
|
||||
void PrintTo(const Result<T>& result, std::ostream* os) {
|
||||
if (result.ok()) {
|
||||
::testing::internal::UniversalPrint(result.ValueOrDie(), os);
|
||||
} else {
|
||||
*os << result.status();
|
||||
}
|
||||
}
|
||||
|
||||
// A data type with only move constructors (no copy, no default).
|
||||
struct MoveOnlyDataType {
|
||||
explicit MoveOnlyDataType(int x) : data(new int(x)) {}
|
||||
|
||||
MoveOnlyDataType(const MoveOnlyDataType& other) = delete;
|
||||
MoveOnlyDataType& operator=(const MoveOnlyDataType& other) = delete;
|
||||
|
||||
MoveOnlyDataType(MoveOnlyDataType&& other) { MoveFrom(&other); }
|
||||
MoveOnlyDataType& operator=(MoveOnlyDataType&& other) {
|
||||
MoveFrom(&other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
MoveOnlyDataType& operator=(int x) {
|
||||
if (data != nullptr) {
|
||||
delete data;
|
||||
}
|
||||
data = new int(x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~MoveOnlyDataType() { Destroy(); }
|
||||
|
||||
void Destroy() {
|
||||
if (data != nullptr) {
|
||||
delete data;
|
||||
data = nullptr;
|
||||
moves = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void MoveFrom(MoveOnlyDataType* other) {
|
||||
Destroy();
|
||||
data = other->data;
|
||||
other->data = nullptr;
|
||||
moves = other->moves + 1;
|
||||
}
|
||||
|
||||
int ToInt() const { return data == nullptr ? -42 : *data; }
|
||||
|
||||
bool operator==(const MoveOnlyDataType& other) const {
|
||||
return data != nullptr && other.data != nullptr && *data == *other.data;
|
||||
}
|
||||
bool operator<(const MoveOnlyDataType& other) const {
|
||||
return data == nullptr || (other.data != nullptr && *data < *other.data);
|
||||
}
|
||||
|
||||
bool operator==(int other) const { return data != nullptr && *data == other; }
|
||||
friend bool operator==(int left, const MoveOnlyDataType& right) {
|
||||
return right == left;
|
||||
}
|
||||
|
||||
int* data = nullptr;
|
||||
int moves = 0;
|
||||
};
|
||||
|
||||
// A task that blocks until unlocked. Useful for timing tests.
|
||||
class ARROW_TESTING_EXPORT GatingTask {
|
||||
public:
|
||||
explicit GatingTask(double timeout_seconds = 10);
|
||||
/// \brief During destruction we wait for all pending tasks to finish
|
||||
~GatingTask();
|
||||
|
||||
/// \brief Creates a new waiting task (presumably to spawn on a thread). It will return
|
||||
/// invalid if the timeout arrived before the unlock. The task will not complete until
|
||||
/// unlocked or timed out
|
||||
///
|
||||
/// Note: The GatingTask must outlive any Task instances
|
||||
std::function<void()> Task();
|
||||
/// \brief Creates a new waiting task as a future. The future will not complete
|
||||
/// until unlocked.
|
||||
Future<> AsyncTask();
|
||||
/// \brief Waits until at least count tasks are running.
|
||||
Status WaitForRunning(int count);
|
||||
/// \brief Unlocks all waiting tasks. Returns an invalid status if any waiting task has
|
||||
/// timed out
|
||||
Status Unlock();
|
||||
|
||||
static std::shared_ptr<GatingTask> Make(double timeout_seconds = 10);
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
std::shared_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace arrow
|
||||
|
||||
namespace nonstd {
|
||||
namespace sv_lite {
|
||||
|
||||
// Without this hint, GTest will print string_views as a container of char
|
||||
template <class Char, class Traits = std::char_traits<Char>>
|
||||
void PrintTo(const basic_string_view<Char, Traits>& view, std::ostream* os) {
|
||||
*os << view;
|
||||
}
|
||||
|
||||
} // namespace sv_lite
|
||||
|
||||
namespace optional_lite {
|
||||
|
||||
template <typename T>
|
||||
void PrintTo(const optional<T>& opt, std::ostream* os) {
|
||||
if (opt.has_value()) {
|
||||
*os << "{";
|
||||
::testing::internal::UniversalPrint(*opt, os);
|
||||
*os << "}";
|
||||
} else {
|
||||
*os << "nullopt";
|
||||
}
|
||||
}
|
||||
|
||||
inline void PrintTo(const decltype(nullopt)&, std::ostream* os) { *os << "nullopt"; }
|
||||
|
||||
} // namespace optional_lite
|
||||
} // namespace nonstd
|
@ -0,0 +1,129 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Implement Arrow JSON serialization format for integration tests
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Buffer;
|
||||
class MemoryPool;
|
||||
class RecordBatch;
|
||||
class Schema;
|
||||
|
||||
namespace io {
|
||||
class ReadableFile;
|
||||
} // namespace io
|
||||
|
||||
namespace testing {
|
||||
|
||||
/// \class IntegrationJsonWriter
|
||||
/// \brief Write the JSON representation of an Arrow record batch file or stream
|
||||
///
|
||||
/// This is used for integration testing
|
||||
class ARROW_TESTING_EXPORT IntegrationJsonWriter {
|
||||
public:
|
||||
~IntegrationJsonWriter();
|
||||
|
||||
/// \brief Create a new JSON writer that writes to memory
|
||||
///
|
||||
/// \param[in] schema the schema of record batches
|
||||
/// \param[out] out the returned writer object
|
||||
/// \return Status
|
||||
static Status Open(const std::shared_ptr<Schema>& schema,
|
||||
std::unique_ptr<IntegrationJsonWriter>* out);
|
||||
|
||||
/// \brief Append a record batch
|
||||
Status WriteRecordBatch(const RecordBatch& batch);
|
||||
|
||||
/// \brief Finish the JSON payload and return as a std::string
|
||||
///
|
||||
/// \param[out] result the JSON as as a std::string
|
||||
/// \return Status
|
||||
Status Finish(std::string* result);
|
||||
|
||||
private:
|
||||
explicit IntegrationJsonWriter(const std::shared_ptr<Schema>& schema);
|
||||
|
||||
// Hide RapidJSON details from public API
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
/// \class IntegrationJsonReader
|
||||
/// \brief Read the JSON representation of an Arrow record batch file or stream
|
||||
///
|
||||
/// This is used for integration testing
|
||||
class ARROW_TESTING_EXPORT IntegrationJsonReader {
|
||||
public:
|
||||
~IntegrationJsonReader();
|
||||
|
||||
/// \brief Create a new JSON reader
|
||||
///
|
||||
/// \param[in] pool a MemoryPool to use for buffer allocations
|
||||
/// \param[in] data a Buffer containing the JSON data
|
||||
/// \param[out] reader the returned reader object
|
||||
/// \return Status
|
||||
static Status Open(MemoryPool* pool, const std::shared_ptr<Buffer>& data,
|
||||
std::unique_ptr<IntegrationJsonReader>* reader);
|
||||
|
||||
/// \brief Create a new JSON reader that uses the default memory pool
|
||||
///
|
||||
/// \param[in] data a Buffer containing the JSON data
|
||||
/// \param[out] reader the returned reader object
|
||||
/// \return Status
|
||||
static Status Open(const std::shared_ptr<Buffer>& data,
|
||||
std::unique_ptr<IntegrationJsonReader>* reader);
|
||||
|
||||
/// \brief Create a new JSON reader from a file
|
||||
///
|
||||
/// \param[in] pool a MemoryPool to use for buffer allocations
|
||||
/// \param[in] in_file a ReadableFile containing JSON data
|
||||
/// \param[out] reader the returned reader object
|
||||
/// \return Status
|
||||
static Status Open(MemoryPool* pool, const std::shared_ptr<io::ReadableFile>& in_file,
|
||||
std::unique_ptr<IntegrationJsonReader>* reader);
|
||||
|
||||
/// \brief Return the schema read from the JSON
|
||||
std::shared_ptr<Schema> schema() const;
|
||||
|
||||
/// \brief Return the number of record batches
|
||||
int num_record_batches() const;
|
||||
|
||||
/// \brief Read a particular record batch from the file
|
||||
///
|
||||
/// \param[in] i the record batch index, does not boundscheck
|
||||
/// \param[out] batch the read record batch
|
||||
Status ReadRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const;
|
||||
|
||||
private:
|
||||
IntegrationJsonReader(MemoryPool* pool, const std::shared_ptr<Buffer>& data);
|
||||
|
||||
// Hide RapidJSON details from public API
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace testing
|
||||
} // namespace arrow
|
469
.venv/Lib/site-packages/pyarrow/include/arrow/testing/matchers.h
Normal file
469
.venv/Lib/site-packages/pyarrow/include/arrow/testing/matchers.h
Normal file
@ -0,0 +1,469 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <gmock/gmock-matchers.h>
|
||||
|
||||
#include "arrow/datum.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/stl_iterator.h"
|
||||
#include "arrow/testing/future_util.h"
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/util/future.h"
|
||||
#include "arrow/util/unreachable.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class PointeesEqualMatcher {
|
||||
public:
|
||||
template <typename PtrPair>
|
||||
operator testing::Matcher<PtrPair>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const PtrPair&> {
|
||||
void DescribeTo(::std::ostream* os) const override { *os << "pointees are equal"; }
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "pointees are not equal";
|
||||
}
|
||||
|
||||
bool MatchAndExplain(const PtrPair& pair,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
const auto& first = *std::get<0>(pair);
|
||||
const auto& second = *std::get<1>(pair);
|
||||
const bool match = first.Equals(second);
|
||||
*listener << "whose pointees " << testing::PrintToString(first) << " and "
|
||||
<< testing::PrintToString(second)
|
||||
<< (match ? " are equal" : " are not equal");
|
||||
return match;
|
||||
}
|
||||
};
|
||||
|
||||
return testing::Matcher<PtrPair>(new Impl());
|
||||
}
|
||||
};
|
||||
|
||||
// A matcher that checks that the values pointed to are Equals().
|
||||
// Useful in conjunction with other googletest matchers.
|
||||
inline PointeesEqualMatcher PointeesEqual() { return {}; }
|
||||
|
||||
class AnyOfJSONMatcher {
|
||||
public:
|
||||
AnyOfJSONMatcher(std::shared_ptr<DataType> type, std::string array_json)
|
||||
: type_(std::move(type)), array_json_(std::move(array_json)) {}
|
||||
|
||||
template <typename arg_type>
|
||||
operator testing::Matcher<arg_type>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const arg_type&> {
|
||||
Impl(std::shared_ptr<DataType> type, std::string array_json)
|
||||
: type_(std::move(type)), array_json_(std::move(array_json)) {
|
||||
array = ArrayFromJSON(type_, array_json_);
|
||||
}
|
||||
void DescribeTo(std::ostream* os) const override {
|
||||
*os << "matches at least one scalar from ";
|
||||
*os << array->ToString();
|
||||
}
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "matches no scalar from ";
|
||||
*os << array->ToString();
|
||||
}
|
||||
bool MatchAndExplain(
|
||||
const arg_type& arg,
|
||||
::testing::MatchResultListener* result_listener) const override {
|
||||
for (int64_t i = 0; i < array->length(); ++i) {
|
||||
std::shared_ptr<Scalar> scalar;
|
||||
auto maybe_scalar = array->GetScalar(i);
|
||||
if (maybe_scalar.ok()) {
|
||||
scalar = maybe_scalar.ValueOrDie();
|
||||
} else {
|
||||
*result_listener << "GetScalar() had status "
|
||||
<< maybe_scalar.status().ToString() << "at index " << i
|
||||
<< " in the input JSON Array";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scalar->Equals(arg)) return true;
|
||||
}
|
||||
*result_listener << "Argument scalar: '" << arg->ToString()
|
||||
<< "' matches no scalar from " << array->ToString();
|
||||
return false;
|
||||
}
|
||||
const std::shared_ptr<DataType> type_;
|
||||
const std::string array_json_;
|
||||
std::shared_ptr<Array> array;
|
||||
};
|
||||
|
||||
return testing::Matcher<arg_type>(new Impl(type_, array_json_));
|
||||
}
|
||||
|
||||
private:
|
||||
const std::shared_ptr<DataType> type_;
|
||||
const std::string array_json_;
|
||||
};
|
||||
|
||||
inline AnyOfJSONMatcher AnyOfJSON(std::shared_ptr<DataType> type,
|
||||
std::string array_json) {
|
||||
return {std::move(type), std::move(array_json)};
|
||||
}
|
||||
|
||||
template <typename ResultMatcher>
|
||||
class FutureMatcher {
|
||||
public:
|
||||
explicit FutureMatcher(ResultMatcher result_matcher, double wait_seconds)
|
||||
: result_matcher_(std::move(result_matcher)), wait_seconds_(wait_seconds) {}
|
||||
|
||||
template <typename Fut,
|
||||
typename ValueType = typename std::decay<Fut>::type::ValueType>
|
||||
operator testing::Matcher<Fut>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const Fut&> {
|
||||
explicit Impl(const ResultMatcher& result_matcher, double wait_seconds)
|
||||
: result_matcher_(testing::MatcherCast<Result<ValueType>>(result_matcher)),
|
||||
wait_seconds_(wait_seconds) {}
|
||||
|
||||
void DescribeTo(::std::ostream* os) const override {
|
||||
*os << "value ";
|
||||
result_matcher_.DescribeTo(os);
|
||||
}
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "value ";
|
||||
result_matcher_.DescribeNegationTo(os);
|
||||
}
|
||||
|
||||
bool MatchAndExplain(const Fut& fut,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
if (!fut.Wait(wait_seconds_)) {
|
||||
*listener << "which didn't finish within " << wait_seconds_ << " seconds";
|
||||
return false;
|
||||
}
|
||||
return result_matcher_.MatchAndExplain(fut.result(), listener);
|
||||
}
|
||||
|
||||
const testing::Matcher<Result<ValueType>> result_matcher_;
|
||||
const double wait_seconds_;
|
||||
};
|
||||
|
||||
return testing::Matcher<Fut>(new Impl(result_matcher_, wait_seconds_));
|
||||
}
|
||||
|
||||
private:
|
||||
const ResultMatcher result_matcher_;
|
||||
const double wait_seconds_;
|
||||
};
|
||||
|
||||
template <typename ValueMatcher>
|
||||
class ResultMatcher {
|
||||
public:
|
||||
explicit ResultMatcher(ValueMatcher value_matcher)
|
||||
: value_matcher_(std::move(value_matcher)) {}
|
||||
|
||||
template <typename Res,
|
||||
typename ValueType = typename std::decay<Res>::type::ValueType>
|
||||
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const Res&> {
|
||||
explicit Impl(const ValueMatcher& value_matcher)
|
||||
: value_matcher_(testing::MatcherCast<ValueType>(value_matcher)) {}
|
||||
|
||||
void DescribeTo(::std::ostream* os) const override {
|
||||
*os << "value ";
|
||||
value_matcher_.DescribeTo(os);
|
||||
}
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "value ";
|
||||
value_matcher_.DescribeNegationTo(os);
|
||||
}
|
||||
|
||||
bool MatchAndExplain(const Res& maybe_value,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
if (!maybe_value.status().ok()) {
|
||||
*listener << "whose error "
|
||||
<< testing::PrintToString(maybe_value.status().ToString())
|
||||
<< " doesn't match";
|
||||
return false;
|
||||
}
|
||||
const ValueType& value = maybe_value.ValueOrDie();
|
||||
testing::StringMatchResultListener value_listener;
|
||||
const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
|
||||
*listener << "whose value " << testing::PrintToString(value)
|
||||
<< (match ? " matches" : " doesn't match");
|
||||
testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
|
||||
return match;
|
||||
}
|
||||
|
||||
const testing::Matcher<ValueType> value_matcher_;
|
||||
};
|
||||
|
||||
return testing::Matcher<Res>(new Impl(value_matcher_));
|
||||
}
|
||||
|
||||
private:
|
||||
const ValueMatcher value_matcher_;
|
||||
};
|
||||
|
||||
class ErrorMatcher {
|
||||
public:
|
||||
explicit ErrorMatcher(StatusCode code,
|
||||
util::optional<testing::Matcher<std::string>> message_matcher)
|
||||
: code_(code), message_matcher_(std::move(message_matcher)) {}
|
||||
|
||||
template <typename Res>
|
||||
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const Res&> {
|
||||
explicit Impl(StatusCode code,
|
||||
util::optional<testing::Matcher<std::string>> message_matcher)
|
||||
: code_(code), message_matcher_(std::move(message_matcher)) {}
|
||||
|
||||
void DescribeTo(::std::ostream* os) const override {
|
||||
*os << "raises StatusCode::" << Status::CodeAsString(code_);
|
||||
if (message_matcher_) {
|
||||
*os << " and message ";
|
||||
message_matcher_->DescribeTo(os);
|
||||
}
|
||||
}
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "does not raise StatusCode::" << Status::CodeAsString(code_);
|
||||
if (message_matcher_) {
|
||||
*os << " or message ";
|
||||
message_matcher_->DescribeNegationTo(os);
|
||||
}
|
||||
}
|
||||
|
||||
bool MatchAndExplain(const Res& maybe_value,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
const Status& status = internal::GenericToStatus(maybe_value);
|
||||
testing::StringMatchResultListener value_listener;
|
||||
|
||||
bool match = status.code() == code_;
|
||||
if (message_matcher_) {
|
||||
match = match &&
|
||||
message_matcher_->MatchAndExplain(status.message(), &value_listener);
|
||||
}
|
||||
|
||||
if (match) {
|
||||
*listener << "whose error matches";
|
||||
} else if (status.ok()) {
|
||||
*listener << "whose non-error doesn't match";
|
||||
} else {
|
||||
*listener << "whose error doesn't match";
|
||||
}
|
||||
|
||||
testing::internal::PrintIfNotEmpty(value_listener.str(), listener->stream());
|
||||
return match;
|
||||
}
|
||||
|
||||
const StatusCode code_;
|
||||
const util::optional<testing::Matcher<std::string>> message_matcher_;
|
||||
};
|
||||
|
||||
return testing::Matcher<Res>(new Impl(code_, message_matcher_));
|
||||
}
|
||||
|
||||
private:
|
||||
const StatusCode code_;
|
||||
const util::optional<testing::Matcher<std::string>> message_matcher_;
|
||||
};
|
||||
|
||||
class OkMatcher {
|
||||
public:
|
||||
template <typename Res>
|
||||
operator testing::Matcher<Res>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const Res&> {
|
||||
void DescribeTo(::std::ostream* os) const override { *os << "is ok"; }
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override { *os << "is not ok"; }
|
||||
|
||||
bool MatchAndExplain(const Res& maybe_value,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
const Status& status = internal::GenericToStatus(maybe_value);
|
||||
|
||||
const bool match = status.ok();
|
||||
*listener << "whose " << (match ? "non-error matches" : "error doesn't match");
|
||||
return match;
|
||||
}
|
||||
};
|
||||
|
||||
return testing::Matcher<Res>(new Impl());
|
||||
}
|
||||
};
|
||||
|
||||
// Returns a matcher that waits on a Future (by default for 16 seconds)
|
||||
// then applies a matcher to the result.
|
||||
template <typename ResultMatcher>
|
||||
FutureMatcher<ResultMatcher> Finishes(
|
||||
const ResultMatcher& result_matcher,
|
||||
double wait_seconds = kDefaultAssertFinishesWaitSeconds) {
|
||||
return FutureMatcher<ResultMatcher>(result_matcher, wait_seconds);
|
||||
}
|
||||
|
||||
// Returns a matcher that matches the value of a successful Result<T>.
|
||||
template <typename ValueMatcher>
|
||||
ResultMatcher<ValueMatcher> ResultWith(const ValueMatcher& value_matcher) {
|
||||
return ResultMatcher<ValueMatcher>(value_matcher);
|
||||
}
|
||||
|
||||
// Returns a matcher that matches an ok Status or Result<T>.
|
||||
inline OkMatcher Ok() { return {}; }
|
||||
|
||||
// Returns a matcher that matches the StatusCode of a Status or Result<T>.
|
||||
// Do not use Raises(StatusCode::OK) to match a non error code.
|
||||
inline ErrorMatcher Raises(StatusCode code) { return ErrorMatcher(code, util::nullopt); }
|
||||
|
||||
// Returns a matcher that matches the StatusCode and message of a Status or Result<T>.
|
||||
template <typename MessageMatcher>
|
||||
ErrorMatcher Raises(StatusCode code, const MessageMatcher& message_matcher) {
|
||||
return ErrorMatcher(code, testing::MatcherCast<std::string>(message_matcher));
|
||||
}
|
||||
|
||||
class DataEqMatcher {
|
||||
public:
|
||||
// TODO(bkietz) support EqualOptions, ApproxEquals, etc
|
||||
// Probably it's better to use something like config-through-key_value_metadata
|
||||
// as with the random generators to decouple this from EqualOptions etc.
|
||||
explicit DataEqMatcher(Datum expected) : expected_(std::move(expected)) {}
|
||||
|
||||
template <typename Data>
|
||||
operator testing::Matcher<Data>() const { // NOLINT runtime/explicit
|
||||
struct Impl : testing::MatcherInterface<const Data&> {
|
||||
explicit Impl(Datum expected) : expected_(std::move(expected)) {}
|
||||
|
||||
void DescribeTo(::std::ostream* os) const override {
|
||||
*os << "has data ";
|
||||
PrintTo(expected_, os);
|
||||
}
|
||||
|
||||
void DescribeNegationTo(::std::ostream* os) const override {
|
||||
*os << "doesn't have data ";
|
||||
PrintTo(expected_, os);
|
||||
}
|
||||
|
||||
bool MatchAndExplain(const Data& data,
|
||||
testing::MatchResultListener* listener) const override {
|
||||
Datum boxed(data);
|
||||
|
||||
if (boxed.kind() != expected_.kind()) {
|
||||
*listener << "whose Datum::kind " << boxed.ToString() << " doesn't match "
|
||||
<< expected_.ToString();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (const auto& boxed_type = boxed.type()) {
|
||||
if (*boxed_type != *expected_.type()) {
|
||||
*listener << "whose DataType " << boxed_type->ToString() << " doesn't match "
|
||||
<< expected_.type()->ToString();
|
||||
return false;
|
||||
}
|
||||
} else if (const auto& boxed_schema = boxed.schema()) {
|
||||
if (*boxed_schema != *expected_.schema()) {
|
||||
*listener << "whose Schema " << boxed_schema->ToString() << " doesn't match "
|
||||
<< expected_.schema()->ToString();
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
Unreachable();
|
||||
}
|
||||
|
||||
if (boxed == expected_) {
|
||||
*listener << "whose value matches";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (listener->IsInterested() && boxed.kind() == Datum::ARRAY) {
|
||||
*listener << "whose value differs from the expected value by "
|
||||
<< boxed.make_array()->Diff(*expected_.make_array());
|
||||
} else {
|
||||
*listener << "whose value doesn't match";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Datum expected_;
|
||||
};
|
||||
|
||||
return testing::Matcher<Data>(new Impl(expected_));
|
||||
}
|
||||
|
||||
private:
|
||||
Datum expected_;
|
||||
};
|
||||
|
||||
/// Constructs a datum against which arguments are matched
|
||||
template <typename Data>
|
||||
DataEqMatcher DataEq(Data&& dat) {
|
||||
return DataEqMatcher(Datum(std::forward<Data>(dat)));
|
||||
}
|
||||
|
||||
/// Constructs an array with ArrayFromJSON against which arguments are matched
|
||||
inline DataEqMatcher DataEqArray(const std::shared_ptr<DataType>& type,
|
||||
util::string_view json) {
|
||||
return DataEq(ArrayFromJSON(type, json));
|
||||
}
|
||||
|
||||
/// Constructs an array from a vector of optionals against which arguments are matched
|
||||
template <typename T, typename ArrayType = typename TypeTraits<T>::ArrayType,
|
||||
typename BuilderType = typename TypeTraits<T>::BuilderType,
|
||||
typename ValueType =
|
||||
typename ::arrow::stl::detail::DefaultValueAccessor<ArrayType>::ValueType>
|
||||
DataEqMatcher DataEqArray(T type, const std::vector<util::optional<ValueType>>& values) {
|
||||
// FIXME(bkietz) broken until DataType is move constructible
|
||||
BuilderType builder(std::make_shared<T>(std::move(type)), default_memory_pool());
|
||||
DCHECK_OK(builder.Reserve(static_cast<int64_t>(values.size())));
|
||||
|
||||
// pseudo constexpr:
|
||||
static const bool need_safe_append = !is_fixed_width(T::type_id);
|
||||
|
||||
for (auto value : values) {
|
||||
if (value) {
|
||||
if (need_safe_append) {
|
||||
builder.UnsafeAppend(*value);
|
||||
} else {
|
||||
DCHECK_OK(builder.Append(*value));
|
||||
}
|
||||
} else {
|
||||
builder.UnsafeAppendNull();
|
||||
}
|
||||
}
|
||||
|
||||
return DataEq(builder.Finish().ValueOrDie());
|
||||
}
|
||||
|
||||
/// Constructs a scalar with ScalarFromJSON against which arguments are matched
|
||||
inline DataEqMatcher DataEqScalar(const std::shared_ptr<DataType>& type,
|
||||
util::string_view json) {
|
||||
return DataEq(ScalarFromJSON(type, json));
|
||||
}
|
||||
|
||||
/// Constructs a scalar against which arguments are matched
|
||||
template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType,
|
||||
typename ValueType = typename ScalarType::ValueType>
|
||||
DataEqMatcher DataEqScalar(T type, util::optional<ValueType> value) {
|
||||
ScalarType expected(std::make_shared<T>(std::move(type)));
|
||||
|
||||
if (value) {
|
||||
expected.is_valid = true;
|
||||
expected.value = std::move(*value);
|
||||
}
|
||||
|
||||
return DataEq(std::move(expected));
|
||||
}
|
||||
|
||||
// HasType, HasSchema matchers
|
||||
|
||||
} // namespace arrow
|
25
.venv/Lib/site-packages/pyarrow/include/arrow/testing/pch.h
Normal file
25
.venv/Lib/site-packages/pyarrow/include/arrow/testing/pch.h
Normal file
@ -0,0 +1,25 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Often-used headers, for precompiling.
|
||||
// If updating this header, please make sure you check compilation speed
|
||||
// before checking in. Adding headers which are not used extremely often
|
||||
// may incur a slowdown, since it makes the precompiled header heavier to load.
|
||||
|
||||
#include "arrow/pch.h"
|
||||
#include "arrow/testing/gtest_util.h"
|
||||
#include "arrow/testing/util.h"
|
504
.venv/Lib/site-packages/pyarrow/include/arrow/testing/random.h
Normal file
504
.venv/Lib/site-packages/pyarrow/include/arrow/testing/random.h
Normal file
@ -0,0 +1,504 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/testing/uniform_real.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/type.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class Array;
|
||||
|
||||
namespace random {
|
||||
|
||||
using SeedType = int32_t;
|
||||
constexpr SeedType kSeedMax = std::numeric_limits<SeedType>::max();
|
||||
|
||||
class ARROW_TESTING_EXPORT RandomArrayGenerator {
|
||||
public:
|
||||
explicit RandomArrayGenerator(SeedType seed)
|
||||
: seed_distribution_(static_cast<SeedType>(1), kSeedMax), seed_rng_(seed) {}
|
||||
|
||||
/// \brief Generate a null bitmap
|
||||
///
|
||||
/// \param[in] size the size of the bitmap to generate
|
||||
/// \param[in] null_probability the probability of a bit being zero
|
||||
///
|
||||
/// \return a generated Buffer
|
||||
std::shared_ptr<Buffer> NullBitmap(int64_t size, double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random BooleanArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] true_probability the probability of a value being 1 / bit-set
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Boolean(int64_t size, double true_probability,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random UInt8Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> UInt8(int64_t size, uint8_t min, uint8_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random Int8Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Int8(int64_t size, int8_t min, int8_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random UInt16Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> UInt16(int64_t size, uint16_t min, uint16_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random Int16Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Int16(int64_t size, int16_t min, int16_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random UInt32Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> UInt32(int64_t size, uint32_t min, uint32_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random Int32Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Int32(int64_t size, int32_t min, int32_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random UInt64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> UInt64(int64_t size, uint64_t min, uint64_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random Int64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Int64(int64_t size, int64_t min, int64_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random HalfFloatArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the distribution
|
||||
/// \param[in] max the upper bound of the distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Float16(int64_t size, int16_t min, int16_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random FloatArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
/// \param[in] nan_probability the probability of a value being NaN
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Float32(int64_t size, float min, float max,
|
||||
double null_probability = 0, double nan_probability = 0);
|
||||
|
||||
/// \brief Generate a random DoubleArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
/// \param[in] nan_probability the probability of a value being NaN
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Float64(int64_t size, double min, double max,
|
||||
double null_probability = 0, double nan_probability = 0);
|
||||
|
||||
/// \brief Generate a random Date64Array
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min the lower bound of the uniform distribution
|
||||
/// \param[in] max the upper bound of the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Date64(int64_t size, int64_t min, int64_t max,
|
||||
double null_probability = 0);
|
||||
|
||||
template <typename ArrowType, typename CType = typename ArrowType::c_type>
|
||||
std::shared_ptr<Array> Numeric(int64_t size, CType min, CType max,
|
||||
double null_probability = 0) {
|
||||
switch (ArrowType::type_id) {
|
||||
case Type::UINT8:
|
||||
return UInt8(size, static_cast<uint8_t>(min), static_cast<uint8_t>(max),
|
||||
null_probability);
|
||||
case Type::INT8:
|
||||
return Int8(size, static_cast<int8_t>(min), static_cast<int8_t>(max),
|
||||
null_probability);
|
||||
case Type::UINT16:
|
||||
return UInt16(size, static_cast<uint16_t>(min), static_cast<uint16_t>(max),
|
||||
null_probability);
|
||||
case Type::INT16:
|
||||
return Int16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
|
||||
null_probability);
|
||||
case Type::UINT32:
|
||||
return UInt32(size, static_cast<uint32_t>(min), static_cast<uint32_t>(max),
|
||||
null_probability);
|
||||
case Type::INT32:
|
||||
return Int32(size, static_cast<int32_t>(min), static_cast<int32_t>(max),
|
||||
null_probability);
|
||||
case Type::UINT64:
|
||||
return UInt64(size, static_cast<uint64_t>(min), static_cast<uint64_t>(max),
|
||||
null_probability);
|
||||
case Type::INT64:
|
||||
return Int64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
|
||||
null_probability);
|
||||
case Type::HALF_FLOAT:
|
||||
return Float16(size, static_cast<int16_t>(min), static_cast<int16_t>(max),
|
||||
null_probability);
|
||||
case Type::FLOAT:
|
||||
return Float32(size, static_cast<float>(min), static_cast<float>(max),
|
||||
null_probability);
|
||||
case Type::DOUBLE:
|
||||
return Float64(size, static_cast<double>(min), static_cast<double>(max),
|
||||
null_probability);
|
||||
case Type::DATE64:
|
||||
return Date64(size, static_cast<int64_t>(min), static_cast<int64_t>(max),
|
||||
null_probability);
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generate a random Decimal128Array
|
||||
///
|
||||
/// \param[in] type the type of the array to generate
|
||||
/// (must be an instance of Decimal128Type)
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Decimal128(std::shared_ptr<DataType> type, int64_t size,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random Decimal256Array
|
||||
///
|
||||
/// \param[in] type the type of the array to generate
|
||||
/// (must be an instance of Decimal256Type)
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Decimal256(std::shared_ptr<DataType> type, int64_t size,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate an array of offsets (for use in e.g. ListArray::FromArrays)
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] first_offset the first offset value (usually 0)
|
||||
/// \param[in] last_offset the last offset value (usually the size of the child array)
|
||||
/// \param[in] null_probability the probability of an offset being null
|
||||
/// \param[in] force_empty_nulls if true, null offsets must have 0 "length"
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Offsets(int64_t size, int32_t first_offset, int32_t last_offset,
|
||||
double null_probability = 0,
|
||||
bool force_empty_nulls = false);
|
||||
|
||||
std::shared_ptr<Array> LargeOffsets(int64_t size, int64_t first_offset,
|
||||
int64_t last_offset, double null_probability = 0,
|
||||
bool force_empty_nulls = false);
|
||||
|
||||
/// \brief Generate a random StringArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min_length the lower bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] max_length the upper bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random LargeStringArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] min_length the lower bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] max_length the upper bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> LargeString(int64_t size, int32_t min_length, int32_t max_length,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random StringArray with repeated values
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] unique the number of unique string values used
|
||||
/// to populate the array
|
||||
/// \param[in] min_length the lower bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] max_length the upper bound of the string length
|
||||
/// determined by the uniform distribution
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> StringWithRepeats(int64_t size, int64_t unique,
|
||||
int32_t min_length, int32_t max_length,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Like StringWithRepeats but return BinaryArray
|
||||
std::shared_ptr<Array> BinaryWithRepeats(int64_t size, int64_t unique,
|
||||
int32_t min_length, int32_t max_length,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random FixedSizeBinaryArray
|
||||
///
|
||||
/// \param[in] size the size of the array to generate
|
||||
/// \param[in] byte_width the byte width of fixed-size binary items
|
||||
/// \param[in] null_probability the probability of a value being null
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate a random ListArray
|
||||
///
|
||||
/// \param[in] values The underlying values array
|
||||
/// \param[in] size The size of the generated list array
|
||||
/// \param[in] null_probability the probability of a list value being null
|
||||
/// \param[in] force_empty_nulls if true, null list entries must have 0 length
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> List(const Array& values, int64_t size,
|
||||
double null_probability = 0,
|
||||
bool force_empty_nulls = false);
|
||||
|
||||
/// \brief Generate a random MapArray
|
||||
///
|
||||
/// \param[in] keys The underlying keys array
|
||||
/// \param[in] items The underlying items array
|
||||
/// \param[in] size The size of the generated map array
|
||||
/// \param[in] null_probability the probability of a map value being null
|
||||
/// \param[in] force_empty_nulls if true, null map entries must have 0 length
|
||||
///
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> Map(const std::shared_ptr<Array>& keys,
|
||||
const std::shared_ptr<Array>& items, int64_t size,
|
||||
double null_probability = 0, bool force_empty_nulls = false);
|
||||
|
||||
/// \brief Generate a random SparseUnionArray
|
||||
///
|
||||
/// The type ids are chosen randomly, according to a uniform distribution,
|
||||
/// amongst the given child fields.
|
||||
///
|
||||
/// \param[in] fields Vector of Arrays containing the data for each union field
|
||||
/// \param[in] size The size of the generated sparse union array
|
||||
std::shared_ptr<Array> SparseUnion(const ArrayVector& fields, int64_t size);
|
||||
|
||||
/// \brief Generate a random DenseUnionArray
|
||||
///
|
||||
/// The type ids are chosen randomly, according to a uniform distribution,
|
||||
/// amongst the given child fields. The offsets are incremented along
|
||||
/// each child field.
|
||||
///
|
||||
/// \param[in] fields Vector of Arrays containing the data for each union field
|
||||
/// \param[in] size The size of the generated sparse union array
|
||||
std::shared_ptr<Array> DenseUnion(const ArrayVector& fields, int64_t size);
|
||||
|
||||
/// \brief Generate a random Array of the specified type, size, and null_probability.
|
||||
///
|
||||
/// Generation parameters other than size and null_probability are determined based on
|
||||
/// the type of Array to be generated.
|
||||
/// If boolean the probabilities of true,false values are 0.25,0.75 respectively.
|
||||
/// If numeric min,max will be the least and greatest representable values.
|
||||
/// If string min_length,max_length will be 0,sqrt(size) respectively.
|
||||
///
|
||||
/// \param[in] type the type of Array to generate
|
||||
/// \param[in] size the size of the Array to generate
|
||||
/// \param[in] null_probability the probability of a slot being null
|
||||
/// \return a generated Array
|
||||
std::shared_ptr<Array> ArrayOf(std::shared_ptr<DataType> type, int64_t size,
|
||||
double null_probability = 0);
|
||||
|
||||
/// \brief Generate an array with random data based on the given field. See BatchOf
|
||||
/// for usage info.
|
||||
std::shared_ptr<Array> ArrayOf(const Field& field, int64_t size);
|
||||
|
||||
/// \brief Generate a record batch with random data of the specified length.
|
||||
///
|
||||
/// Generation options are read from key-value metadata for each field, and may be
|
||||
/// specified at any nesting level. For example, generation options for the child
|
||||
/// values of a list array can be specified by constructing the list type with
|
||||
/// list(field("item", int8(), options_metadata))
|
||||
///
|
||||
/// The following options are supported:
|
||||
///
|
||||
/// For all types except NullType:
|
||||
/// - null_probability (double): range [0.0, 1.0] the probability of a null value.
|
||||
/// Default/value is 0.0 if the field is marked non-nullable, else it is 0.01
|
||||
///
|
||||
/// For all numeric types T:
|
||||
/// - min (T::c_type): the minimum value to generate (inclusive), default
|
||||
/// std::numeric_limits<T::c_type>::min()
|
||||
/// - max (T::c_type): the maximum value to generate (inclusive), default
|
||||
/// std::numeric_limits<T::c_type>::max()
|
||||
/// Note this means that, for example, min/max are int16_t values for HalfFloatType.
|
||||
///
|
||||
/// For floating point types T for which is_physical_floating_type<T>:
|
||||
/// - nan_probability (double): range [0.0, 1.0] the probability of a NaN value.
|
||||
///
|
||||
/// For BooleanType:
|
||||
/// - true_probability (double): range [0.0, 1.0] the probability of a true.
|
||||
///
|
||||
/// For DictionaryType:
|
||||
/// - values (int32_t): the size of the dictionary.
|
||||
/// Other properties are passed to the generator for the dictionary indices. However,
|
||||
/// min and max cannot be specified. Note it is not possible to otherwise customize
|
||||
/// the generation of dictionary values.
|
||||
///
|
||||
/// For list, string, and binary types T, including their large variants:
|
||||
/// - min_length (T::offset_type): the minimum length of the child to generate,
|
||||
/// default 0
|
||||
/// - max_length (T::offset_type): the minimum length of the child to generate,
|
||||
/// default 1024
|
||||
///
|
||||
/// For string and binary types T (not including their large variants):
|
||||
/// - unique (int32_t): if positive, this many distinct values will be generated
|
||||
/// and all array values will be one of these values, default -1
|
||||
///
|
||||
/// For MapType:
|
||||
/// - values (int32_t): the number of key-value pairs to generate, which will be
|
||||
/// partitioned among the array values.
|
||||
std::shared_ptr<arrow::RecordBatch> BatchOf(const FieldVector& fields, int64_t size);
|
||||
|
||||
SeedType seed() { return seed_distribution_(seed_rng_); }
|
||||
|
||||
private:
|
||||
std::uniform_int_distribution<SeedType> seed_distribution_;
|
||||
std::default_random_engine seed_rng_;
|
||||
};
|
||||
|
||||
/// Generate an array with random data. See RandomArrayGenerator::BatchOf.
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<arrow::RecordBatch> GenerateBatch(const FieldVector& fields, int64_t size,
|
||||
SeedType seed);
|
||||
|
||||
/// Generate an array with random data. See RandomArrayGenerator::BatchOf.
|
||||
ARROW_TESTING_EXPORT
|
||||
std::shared_ptr<arrow::Array> GenerateArray(const Field& field, int64_t size,
|
||||
SeedType seed);
|
||||
|
||||
} // namespace random
|
||||
|
||||
//
|
||||
// Assorted functions
|
||||
//
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void rand_day_millis(int64_t N, std::vector<DayTimeIntervalType::DayMilliseconds>* out);
|
||||
ARROW_TESTING_EXPORT
|
||||
void rand_month_day_nanos(int64_t N,
|
||||
std::vector<MonthDayNanoIntervalType::MonthDayNanos>* out);
|
||||
|
||||
template <typename T, typename U>
|
||||
void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
|
||||
const int random_seed = 0;
|
||||
std::default_random_engine gen(random_seed);
|
||||
std::uniform_int_distribution<T> d(lower, upper);
|
||||
out->resize(N, static_cast<T>(0));
|
||||
std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
|
||||
std::vector<U>* out) {
|
||||
std::default_random_engine gen(seed);
|
||||
::arrow::random::uniform_real_distribution<T> d(min_value, max_value);
|
||||
out->resize(n, static_cast<T>(0));
|
||||
std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
|
||||
assert(out || (n == 0));
|
||||
std::default_random_engine gen(seed);
|
||||
std::uniform_int_distribution<T> d(min_value, max_value);
|
||||
std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
|
||||
}
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,84 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// Random real generation is very slow on Arm if built with clang + libstdc++
|
||||
// due to software emulated long double arithmetic.
|
||||
// This file ports some random real libs from llvm libc++ library, which are
|
||||
// free from long double calculation.
|
||||
// It improves performance significantly on both Arm (~100x) and x86 (~8x) in
|
||||
// generating random reals when built with clang + gnu libstdc++.
|
||||
// Based on: https://github.com/llvm/llvm-project/tree/main/libcxx
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include <arrow/util/bit_util.h>
|
||||
|
||||
namespace arrow {
|
||||
namespace random {
|
||||
|
||||
namespace detail {
|
||||
|
||||
// std::generate_canonical, simplified
|
||||
// https://en.cppreference.com/w/cpp/numeric/random/generate_canonical
|
||||
template <typename RealType, typename Rng>
|
||||
RealType generate_canonical(Rng& rng) {
|
||||
const size_t b = std::numeric_limits<RealType>::digits;
|
||||
const size_t log2R = 63 - ::arrow::bit_util::CountLeadingZeros(
|
||||
static_cast<uint64_t>(Rng::max() - Rng::min()) + 1);
|
||||
const size_t k = b / log2R + (b % log2R != 0) + (b == 0);
|
||||
const RealType r = static_cast<RealType>(Rng::max() - Rng::min()) + 1;
|
||||
RealType base = r;
|
||||
RealType sp = static_cast<RealType>(rng() - Rng::min());
|
||||
for (size_t i = 1; i < k; ++i, base *= r) {
|
||||
sp += (rng() - Rng::min()) * base;
|
||||
}
|
||||
return sp / base;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// std::uniform_real_distribution, simplified
|
||||
// https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution
|
||||
template <typename RealType = double>
|
||||
struct uniform_real_distribution {
|
||||
const RealType a, b;
|
||||
|
||||
explicit uniform_real_distribution(RealType a = 0, RealType b = 1) : a(a), b(b) {}
|
||||
|
||||
template <typename Rng>
|
||||
RealType operator()(Rng& rng) {
|
||||
return (b - a) * detail::generate_canonical<RealType>(rng) + a;
|
||||
}
|
||||
};
|
||||
|
||||
// std::bernoulli_distribution, simplified
|
||||
// https://en.cppreference.com/w/cpp/numeric/random/bernoulli_distribution
|
||||
struct bernoulli_distribution {
|
||||
const double p;
|
||||
|
||||
explicit bernoulli_distribution(double p = 0.5) : p(p) {}
|
||||
|
||||
template <class Rng>
|
||||
bool operator()(Rng& rng) {
|
||||
return detail::generate_canonical<double>(rng) < p;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace random
|
||||
} // namespace arrow
|
134
.venv/Lib/site-packages/pyarrow/include/arrow/testing/util.h
Normal file
134
.venv/Lib/site-packages/pyarrow/include/arrow/testing/util.h
Normal file
@ -0,0 +1,134 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/record_batch.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/optional.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
template <typename T>
|
||||
Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
|
||||
std::shared_ptr<Buffer>* result) {
|
||||
int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
|
||||
|
||||
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(nbytes, pool));
|
||||
auto immutable_data = reinterpret_cast<const uint8_t*>(values.data());
|
||||
std::copy(immutable_data, immutable_data + nbytes, buffer->mutable_data());
|
||||
memset(buffer->mutable_data() + nbytes, 0,
|
||||
static_cast<size_t>(buffer->capacity() - nbytes));
|
||||
|
||||
*result = std::move(buffer);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Sets approximately pct_null of the first n bytes in null_bytes to zero
|
||||
// and the rest to non-zero (true) values.
|
||||
ARROW_TESTING_EXPORT void random_null_bytes(int64_t n, double pct_null,
|
||||
uint8_t* null_bytes);
|
||||
ARROW_TESTING_EXPORT void random_is_valid(int64_t n, double pct_null,
|
||||
std::vector<bool>* is_valid,
|
||||
int random_seed = 0);
|
||||
ARROW_TESTING_EXPORT void random_bytes(int64_t n, uint32_t seed, uint8_t* out);
|
||||
ARROW_TESTING_EXPORT std::string random_string(int64_t n, uint32_t seed);
|
||||
ARROW_TESTING_EXPORT int32_t DecimalSize(int32_t precision);
|
||||
ARROW_TESTING_EXPORT void random_ascii(int64_t n, uint32_t seed, uint8_t* out);
|
||||
ARROW_TESTING_EXPORT int64_t CountNulls(const std::vector<uint8_t>& valid_bytes);
|
||||
|
||||
ARROW_TESTING_EXPORT Status MakeRandomByteBuffer(int64_t length, MemoryPool* pool,
|
||||
std::shared_ptr<ResizableBuffer>* out,
|
||||
uint32_t seed = 0);
|
||||
|
||||
ARROW_TESTING_EXPORT uint64_t random_seed();
|
||||
|
||||
#define DECL_T() typedef typename TestFixture::T T;
|
||||
|
||||
#define DECL_TYPE() typedef typename TestFixture::Type Type;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// A RecordBatchReader for serving a sequence of in-memory record batches
|
||||
|
||||
class BatchIterator : public RecordBatchReader {
|
||||
public:
|
||||
BatchIterator(const std::shared_ptr<Schema>& schema,
|
||||
const std::vector<std::shared_ptr<RecordBatch>>& batches)
|
||||
: schema_(schema), batches_(batches), position_(0) {}
|
||||
|
||||
std::shared_ptr<Schema> schema() const override { return schema_; }
|
||||
|
||||
Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
|
||||
if (position_ >= batches_.size()) {
|
||||
*out = nullptr;
|
||||
} else {
|
||||
*out = batches_[position_++];
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<Schema> schema_;
|
||||
std::vector<std::shared_ptr<RecordBatch>> batches_;
|
||||
size_t position_;
|
||||
};
|
||||
|
||||
static inline std::vector<std::shared_ptr<DataType> (*)(FieldVector, std::vector<int8_t>)>
|
||||
UnionTypeFactories() {
|
||||
return {sparse_union, dense_union};
|
||||
}
|
||||
|
||||
// Return the value of the ARROW_TEST_DATA environment variable or return error
|
||||
// Status
|
||||
ARROW_TESTING_EXPORT Status GetTestResourceRoot(std::string*);
|
||||
|
||||
// Return the value of the ARROW_TIMEZONE_DATABASE environment variable
|
||||
ARROW_TESTING_EXPORT util::optional<std::string> GetTestTimezoneDatabaseRoot();
|
||||
|
||||
// Set the Timezone database based on the ARROW_TIMEZONE_DATABASE env variable
|
||||
// This is only relevant on Windows, since other OSs have compatible databases built-in
|
||||
ARROW_TESTING_EXPORT Status InitTestTimezoneDatabase();
|
||||
|
||||
// Get a TCP port number to listen on. This is a different number every time,
|
||||
// as reusing the same port across tests can produce spurious bind errors on
|
||||
// Windows.
|
||||
ARROW_TESTING_EXPORT int GetListenPort();
|
||||
|
||||
// Get a IPv4 "address:port" to listen on. The address will be a loopback
|
||||
// address. Compared to GetListenPort(), this will minimize the risk of
|
||||
// port conflicts.
|
||||
ARROW_TESTING_EXPORT std::string GetListenAddress();
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
const std::vector<std::shared_ptr<DataType>>& all_dictionary_index_types();
|
||||
|
||||
} // namespace arrow
|
@ -0,0 +1,48 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4251)
|
||||
#else
|
||||
#pragma GCC diagnostic ignored "-Wattributes"
|
||||
#endif
|
||||
|
||||
#ifdef ARROW_TESTING_STATIC
|
||||
#define ARROW_TESTING_EXPORT
|
||||
#elif defined(ARROW_TESTING_EXPORTING)
|
||||
#define ARROW_TESTING_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define ARROW_TESTING_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
#define ARROW_TESTING_NO_EXPORT
|
||||
#else // Not Windows
|
||||
#ifndef ARROW_TESTING_EXPORT
|
||||
#define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
|
||||
#endif
|
||||
#ifndef ARROW_TESTING_NO_EXPORT
|
||||
#define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
|
||||
#endif
|
||||
#endif // Non-Windows
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
Reference in New Issue
Block a user