mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-23 02:40:43 +00:00
348 lines
12 KiB
C++
348 lines
12 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "arrow/io/type_fwd.h"
|
|
#include "arrow/type_fwd.h"
|
|
#include "arrow/util/cancel.h"
|
|
#include "arrow/util/macros.h"
|
|
#include "arrow/util/string_view.h"
|
|
#include "arrow/util/type_fwd.h"
|
|
#include "arrow/util/visibility.h"
|
|
|
|
namespace arrow {
|
|
namespace io {
|
|
|
|
struct ReadRange {
|
|
int64_t offset;
|
|
int64_t length;
|
|
|
|
friend bool operator==(const ReadRange& left, const ReadRange& right) {
|
|
return (left.offset == right.offset && left.length == right.length);
|
|
}
|
|
friend bool operator!=(const ReadRange& left, const ReadRange& right) {
|
|
return !(left == right);
|
|
}
|
|
|
|
bool Contains(const ReadRange& other) const {
|
|
return (offset <= other.offset && offset + length >= other.offset + other.length);
|
|
}
|
|
};
|
|
|
|
/// EXPERIMENTAL: options provider for IO tasks
|
|
///
|
|
/// Includes an Executor (which will be used to execute asynchronous reads),
|
|
/// a MemoryPool (which will be used to allocate buffers when zero copy reads
|
|
/// are not possible), and an external id (in case the executor receives tasks from
|
|
/// multiple sources and must distinguish tasks associated with this IOContext).
|
|
struct ARROW_EXPORT IOContext {
|
|
// No specified executor: will use a global IO thread pool
|
|
IOContext() : IOContext(default_memory_pool(), StopToken::Unstoppable()) {}
|
|
|
|
explicit IOContext(StopToken stop_token)
|
|
: IOContext(default_memory_pool(), std::move(stop_token)) {}
|
|
|
|
explicit IOContext(MemoryPool* pool, StopToken stop_token = StopToken::Unstoppable());
|
|
|
|
explicit IOContext(MemoryPool* pool, ::arrow::internal::Executor* executor,
|
|
StopToken stop_token = StopToken::Unstoppable(),
|
|
int64_t external_id = -1)
|
|
: pool_(pool),
|
|
executor_(executor),
|
|
external_id_(external_id),
|
|
stop_token_(std::move(stop_token)) {}
|
|
|
|
explicit IOContext(::arrow::internal::Executor* executor,
|
|
StopToken stop_token = StopToken::Unstoppable(),
|
|
int64_t external_id = -1)
|
|
: pool_(default_memory_pool()),
|
|
executor_(executor),
|
|
external_id_(external_id),
|
|
stop_token_(std::move(stop_token)) {}
|
|
|
|
MemoryPool* pool() const { return pool_; }
|
|
|
|
::arrow::internal::Executor* executor() const { return executor_; }
|
|
|
|
// An application-specific ID, forwarded to executor task submissions
|
|
int64_t external_id() const { return external_id_; }
|
|
|
|
StopToken stop_token() const { return stop_token_; }
|
|
|
|
private:
|
|
MemoryPool* pool_;
|
|
::arrow::internal::Executor* executor_;
|
|
int64_t external_id_;
|
|
StopToken stop_token_;
|
|
};
|
|
|
|
struct ARROW_DEPRECATED("renamed to IOContext in 4.0.0") AsyncContext : public IOContext {
|
|
using IOContext::IOContext;
|
|
};
|
|
|
|
class ARROW_EXPORT FileInterface {
|
|
public:
|
|
virtual ~FileInterface() = 0;
|
|
|
|
/// \brief Close the stream cleanly
|
|
///
|
|
/// For writable streams, this will attempt to flush any pending data
|
|
/// before releasing the underlying resource.
|
|
///
|
|
/// After Close() is called, closed() returns true and the stream is not
|
|
/// available for further operations.
|
|
virtual Status Close() = 0;
|
|
|
|
/// \brief Close the stream asynchronously
|
|
///
|
|
/// By default, this will just submit the synchronous Close() to the
|
|
/// default I/O thread pool. Subclasses may implement this in a more
|
|
/// efficient manner.
|
|
virtual Future<> CloseAsync();
|
|
|
|
/// \brief Close the stream abruptly
|
|
///
|
|
/// This method does not guarantee that any pending data is flushed.
|
|
/// It merely releases any underlying resource used by the stream for
|
|
/// its operation.
|
|
///
|
|
/// After Abort() is called, closed() returns true and the stream is not
|
|
/// available for further operations.
|
|
virtual Status Abort();
|
|
|
|
/// \brief Return the position in this stream
|
|
virtual Result<int64_t> Tell() const = 0;
|
|
|
|
/// \brief Return whether the stream is closed
|
|
virtual bool closed() const = 0;
|
|
|
|
FileMode::type mode() const { return mode_; }
|
|
|
|
protected:
|
|
FileInterface() : mode_(FileMode::READ) {}
|
|
FileMode::type mode_;
|
|
void set_mode(FileMode::type mode) { mode_ = mode; }
|
|
|
|
private:
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(FileInterface);
|
|
};
|
|
|
|
class ARROW_EXPORT Seekable {
|
|
public:
|
|
virtual ~Seekable() = default;
|
|
virtual Status Seek(int64_t position) = 0;
|
|
};
|
|
|
|
class ARROW_EXPORT Writable {
|
|
public:
|
|
virtual ~Writable() = default;
|
|
|
|
/// \brief Write the given data to the stream
|
|
///
|
|
/// This method always processes the bytes in full. Depending on the
|
|
/// semantics of the stream, the data may be written out immediately,
|
|
/// held in a buffer, or written asynchronously. In the case where
|
|
/// the stream buffers the data, it will be copied. To avoid potentially
|
|
/// large copies, use the Write variant that takes an owned Buffer.
|
|
virtual Status Write(const void* data, int64_t nbytes) = 0;
|
|
|
|
/// \brief Write the given data to the stream
|
|
///
|
|
/// Since the Buffer owns its memory, this method can avoid a copy if
|
|
/// buffering is required. See Write(const void*, int64_t) for details.
|
|
virtual Status Write(const std::shared_ptr<Buffer>& data);
|
|
|
|
/// \brief Flush buffered bytes, if any
|
|
virtual Status Flush();
|
|
|
|
Status Write(util::string_view data);
|
|
};
|
|
|
|
class ARROW_EXPORT Readable {
|
|
public:
|
|
virtual ~Readable() = default;
|
|
|
|
/// \brief Read data from current file position.
|
|
///
|
|
/// Read at most `nbytes` from the current file position into `out`.
|
|
/// The number of bytes read is returned.
|
|
virtual Result<int64_t> Read(int64_t nbytes, void* out) = 0;
|
|
|
|
/// \brief Read data from current file position.
|
|
///
|
|
/// Read at most `nbytes` from the current file position. Less bytes may
|
|
/// be read if EOF is reached. This method updates the current file position.
|
|
///
|
|
/// In some cases (e.g. a memory-mapped file), this method may avoid a
|
|
/// memory copy.
|
|
virtual Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) = 0;
|
|
|
|
/// EXPERIMENTAL: The IOContext associated with this file.
|
|
///
|
|
/// By default, this is the same as default_io_context(), but it may be
|
|
/// overriden by subclasses.
|
|
virtual const IOContext& io_context() const;
|
|
};
|
|
|
|
class ARROW_EXPORT OutputStream : virtual public FileInterface, public Writable {
|
|
protected:
|
|
OutputStream() = default;
|
|
};
|
|
|
|
class ARROW_EXPORT InputStream : virtual public FileInterface,
|
|
virtual public Readable,
|
|
public std::enable_shared_from_this<InputStream> {
|
|
public:
|
|
/// \brief Advance or skip stream indicated number of bytes
|
|
/// \param[in] nbytes the number to move forward
|
|
/// \return Status
|
|
Status Advance(int64_t nbytes);
|
|
|
|
/// \brief Return zero-copy string_view to upcoming bytes.
|
|
///
|
|
/// Do not modify the stream position. The view becomes invalid after
|
|
/// any operation on the stream. May trigger buffering if the requested
|
|
/// size is larger than the number of buffered bytes.
|
|
///
|
|
/// May return NotImplemented on streams that don't support it.
|
|
///
|
|
/// \param[in] nbytes the maximum number of bytes to see
|
|
virtual Result<util::string_view> Peek(int64_t nbytes);
|
|
|
|
/// \brief Return true if InputStream is capable of zero copy Buffer reads
|
|
///
|
|
/// Zero copy reads imply the use of Buffer-returning Read() overloads.
|
|
virtual bool supports_zero_copy() const;
|
|
|
|
/// \brief Read and return stream metadata
|
|
///
|
|
/// If the stream implementation doesn't support metadata, empty metadata
|
|
/// is returned. Note that it is allowed to return a null pointer rather
|
|
/// than an allocated empty metadata.
|
|
virtual Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata();
|
|
|
|
/// \brief Read stream metadata asynchronously
|
|
virtual Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync(
|
|
const IOContext& io_context);
|
|
Future<std::shared_ptr<const KeyValueMetadata>> ReadMetadataAsync();
|
|
|
|
protected:
|
|
InputStream() = default;
|
|
};
|
|
|
|
class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable {
|
|
public:
|
|
/// Necessary because we hold a std::unique_ptr
|
|
~RandomAccessFile() override;
|
|
|
|
/// \brief Create an isolated InputStream that reads a segment of a
|
|
/// RandomAccessFile. Multiple such stream can be created and used
|
|
/// independently without interference
|
|
/// \param[in] file a file instance
|
|
/// \param[in] file_offset the starting position in the file
|
|
/// \param[in] nbytes the extent of bytes to read. The file should have
|
|
/// sufficient bytes available
|
|
static std::shared_ptr<InputStream> GetStream(std::shared_ptr<RandomAccessFile> file,
|
|
int64_t file_offset, int64_t nbytes);
|
|
|
|
/// \brief Return the total file size in bytes.
|
|
///
|
|
/// This method does not read or move the current file position, so is safe
|
|
/// to call concurrently with e.g. ReadAt().
|
|
virtual Result<int64_t> GetSize() = 0;
|
|
|
|
/// \brief Read data from given file position.
|
|
///
|
|
/// At most `nbytes` bytes are read. The number of bytes read is returned
|
|
/// (it can be less than `nbytes` if EOF is reached).
|
|
///
|
|
/// This method can be safely called from multiple threads concurrently.
|
|
/// It is unspecified whether this method updates the file position or not.
|
|
///
|
|
/// The default RandomAccessFile-provided implementation uses Seek() and Read(),
|
|
/// but subclasses may override it with a more efficient implementation
|
|
/// that doesn't depend on implicit file positioning.
|
|
///
|
|
/// \param[in] position Where to read bytes from
|
|
/// \param[in] nbytes The number of bytes to read
|
|
/// \param[out] out The buffer to read bytes into
|
|
/// \return The number of bytes read, or an error
|
|
virtual Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out);
|
|
|
|
/// \brief Read data from given file position.
|
|
///
|
|
/// At most `nbytes` bytes are read, but it can be less if EOF is reached.
|
|
///
|
|
/// \param[in] position Where to read bytes from
|
|
/// \param[in] nbytes The number of bytes to read
|
|
/// \return A buffer containing the bytes read, or an error
|
|
virtual Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes);
|
|
|
|
/// EXPERIMENTAL: Read data asynchronously.
|
|
virtual Future<std::shared_ptr<Buffer>> ReadAsync(const IOContext&, int64_t position,
|
|
int64_t nbytes);
|
|
|
|
/// EXPERIMENTAL: Read data asynchronously, using the file's IOContext.
|
|
Future<std::shared_ptr<Buffer>> ReadAsync(int64_t position, int64_t nbytes);
|
|
|
|
/// EXPERIMENTAL: Inform that the given ranges may be read soon.
|
|
///
|
|
/// Some implementations might arrange to prefetch some of the data.
|
|
/// However, no guarantee is made and the default implementation does nothing.
|
|
/// For robust prefetching, use ReadAt() or ReadAsync().
|
|
virtual Status WillNeed(const std::vector<ReadRange>& ranges);
|
|
|
|
protected:
|
|
RandomAccessFile();
|
|
|
|
private:
|
|
struct ARROW_NO_EXPORT Impl;
|
|
std::unique_ptr<Impl> interface_impl_;
|
|
};
|
|
|
|
class ARROW_EXPORT WritableFile : public OutputStream, public Seekable {
|
|
public:
|
|
virtual Status WriteAt(int64_t position, const void* data, int64_t nbytes) = 0;
|
|
|
|
protected:
|
|
WritableFile() = default;
|
|
};
|
|
|
|
class ARROW_EXPORT ReadWriteFileInterface : public RandomAccessFile, public WritableFile {
|
|
protected:
|
|
ReadWriteFileInterface() { RandomAccessFile::set_mode(FileMode::READWRITE); }
|
|
};
|
|
|
|
/// \brief Return an iterator on an input stream
|
|
///
|
|
/// The iterator yields a fixed-size block on each Next() call, except the
|
|
/// last block in the stream which may be smaller.
|
|
/// Once the end of stream is reached, Next() returns nullptr
|
|
/// (unlike InputStream::Read() which returns an empty buffer).
|
|
ARROW_EXPORT
|
|
Result<Iterator<std::shared_ptr<Buffer>>> MakeInputStreamIterator(
|
|
std::shared_ptr<InputStream> stream, int64_t block_size);
|
|
|
|
} // namespace io
|
|
} // namespace arrow
|