mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-01 22:13:01 +00:00
first commit
This commit is contained in:
27
.venv/Lib/site-packages/pyarrow/include/arrow/csv/api.h
Normal file
27
.venv/Lib/site-packages/pyarrow/include/arrow/csv/api.h
Normal file
@ -0,0 +1,27 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/csv/options.h"
|
||||
#include "arrow/csv/reader.h"
|
||||
|
||||
// The writer depends on compute module for casting.
|
||||
#include "arrow/util/config.h" // for ARROW_COMPUTE definition
|
||||
#ifdef ARROW_COMPUTE
|
||||
#include "arrow/csv/writer.h"
|
||||
#endif
|
36
.venv/Lib/site-packages/pyarrow/include/arrow/csv/chunker.h
Normal file
36
.venv/Lib/site-packages/pyarrow/include/arrow/csv/chunker.h
Normal file
@ -0,0 +1,36 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/csv/options.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/delimiting.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
ARROW_EXPORT
|
||||
std::unique_ptr<Chunker> MakeChunker(const ParseOptions& options);
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
@ -0,0 +1,78 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
class BlockParser;
|
||||
struct ConvertOptions;
|
||||
|
||||
class ARROW_EXPORT ColumnBuilder {
|
||||
public:
|
||||
virtual ~ColumnBuilder() = default;
|
||||
|
||||
/// Spawn a task that will try to convert and append the given CSV block.
|
||||
/// All calls to Append() should happen on the same thread, otherwise
|
||||
/// call Insert() instead.
|
||||
virtual void Append(const std::shared_ptr<BlockParser>& parser) = 0;
|
||||
|
||||
/// Spawn a task that will try to convert and insert the given CSV block
|
||||
virtual void Insert(int64_t block_index,
|
||||
const std::shared_ptr<BlockParser>& parser) = 0;
|
||||
|
||||
/// Return the final chunked array. The TaskGroup _must_ have finished!
|
||||
virtual Result<std::shared_ptr<ChunkedArray>> Finish() = 0;
|
||||
|
||||
std::shared_ptr<arrow::internal::TaskGroup> task_group() { return task_group_; }
|
||||
|
||||
/// Construct a strictly-typed ColumnBuilder.
|
||||
static Result<std::shared_ptr<ColumnBuilder>> Make(
|
||||
MemoryPool* pool, const std::shared_ptr<DataType>& type, int32_t col_index,
|
||||
const ConvertOptions& options,
|
||||
const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
|
||||
|
||||
/// Construct a type-inferring ColumnBuilder.
|
||||
static Result<std::shared_ptr<ColumnBuilder>> Make(
|
||||
MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
|
||||
const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
|
||||
|
||||
/// Construct a ColumnBuilder for a column of nulls
|
||||
/// (i.e. not present in the CSV file).
|
||||
static Result<std::shared_ptr<ColumnBuilder>> MakeNull(
|
||||
MemoryPool* pool, const std::shared_ptr<DataType>& type,
|
||||
const std::shared_ptr<arrow::internal::TaskGroup>& task_group);
|
||||
|
||||
protected:
|
||||
explicit ColumnBuilder(std::shared_ptr<arrow::internal::TaskGroup> task_group)
|
||||
: task_group_(std::move(task_group)) {}
|
||||
|
||||
std::shared_ptr<arrow::internal::TaskGroup> task_group_;
|
||||
};
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
@ -0,0 +1,64 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
class BlockParser;
|
||||
struct ConvertOptions;
|
||||
|
||||
class ARROW_EXPORT ColumnDecoder {
|
||||
public:
|
||||
virtual ~ColumnDecoder() = default;
|
||||
|
||||
/// Spawn a task that will try to convert and insert the given CSV block
|
||||
virtual Future<std::shared_ptr<Array>> Decode(
|
||||
const std::shared_ptr<BlockParser>& parser) = 0;
|
||||
|
||||
/// Construct a strictly-typed ColumnDecoder.
|
||||
static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool,
|
||||
std::shared_ptr<DataType> type,
|
||||
int32_t col_index,
|
||||
const ConvertOptions& options);
|
||||
|
||||
/// Construct a type-inferring ColumnDecoder.
|
||||
/// Inference will run only on the first block, the type will be frozen afterwards.
|
||||
static Result<std::shared_ptr<ColumnDecoder>> Make(MemoryPool* pool, int32_t col_index,
|
||||
const ConvertOptions& options);
|
||||
|
||||
/// Construct a ColumnDecoder for a column of nulls
|
||||
/// (i.e. not present in the CSV file).
|
||||
static Result<std::shared_ptr<ColumnDecoder>> MakeNull(MemoryPool* pool,
|
||||
std::shared_ptr<DataType> type);
|
||||
|
||||
protected:
|
||||
ColumnDecoder() = default;
|
||||
};
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
@ -0,0 +1,82 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/csv/options.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
class BlockParser;
|
||||
|
||||
class ARROW_EXPORT Converter {
|
||||
public:
|
||||
Converter(const std::shared_ptr<DataType>& type, const ConvertOptions& options,
|
||||
MemoryPool* pool);
|
||||
virtual ~Converter() = default;
|
||||
|
||||
virtual Result<std::shared_ptr<Array>> Convert(const BlockParser& parser,
|
||||
int32_t col_index) = 0;
|
||||
|
||||
std::shared_ptr<DataType> type() const { return type_; }
|
||||
|
||||
// Create a Converter for the given data type
|
||||
static Result<std::shared_ptr<Converter>> Make(
|
||||
const std::shared_ptr<DataType>& type, const ConvertOptions& options,
|
||||
MemoryPool* pool = default_memory_pool());
|
||||
|
||||
protected:
|
||||
ARROW_DISALLOW_COPY_AND_ASSIGN(Converter);
|
||||
|
||||
virtual Status Initialize() = 0;
|
||||
|
||||
// CAUTION: ConvertOptions can grow large (if it customizes hundreds or
|
||||
// thousands of columns), so avoid copying it in each Converter.
|
||||
const ConvertOptions& options_;
|
||||
MemoryPool* pool_;
|
||||
std::shared_ptr<DataType> type_;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT DictionaryConverter : public Converter {
|
||||
public:
|
||||
DictionaryConverter(const std::shared_ptr<DataType>& value_type,
|
||||
const ConvertOptions& options, MemoryPool* pool);
|
||||
|
||||
// If the dictionary length goes above this value, conversion will fail
|
||||
// with Status::IndexError.
|
||||
virtual void SetMaxCardinality(int32_t max_length) = 0;
|
||||
|
||||
// Create a Converter for the given dictionary value type.
|
||||
// The dictionary index type will always be Int32.
|
||||
static Result<std::shared_ptr<DictionaryConverter>> Make(
|
||||
const std::shared_ptr<DataType>& value_type, const ConvertOptions& options,
|
||||
MemoryPool* pool = default_memory_pool());
|
||||
|
||||
protected:
|
||||
std::shared_ptr<DataType> value_type_;
|
||||
};
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
@ -0,0 +1,56 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "arrow/util/string_view.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
/// \brief Description of an invalid row
|
||||
struct InvalidRow {
|
||||
/// \brief Number of columns expected in the row
|
||||
int32_t expected_columns;
|
||||
/// \brief Actual number of columns found in the row
|
||||
int32_t actual_columns;
|
||||
/// \brief The physical row number if known or -1
|
||||
///
|
||||
/// This number is one-based and also accounts for non-data rows (such as
|
||||
/// CSV header rows).
|
||||
int64_t number;
|
||||
/// \brief View of the entire row. Memory will be freed after callback returns
|
||||
const util::string_view text;
|
||||
};
|
||||
|
||||
/// \brief Result returned by an InvalidRowHandler
|
||||
enum class InvalidRowResult {
|
||||
// Generate an error describing this row
|
||||
Error,
|
||||
// Skip over this row
|
||||
Skip
|
||||
};
|
||||
|
||||
/// \brief callback for handling a row with an invalid number of columns while parsing
|
||||
/// \return result indicating if an error should be returned from the parser or the row is
|
||||
/// skipped
|
||||
using InvalidRowHandler = std::function<InvalidRowResult(const InvalidRow&)>;
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
220
.venv/Lib/site-packages/pyarrow/include/arrow/csv/options.h
Normal file
220
.venv/Lib/site-packages/pyarrow/include/arrow/csv/options.h
Normal file
@ -0,0 +1,220 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/csv/invalid_row.h"
|
||||
#include "arrow/csv/type_fwd.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class DataType;
|
||||
class TimestampParser;
|
||||
|
||||
namespace csv {
|
||||
|
||||
// Silly workaround for https://github.com/michaeljones/breathe/issues/453
|
||||
constexpr char kDefaultEscapeChar = '\\';
|
||||
|
||||
struct ARROW_EXPORT ParseOptions {
|
||||
// Parsing options
|
||||
|
||||
/// Field delimiter
|
||||
char delimiter = ',';
|
||||
/// Whether quoting is used
|
||||
bool quoting = true;
|
||||
/// Quoting character (if `quoting` is true)
|
||||
char quote_char = '"';
|
||||
/// Whether a quote inside a value is double-quoted
|
||||
bool double_quote = true;
|
||||
/// Whether escaping is used
|
||||
bool escaping = false;
|
||||
/// Escaping character (if `escaping` is true)
|
||||
char escape_char = kDefaultEscapeChar;
|
||||
/// Whether values are allowed to contain CR (0x0d) and LF (0x0a) characters
|
||||
bool newlines_in_values = false;
|
||||
/// Whether empty lines are ignored. If false, an empty line represents
|
||||
/// a single empty value (assuming a one-column CSV file).
|
||||
bool ignore_empty_lines = true;
|
||||
/// A handler function for rows which do not have the correct number of columns
|
||||
InvalidRowHandler invalid_row_handler;
|
||||
|
||||
/// Create parsing options with default values
|
||||
static ParseOptions Defaults();
|
||||
|
||||
/// \brief Test that all set options are valid
|
||||
Status Validate() const;
|
||||
};
|
||||
|
||||
struct ARROW_EXPORT ConvertOptions {
|
||||
// Conversion options
|
||||
|
||||
/// Whether to check UTF8 validity of string columns
|
||||
bool check_utf8 = true;
|
||||
/// Optional per-column types (disabling type inference on those columns)
|
||||
std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
|
||||
/// Recognized spellings for null values
|
||||
std::vector<std::string> null_values;
|
||||
/// Recognized spellings for boolean true values
|
||||
std::vector<std::string> true_values;
|
||||
/// Recognized spellings for boolean false values
|
||||
std::vector<std::string> false_values;
|
||||
|
||||
/// Whether string / binary columns can have null values.
|
||||
///
|
||||
/// If true, then strings in "null_values" are considered null for string columns.
|
||||
/// If false, then all strings are valid string values.
|
||||
bool strings_can_be_null = false;
|
||||
|
||||
/// Whether quoted values can be null.
|
||||
///
|
||||
/// If true, then strings in "null_values" are also considered null when they
|
||||
/// appear quoted in the CSV file. Otherwise, quoted values are never considered null.
|
||||
bool quoted_strings_can_be_null = true;
|
||||
|
||||
/// Whether to try to automatically dict-encode string / binary data.
|
||||
/// If true, then when type inference detects a string or binary column,
|
||||
/// it is dict-encoded up to `auto_dict_max_cardinality` distinct values
|
||||
/// (per chunk), after which it switches to regular encoding.
|
||||
///
|
||||
/// This setting is ignored for non-inferred columns (those in `column_types`).
|
||||
bool auto_dict_encode = false;
|
||||
int32_t auto_dict_max_cardinality = 50;
|
||||
|
||||
/// Decimal point character for floating-point and decimal data
|
||||
char decimal_point = '.';
|
||||
|
||||
// XXX Should we have a separate FilterOptions?
|
||||
|
||||
/// If non-empty, indicates the names of columns from the CSV file that should
|
||||
/// be actually read and converted (in the vector's order).
|
||||
/// Columns not in this vector will be ignored.
|
||||
std::vector<std::string> include_columns;
|
||||
/// If false, columns in `include_columns` but not in the CSV file will error out.
|
||||
/// If true, columns in `include_columns` but not in the CSV file will produce
|
||||
/// a column of nulls (whose type is selected using `column_types`,
|
||||
/// or null by default)
|
||||
/// This option is ignored if `include_columns` is empty.
|
||||
bool include_missing_columns = false;
|
||||
|
||||
/// User-defined timestamp parsers, using the virtual parser interface in
|
||||
/// arrow/util/value_parsing.h. More than one parser can be specified, and
|
||||
/// the CSV conversion logic will try parsing values starting from the
|
||||
/// beginning of this vector. If no parsers are specified, we use the default
|
||||
/// built-in ISO-8601 parser.
|
||||
std::vector<std::shared_ptr<TimestampParser>> timestamp_parsers;
|
||||
|
||||
/// Create conversion options with default values, including conventional
|
||||
/// values for `null_values`, `true_values` and `false_values`
|
||||
static ConvertOptions Defaults();
|
||||
|
||||
/// \brief Test that all set options are valid
|
||||
Status Validate() const;
|
||||
};
|
||||
|
||||
struct ARROW_EXPORT ReadOptions {
|
||||
// Reader options
|
||||
|
||||
/// Whether to use the global CPU thread pool
|
||||
bool use_threads = true;
|
||||
|
||||
/// \brief Block size we request from the IO layer.
|
||||
///
|
||||
/// This will determine multi-threading granularity as well as
|
||||
/// the size of individual record batches.
|
||||
/// Minimum valid value for block size is 1
|
||||
int32_t block_size = 1 << 20; // 1 MB
|
||||
|
||||
/// Number of header rows to skip (not including the row of column names, if any)
|
||||
int32_t skip_rows = 0;
|
||||
|
||||
/// Number of rows to skip after the column names are read, if any
|
||||
int32_t skip_rows_after_names = 0;
|
||||
|
||||
/// Column names for the target table.
|
||||
/// If empty, fall back on autogenerate_column_names.
|
||||
std::vector<std::string> column_names;
|
||||
|
||||
/// Whether to autogenerate column names if `column_names` is empty.
|
||||
/// If true, column names will be of the form "f0", "f1"...
|
||||
/// If false, column names will be read from the first CSV row after `skip_rows`.
|
||||
bool autogenerate_column_names = false;
|
||||
|
||||
/// Create read options with default values
|
||||
static ReadOptions Defaults();
|
||||
|
||||
/// \brief Test that all set options are valid
|
||||
Status Validate() const;
|
||||
};
|
||||
|
||||
/// \brief Quoting style for CSV writing
|
||||
enum class ARROW_EXPORT QuotingStyle {
|
||||
/// Only enclose values in quotes which need them, because their CSV rendering can
|
||||
/// contain quotes itself (e.g. strings or binary values)
|
||||
Needed,
|
||||
/// Enclose all valid values in quotes. Nulls are not quoted. May cause readers to
|
||||
/// interpret all values as strings if schema is inferred.
|
||||
AllValid,
|
||||
/// Do not enclose any values in quotes. Prevents values from containing quotes ("),
|
||||
/// cell delimiters (,) or line endings (\\r, \\n), (following RFC4180). If values
|
||||
/// contain these characters, an error is caused when attempting to write.
|
||||
None
|
||||
};
|
||||
|
||||
struct ARROW_EXPORT WriteOptions {
|
||||
/// Whether to write an initial header line with column names
|
||||
bool include_header = true;
|
||||
|
||||
/// \brief Maximum number of rows processed at a time
|
||||
///
|
||||
/// The CSV writer converts and writes data in batches of N rows.
|
||||
/// This number can impact performance.
|
||||
int32_t batch_size = 1024;
|
||||
|
||||
/// Field delimiter
|
||||
char delimiter = ',';
|
||||
|
||||
/// \brief The string to write for null values. Quotes are not allowed in this string.
|
||||
std::string null_string;
|
||||
|
||||
/// \brief IO context for writing.
|
||||
io::IOContext io_context;
|
||||
|
||||
/// \brief The end of line character to use for ending rows
|
||||
std::string eol = "\n";
|
||||
|
||||
/// \brief Quoting style
|
||||
QuotingStyle quoting_style = QuotingStyle::Needed;
|
||||
|
||||
/// Create write options with default values
|
||||
static WriteOptions Defaults();
|
||||
|
||||
/// \brief Test that all set options are valid
|
||||
Status Validate() const;
|
||||
};
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
227
.venv/Lib/site-packages/pyarrow/include/arrow/csv/parser.h
Normal file
227
.venv/Lib/site-packages/pyarrow/include/arrow/csv/parser.h
Normal file
@ -0,0 +1,227 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/buffer.h"
|
||||
#include "arrow/csv/options.h"
|
||||
#include "arrow/csv/type_fwd.h"
|
||||
#include "arrow/status.h"
|
||||
#include "arrow/util/macros.h"
|
||||
#include "arrow/util/string_view.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
|
||||
class MemoryPool;
|
||||
|
||||
namespace csv {
|
||||
|
||||
/// Skip at most num_rows from the given input. The input pointer is updated
|
||||
/// and the number of actually skipped rows is returns (may be less than
|
||||
/// requested if the input is too short).
|
||||
ARROW_EXPORT
|
||||
int32_t SkipRows(const uint8_t* data, uint32_t size, int32_t num_rows,
|
||||
const uint8_t** out_data);
|
||||
|
||||
class BlockParserImpl;
|
||||
|
||||
namespace detail {
|
||||
|
||||
struct ParsedValueDesc {
|
||||
uint32_t offset : 31;
|
||||
bool quoted : 1;
|
||||
};
|
||||
|
||||
class ARROW_EXPORT DataBatch {
|
||||
public:
|
||||
explicit DataBatch(int32_t num_cols) : num_cols_(num_cols) {}
|
||||
|
||||
/// \brief Return the number of parsed rows (not skipped)
|
||||
int32_t num_rows() const { return num_rows_; }
|
||||
/// \brief Return the number of parsed columns
|
||||
int32_t num_cols() const { return num_cols_; }
|
||||
/// \brief Return the total size in bytes of parsed data
|
||||
uint32_t num_bytes() const { return parsed_size_; }
|
||||
/// \brief Return the number of skipped rows
|
||||
int32_t num_skipped_rows() const { return static_cast<int32_t>(skipped_rows_.size()); }
|
||||
|
||||
template <typename Visitor>
|
||||
Status VisitColumn(int32_t col_index, int64_t first_row, Visitor&& visit) const {
|
||||
using detail::ParsedValueDesc;
|
||||
|
||||
int32_t batch_row = 0;
|
||||
for (size_t buf_index = 0; buf_index < values_buffers_.size(); ++buf_index) {
|
||||
const auto& values_buffer = values_buffers_[buf_index];
|
||||
const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
|
||||
const auto max_pos =
|
||||
static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) - 1;
|
||||
for (int32_t pos = col_index; pos < max_pos; pos += num_cols_, ++batch_row) {
|
||||
auto start = values[pos].offset;
|
||||
auto stop = values[pos + 1].offset;
|
||||
auto quoted = values[pos + 1].quoted;
|
||||
Status status = visit(parsed_ + start, stop - start, quoted);
|
||||
if (ARROW_PREDICT_FALSE(!status.ok())) {
|
||||
return DecorateWithRowNumber(std::move(status), first_row, batch_row);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename Visitor>
|
||||
Status VisitLastRow(Visitor&& visit) const {
|
||||
using detail::ParsedValueDesc;
|
||||
|
||||
const auto& values_buffer = values_buffers_.back();
|
||||
const auto values = reinterpret_cast<const ParsedValueDesc*>(values_buffer->data());
|
||||
const auto start_pos =
|
||||
static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc)) -
|
||||
num_cols_ - 1;
|
||||
for (int32_t col_index = 0; col_index < num_cols_; ++col_index) {
|
||||
auto start = values[start_pos + col_index].offset;
|
||||
auto stop = values[start_pos + col_index + 1].offset;
|
||||
auto quoted = values[start_pos + col_index + 1].quoted;
|
||||
ARROW_RETURN_NOT_OK(visit(parsed_ + start, stop - start, quoted));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
protected:
|
||||
Status DecorateWithRowNumber(Status&& status, int64_t first_row,
|
||||
int32_t batch_row) const {
|
||||
if (first_row >= 0) {
|
||||
// `skipped_rows_` is in ascending order by construction, so use bisection
|
||||
// to find out how many rows were skipped before `batch_row`.
|
||||
const auto skips_before =
|
||||
std::upper_bound(skipped_rows_.begin(), skipped_rows_.end(), batch_row) -
|
||||
skipped_rows_.begin();
|
||||
status = status.WithMessage("Row #", batch_row + skips_before + first_row, ": ",
|
||||
status.message());
|
||||
}
|
||||
// Use return_if so that when extra context is enabled it will be added
|
||||
ARROW_RETURN_IF_(true, std::move(status), ARROW_STRINGIFY(status));
|
||||
}
|
||||
|
||||
// The number of rows in this batch (not including any skipped ones)
|
||||
int32_t num_rows_ = 0;
|
||||
// The number of columns
|
||||
int32_t num_cols_ = 0;
|
||||
|
||||
// XXX should we ensure the parsed buffer is padded with 8 or 16 excess zero bytes?
|
||||
// It may help with null parsing...
|
||||
std::vector<std::shared_ptr<Buffer>> values_buffers_;
|
||||
std::shared_ptr<Buffer> parsed_buffer_;
|
||||
const uint8_t* parsed_ = NULLPTR;
|
||||
int32_t parsed_size_ = 0;
|
||||
|
||||
// Record the current num_rows_ each time a row is skipped
|
||||
std::vector<int32_t> skipped_rows_;
|
||||
|
||||
friend class ::arrow::csv::BlockParserImpl;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
constexpr int32_t kMaxParserNumRows = 100000;
|
||||
|
||||
/// \class BlockParser
|
||||
/// \brief A reusable block-based parser for CSV data
|
||||
///
|
||||
/// The parser takes a block of CSV data and delimits rows and fields,
|
||||
/// unquoting and unescaping them on the fly. Parsed data is own by the
|
||||
/// parser, so the original buffer can be discarded after Parse() returns.
|
||||
///
|
||||
/// If the block is truncated (i.e. not all data can be parsed), it is up
|
||||
/// to the caller to arrange the next block to start with the trailing data.
|
||||
/// Also, if the previous block ends with CR (0x0d) and a new block starts
|
||||
/// with LF (0x0a), the parser will consider the leading newline as an empty
|
||||
/// line; the caller should therefore strip it.
|
||||
class ARROW_EXPORT BlockParser {
|
||||
public:
|
||||
explicit BlockParser(ParseOptions options, int32_t num_cols = -1,
|
||||
int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
|
||||
explicit BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_cols = -1,
|
||||
int64_t first_row = -1, int32_t max_num_rows = kMaxParserNumRows);
|
||||
~BlockParser();
|
||||
|
||||
/// \brief Parse a block of data
|
||||
///
|
||||
/// Parse a block of CSV data, ingesting up to max_num_rows rows.
|
||||
/// The number of bytes actually parsed is returned in out_size.
|
||||
Status Parse(util::string_view data, uint32_t* out_size);
|
||||
|
||||
/// \brief Parse sequential blocks of data
|
||||
///
|
||||
/// Only the last block is allowed to be truncated.
|
||||
Status Parse(const std::vector<util::string_view>& data, uint32_t* out_size);
|
||||
|
||||
/// \brief Parse the final block of data
|
||||
///
|
||||
/// Like Parse(), but called with the final block in a file.
|
||||
/// The last row may lack a trailing line separator.
|
||||
Status ParseFinal(util::string_view data, uint32_t* out_size);
|
||||
|
||||
/// \brief Parse the final sequential blocks of data
|
||||
///
|
||||
/// Only the last block is allowed to be truncated.
|
||||
Status ParseFinal(const std::vector<util::string_view>& data, uint32_t* out_size);
|
||||
|
||||
/// \brief Return the number of parsed rows
|
||||
int32_t num_rows() const { return parsed_batch().num_rows(); }
|
||||
/// \brief Return the number of parsed columns
|
||||
int32_t num_cols() const { return parsed_batch().num_cols(); }
|
||||
/// \brief Return the total size in bytes of parsed data
|
||||
uint32_t num_bytes() const { return parsed_batch().num_bytes(); }
|
||||
|
||||
/// \brief Return the total number of rows including rows which were skipped
|
||||
int32_t total_num_rows() const {
|
||||
return parsed_batch().num_rows() + parsed_batch().num_skipped_rows();
|
||||
}
|
||||
|
||||
/// \brief Return the row number of the first row in the block or -1 if unsupported
|
||||
int64_t first_row_num() const;
|
||||
|
||||
/// \brief Visit parsed values in a column
|
||||
///
|
||||
/// The signature of the visitor is
|
||||
/// Status(const uint8_t* data, uint32_t size, bool quoted)
|
||||
template <typename Visitor>
|
||||
Status VisitColumn(int32_t col_index, Visitor&& visit) const {
|
||||
return parsed_batch().VisitColumn(col_index, first_row_num(),
|
||||
std::forward<Visitor>(visit));
|
||||
}
|
||||
|
||||
template <typename Visitor>
|
||||
Status VisitLastRow(Visitor&& visit) const {
|
||||
return parsed_batch().VisitLastRow(std::forward<Visitor>(visit));
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<BlockParserImpl> impl_;
|
||||
|
||||
const detail::DataBatch& parsed_batch() const;
|
||||
};
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
125
.venv/Lib/site-packages/pyarrow/include/arrow/csv/reader.h
Normal file
125
.venv/Lib/site-packages/pyarrow/include/arrow/csv/reader.h
Normal file
@ -0,0 +1,125 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/csv/options.h" // IWYU pragma: keep
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/record_batch.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/type.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/future.h"
|
||||
#include "arrow/util/thread_pool.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace io {
|
||||
class InputStream;
|
||||
} // namespace io
|
||||
|
||||
namespace csv {
|
||||
|
||||
/// A class that reads an entire CSV file into a Arrow Table
|
||||
class ARROW_EXPORT TableReader {
|
||||
public:
|
||||
virtual ~TableReader() = default;
|
||||
|
||||
/// Read the entire CSV file and convert it to a Arrow Table
|
||||
virtual Result<std::shared_ptr<Table>> Read() = 0;
|
||||
/// Read the entire CSV file and convert it to a Arrow Table
|
||||
virtual Future<std::shared_ptr<Table>> ReadAsync() = 0;
|
||||
|
||||
/// Create a TableReader instance
|
||||
static Result<std::shared_ptr<TableReader>> Make(io::IOContext io_context,
|
||||
std::shared_ptr<io::InputStream> input,
|
||||
const ReadOptions&,
|
||||
const ParseOptions&,
|
||||
const ConvertOptions&);
|
||||
|
||||
ARROW_DEPRECATED(
|
||||
"Deprecated in 4.0.0. "
|
||||
"Use MemoryPool-less variant (the IOContext holds a pool already)")
|
||||
static Result<std::shared_ptr<TableReader>> Make(
|
||||
MemoryPool* pool, io::IOContext io_context, std::shared_ptr<io::InputStream> input,
|
||||
const ReadOptions&, const ParseOptions&, const ConvertOptions&);
|
||||
};
|
||||
|
||||
/// \brief A class that reads a CSV file incrementally
|
||||
///
|
||||
/// Caveats:
|
||||
/// - For now, this is always single-threaded (regardless of `ReadOptions::use_threads`.
|
||||
/// - Type inference is done on the first block and types are frozen afterwards;
|
||||
/// to make sure the right data types are inferred, either set
|
||||
/// `ReadOptions::block_size` to a large enough value, or use
|
||||
/// `ConvertOptions::column_types` to set the desired data types explicitly.
|
||||
class ARROW_EXPORT StreamingReader : public RecordBatchReader {
|
||||
public:
|
||||
virtual ~StreamingReader() = default;
|
||||
|
||||
virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
|
||||
|
||||
/// \brief Return the number of bytes which have been read and processed
|
||||
///
|
||||
/// The returned number includes CSV bytes which the StreamingReader has
|
||||
/// finished processing, but not bytes for which some processing (e.g.
|
||||
/// CSV parsing or conversion to Arrow layout) is still ongoing.
|
||||
///
|
||||
/// Furthermore, the following rules apply:
|
||||
/// - bytes skipped by `ReadOptions.skip_rows` are counted as being read before
|
||||
/// any records are returned.
|
||||
/// - bytes read while parsing the header are counted as being read before any
|
||||
/// records are returned.
|
||||
/// - bytes skipped by `ReadOptions.skip_rows_after_names` are counted after the
|
||||
/// first batch is returned.
|
||||
virtual int64_t bytes_read() const = 0;
|
||||
|
||||
/// Create a StreamingReader instance
|
||||
///
|
||||
/// This involves some I/O as the first batch must be loaded during the creation process
|
||||
/// so it is returned as a future
|
||||
///
|
||||
/// Currently, the StreamingReader is not async-reentrant and does not do any fan-out
|
||||
/// parsing (see ARROW-11889)
|
||||
static Future<std::shared_ptr<StreamingReader>> MakeAsync(
|
||||
io::IOContext io_context, std::shared_ptr<io::InputStream> input,
|
||||
arrow::internal::Executor* cpu_executor, const ReadOptions&, const ParseOptions&,
|
||||
const ConvertOptions&);
|
||||
|
||||
static Result<std::shared_ptr<StreamingReader>> Make(
|
||||
io::IOContext io_context, std::shared_ptr<io::InputStream> input,
|
||||
const ReadOptions&, const ParseOptions&, const ConvertOptions&);
|
||||
|
||||
ARROW_DEPRECATED("Deprecated in 4.0.0. Use IOContext-based overload")
|
||||
static Result<std::shared_ptr<StreamingReader>> Make(
|
||||
MemoryPool* pool, std::shared_ptr<io::InputStream> input,
|
||||
const ReadOptions& read_options, const ParseOptions& parse_options,
|
||||
const ConvertOptions& convert_options);
|
||||
};
|
||||
|
||||
/// \brief Count the logical rows of data in a CSV file (i.e. the
|
||||
/// number of rows you would get if you read the file into a table).
|
||||
ARROW_EXPORT
|
||||
Future<int64_t> CountRowsAsync(io::IOContext io_context,
|
||||
std::shared_ptr<io::InputStream> input,
|
||||
arrow::internal::Executor* cpu_executor,
|
||||
const ReadOptions&, const ParseOptions&);
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
@ -0,0 +1,55 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/csv/parser.h"
|
||||
#include "arrow/testing/visibility.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
std::string MakeCSVData(std::vector<std::string> lines);
|
||||
|
||||
// Make a BlockParser from a vector of lines representing a CSV file
|
||||
ARROW_TESTING_EXPORT
|
||||
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, int32_t num_cols,
|
||||
std::shared_ptr<BlockParser>* out);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
|
||||
std::shared_ptr<BlockParser>* out);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
void MakeCSVParser(std::vector<std::string> lines, std::shared_ptr<BlockParser>* out);
|
||||
|
||||
// Make a BlockParser from a vector of strings representing a single CSV column
|
||||
ARROW_TESTING_EXPORT
|
||||
void MakeColumnParser(std::vector<std::string> items, std::shared_ptr<BlockParser>* out);
|
||||
|
||||
ARROW_TESTING_EXPORT
|
||||
Result<std::shared_ptr<Buffer>> MakeSampleCsvBuffer(
|
||||
size_t num_rows, std::function<bool(size_t row_num)> is_valid = {});
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
28
.venv/Lib/site-packages/pyarrow/include/arrow/csv/type_fwd.h
Normal file
28
.venv/Lib/site-packages/pyarrow/include/arrow/csv/type_fwd.h
Normal file
@ -0,0 +1,28 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
class TableReader;
|
||||
struct ConvertOptions;
|
||||
struct ReadOptions;
|
||||
struct ParseOptions;
|
||||
struct WriteOptions;
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
89
.venv/Lib/site-packages/pyarrow/include/arrow/csv/writer.h
Normal file
89
.venv/Lib/site-packages/pyarrow/include/arrow/csv/writer.h
Normal file
@ -0,0 +1,89 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "arrow/csv/options.h"
|
||||
#include "arrow/io/interfaces.h"
|
||||
#include "arrow/ipc/type_fwd.h"
|
||||
#include "arrow/record_batch.h"
|
||||
#include "arrow/table.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace csv {
|
||||
|
||||
// Functionality for converting Arrow data to Comma separated value text.
|
||||
// This library supports all primitive types that can be cast to a StringArrays.
|
||||
// It applies to following formatting rules:
|
||||
// - For non-binary types no quotes surround values. Nulls are represented as the empty
|
||||
// string.
|
||||
// - For binary types all non-null data is quoted (and quotes within data are escaped
|
||||
// with an additional quote).
|
||||
// Null values are empty and unquoted.
|
||||
|
||||
/// \defgroup csv-write-functions High-level functions for writing CSV files
|
||||
/// @{
|
||||
|
||||
/// \brief Convert table to CSV and write the result to output.
|
||||
/// Experimental
|
||||
ARROW_EXPORT Status WriteCSV(const Table& table, const WriteOptions& options,
|
||||
arrow::io::OutputStream* output);
|
||||
/// \brief Convert batch to CSV and write the result to output.
|
||||
/// Experimental
|
||||
ARROW_EXPORT Status WriteCSV(const RecordBatch& batch, const WriteOptions& options,
|
||||
arrow::io::OutputStream* output);
|
||||
/// \brief Convert batches read through a RecordBatchReader
|
||||
/// to CSV and write the results to output.
|
||||
/// Experimental
|
||||
ARROW_EXPORT Status WriteCSV(const std::shared_ptr<RecordBatchReader>& reader,
|
||||
const WriteOptions& options,
|
||||
arrow::io::OutputStream* output);
|
||||
|
||||
/// @}
|
||||
|
||||
/// \defgroup csv-writer-factories Functions for creating an incremental CSV writer
|
||||
/// @{
|
||||
|
||||
/// \brief Create a new CSV writer. User is responsible for closing the
|
||||
/// actual OutputStream.
|
||||
///
|
||||
/// \param[in] sink output stream to write to
|
||||
/// \param[in] schema the schema of the record batches to be written
|
||||
/// \param[in] options options for serialization
|
||||
/// \return Result<std::shared_ptr<RecordBatchWriter>>
|
||||
ARROW_EXPORT
|
||||
Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
|
||||
std::shared_ptr<io::OutputStream> sink, const std::shared_ptr<Schema>& schema,
|
||||
const WriteOptions& options = WriteOptions::Defaults());
|
||||
|
||||
/// \brief Create a new CSV writer.
|
||||
///
|
||||
/// \param[in] sink output stream to write to (does not take ownership)
|
||||
/// \param[in] schema the schema of the record batches to be written
|
||||
/// \param[in] options options for serialization
|
||||
/// \return Result<std::shared_ptr<RecordBatchWriter>>
|
||||
ARROW_EXPORT
|
||||
Result<std::shared_ptr<ipc::RecordBatchWriter>> MakeCSVWriter(
|
||||
io::OutputStream* sink, const std::shared_ptr<Schema>& schema,
|
||||
const WriteOptions& options = WriteOptions::Defaults());
|
||||
|
||||
/// @}
|
||||
|
||||
} // namespace csv
|
||||
} // namespace arrow
|
Reference in New Issue
Block a user