// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include "arrow/csv/options.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/file_base.h" #include "arrow/dataset/type_fwd.h" #include "arrow/dataset/visibility.h" #include "arrow/ipc/type_fwd.h" #include "arrow/status.h" #include "arrow/util/compression.h" namespace arrow { namespace dataset { constexpr char kCsvTypeName[] = "csv"; /// \addtogroup dataset-file-formats /// /// @{ /// \brief A FileFormat implementation that reads from and writes to Csv files class ARROW_DS_EXPORT CsvFileFormat : public FileFormat { public: /// Options affecting the parsing of CSV files csv::ParseOptions parse_options = csv::ParseOptions::Defaults(); std::string type_name() const override { return kCsvTypeName; } bool Equals(const FileFormat& other) const override; Result IsSupported(const FileSource& source) const override; /// \brief Return the schema of the file if possible. Result> Inspect(const FileSource& source) const override; Result ScanBatchesAsync( const std::shared_ptr& scan_options, const std::shared_ptr& file) const override; Future> CountRows( const std::shared_ptr& file, compute::Expression predicate, const std::shared_ptr& options) override; Result> MakeWriter( std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, fs::FileLocator destination_locator) const override; std::shared_ptr DefaultWriteOptions() override; }; /// \brief Per-scan options for CSV fragments struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions { std::string type_name() const override { return kCsvTypeName; } /// CSV conversion options csv::ConvertOptions convert_options = csv::ConvertOptions::Defaults(); /// CSV reading options /// /// Note that use_threads is always ignored. csv::ReadOptions read_options = csv::ReadOptions::Defaults(); }; class ARROW_DS_EXPORT CsvFileWriteOptions : public FileWriteOptions { public: /// Options passed to csv::MakeCSVWriter. std::shared_ptr write_options; protected: using FileWriteOptions::FileWriteOptions; friend class CsvFileFormat; }; class ARROW_DS_EXPORT CsvFileWriter : public FileWriter { public: Status Write(const std::shared_ptr& batch) override; private: CsvFileWriter(std::shared_ptr destination, std::shared_ptr writer, std::shared_ptr schema, std::shared_ptr options, fs::FileLocator destination_locator); Future<> FinishInternal() override; std::shared_ptr destination_; std::shared_ptr batch_writer_; friend class CsvFileFormat; }; /// @} } // namespace dataset } // namespace arrow