mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-07-03 06:31:28 +00:00
first commit
This commit is contained in:
197
.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/gcsfs.h
Normal file
197
.venv/Lib/site-packages/pyarrow/include/arrow/filesystem/gcsfs.h
Normal file
@ -0,0 +1,197 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arrow/filesystem/filesystem.h"
|
||||
#include "arrow/util/uri.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace fs {
|
||||
|
||||
struct GcsCredentials;
|
||||
|
||||
/// Options for the GcsFileSystem implementation.
|
||||
struct ARROW_EXPORT GcsOptions {
|
||||
std::shared_ptr<GcsCredentials> credentials;
|
||||
|
||||
std::string endpoint_override;
|
||||
std::string scheme;
|
||||
/// \brief Location to use for creating buckets.
|
||||
std::string default_bucket_location;
|
||||
|
||||
/// \brief Default metadata for OpenOutputStream.
|
||||
///
|
||||
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
|
||||
std::shared_ptr<const KeyValueMetadata> default_metadata;
|
||||
|
||||
bool Equals(const GcsOptions& other) const;
|
||||
|
||||
/// \brief Initialize with Google Default Credentials
|
||||
///
|
||||
/// Create options configured to use [Application Default Credentials][aip/4110]. The
|
||||
/// details of this mechanism are too involved to describe here, but suffice is to say
|
||||
/// that applications can override any defaults using an environment variable
|
||||
/// (`GOOGLE_APPLICATION_CREDENTIALS`), and that the defaults work with most Google
|
||||
/// Cloud Platform deployment environments (GCE, GKE, Cloud Run, etc.), and that have
|
||||
/// the same behavior as the `gcloud` CLI tool on your workstation.
|
||||
///
|
||||
/// \see https://cloud.google.com/docs/authentication
|
||||
///
|
||||
/// [aip/4110]: https://google.aip.dev/auth/4110
|
||||
static GcsOptions Defaults();
|
||||
|
||||
/// \brief Initialize with anonymous credentials
|
||||
static GcsOptions Anonymous();
|
||||
|
||||
/// \brief Initialize with access token
|
||||
///
|
||||
/// These credentials are useful when using an out-of-band mechanism to fetch access
|
||||
/// tokens. Note that access tokens are time limited, you will need to manually refresh
|
||||
/// the tokens created by the out-of-band mechanism.
|
||||
static GcsOptions FromAccessToken(const std::string& access_token,
|
||||
std::chrono::system_clock::time_point expiration);
|
||||
|
||||
/// \brief Initialize with service account impersonation
|
||||
///
|
||||
/// Service account impersonation allows one principal (a user or service account) to
|
||||
/// impersonate a service account. It requires that the calling principal has the
|
||||
/// necessary permissions *on* the service account.
|
||||
static GcsOptions FromImpersonatedServiceAccount(
|
||||
const GcsCredentials& base_credentials, const std::string& target_service_account);
|
||||
|
||||
/// Creates service account credentials from a JSON object in string form.
|
||||
///
|
||||
/// The @p json_object is expected to be in the format described by [aip/4112]. Such an
|
||||
/// object contains the identity of a service account, as well as a private key that can
|
||||
/// be used to sign tokens, showing the caller was holding the private key.
|
||||
///
|
||||
/// In GCP one can create several "keys" for each service account, and these keys are
|
||||
/// downloaded as a JSON "key file". The contents of such a file are in the format
|
||||
/// required by this function. Remember that key files and their contents should be
|
||||
/// treated as any other secret with security implications, think of them as passwords
|
||||
/// (because they are!), don't store them or output them where unauthorized persons may
|
||||
/// read them.
|
||||
///
|
||||
/// Most applications should probably use default credentials, maybe pointing them to a
|
||||
/// file with these contents. Using this function may be useful when the json object is
|
||||
/// obtained from a Cloud Secret Manager or a similar service.
|
||||
///
|
||||
/// [aip/4112]: https://google.aip.dev/auth/4112
|
||||
static GcsOptions FromServiceAccountCredentials(const std::string& json_object);
|
||||
|
||||
/// Initialize from URIs such as "gs://bucket/object".
|
||||
static Result<GcsOptions> FromUri(const arrow::internal::Uri& uri,
|
||||
std::string* out_path);
|
||||
static Result<GcsOptions> FromUri(const std::string& uri, std::string* out_path);
|
||||
};
|
||||
|
||||
/// \brief GCS-backed FileSystem implementation.
|
||||
///
|
||||
/// GCS (Google Cloud Storage - https://cloud.google.com/storage) is a scalable object
|
||||
/// storage system for any amount of data. The main abstractions in GCS are buckets and
|
||||
/// objects. A bucket is a namespace for objects, buckets can store any number of objects,
|
||||
/// tens of millions and even billions is not uncommon. Each object contains a single
|
||||
/// blob of data, up to 5TiB in size. Buckets are typically configured to keep a single
|
||||
/// version of each object, but versioning can be enabled. Versioning is important because
|
||||
/// objects are immutable, once created one cannot append data to the object or modify the
|
||||
/// object data in any way.
|
||||
///
|
||||
/// GCS buckets are in a global namespace, if a Google Cloud customer creates a bucket
|
||||
/// named `foo` no other customer can create a bucket with the same name. Note that a
|
||||
/// principal (a user or service account) may only list the buckets they are entitled to,
|
||||
/// and then only within a project. It is not possible to list "all" the buckets.
|
||||
///
|
||||
/// Within each bucket objects are in flat namespace. GCS does not have folders or
|
||||
/// directories. However, following some conventions it is possible to emulate
|
||||
/// directories. To this end, this class:
|
||||
///
|
||||
/// - All buckets are treated as directories at the "root"
|
||||
/// - Creating a root directory results in a new bucket being created, this may be slower
|
||||
/// than most GCS operations.
|
||||
/// - The class creates marker objects for a directory, using a metadata attribute to
|
||||
/// annotate the file.
|
||||
/// - GCS can list all the objects with a given prefix, this is used to emulate listing
|
||||
/// of directories.
|
||||
/// - In object lists GCS can summarize all the objects with a common prefix as a single
|
||||
/// entry, this is used to emulate non-recursive lists. Note that GCS list time is
|
||||
/// proportional to the number of objects in the prefix. Listing recursively takes
|
||||
/// almost the same time as non-recursive lists.
|
||||
///
|
||||
class ARROW_EXPORT GcsFileSystem : public FileSystem {
|
||||
public:
|
||||
~GcsFileSystem() override = default;
|
||||
|
||||
std::string type_name() const override;
|
||||
|
||||
bool Equals(const FileSystem& other) const override;
|
||||
|
||||
Result<FileInfo> GetFileInfo(const std::string& path) override;
|
||||
Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
|
||||
|
||||
Status CreateDir(const std::string& path, bool recursive) override;
|
||||
|
||||
Status DeleteDir(const std::string& path) override;
|
||||
|
||||
Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
|
||||
|
||||
/// This is not implemented in GcsFileSystem, as it would be too dangerous.
|
||||
Status DeleteRootDirContents() override;
|
||||
|
||||
Status DeleteFile(const std::string& path) override;
|
||||
|
||||
Status Move(const std::string& src, const std::string& dest) override;
|
||||
|
||||
Status CopyFile(const std::string& src, const std::string& dest) override;
|
||||
|
||||
Result<std::shared_ptr<io::InputStream>> OpenInputStream(
|
||||
const std::string& path) override;
|
||||
Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
|
||||
|
||||
Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
|
||||
const std::string& path) override;
|
||||
Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
|
||||
const FileInfo& info) override;
|
||||
|
||||
Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
|
||||
const std::string& path,
|
||||
const std::shared_ptr<const KeyValueMetadata>& metadata) override;
|
||||
|
||||
ARROW_DEPRECATED(
|
||||
"Deprecated. "
|
||||
"OpenAppendStream is unsupported on the GCS FileSystem.")
|
||||
Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
|
||||
const std::string& path,
|
||||
const std::shared_ptr<const KeyValueMetadata>& metadata) override;
|
||||
|
||||
/// Create a GcsFileSystem instance from the given options.
|
||||
static std::shared_ptr<GcsFileSystem> Make(
|
||||
const GcsOptions& options, const io::IOContext& = io::default_io_context());
|
||||
|
||||
private:
|
||||
explicit GcsFileSystem(const GcsOptions& options, const io::IOContext& io_context);
|
||||
|
||||
class Impl;
|
||||
std::shared_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace fs
|
||||
} // namespace arrow
|
Reference in New Issue
Block a user