mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 18:32:15 +00:00
511 lines
19 KiB
C++
511 lines
19 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "parquet/exception.h"
|
|
#include "parquet/schema.h"
|
|
#include "parquet/types.h"
|
|
|
|
namespace parquet {
|
|
|
|
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
|
|
ParquetCipher::AES_GCM_V1;
|
|
static constexpr int32_t kMaximalAadMetadataLength = 256;
|
|
static constexpr bool kDefaultEncryptedFooter = true;
|
|
static constexpr bool kDefaultCheckSignature = true;
|
|
static constexpr bool kDefaultAllowPlaintextFiles = false;
|
|
static constexpr int32_t kAadFileUniqueLength = 8;
|
|
|
|
class ColumnDecryptionProperties;
|
|
using ColumnPathToDecryptionPropertiesMap =
|
|
std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
|
|
|
|
class ColumnEncryptionProperties;
|
|
using ColumnPathToEncryptionPropertiesMap =
|
|
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
|
|
|
|
class PARQUET_EXPORT DecryptionKeyRetriever {
|
|
public:
|
|
virtual std::string GetKey(const std::string& key_metadata) = 0;
|
|
virtual ~DecryptionKeyRetriever() {}
|
|
};
|
|
|
|
/// Simple integer key retriever
|
|
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
|
|
public:
|
|
void PutKey(uint32_t key_id, const std::string& key);
|
|
std::string GetKey(const std::string& key_metadata) override;
|
|
|
|
private:
|
|
std::map<uint32_t, std::string> key_map_;
|
|
};
|
|
|
|
// Simple string key retriever
|
|
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
|
|
public:
|
|
void PutKey(const std::string& key_id, const std::string& key);
|
|
std::string GetKey(const std::string& key_metadata) override;
|
|
|
|
private:
|
|
std::map<std::string, std::string> key_map_;
|
|
};
|
|
|
|
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
|
|
public:
|
|
explicit HiddenColumnException(const std::string& columnPath)
|
|
: ParquetException(columnPath.c_str()) {}
|
|
};
|
|
|
|
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
|
|
public:
|
|
explicit KeyAccessDeniedException(const std::string& columnPath)
|
|
: ParquetException(columnPath.c_str()) {}
|
|
};
|
|
|
|
inline const uint8_t* str2bytes(const std::string& str) {
|
|
if (str.empty()) return NULLPTR;
|
|
|
|
char* cbytes = const_cast<char*>(str.c_str());
|
|
return reinterpret_cast<const uint8_t*>(cbytes);
|
|
}
|
|
|
|
class PARQUET_EXPORT ColumnEncryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
/// Convenience builder for encrypted columns.
|
|
explicit Builder(const std::string& name) : Builder(name, true) {}
|
|
|
|
/// Convenience builder for encrypted columns.
|
|
explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
|
|
: Builder(path->ToDotString(), true) {}
|
|
|
|
/// Set a column-specific key.
|
|
/// If key is not set on an encrypted column, the column will
|
|
/// be encrypted with the footer key.
|
|
/// keyBytes Key length must be either 16, 24 or 32 bytes.
|
|
/// The key is cloned, and will be wiped out (array values set to 0) upon completion
|
|
/// of file writing.
|
|
/// Caller is responsible for wiping out the input key array.
|
|
Builder* key(std::string column_key);
|
|
|
|
/// Set a key retrieval metadata.
|
|
/// use either key_metadata() or key_id(), not both
|
|
Builder* key_metadata(const std::string& key_metadata);
|
|
|
|
/// A convenience function to set key metadata using a string id.
|
|
/// Set a key retrieval metadata (converted from String).
|
|
/// use either key_metadata() or key_id(), not both
|
|
/// key_id will be converted to metadata (UTF-8 array).
|
|
Builder* key_id(const std::string& key_id);
|
|
|
|
std::shared_ptr<ColumnEncryptionProperties> build() {
|
|
return std::shared_ptr<ColumnEncryptionProperties>(
|
|
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
|
|
}
|
|
|
|
private:
|
|
const std::string column_path_;
|
|
bool encrypted_;
|
|
std::string key_;
|
|
std::string key_metadata_;
|
|
|
|
Builder(const std::string path, bool encrypted)
|
|
: column_path_(path), encrypted_(encrypted) {}
|
|
};
|
|
|
|
std::string column_path() const { return column_path_; }
|
|
bool is_encrypted() const { return encrypted_; }
|
|
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
|
|
std::string key() const { return key_; }
|
|
std::string key_metadata() const { return key_metadata_; }
|
|
|
|
/// Upon completion of file writing, the encryption key
|
|
/// will be wiped out.
|
|
void WipeOutEncryptionKey() { key_.clear(); }
|
|
|
|
bool is_utilized() {
|
|
if (key_.empty())
|
|
return false; // can re-use column properties without encryption keys
|
|
return utilized_;
|
|
}
|
|
|
|
/// ColumnEncryptionProperties object can be used for writing one file only.
|
|
/// Mark ColumnEncryptionProperties as utilized once it is used in
|
|
/// FileEncryptionProperties as the encryption key will be wiped out upon
|
|
/// completion of file writing.
|
|
void set_utilized() { utilized_ = true; }
|
|
|
|
std::shared_ptr<ColumnEncryptionProperties> DeepClone() {
|
|
std::string key_copy = key_;
|
|
return std::shared_ptr<ColumnEncryptionProperties>(new ColumnEncryptionProperties(
|
|
encrypted_, column_path_, key_copy, key_metadata_));
|
|
}
|
|
|
|
ColumnEncryptionProperties() = default;
|
|
ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default;
|
|
ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default;
|
|
|
|
private:
|
|
const std::string column_path_;
|
|
bool encrypted_;
|
|
bool encrypted_with_footer_key_;
|
|
std::string key_;
|
|
std::string key_metadata_;
|
|
bool utilized_;
|
|
explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path,
|
|
const std::string& key,
|
|
const std::string& key_metadata);
|
|
};
|
|
|
|
class PARQUET_EXPORT ColumnDecryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
explicit Builder(const std::string& name) : column_path_(name) {}
|
|
|
|
explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
|
|
: Builder(path->ToDotString()) {}
|
|
|
|
/// Set an explicit column key. If applied on a file that contains
|
|
/// key metadata for this column the metadata will be ignored,
|
|
/// the column will be decrypted with this key.
|
|
/// key length must be either 16, 24 or 32 bytes.
|
|
Builder* key(const std::string& key);
|
|
|
|
std::shared_ptr<ColumnDecryptionProperties> build();
|
|
|
|
private:
|
|
const std::string column_path_;
|
|
std::string key_;
|
|
};
|
|
|
|
ColumnDecryptionProperties() = default;
|
|
ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default;
|
|
ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default;
|
|
|
|
std::string column_path() const { return column_path_; }
|
|
std::string key() const { return key_; }
|
|
bool is_utilized() { return utilized_; }
|
|
|
|
/// ColumnDecryptionProperties object can be used for reading one file only.
|
|
/// Mark ColumnDecryptionProperties as utilized once it is used in
|
|
/// FileDecryptionProperties as the encryption key will be wiped out upon
|
|
/// completion of file reading.
|
|
void set_utilized() { utilized_ = true; }
|
|
|
|
/// Upon completion of file reading, the encryption key
|
|
/// will be wiped out.
|
|
void WipeOutDecryptionKey();
|
|
|
|
std::shared_ptr<ColumnDecryptionProperties> DeepClone();
|
|
|
|
private:
|
|
const std::string column_path_;
|
|
std::string key_;
|
|
bool utilized_;
|
|
|
|
/// This class is only required for setting explicit column decryption keys -
|
|
/// to override key retriever (or to provide keys when key metadata and/or
|
|
/// key retriever are not available)
|
|
explicit ColumnDecryptionProperties(const std::string& column_path,
|
|
const std::string& key);
|
|
};
|
|
|
|
class PARQUET_EXPORT AADPrefixVerifier {
|
|
public:
|
|
/// Verifies identity (AAD Prefix) of individual file,
|
|
/// or of file collection in a data set.
|
|
/// Throws exception if an AAD prefix is wrong.
|
|
/// In a data set, AAD Prefixes should be collected,
|
|
/// and then checked for missing files.
|
|
virtual void Verify(const std::string& aad_prefix) = 0;
|
|
virtual ~AADPrefixVerifier() {}
|
|
};
|
|
|
|
class PARQUET_EXPORT FileDecryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
Builder() {
|
|
check_plaintext_footer_integrity_ = kDefaultCheckSignature;
|
|
plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
|
|
}
|
|
|
|
/// Set an explicit footer key. If applied on a file that contains
|
|
/// footer key metadata the metadata will be ignored, the footer
|
|
/// will be decrypted/verified with this key.
|
|
/// If explicit key is not set, footer key will be fetched from
|
|
/// key retriever.
|
|
/// With explicit keys or AAD prefix, new encryption properties object must be
|
|
/// created for each encrypted file.
|
|
/// Explicit encryption keys (footer and column) are cloned.
|
|
/// Upon completion of file reading, the cloned encryption keys in the properties
|
|
/// will be wiped out (array values set to 0).
|
|
/// Caller is responsible for wiping out the input key array.
|
|
/// param footerKey Key length must be either 16, 24 or 32 bytes.
|
|
Builder* footer_key(const std::string footer_key);
|
|
|
|
/// Set explicit column keys (decryption properties).
|
|
/// Its also possible to set a key retriever on this property object.
|
|
/// Upon file decryption, availability of explicit keys is checked before
|
|
/// invocation of the retriever callback.
|
|
/// If an explicit key is available for a footer or a column,
|
|
/// its key metadata will be ignored.
|
|
Builder* column_keys(
|
|
const ColumnPathToDecryptionPropertiesMap& column_decryption_properties);
|
|
|
|
/// Set a key retriever callback. Its also possible to
|
|
/// set explicit footer or column keys on this file property object.
|
|
/// Upon file decryption, availability of explicit keys is checked before
|
|
/// invocation of the retriever callback.
|
|
/// If an explicit key is available for a footer or a column,
|
|
/// its key metadata will be ignored.
|
|
Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever);
|
|
|
|
/// Skip integrity verification of plaintext footers.
|
|
/// If not called, integrity of plaintext footers will be checked in runtime,
|
|
/// and an exception will be thrown in the following situations:
|
|
/// - footer signing key is not available
|
|
/// (not passed, or not found by key retriever)
|
|
/// - footer content and signature don't match
|
|
Builder* disable_footer_signature_verification() {
|
|
check_plaintext_footer_integrity_ = false;
|
|
return this;
|
|
}
|
|
|
|
/// Explicitly supply the file AAD prefix.
|
|
/// A must when a prefix is used for file encryption, but not stored in file.
|
|
/// If AAD prefix is stored in file, it will be compared to the explicitly
|
|
/// supplied value and an exception will be thrown if they differ.
|
|
Builder* aad_prefix(const std::string& aad_prefix);
|
|
|
|
/// Set callback for verification of AAD Prefixes stored in file.
|
|
Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
|
|
|
|
/// By default, reading plaintext (unencrypted) files is not
|
|
/// allowed when using a decryptor
|
|
/// - in order to detect files that were not encrypted by mistake.
|
|
/// However, the default behavior can be overridden by calling this method.
|
|
/// The caller should use then a different method to ensure encryption
|
|
/// of files with sensitive data.
|
|
Builder* plaintext_files_allowed() {
|
|
plaintext_files_allowed_ = true;
|
|
return this;
|
|
}
|
|
|
|
std::shared_ptr<FileDecryptionProperties> build() {
|
|
return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
|
|
footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
|
|
aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
|
|
}
|
|
|
|
private:
|
|
std::string footer_key_;
|
|
std::string aad_prefix_;
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
|
|
|
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
|
bool check_plaintext_footer_integrity_;
|
|
bool plaintext_files_allowed_;
|
|
};
|
|
|
|
std::string column_key(const std::string& column_path) const;
|
|
|
|
std::string footer_key() const { return footer_key_; }
|
|
|
|
std::string aad_prefix() const { return aad_prefix_; }
|
|
|
|
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
|
|
return key_retriever_;
|
|
}
|
|
|
|
bool check_plaintext_footer_integrity() const {
|
|
return check_plaintext_footer_integrity_;
|
|
}
|
|
|
|
bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
|
|
|
|
const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
|
|
return aad_prefix_verifier_;
|
|
}
|
|
|
|
/// Upon completion of file reading, the encryption keys in the properties
|
|
/// will be wiped out (array values set to 0).
|
|
void WipeOutDecryptionKeys();
|
|
|
|
bool is_utilized();
|
|
|
|
/// FileDecryptionProperties object can be used for reading one file only.
|
|
/// Mark FileDecryptionProperties as utilized once it is used to read a file as the
|
|
/// encryption keys will be wiped out upon completion of file reading.
|
|
void set_utilized() { utilized_ = true; }
|
|
|
|
/// FileDecryptionProperties object can be used for reading one file only.
|
|
/// (unless this object keeps the keyRetrieval callback only, and no explicit
|
|
/// keys or aadPrefix).
|
|
/// At the end, keys are wiped out in the memory.
|
|
/// This method allows to clone identical properties for another file,
|
|
/// with an option to update the aadPrefix (if newAadPrefix is null,
|
|
/// aadPrefix will be cloned too)
|
|
std::shared_ptr<FileDecryptionProperties> DeepClone(std::string new_aad_prefix = "");
|
|
|
|
private:
|
|
std::string footer_key_;
|
|
std::string aad_prefix_;
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
|
|
|
|
const std::string empty_string_ = "";
|
|
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
|
|
|
|
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
|
|
bool check_plaintext_footer_integrity_;
|
|
bool plaintext_files_allowed_;
|
|
bool utilized_;
|
|
|
|
FileDecryptionProperties(
|
|
const std::string& footer_key,
|
|
std::shared_ptr<DecryptionKeyRetriever> key_retriever,
|
|
bool check_plaintext_footer_integrity, const std::string& aad_prefix,
|
|
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
|
|
const ColumnPathToDecryptionPropertiesMap& column_decryption_properties,
|
|
bool plaintext_files_allowed);
|
|
};
|
|
|
|
class PARQUET_EXPORT FileEncryptionProperties {
|
|
public:
|
|
class PARQUET_EXPORT Builder {
|
|
public:
|
|
explicit Builder(const std::string& footer_key)
|
|
: parquet_cipher_(kDefaultEncryptionAlgorithm),
|
|
encrypted_footer_(kDefaultEncryptedFooter) {
|
|
footer_key_ = footer_key;
|
|
store_aad_prefix_in_file_ = false;
|
|
}
|
|
|
|
/// Create files with plaintext footer.
|
|
/// If not called, the files will be created with encrypted footer (default).
|
|
Builder* set_plaintext_footer() {
|
|
encrypted_footer_ = false;
|
|
return this;
|
|
}
|
|
|
|
/// Set encryption algorithm.
|
|
/// If not called, files will be encrypted with AES_GCM_V1 (default).
|
|
Builder* algorithm(ParquetCipher::type parquet_cipher) {
|
|
parquet_cipher_ = parquet_cipher;
|
|
return this;
|
|
}
|
|
|
|
/// Set a key retrieval metadata (converted from String).
|
|
/// use either footer_key_metadata or footer_key_id, not both.
|
|
Builder* footer_key_id(const std::string& key_id);
|
|
|
|
/// Set a key retrieval metadata.
|
|
/// use either footer_key_metadata or footer_key_id, not both.
|
|
Builder* footer_key_metadata(const std::string& footer_key_metadata);
|
|
|
|
/// Set the file AAD Prefix.
|
|
Builder* aad_prefix(const std::string& aad_prefix);
|
|
|
|
/// Skip storing AAD Prefix in file.
|
|
/// If not called, and if AAD Prefix is set, it will be stored.
|
|
Builder* disable_aad_prefix_storage();
|
|
|
|
/// Set the list of encrypted columns and their properties (keys etc).
|
|
/// If not called, all columns will be encrypted with the footer key.
|
|
/// If called, the file columns not in the list will be left unencrypted.
|
|
Builder* encrypted_columns(
|
|
const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
|
|
|
|
std::shared_ptr<FileEncryptionProperties> build() {
|
|
return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
|
|
parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
|
|
aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
|
|
}
|
|
|
|
private:
|
|
ParquetCipher::type parquet_cipher_;
|
|
bool encrypted_footer_;
|
|
std::string footer_key_;
|
|
std::string footer_key_metadata_;
|
|
|
|
std::string aad_prefix_;
|
|
bool store_aad_prefix_in_file_;
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
|
};
|
|
bool encrypted_footer() const { return encrypted_footer_; }
|
|
|
|
EncryptionAlgorithm algorithm() const { return algorithm_; }
|
|
|
|
std::string footer_key() const { return footer_key_; }
|
|
|
|
std::string footer_key_metadata() const { return footer_key_metadata_; }
|
|
|
|
std::string file_aad() const { return file_aad_; }
|
|
|
|
std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
|
|
const std::string& column_path);
|
|
|
|
bool is_utilized() const { return utilized_; }
|
|
|
|
/// FileEncryptionProperties object can be used for writing one file only.
|
|
/// Mark FileEncryptionProperties as utilized once it is used to write a file as the
|
|
/// encryption keys will be wiped out upon completion of file writing.
|
|
void set_utilized() { utilized_ = true; }
|
|
|
|
/// Upon completion of file writing, the encryption keys
|
|
/// will be wiped out (array values set to 0).
|
|
void WipeOutEncryptionKeys();
|
|
|
|
/// FileEncryptionProperties object can be used for writing one file only.
|
|
/// (at the end, keys are wiped out in the memory).
|
|
/// This method allows to clone identical properties for another file,
|
|
/// with an option to update the aadPrefix (if newAadPrefix is null,
|
|
/// aadPrefix will be cloned too)
|
|
std::shared_ptr<FileEncryptionProperties> DeepClone(std::string new_aad_prefix = "");
|
|
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns() const {
|
|
return encrypted_columns_;
|
|
}
|
|
|
|
private:
|
|
EncryptionAlgorithm algorithm_;
|
|
std::string footer_key_;
|
|
std::string footer_key_metadata_;
|
|
bool encrypted_footer_;
|
|
std::string file_aad_;
|
|
std::string aad_prefix_;
|
|
bool utilized_;
|
|
bool store_aad_prefix_in_file_;
|
|
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
|
|
|
|
FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key,
|
|
const std::string& footer_key_metadata, bool encrypted_footer,
|
|
const std::string& aad_prefix, bool store_aad_prefix_in_file,
|
|
const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
|
|
};
|
|
|
|
} // namespace parquet
|