mirror of
https://github.com/aykhans/AzSuicideDataVisualization.git
synced 2025-04-22 18:32:15 +00:00
99 lines
3.5 KiB
C++
99 lines
3.5 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
|
|
#include "arrow/util/bit_run_reader.h"
|
|
|
|
namespace arrow {
|
|
namespace util {
|
|
namespace internal {
|
|
|
|
/// \brief Compress the buffer to spaced, excluding the null entries.
|
|
///
|
|
/// \param[in] src the source buffer
|
|
/// \param[in] num_values the size of source buffer
|
|
/// \param[in] valid_bits bitmap data indicating position of valid slots
|
|
/// \param[in] valid_bits_offset offset into valid_bits
|
|
/// \param[out] output the output buffer spaced
|
|
/// \return The size of spaced buffer.
|
|
template <typename T>
|
|
inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
|
|
int64_t valid_bits_offset, T* output) {
|
|
int num_valid_values = 0;
|
|
|
|
arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
|
|
while (true) {
|
|
const auto run = reader.NextRun();
|
|
if (run.length == 0) {
|
|
break;
|
|
}
|
|
std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
|
|
num_valid_values += static_cast<int32_t>(run.length);
|
|
}
|
|
|
|
return num_valid_values;
|
|
}
|
|
|
|
/// \brief Relocate values in buffer into positions of non-null values as indicated by
|
|
/// a validity bitmap.
|
|
///
|
|
/// \param[in, out] buffer the in-place buffer
|
|
/// \param[in] num_values total size of buffer including null slots
|
|
/// \param[in] null_count number of null slots
|
|
/// \param[in] valid_bits bitmap data indicating position of valid slots
|
|
/// \param[in] valid_bits_offset offset into valid_bits
|
|
/// \return The number of values expanded, including nulls.
|
|
template <typename T>
|
|
inline int SpacedExpand(T* buffer, int num_values, int null_count,
|
|
const uint8_t* valid_bits, int64_t valid_bits_offset) {
|
|
// Point to end as we add the spacing from the back.
|
|
int idx_decode = num_values - null_count;
|
|
|
|
// Depending on the number of nulls, some of the value slots in buffer may
|
|
// be uninitialized, and this will cause valgrind warnings / potentially UB
|
|
std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
|
|
if (idx_decode == 0) {
|
|
// All nulls, nothing more to do
|
|
return num_values;
|
|
}
|
|
|
|
arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
|
|
num_values);
|
|
while (true) {
|
|
const auto run = reader.NextRun();
|
|
if (run.length == 0) {
|
|
break;
|
|
}
|
|
idx_decode -= static_cast<int32_t>(run.length);
|
|
assert(idx_decode >= 0);
|
|
std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
|
|
}
|
|
|
|
// Otherwise caller gave an incorrect null_count
|
|
assert(idx_decode == 0);
|
|
return num_values;
|
|
}
|
|
|
|
} // namespace internal
|
|
} // namespace util
|
|
} // namespace arrow
|