// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include "arrow/util/bit_run_reader.h" namespace arrow { namespace util { namespace internal { /// \brief Compress the buffer to spaced, excluding the null entries. /// /// \param[in] src the source buffer /// \param[in] num_values the size of source buffer /// \param[in] valid_bits bitmap data indicating position of valid slots /// \param[in] valid_bits_offset offset into valid_bits /// \param[out] output the output buffer spaced /// \return The size of spaced buffer. template inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset, T* output) { int num_valid_values = 0; arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values); while (true) { const auto run = reader.NextRun(); if (run.length == 0) { break; } std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T)); num_valid_values += static_cast(run.length); } return num_valid_values; } /// \brief Relocate values in buffer into positions of non-null values as indicated by /// a validity bitmap. /// /// \param[in, out] buffer the in-place buffer /// \param[in] num_values total size of buffer including null slots /// \param[in] null_count number of null slots /// \param[in] valid_bits bitmap data indicating position of valid slots /// \param[in] valid_bits_offset offset into valid_bits /// \return The number of values expanded, including nulls. template inline int SpacedExpand(T* buffer, int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset) { // Point to end as we add the spacing from the back. int idx_decode = num_values - null_count; // Depending on the number of nulls, some of the value slots in buffer may // be uninitialized, and this will cause valgrind warnings / potentially UB std::memset(static_cast(buffer + idx_decode), 0, null_count * sizeof(T)); if (idx_decode == 0) { // All nulls, nothing more to do return num_values; } arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset, num_values); while (true) { const auto run = reader.NextRun(); if (run.length == 0) { break; } idx_decode -= static_cast(run.length); assert(idx_decode >= 0); std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T)); } // Otherwise caller gave an incorrect null_count assert(idx_decode == 0); return num_values; } } // namespace internal } // namespace util } // namespace arrow