diff --git a/include/sdsl/cereal.hpp b/include/sdsl/cereal.hpp index 4f5c9f37..b3b95157 100644 --- a/include/sdsl/cereal.hpp +++ b/include/sdsl/cereal.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #endif diff --git a/include/sdsl/csa_sampling_strategy.hpp b/include/sdsl/csa_sampling_strategy.hpp index b78c5ba5..c0d43fc9 100644 --- a/include/sdsl/csa_sampling_strategy.hpp +++ b/include/sdsl/csa_sampling_strategy.hpp @@ -1007,7 +1007,7 @@ class _fuzzy_isa_sampling_support { structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; - written_bytes += m_select_marked_sa.serialize(out, v, "select_marked_sa"); + written_bytes += m_select_marked_sa.serialize(out, child, "select_marked_sa"); structure_tree::add_size(child, written_bytes); return written_bytes; } diff --git a/include/sdsl/int_vector.hpp b/include/sdsl/int_vector.hpp index 878ad165..11edc9d8 100644 --- a/include/sdsl/int_vector.hpp +++ b/include/sdsl/int_vector.hpp @@ -1301,7 +1301,7 @@ class int_vector_const_iterator : public int_vector_iterator_base m_len = it.m_len; } - int_vector_const_iterator & operator=(const int_vector_const_iterator&) = default; + int_vector_const_iterator & operator=(const int_vector_const_iterator &) = default; const_reference operator*() const { diff --git a/include/sdsl/rank_support_int.hpp b/include/sdsl/rank_support_int.hpp index 18e43b31..4a4dab9e 100644 --- a/include/sdsl/rank_support_int.hpp +++ b/include/sdsl/rank_support_int.hpp @@ -1,11 +1,11 @@ // Copyright (c) 2016, the SDSL Project Authors. All rights reserved. // Please see the AUTHORS file for details. Use of this source code is governed // by a BSD license that can be found in the LICENSE file. -/*! \file rank_support_int.hpp - \brief rank_support_int.hpp contains classes that support a sdsl::int_vector with constant time rank information. - Rank is defined as the number of occurrences of a value up to a given position. - \author Christopher Pockrandt -*/ +/*!\file rank_support_int.hpp + * \brief rank_support_int.hpp contains classes that support a sdsl::int_vector with constant time rank information. + * Rank is defined as the number of occurrences of a value up to a given position. + * \author Christopher Pockrandt + */ #ifndef INCLUDED_SDSL_RANK_SUPPORT_INT #define INCLUDED_SDSL_RANK_SUPPORT_INT @@ -13,19 +13,18 @@ * This group contains data structures which support an sdsl::int_vector with the rank method. */ -#include "int_vector.hpp" -#include "uint128_t.hpp" +#include +#include // TODO: benchmark the use of compiler hints for branch prediction -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) //! Namespace for the succinct data structure library. -namespace sdsl { +namespace sdsl +{ //! The base class of classes supporting rank_queries for a sdsl::int_vector in constant time. -/*! -*/ constexpr size_t floor_log2(size_t const n) { @@ -38,16 +37,16 @@ constexpr size_t ceil_log2(size_t const n) } template -class rank_support_int { +class rank_support_int +{ -public: + public: typedef typename int_vector<>::size_type size_type; typedef typename int_vector<>::value_type value_type; static_assert(alphabet_size > 2, "Rank support is only implemented on int_vectors with an alphabet size of > 2."); -protected: - + protected: // Constructs a bit mask with the pattern w of a given length. // It is concatenated until the length of the bitmask reaches max_length. template @@ -63,42 +62,40 @@ class rank_support_int { for (value_type v = 0; v < alphabet_size; ++v) { masks[v] = v; - for (uint8_t i = sigma_bits * 2; i < 64; i <<= 1) - masks[v] |= masks[v] << i; + for (uint8_t i = sigma_bits * 2; i < 64; i <<= 1) masks[v] |= masks[v] << i; } uint64_t tmp_carry = masks[1]; - for (value_type v = 0; v < alphabet_size; ++v) - masks[v] |= tmp_carry << sigma_bits; + for (value_type v = 0; v < alphabet_size; ++v) masks[v] |= tmp_carry << sigma_bits; return masks; } -protected: - static constexpr uint8_t sigma{alphabet_size}; - static constexpr uint8_t sigma_bits{ceil_log2(alphabet_size)}; - static constexpr uint8_t bits_per_word{(64 / sigma_bits) * sigma_bits}; - static constexpr uint64_t even_mask{bm_rec(bits::lo_set[sigma_bits], sigma_bits * 2, 64)}; - static constexpr uint64_t carry_select_mask{bm_rec(1ULL << sigma_bits, sigma_bits * 2, 64)}; + protected: + static constexpr uint8_t sigma{ alphabet_size }; + static constexpr uint8_t sigma_bits{ ceil_log2(alphabet_size) }; + static constexpr uint8_t bits_per_word{ (64 / sigma_bits) * sigma_bits }; + static constexpr uint64_t even_mask{ bm_rec(bits::lo_set[sigma_bits], sigma_bits * 2, 64) }; + static constexpr uint64_t carry_select_mask{ bm_rec(1ULL << sigma_bits, sigma_bits * 2, 64) }; static const std::array masks; - const int_vector<>* m_v; //!< Pointer to the rank supported bit_vector + const int_vector<> * m_v; //!< Pointer to the rank supported bit_vector -public: + public: //! Constructor /*! \param v The supported int_vector. */ - rank_support_int(const int_vector<>* v = nullptr) + rank_support_int(const int_vector<> * v = nullptr) { // Check that the actual width of the vector has same size as sigma_bits. assert((v != nullptr) ? sigma_bits == v->width() : true); m_v = v; } //! Copy constructor - rank_support_int(const rank_support_int&) = default; - rank_support_int(rank_support_int&&) = default; - rank_support_int& operator=(const rank_support_int&) = default; - rank_support_int& operator=(rank_support_int&&) = default; + rank_support_int(const rank_support_int &) = default; + rank_support_int(rank_support_int &&) = default; + rank_support_int & operator=(const rank_support_int &) = default; + rank_support_int & operator=(rank_support_int &&) = default; //! Destructor virtual ~rank_support_int() {} @@ -117,31 +114,30 @@ class rank_support_int { //! Answers rank queries for the supported int_vector. /*! \param i Argument for the length of the prefix v[0..i-1]. * \param v Argument which value (including smaller values) to count. - * \returns Number of occurrences of elements smaller or equal to v in the prefix [0..i-1] of the supported int_vector. - * \note Method init has to be called before the first call of rank. - * \sa init + * \returns Number of occurrences of elements smaller or equal to v in the prefix [0..i-1] of the supported + * int_vector. \note Method init has to be called before the first call of rank. \sa init */ virtual size_type prefix_rank(const size_type i, const value_type v) const = 0; //! Serializes rank_support_int. /*! \param out Out-Stream to serialize the data to. */ - virtual size_type serialize(std::ostream& out, structure_tree_node* v, const std::string name) const = 0; + virtual size_type serialize(std::ostream & out, structure_tree_node * v, const std::string name) const = 0; //! Loads the rank_support_int. /*! \param in In-Stream to load the rank_support_int data from. * \param v The supported int_vector. */ - virtual void load(std::istream& in, const int_vector<>* v = nullptr) = 0; + virtual void load(std::istream & in, const int_vector<> * v = nullptr) = 0; //! Sets the supported int_vector to the given pointer. /*! \param v The new int_vector to support. * \note Method init has to be called before the next call of rank or prefix_rank. * \sa init, rank, prefix_rank */ - virtual void set_vector(const int_vector<>* v = nullptr) = 0; + virtual void set_vector(const int_vector<> * v = nullptr) = 0; -protected: + protected: // Mask the set prefix positions. static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept { @@ -156,7 +152,7 @@ class rank_support_int { // Count how often value v or smaller occurs in the word w. static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept { - uint64_t const w_even = even_mask & w; // retrieve even positions + uint64_t const w_even = even_mask & w; // retrieve even positions uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions return mask_prefix(v, w_even, w_odd); } @@ -167,7 +163,7 @@ class rank_support_int { { assert(v > 0); // optimiyed version of set_positions(w, v) - set_positions(w, v - 1) - uint64_t const w_even = even_mask & w; // retrieve even positions + uint64_t const w_even = even_mask & w; // retrieve even positions uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions uint64_t res = ((masks[v] - w_even) & ~(masks[v - 1] - w_even)) & carry_select_mask; res |= (((masks[v] - w_odd) & ~(masks[v - 1] - w_odd)) & carry_select_mask) << 1; @@ -175,17 +171,17 @@ class rank_support_int { } // Counts the occurrences of elements smaller or equal to v in the word starting at data up to position idx. - template + template static constexpr std::array word_prefix_rank(const uint64_t word, const size_type bit_pos, - const value_t ...values) noexcept + const value_t... values) noexcept { uint64_t const mask = bits::lo_set[(bit_pos % bits_per_word) + 1]; - uint64_t const w_even = even_mask & word; // retrieve even positions + uint64_t const w_even = even_mask & word; // retrieve even positions uint64_t const w_odd = even_mask & (word >> sigma_bits); // retrieve odd positions - return {(bits::cnt(mask_prefix(values, w_even, w_odd) & mask))...}; + return { (bits::cnt(mask_prefix(values, w_even, w_odd) & mask))... }; } // Counts the occurrences of elements smaller or equal to v in the word starting at data up to position idx. @@ -211,7 +207,7 @@ class rank_support_int { } // Returns the word a the given word position. - static constexpr uint64_t extract_word(const uint64_t* data, const size_type word_position) noexcept + static constexpr uint64_t extract_word(const uint64_t * data, const size_type word_position) noexcept { return *(data + word_position); } @@ -222,7 +218,7 @@ const std::array rank_support_int::masks } // end namespace sdsl -#include "rank_support_int_v.hpp" #include "rank_support_int_scan.hpp" +#include "rank_support_int_v.hpp" #endif // end file diff --git a/include/sdsl/rank_support_int_scan.hpp b/include/sdsl/rank_support_int_scan.hpp index 113dc8d9..57c34925 100644 --- a/include/sdsl/rank_support_int_scan.hpp +++ b/include/sdsl/rank_support_int_scan.hpp @@ -1,17 +1,19 @@ // Copyright (c) 2016, the SDSL Project Authors. All rights reserved. // Please see the AUTHORS file for details. Use of this source code is governed // by a BSD license that can be found in the LICENSE file. -/*! \file rank_support_int_scan.hpp - \brief rank_support_int_scan.hpp contains rank_support_int_scan that support a sdsl::int_vector with linear time rank information. - \author Christopher Pockrandt -*/ +/*!\file rank_support_int_scan.hpp + * \brief rank_support_int_scan.hpp contains rank_support_int_scan that support a sdsl::int_vector with linear time + * rank information. + * \author Christopher Pockrandt + */ #ifndef INCLUDED_SDSL_RANK_SUPPORT_INT_SCAN #define INCLUDED_SDSL_RANK_SUPPORT_INT_SCAN -#include "rank_support_int.hpp" +#include //! Namespace for the succinct data structure library. -namespace sdsl { +namespace sdsl +{ //! A class supporting rank queries in linear time. /*! \par Space complexity @@ -24,30 +26,37 @@ namespace sdsl { */ template -class rank_support_int_scan : public rank_support_int { -private: +class rank_support_int_scan : public rank_support_int +{ + private: using base_t = rank_support_int; -public: + + public: typedef int_vector<> int_vector_type; typedef typename rank_support_int::size_type size_type; typedef typename rank_support_int::value_type value_type; -public: - explicit rank_support_int_scan(const int_vector<>* v = nullptr) : rank_support_int(v){}; - rank_support_int_scan(const rank_support_int_scan& rs) = default; - rank_support_int_scan(rank_support_int_scan&& rs) = default; - rank_support_int_scan& operator=(const rank_support_int_scan& rs) = default; - rank_support_int_scan& operator=(rank_support_int_scan&& rs) = default; + public: + explicit rank_support_int_scan(const int_vector<> * v = nullptr) + : rank_support_int(v){}; + rank_support_int_scan(const rank_support_int_scan & rs) = default; + rank_support_int_scan(rank_support_int_scan && rs) = default; + rank_support_int_scan & operator=(const rank_support_int_scan & rs) = default; + rank_support_int_scan & operator=(rank_support_int_scan && rs) = default; size_type rank(size_type idx, const value_type v) const; size_type operator()(size_type idx, const value_type v) const { return rank(idx, v); }; size_type prefix_rank(size_type idx, const value_type v) const; size_type size() const { return this->m_v->size(); }; - size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, const std::string name = "") const + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const { return serialize_empty_object(out, v, name, this); } - void load(std::istream&, const int_vector<>* v = nullptr) { this->m_v = v; this->init(v); } - void set_vector(const int_vector<>* v = nullptr) { this->m_v = v; } + void load(std::istream &, const int_vector<> * v = nullptr) + { + this->m_v = v; + this->init(v); + } + void set_vector(const int_vector<> * v = nullptr) { this->m_v = v; } }; //! Counts the occurrences of v in the prefix [0..idx-1] @@ -56,21 +65,22 @@ class rank_support_int_scan : public rank_support_int { * \sa prefix_rank */ template -inline typename rank_support_int_scan::size_type -rank_support_int_scan::rank(const size_type idx, const value_type v) const +inline typename rank_support_int_scan::size_type rank_support_int_scan::rank( + const size_type idx, + const value_type v) const { assert(v < this->t_v); assert(this->m_v != nullptr); assert(idx <= this->m_v->size()); - if (unlikely(v == 0)) - return prefix_rank(idx, v); + if (unlikely(v == 0)) return prefix_rank(idx, v); - const uint64_t* p = this->m_v->data(); + const uint64_t * p = this->m_v->data(); size_type i = 0; size_type result = 0; size_type word_pos = (idx * this->t_b) >> 6; - while (i < word_pos) { + while (i < word_pos) + { result += base_t::full_word_rank(base_t::extract_word(p, i), v); ++i; } @@ -83,22 +93,23 @@ rank_support_int_scan::rank(const size_type idx, const value_type * \sa rank */ template -inline typename rank_support_int_scan::size_type -rank_support_int_scan::prefix_rank(const size_type idx, const value_type v) const +inline typename rank_support_int_scan::size_type rank_support_int_scan::prefix_rank( + const size_type idx, + const value_type v) const { assert(v < this->t_v); assert(this->m_v != nullptr); assert(idx <= this->m_v->size()); - if (unlikely(v == this->t_v - 1)) - return idx; + if (unlikely(v == this->t_v - 1)) return idx; - const uint64_t* p = this->m_v->data(); - size_type word_pos = (idx * this->sigma_bits) >> 6; - size_type i = 0; - size_type result = 0; + const uint64_t * p = this->m_v->data(); + size_type word_pos = (idx * this->sigma_bits) >> 6; + size_type i = 0; + size_type result = 0; - while (i < word_pos) { + while (i < word_pos) + { result += base_t::full_word_prefix_rank(base_t::extract_word(p, i), v); ++i; } @@ -106,6 +117,6 @@ rank_support_int_scan::prefix_rank(const size_type idx, const val return result + base_t::word_prefix_rank(base_t::extract_word(p, idx), idx * this->sigma_bits, v)[0]; } -} // end namespace sds +} // namespace sdsl #endif // end file diff --git a/include/sdsl/rank_support_int_v.hpp b/include/sdsl/rank_support_int_v.hpp index f6102812..89c09130 100644 --- a/include/sdsl/rank_support_int_v.hpp +++ b/include/sdsl/rank_support_int_v.hpp @@ -1,21 +1,23 @@ // Copyright (c) 2016, the SDSL Project Authors. All rights reserved. // Please see the AUTHORS file for details. Use of this source code is governed // by a BSD license that can be found in the LICENSE file. -/*! \file rank_support_int_v.hpp - * \brief rank_support_int_v.hpp contains rank_support_int_v. - * \author Christopher Pockrandt - * \author René Rahn +/*!\file rank_support_int_v.hpp + * \brief rank_support_int_v.hpp contains rank_support_int_v. + * \author Christopher Pockrandt + * \author René Rahn */ #ifndef INCLUDED_SDSL_RANK_SUPPORT_INT_V #define INCLUDED_SDSL_RANK_SUPPORT_INT_V #include -#include "io.hpp" -#include "rank_support_int.hpp" +#include +#include -namespace sdsl{ -namespace detail { +namespace sdsl +{ +namespace detail +{ /*!\brief A bit compressed * \tparam value_t The represented value_type. @@ -29,7 +31,7 @@ namespace detail { template class bit_compressed_word { -private: + private: static_assert(bits_per_value <= 64, "The maximum bit size is 64 for a value."); //!\brief The maximal number of values that can be stored in one word. @@ -40,7 +42,7 @@ class bit_compressed_word //!\brief The data holder that stores the compressed values. uint64_t word{}; -public: + public: //!\brief The size type needed for serialisation. using size_type = size_t; @@ -89,35 +91,29 @@ class bit_compressed_word template constexpr void assign(it_t it, it_t end) noexcept { - assert(std::distance(it, end) <= max_size); + assert(static_cast(std::distance(it, end)) <= max_size); for (size_t index = 0; it != end; ++it, ++index) { uint64_t offset = index * bits_per_value; - word = (word & ~(bit_mask << offset)) | uint64_t{*it} << offset; + word = (word & ~(bit_mask << offset)) | uint64_t{ *it } << offset; } } //!\brief Implicitly converts to the word type. - constexpr operator uint64_t() const noexcept - { - return word; - } + constexpr operator uint64_t() const noexcept { return word; } //!\brief Saves to the stream. - size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, const std::string name = "") const + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const { - structure_tree_node* child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); - size_type written_bytes = sdsl::serialize(word, out, v, "compressed_word"); + structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); + size_type written_bytes = sdsl::serialize(word, out, child, "compressed_word"); structure_tree::add_size(child, written_bytes); return written_bytes; } //!\brief Loads from the stream. - void load(std::istream& in) - { - sdsl::load(word, in); - } + void load(std::istream & in) { sdsl::load(word, in); } //!\brief Saves to the archive. template @@ -137,7 +133,8 @@ class bit_compressed_word } // namespace sdsl //! Namespace for the succinct data structure library. -namespace sdsl { +namespace sdsl +{ //! A rank structure proposed by Christopher Pockrandt /*! @@ -145,35 +142,36 @@ namespace sdsl { * It supports constant time rank and prefix_rank queries on int vectors. * * \tparam alphabet_size Size of the alphabet represented in the int_vector, i.e., largest value + 1. - * \tparam words_per_block Words per block (equivalent to the number of popcount operations in the worst-case per rank query). - * \tparam blocks_per_superblock Blocks per superblock. + * \tparam words_per_block Words per block (equivalent to the number of popcount operations in the worst-case per + * rank query). \tparam blocks_per_superblock Blocks per superblock. * * \par Reference * Christopher Pockrandt: - * EPR-Dictionaries: A practical and fast data structure for constant time searches in unidirectional and bidirectional FM-indices. - * WEA 2008: 154-168 + * EPR-Dictionaries: A practical and fast data structure for constant time searches in unidirectional and + * bidirectional FM-indices. WEA 2008: 154-168 * * @ingroup rank_support_group */ template -class rank_support_int_v : public rank_support_int { -private: +class rank_support_int_v : public rank_support_int +{ + private: //!\brief The type of the base class. using base_t = rank_support_int; // Import sigma specific constants from base class. + using base_t::bits_per_word; using base_t::sigma; using base_t::sigma_bits; - using base_t::bits_per_word; //!\brief How many values can be stored in one word. - static constexpr uint64_t values_per_word{64ULL / sigma_bits}; + static constexpr uint64_t values_per_word{ 64ULL / sigma_bits }; //!\brief How many values can be stored in one block. - static constexpr uint64_t values_per_block{words_per_block * values_per_word}; + static constexpr uint64_t values_per_block{ words_per_block * values_per_word }; //!\brief How many values can be stored in one superblock. - static constexpr uint64_t values_per_superblock{blocks_per_superblock * values_per_block}; + static constexpr uint64_t values_per_superblock{ blocks_per_superblock * values_per_block }; //!\brief How many words can be stored in one superblock. - static constexpr uint64_t words_per_superblock{words_per_block * blocks_per_superblock}; + static constexpr uint64_t words_per_superblock{ words_per_block * blocks_per_superblock }; //!\brief The effective alphabet size needed to compute the prefix ranks. static constexpr uint64_t effective_alphabet_size = alphabet_size - 1; @@ -184,7 +182,7 @@ class rank_support_int_v : public rank_support_int { //!\brief The size of the original text. typename base_t::size_type text_size{}; -public: + public: //!\brief The size type. using typename base_t::size_type; //!\brief The value type. @@ -199,12 +197,12 @@ class rank_support_int_v : public rank_support_int { * prefix rank for a given symbol and prefix length. Accordingly, the pointer to the text of the base class will * always be a nullptr. */ - explicit rank_support_int_v(const int_vector<> * text_ptr = nullptr) : rank_support_int(nullptr) + explicit rank_support_int_v(const int_vector<> * text_ptr = nullptr) + : rank_support_int(nullptr) { static_assert(blocks_per_superblock > 1, "There must be at least two blocks per superblock!"); - if (text_ptr == nullptr || text_ptr->empty()) - return; + if (text_ptr == nullptr || text_ptr->empty()) return; text_size = text_ptr->size(); @@ -221,7 +219,7 @@ class rank_support_int_v : public rank_support_int { // Iterate over the superblock entries and initialise them. auto text_slice_it = text_ptr->begin(); - uint64_t word_id = 0; // We basically iterate over all words of the underlying text. + uint64_t word_id = 0; // We basically iterate over all words of the underlying text. for (auto entry_it = superblocks.begin(); entry_it != superblocks.end(); ++entry_it) { // First initialise the superblock text. @@ -232,17 +230,17 @@ class rank_support_int_v : public rank_support_int { std::min(std::distance(text_slice_it, text_ptr->end()), values_per_word)); compressed_word.assign(text_slice_it, text_slice_end); // Assign text slice to compressed word. - text_slice_it = text_slice_end; // Set to next text slice begin. + text_slice_it = text_slice_end; // Set to next text slice begin. } // Second initialise the superblock counts. // The rank values are stored for every symbol of the alphabet in consecutive order. // The last symbol can be ignored since it's prefix sum will always be same as the prefix length. - auto superblock_it = entry_it->superblocks.begin(); // Store the begin of the super block in the node. + auto superblock_it = entry_it->superblocks.begin(); // Store the begin of the super block in the node. for (size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++superblock_it) { buf_superblocks[letter_rank] += buf_blocks[letter_rank]; // Update sum with previous superblock - *superblock_it = buf_superblocks[letter_rank]; // Store the counts. + *superblock_it = buf_superblocks[letter_rank]; // Store the counts. buf_blocks[letter_rank] = 0; // Reset the block counts for the next superblock. } @@ -253,8 +251,7 @@ class rank_support_int_v : public rank_support_int { // next block at offset `i * effective_alphabet_size`, where `i` is the current block id. // TODO: Make the implementation safe for multiple words per block auto text_it = entry_it->superblock_text.begin(); - for (auto block_it = entry_it->blocks.begin(); - word_id < word_count && block_it != entry_it->blocks.end(); + for (auto block_it = entry_it->blocks.begin(); word_id < word_count && block_it != entry_it->blocks.end(); ++word_id, ++text_it) { // Get the prefix ranks for the current word for each letter and store them in the respective block @@ -277,13 +274,13 @@ class rank_support_int_v : public rank_support_int { } //!\brief Defaulted copy constructor. - rank_support_int_v(const rank_support_int_v&) = default; + rank_support_int_v(const rank_support_int_v &) = default; //!\brief Defaulted move constructor. - rank_support_int_v(rank_support_int_v&&) = default; + rank_support_int_v(rank_support_int_v &&) = default; //!\brief Defaulted copy assignment. - rank_support_int_v& operator=(const rank_support_int_v&) = default; + rank_support_int_v & operator=(const rank_support_int_v &) = default; //!\brief Defaulted move assignment. - rank_support_int_v& operator=(rank_support_int_v&&) = default; + rank_support_int_v & operator=(rank_support_int_v &&) = default; //!\brief Defaulted destructor. ~rank_support_int_v() = default; @@ -297,8 +294,8 @@ class rank_support_int_v : public rank_support_int { { switch (v) { - case 0 : return prefix_rank_impl(position, v); - case sigma - 1 : return position - prefix_rank_impl(position, v - 1); + case 0: return prefix_rank_impl(position, v); + case sigma - 1: return position - prefix_rank_impl(position, v - 1); default: return prefix_rank_impl(position, v); } } @@ -317,8 +314,7 @@ class rank_support_int_v : public rank_support_int { assert(position <= text_size); assert(v <= sigma); - if (unlikely(v == sigma - 1)) - return position; + if (unlikely(v == sigma - 1)) return position; return prefix_rank_impl(position, v); // TODO: Enable me! @@ -350,27 +346,41 @@ class rank_support_int_v : public rank_support_int { } //!\brief Saves to the stream. - size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, const std::string name = "") const + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - size_type written_bytes = sdsl::serialize(superblocks, out, v, "superblocks_vector"); + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = sdsl::serialize(superblocks, out, child, "superblocks_vector"); + written_bytes += write_member(text_size, out, child, "text_size"); structure_tree::add_size(child, written_bytes); return written_bytes; } //!\brief Loads from the stream. - void load(std::istream& in, const int_vector<>* /*v*/) + void load(std::istream & in, const int_vector<> * /*v*/) { this->m_v = nullptr; sdsl::load(superblocks, in); + read_member(text_size, in); + } + + //! Equality operator. + friend bool operator==(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept + { + return (lhs.superblocks == rhs.superblocks) && (lhs.text_size == rhs.text_size); + } + + //! Inequality operator. + friend bool operator!=(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept + { + return !(lhs == rhs); } //!\brief Saves to the archive. template void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const { - (void) ar; ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(text_size)); } //!\brief Loads from the archive. @@ -378,15 +388,16 @@ class rank_support_int_v : public rank_support_int { void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) { ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(text_size)); } //!\brief Does nothing for the rank_support_int structure. - void set_vector(const int_vector<>* /*other_text*/ ){} // TODO: Check where this interface is needed, since it is dangerous? - // I would be able to reset the text without recomputing the rank support structure which is in general a - // bad design. - -private: + void set_vector(const int_vector<> * /*other_text*/) { + } // TODO: Check where this interface is needed, since it is dangerous? + // I would be able to reset the text without recomputing the rank support structure which is in general a + // bad design. + private: /*!\brief Determines the superblock position covering the given text position. * \param[in] position The given text position. * \returns The position of the superblock that covers the given text position. @@ -406,6 +417,9 @@ class rank_support_int_v : public rank_support_int { { assert(position <= text_size); + if (unlikely(text_size == 0)) // TODO: Maybe there could be some logic in the constructor for this case? + return 0; + superblock_entry const & entry = superblocks[to_superblock_position(position)]; return entry.template superblock_rank(v) + entry.template block_rank(position, v) + @@ -447,9 +461,7 @@ struct rank_support_int_v //!\brief The smallest integer type needed to store the block ranks. using block_value_type = std::conditional_t>; + std::conditional_t>; //!\brief The array storing the super block values. std::array superblocks; @@ -519,44 +531,51 @@ struct rank_support_int_v } //!\brief Saves to the stream. - size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, const std::string name = "") const + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const { - structure_tree_node* child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); + structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); size_type written_bytes = 0; - written_bytes += sdsl::serialize(superblocks.size(), out, v, "prefix_superblock_counts"); - for (const auto & x : superblocks) - written_bytes += sdsl::serialize(x, out, child, "[]"); + written_bytes += sdsl::serialize(superblocks.size(), out, child, "prefix_superblock_counts"); + for (const auto & x : superblocks) written_bytes += sdsl::serialize(x, out, child, "[]"); - written_bytes += sdsl::serialize(blocks.size(), out, v, "prefix_block_counts"); - for (const auto & x : blocks) - written_bytes += sdsl::serialize(x, out, child, "[]"); + written_bytes += sdsl::serialize(blocks.size(), out, child, "prefix_block_counts"); + for (const auto & x : blocks) written_bytes += sdsl::serialize(x, out, child, "[]"); - written_bytes += sdsl::serialize(superblock_text.size(), out, v, "superblock_text"); - for (const auto & x : superblock_text) - written_bytes += sdsl::serialize(x, out, child, "[]"); + written_bytes += sdsl::serialize(superblock_text.size(), out, child, "superblock_text"); + for (const auto & x : superblock_text) written_bytes += sdsl::serialize(x, out, child, "[]"); structure_tree::add_size(child, written_bytes); return written_bytes; } //!\brief Loads from the stream. - void load(std::istream& in) + void load(std::istream & in) { size_type array_size; sdsl::load(array_size, in); assert(array_size == superblocks.size()); - for (size_type idx = 0; idx < array_size; ++idx) - sdsl::load(superblocks[idx], in); + for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(superblocks[idx], in); sdsl::load(array_size, in); assert(array_size == blocks.size()); - for (size_type idx = 0; idx < array_size; ++idx) - sdsl::load(blocks[idx], in); + for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(blocks[idx], in); sdsl::load(array_size, in); assert(array_size == superblock_text.size()); - for (size_type idx = 0; idx < array_size; ++idx) - sdsl::load(superblock_text[idx], in); + for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(superblock_text[idx], in); + } + + //! Equality operator. + friend bool operator==(superblock_entry const & lhs, superblock_entry const & rhs) noexcept + { + return (lhs.superblocks == rhs.superblocks) && (lhs.blocks == rhs.blocks) && + (lhs.superblock_text == rhs.superblock_text); + } + + //! Inequality operator. + friend bool operator!=(superblock_entry const & lhs, superblock_entry const & rhs) noexcept + { + return !(lhs == rhs); } //!\brief Saves to the archive. @@ -577,7 +596,7 @@ struct rank_support_int_v ar(CEREAL_NVP(superblock_text)); } -private: + private: //!\brief Maps the given position to the block position inside of the superblock. static constexpr size_type block_position_in_superblock(size_t const position) noexcept { // if constexpr (blocks_per_superblock power of 2) @@ -604,8 +623,8 @@ struct rank_support_int_v //!\brief Computes the in-block rank for the delta prefix. template - static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) - -> typename std::enable_if::type + static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) -> + typename std::enable_if::type { auto && prefix_rank = base_t::word_prefix_rank(word, bit_pos, v - 1, v); return prefix_rank[1] - prefix_rank[0]; @@ -613,8 +632,8 @@ struct rank_support_int_v //!\brief Computes the in-block rank for the non-delta prefix. template - static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) - -> typename std::enable_if::type + static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) -> + typename std::enable_if::type { return base_t::word_prefix_rank(word, bit_pos, v)[0]; } diff --git a/include/sdsl/wt_epr.hpp b/include/sdsl/wt_epr.hpp index 10f4c585..97ff6f79 100644 --- a/include/sdsl/wt_epr.hpp +++ b/include/sdsl/wt_epr.hpp @@ -1,24 +1,26 @@ // Copyright (c) 2016, the SDSL Project Authors. All rights reserved. // Please see the AUTHORS file for details. Use of this source code is governed // by a BSD license that can be found in the LICENSE file. -/*! \file wt_epr.hpp - \brief wt_epr.hpp contains a class for the EPR dictionary of byte sequences. - The EPR-dictionary can be interpreted as a specialized wavelet tree of height 0. - \author Christopher Pockrandt -*/ +/*!\file wt_epr.hpp + * \brief wt_epr.hpp contains a class for the EPR dictionary of byte sequences. + * The EPR-dictionary can be interpreted as a specialized wavelet tree of height 0. + * \author Christopher Pockrandt + */ #ifndef INCLUDED_SDSL_WT_EPR #define INCLUDED_SDSL_WT_EPR -#include "int_vector.hpp" -#include "rank_support_int.hpp" -#include "wt_helper.hpp" -#include -#include #include #include +#include +#include + +#include +#include +#include //! Namespace for the succinct data structure library. -namespace sdsl { +namespace sdsl +{ //! An EPR-dictionary based wavelet. /*! @@ -29,46 +31,46 @@ namespace sdsl { * * @ingroup wt */ -template , - class t_tree_strat = byte_tree<> - > -class wt_epr { -public: +template , class t_tree_strat = byte_tree<>> +class wt_epr +{ + public: typedef typename t_tree_strat::template type tree_strat_type; typedef int_vector<>::size_type size_type; typedef int_vector<>::value_type value_type; - // typedef random_access_const_iterator const_iterator; - // typedef const_iterator iterator; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename int_vector<>::difference_type difference_type; typedef wt_tag index_category; typedef byte_alphabet_tag /*typename tree_strat_type::alphabet_category*/ alphabet_category; - enum { lex_ordered = true }; + enum + { + lex_ordered = true + }; -private: + private: //!\brief Check if underlying rank support structure stores the text implicitly. static constexpr bool has_inblock_text = std::is_same>::value; - size_type m_size = 0; // original text size - size_type m_sigma = 0; // alphabet size - int_vector<> m_bv; // bit vector to store the wavelet tree - rank_type m_bv_rank; // rank support for the wavelet tree bit vector + size_type m_size = 0; // original text size + size_type m_sigma = 0; // alphabet size + int_vector<> m_bv; // bit vector to store the wavelet tree + rank_type m_bv_rank; // rank support for the wavelet tree bit vector // Overload for the special epr rank structure. template - auto construct_init_rank_select(int_vector<> intermediate_bitvector) - -> std::enable_if_t + auto construct_init_rank_select(int_vector<> intermediate_bitvector) -> std::enable_if_t { // The text is stored inside of the rank structure so we do not store it here. - m_bv_rank = rank_type{&intermediate_bitvector}; // Create the rank support structure. + m_bv_rank = rank_type{ &intermediate_bitvector }; // Create the rank support structure. } // Overload for the other rank support structures. template - auto construct_init_rank_select(int_vector<> intermediate_bitvector) - -> std::enable_if_t + auto construct_init_rank_select(int_vector<> intermediate_bitvector) -> std::enable_if_t { m_bv = std::move(intermediate_bitvector); - m_bv_rank = rank_type{&m_bv}; // Create the rank support structure. + m_bv_rank = rank_type{ &m_bv }; // Create the rank support structure. } // Extract the text value from the given position. @@ -88,9 +90,9 @@ class wt_epr { return m_bv[position]; } -public: - const size_type& sigma = m_sigma; - const int_vector<>& bv = m_bv; + public: + const size_type & sigma = m_sigma; + const int_vector<> & bv = m_bv; // Default constructor wt_epr() = default; @@ -103,7 +105,8 @@ class wt_epr { * \f$ \Order{n\log|\Sigma|}\f$, where \f$n=size\f$ */ template - wt_epr(t_it begin, t_it end) : m_size(std::distance(begin, end)) + wt_epr(t_it begin, t_it end) + : m_size(std::distance(begin, end)) { if (0 == m_size) return; // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes @@ -118,8 +121,8 @@ class wt_epr { // The text cannot have an alphabet larger than the required alphabet_size. if (m_sigma > alphabet_size) - throw std::domain_error{"The given text uses an alphabet that is larger than the explicitly given " - "alphabet size."}; + throw std::domain_error{ "The given text uses an alphabet that is larger than the explicitly given " + "alphabet size." }; // 4. Generate wavelet tree bit sequence m_bv int_vector<> intermediate_bitvector{}; @@ -133,44 +136,47 @@ class wt_epr { } template - wt_epr(t_it begin, t_it end, std::string) : wt_epr(begin, end) + wt_epr(t_it begin, t_it end, std::string) + : wt_epr(begin, end) {} //! Copy constructor - wt_epr(const wt_epr& wt) - : m_size(wt.m_size) - , m_sigma(wt.m_sigma) - , m_bv(wt.m_bv) - , m_bv_rank(wt.m_bv_rank) + wt_epr(const wt_epr & wt) + : m_size(wt.m_size) + , m_sigma(wt.m_sigma) + , m_bv(wt.m_bv) + , m_bv_rank(wt.m_bv_rank) { m_bv_rank.set_vector(&m_bv); } - wt_epr(wt_epr&& wt) - : m_size(wt.m_size) - , m_sigma(wt.m_sigma) - , m_bv(std::move(wt.m_bv)) - , m_bv_rank(std::move(wt.m_bv_rank)) + wt_epr(wt_epr && wt) + : m_size(wt.m_size) + , m_sigma(wt.m_sigma) + , m_bv(std::move(wt.m_bv)) + , m_bv_rank(std::move(wt.m_bv_rank)) { m_bv_rank.set_vector(&m_bv); } //! Assignment operator - wt_epr& operator=(const wt_epr& wt) + wt_epr & operator=(const wt_epr & wt) { - if (this != &wt) { - wt_epr tmp(wt); // re-use copy-constructor + if (this != &wt) + { + wt_epr tmp(wt); // re-use copy-constructor *this = std::move(tmp); // re-use move-assignment } return *this; } //! Move assignment operator - wt_epr& operator=(wt_epr&& wt) + wt_epr & operator=(wt_epr && wt) { - if (this != &wt) { + if (this != &wt) + { m_size = wt.m_size; - m_sigma = wt.m_sigma; + m_sigma = wt.m_sigma; m_bv = std::move(wt.m_bv); m_bv_rank = std::move(wt.m_bv_rank); m_bv_rank.set_vector(&m_bv); @@ -293,12 +299,12 @@ class wt_epr { size_type greater = j - i - m_bv_rank.prefix_rank(j, c) + prefix_i_c; if (c > 0) { - prefix_i_c_1 = m_bv_rank.prefix_rank(i, c-1); - smaller = m_bv_rank.prefix_rank(j, c-1) - prefix_i_c_1; + prefix_i_c_1 = m_bv_rank.prefix_rank(i, c - 1); + smaller = m_bv_rank.prefix_rank(j, c - 1) - prefix_i_c_1; } size_type rank = prefix_i_c - prefix_i_c_1; - return t_ret_type{rank, smaller, greater}; + return t_ret_type{ rank, smaller, greater }; } //! How many symbols are lexicographic smaller than c in [0..i-1]. @@ -319,22 +325,20 @@ class wt_epr { assert(i <= size()); // TODO: write a function returning a pair for (i, c) and (i, c-1) and benchmark! size_type prefix_count_smaller = 0; - if (c > 0) - prefix_count_smaller = m_bv_rank.prefix_rank(i, c - 1); - return t_ret_type{m_bv_rank.prefix_rank(i, c) - prefix_count_smaller, prefix_count_smaller}; + if (c > 0) prefix_count_smaller = m_bv_rank.prefix_rank(i, c - 1); + return t_ret_type{ m_bv_rank.prefix_rank(i, c) - prefix_count_smaller, prefix_count_smaller }; } - //! Returns a const_iterator to the first element. - // const_iterator begin() const { return const_iterator(this, 0); } - // - // //! Returns a const_iterator to the element after the last element. - // const_iterator end() const { return const_iterator(this, size()); } + // ! Returns a const_iterator to the first element. + const_iterator begin() const { return const_iterator(this, 0); } + + //! Returns a const_iterator to the element after the last element. + const_iterator end() const { return const_iterator(this, size()); } //! Serializes the data structure into the given ostream - size_type - serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); size_type written_bytes = 0; written_bytes += write_member(m_size, out, child, "size"); written_bytes += write_member(m_sigma, out, child, "sigma"); @@ -345,7 +349,7 @@ class wt_epr { } //! Loads the data structure from the given istream. - void load(std::istream& in) + void load(std::istream & in) { read_member(m_size, in); read_member(m_sigma, in); @@ -353,6 +357,16 @@ class wt_epr { m_bv_rank.load(in, &m_bv); } + //! Equality operator. + friend bool operator==(wt_epr const & lhs, wt_epr const & rhs) noexcept + { + return (lhs.m_size == rhs.m_size) && (lhs.m_sigma == rhs.m_sigma) && (lhs.m_bv == rhs.m_bv) && + (lhs.m_bv_rank == rhs.m_bv_rank); + } + + //! Inequality operator. + friend bool operator!=(wt_epr const & lhs, wt_epr const & rhs) noexcept { return !(lhs == rhs); } + template void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const { @@ -371,8 +385,7 @@ class wt_epr { ar(CEREAL_NVP(m_bv_rank)); m_bv_rank.set_vector(&m_bv); } - }; -} +} // namespace sdsl #endif diff --git a/test/wt_byte_epr_test.cpp b/test/wt_byte_epr_test.cpp index e318d7bf..f9d49ed6 100644 --- a/test/wt_byte_epr_test.cpp +++ b/test/wt_byte_epr_test.cpp @@ -1,10 +1,13 @@ -#include "sdsl/wavelet_trees.hpp" -#include "common.hpp" -#include "gtest/gtest.h" -#include -#include #include // for std::min #include +#include +#include + +#include + +#include "common.hpp" + +#include namespace { @@ -16,31 +19,45 @@ typedef int_vector<>::size_type size_type; string temp_file; string temp_dir; -int_vector<8> text; -template -class wt_byte_epr_test : public ::testing::Test { }; +template +class wt_byte_epr_test : public ::testing::Test +{ + protected: + // Needs to be a member instead of a global since the static sdsl::memory_manager might call its destructor + // before the vector. + static int_vector<8> text; +}; + +template +int_vector<8> wt_byte_epr_test::text{ []() { + int_vector<8> result; + result.resize(std::rand() % 10000); + for (uint32_t i = 0; i < result.size(); ++i) + { + result[i] = (std::rand() % 3) + 1; // no 0s allowed. produces 1, 2 or 3. + } + return result; +}() }; using testing::Types; -typedef Types< - wt_epr<4> -> Implementations; +typedef Types> Implementations; -TYPED_TEST_CASE(wt_byte_epr_test, Implementations); +TYPED_TEST_SUITE(wt_byte_epr_test, Implementations, ); TYPED_TEST(wt_byte_epr_test, create_and_store) { static_assert(sdsl::util::is_regular::value, "Type is not regular"); - text.resize(std::rand() % 10000); + // text.resize(std::rand() % 10000); - for (uint32_t i = 0; i < text.size(); ++i) - { - text[i] = (std::rand() % 3) + 1; // no 0s allowed. produces 1, 2 or 3. - } + // for (uint32_t i = 0; i < text.size(); ++i) + // { + // text[i] = (std::rand() % 3) + 1; // no 0s allowed. produces 1, 2 or 3. + // } - TypeParam wt(text.begin(), text.end()); + TypeParam wt(this->text.begin(), this->text.end()); ASSERT_TRUE(store_to_file(wt, temp_file)); } @@ -50,25 +67,25 @@ TYPED_TEST(wt_byte_epr_test, sigma) { TypeParam wt; ASSERT_TRUE(load_from_file(wt, temp_file)); - ASSERT_EQ(text.size(), wt.size()); + ASSERT_EQ(this->text.size(), wt.size()); bit_vector occur(256, 0); uint16_t sigma = 0; - for (size_type j=0; jtext.size(); ++j) + { + if (!occur[(unsigned char)this->text[j]]) + { + occur[(unsigned char)this->text[j]] = 1; ++sigma; } } ASSERT_EQ(sigma, wt.sigma); } -template -void compare_wt(const int_vector<8>& text, const t_wt& wt) +template +void compare_wt(const int_vector<8> & text, const t_wt & wt) { ASSERT_EQ(text.size(), wt.size()); - for (size_type j=0; j (wt); - do_serialisation(wt); - do_serialisation (wt); - do_serialisation (wt); - } + if (temp_dir != "@/") + { + TypeParam wt; + ASSERT_TRUE(load_from_file(wt, temp_file)); + + do_serialisation(wt); + do_serialisation(wt); + do_serialisation(wt); + do_serialisation(wt); + } } #endif // SDSL_HAS_CEREAL @@ -168,12 +184,13 @@ TYPED_TEST(wt_byte_epr_test, delete_) sdsl::remove(temp_file); } -} // namespace +} // namespace -int main(int argc, char** argv) +int main(int argc, char ** argv) { ::testing::InitGoogleTest(&argc, argv); - if (argc < 2) { + if (argc < 2) + { // LCOV_EXCL_START std::cout << "Usage: " << argv[0] << " tmp_dir" << std::endl; return 1; @@ -182,7 +199,7 @@ int main(int argc, char** argv) temp_dir = argv[1]; temp_file = temp_dir + "/wt_epr"; - auto const seed{time(NULL)}; + auto const seed{ time(NULL) }; srand(seed); return RUN_ALL_TESTS();