SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
rank_support_int.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT
10#define INCLUDED_SDSL_RANK_SUPPORT_INT
11
15
16#include <array>
17#include <assert.h>
18#include <iosfwd>
19#include <stddef.h>
20#include <stdint.h>
21#include <string>
22
23#include <sdsl/bits.hpp>
24#include <sdsl/int_vector.hpp>
25
26namespace sdsl
27{
29} // namespace sdsl
30
31// TODO: benchmark the use of compiler hints for branch prediction
32#define likely(x) __builtin_expect((x), 1)
33#define unlikely(x) __builtin_expect((x), 0)
34
35namespace sdsl
36{
37
38constexpr size_t floor_log2(size_t const n)
39{
40 return (n == 1) ? 0 : 1 + floor_log2(n >> 1);
41}
42
43constexpr size_t ceil_log2(size_t const n)
44{
45 return (n == 1) ? 0 : floor_log2(n - 1) + 1;
46}
47
49template <uint8_t alphabet_size>
51{
52
53public:
56
57 static_assert(alphabet_size > 2, "Rank support is only implemented on int_vectors with an alphabet size of > 2.");
58
59protected:
63 template <typename uintX_t>
64 static constexpr uintX_t bm_rec(const uintX_t w, const uint8_t length, const uint8_t max_length)
65 {
66 return (length >= max_length) ? w : bm_rec(w | (w << length), length << 1, max_length);
67 }
68
69 static std::array<uint64_t, alphabet_size> generate_mask_array()
70 {
71 std::array<uint64_t, alphabet_size> masks{};
72
73 for (value_type v = 0; v < alphabet_size; ++v)
74 {
75 masks[v] = v;
76 for (uint8_t i = sigma_bits * 2; i < 64; i <<= 1)
77 masks[v] |= masks[v] << i;
78 }
79
80 uint64_t tmp_carry = masks[1];
81 for (value_type v = 0; v < alphabet_size; ++v)
82 masks[v] |= tmp_carry << sigma_bits;
83
84 return masks;
85 }
86
87protected:
88 static constexpr uint8_t sigma{alphabet_size};
89 static constexpr uint8_t sigma_bits{ceil_log2(alphabet_size)};
90 static constexpr uint8_t bits_per_word{(64 / sigma_bits) * sigma_bits};
91 static constexpr uint64_t even_mask{bm_rec<uint64_t>(bits::lo_set[sigma_bits], sigma_bits * 2, 64)};
92 static constexpr uint64_t carry_select_mask{bm_rec<uint64_t>(1ULL << sigma_bits, sigma_bits * 2, 64)};
93 static const std::array<uint64_t, alphabet_size> masks;
94
95 int_vector<> const * m_v;
96
97public:
101 rank_support_int(int_vector<> const * v = nullptr)
102 { // Check that the actual width of the vector has same size as sigma_bits.
103 assert((v != nullptr) ? sigma_bits == v->width() : true);
104 m_v = v;
105 }
106
114 {}
115
123 virtual size_type rank(const size_type i, const value_type v) const = 0;
124
126 virtual size_type operator()(const size_type idx, const value_type v) const = 0;
127
137 virtual size_type prefix_rank(const size_type i, const value_type v) const = 0;
138
142 virtual size_type serialize(std::ostream & out, structure_tree_node * v, const std::string name) const = 0;
143
148 virtual void load(std::istream & in, int_vector<> const * v = nullptr) = 0;
149
155 virtual void set_vector(int_vector<> const * v = nullptr) = 0;
156
157protected:
159 static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept
160 {
161 // every bit that will be set corresponds to an element <= v
162 // because the preset bit to the left in the precomputed bitmask is not eliminated by the carry bit during the
163 // subtraction.
164 // since alphabet_size is > 2 and an element uses at least 2 bits, we can shift the odd positions by one to the
165 // left and it is guaranteed that when adding both with OR that no bits that are set will overlap.
166 return ((masks[v] - w_even) & carry_select_mask) | (((masks[v] - w_odd) & carry_select_mask) << 1);
167 }
168
170 static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept
171 {
172 uint64_t const w_even = even_mask & w; // retrieve even positions
173 uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions
174 return mask_prefix(v, w_even, w_odd);
175 }
176
180 static constexpr uint64_t set_positions(const uint64_t w, const value_type v) noexcept
181 {
182 assert(v > 0);
183 // optimiyed version of set_positions(w, v) - set_positions(w, v - 1)
184 uint64_t const w_even = even_mask & w; // retrieve even positions
185 uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions
186 uint64_t res = ((masks[v] - w_even) & ~(masks[v - 1] - w_even)) & carry_select_mask;
187 res |= (((masks[v] - w_odd) & ~(masks[v - 1] - w_odd)) & carry_select_mask) << 1;
188 return res;
189 }
190
192 template <typename... value_t>
193 static constexpr std::array<uint64_t, sizeof...(value_t)>
194 word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept
195 {
196 uint64_t const mask = bits::lo_set[(bit_pos % bits_per_word) + 1];
197
198 uint64_t const w_even = even_mask & word; // retrieve even positions
199 uint64_t const w_odd = even_mask & (word >> sigma_bits); // retrieve odd positions
200
201 return {(bits::cnt(mask_prefix(values, w_even, w_odd) & mask))...};
202 }
203
207 static constexpr uint32_t word_rank(const uint64_t word, const size_type bit_pos, const value_type v) noexcept
208 {
209 return bits::cnt(set_positions(word, v) & bits::lo_set[(bit_pos & 0x3F) + 1]);
210 }
211
213 static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
214 {
215 return bits::cnt(set_positions_prefix(word, v));
216 }
217
221 static constexpr uint32_t full_word_rank(const uint64_t word, const value_type v) noexcept
222 {
223
224 return bits::cnt(set_positions(word, v));
225 }
226
228 static constexpr uint64_t extract_word(uint64_t const * data, const size_type word_position) noexcept
229 {
230 return *(data + word_position);
231 }
232};
233
234template <uint8_t alphabet_size>
235const std::array<uint64_t, alphabet_size> rank_support_int<alphabet_size>::masks = generate_mask_array();
236
237} // end namespace sdsl
238
239#endif // end file
bits.hpp contains the sdsl::bits class.
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
virtual void load(std::istream &in, int_vector<> const *v=nullptr)=0
Loads the rank_support_int.
virtual void set_vector(int_vector<> const *v=nullptr)=0
Sets the supported int_vector to the given pointer.
static constexpr uint64_t carry_select_mask
static constexpr uint64_t set_positions(const uint64_t w, const value_type v) noexcept
Count how often value v occurs in the word w.
int_vector const * m_v
static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept
Count how often value v or smaller occurs in the word w.
static constexpr uintX_t bm_rec(const uintX_t w, const uint8_t length, const uint8_t max_length)
Constructs a bit mask with the pattern w of a given length.
static std::array< uint64_t, alphabet_size > generate_mask_array()
virtual size_type prefix_rank(const size_type i, const value_type v) const =0
Answers rank queries for the supported int_vector.
rank_support_int & operator=(rank_support_int &&)=default
rank_support_int(rank_support_int &&)=default
virtual size_type rank(const size_type i, const value_type v) const =0
Answers rank queries for the supported int_vector.
static constexpr uint8_t sigma_bits
rank_support_int(int_vector<> const *v=nullptr)
Constructor.
static constexpr std::array< uint64_t, sizeof...(value_t)> word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
static constexpr uint8_t sigma
static constexpr uint8_t bits_per_word
int_vector ::value_type value_type
rank_support_int(rank_support_int const &)=default
Copy constructor.
static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept
Mask the set prefix positions.
static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
static const std::array< uint64_t, alphabet_size > masks
static constexpr uint64_t even_mask
static constexpr uint32_t word_rank(const uint64_t word, const size_type bit_pos, const value_type v) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
int_vector ::size_type size_type
static constexpr uint32_t full_word_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
rank_support_int & operator=(rank_support_int const &)=default
static constexpr uint64_t extract_word(uint64_t const *data, const size_type word_position) noexcept
Returns the word a the given word position.
virtual size_type operator()(const size_type idx, const value_type v) const =0
Alias for rank(idx, v)
virtual size_type serialize(std::ostream &out, structure_tree_node *v, const std::string name) const =0
Serializes rank_support_int.
virtual ~rank_support_int()
Destructor.
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
constexpr size_t ceil_log2(size_t const n)
constexpr size_t floor_log2(size_t const n)
excess< T >::impl excess< T >::data
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:194
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition bits.hpp:486
typename base_t::size_type size_type