9 #ifndef INCLUDED_SDSL_WM_INT
10 #define INCLUDED_SDSL_WM_INT
48 class t_rank =
typename t_bitvector::rank_1_type,
49 class t_select =
typename t_bitvector::select_1_type,
50 class t_select_zero =
typename t_bitvector::select_0_type>
104 template <
typename t_it>
112 for (
auto it =
begin; it !=
end; ++it)
115 if (value > max_elem) max_elem = value;
118 std::string tree_out_buf_file_name =
tmp_file(tmp_dir,
"_m_tree");
124 std::string zero_buf_file_name =
tmp_file(tmp_dir,
"_zero_buf");
125 osfstream tree_out_buf(tree_out_buf_file_name,
126 std::ios::binary | std::ios::trunc | std::ios::out);
131 uint64_t tree_word = 0;
138 const uint64_t mask = 1ULL << width;
142 for (
size_t i = 0; i <
m_size; ++i)
147 tree_word |= (1ULL << (tree_pos & 0x3FULL));
155 if ((tree_pos & 0x3FULL) == 0)
157 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
162 for (
size_t i = zeros; i <
m_size; ++i) { rac[i] = zero_buf[i - zeros]; }
164 if ((tree_pos & 0x3FULL) != 0)
166 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
169 tree_out_buf.
close();
245 m_tree = std::move(wt.m_tree);
286 auto rank_zeros = (i - k *
m_size) - rank_ones;
287 i = (k + 1) *
m_size + rank_zeros;
364 return std::make_pair(i, c);
378 assert(1 <= i and i <=
rank(
size(), c));
382 m_path_off[0] = m_path_rank_off[0] = 0;
401 m_path_off[k + 1] = b;
402 m_path_rank_off[k] = rank_b;
407 b = m_path_off[k - 1];
408 size_type rank_b = m_path_rank_off[k - 1];
435 bool report =
true)
const
446 _range_search_2d(
root(), { { lb, rb } }, vlb, vrb, 0, is, rank_off, point_vec, report, cnt_answers);
448 return make_pair(cnt_answers, point_vec);
456 std::vector<size_type> & is,
457 std::vector<size_type> & rank_off,
463 if (get<0>(r) > get<1>(r))
return;
483 point_vec.emplace_back(is[0] + i - 1, v.
sym);
498 if (!
sdsl::empty(get<0>(c_r)) and vlb < mid and mid)
503 std::min(vrb, mid - 1),
539 written_bytes +=
m_tree.serialize(out, child,
"tree");
540 written_bytes +=
m_tree_rank.serialize(out, child,
"tree_rank");
541 written_bytes +=
m_tree_select1.serialize(out, child,
"tree_select_1");
542 written_bytes +=
m_tree_select0.serialize(out, child,
"tree_select_0");
547 return written_bytes;
565 template <
typename archive_t>
580 template <
typename archive_t>
668 auto rs =
expand(vv, { 0, i });
669 bool bit = *(
begin(vv) + i);
670 i = std::get<1>(rs[bit]);
695 return expand(std::move(v_right));
711 v_left.
size = v.size - ones;
712 v_left.
level = v.level + 1;
713 v_left.
sym = v.sym << 1;
717 v.level = v.level + 1;
718 v.sym = (v.sym << 1) | 1;
720 return { { std::move(v_left), v } };
735 auto ranges_copy = ranges;
736 return expand(v, std::move(ranges_copy));
754 for (
auto & r : ranges)
758 auto left_size = (r[1] - r[0] + 1) - right_size;
760 auto right_sp = sp_rank - v_sp_rank;
761 auto left_sp = r[0] - right_sp;
763 r = { { left_sp, left_sp + left_size - 1 } };
764 res[i++] = { { right_sp, right_sp + right_size - 1 } };
766 return { { ranges, std::move(res) } };
784 auto left_size = (r[1] - r[0] + 1) - right_size;
786 auto right_sp = sp_rank - v_sp_rank;
787 auto left_sp = r[0] - right_sp;
789 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
797 auto begin(
const node_type & v)
const -> decltype(
m_tree.begin() + v.offset) {
return m_tree.begin() + v.offset; }
800 auto end(
const node_type & v)
const -> decltype(
m_tree.begin() + v.offset + v.size)
802 return m_tree.begin() + v.offset + v.size;
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
void close()
Close the stream.
Generic iterator for a random access container.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A wavelet tree class for integer sequences.
const_iterator end() const
Returns a const_iterator to the element after the last element.
t_bitvector::difference_type difference_type
wm_int(const wm_int &wt)
Copy constructor.
std::pair< size_type, point_vec_type > r2d_res_type
void load(std::istream &in)
Loads the data structure from the given istream.
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
void _range_search_2d(node_type v, range_type r, value_type vlb, value_type vrb, size_type ilb, std::vector< size_type > &is, std::vector< size_type > &rank_off, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
int_alphabet_tag alphabet_category
const size_type & sigma
Effective alphabet size of the wavelet tree.
t_bitvector bit_vector_type
value_type sym(const node_type &v) const
Symbol of leaf node v.
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)>>
Random access container to sequence of node v.
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
size_type size() const
Returns the size of the original vector.
int_vector< 64 > m_rank_level
bool operator==(wm_int const &other) const noexcept
Equality operator.
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
wm_int & operator=(const wm_int &wt)
Assignment operator.
const_iterator begin() const
Returns a const_iterator to the first element.
bool empty() const
Returns whether the wavelet tree contains no data.
wm_int()=default
Default constructor.
int_vector< 64 > m_zero_cnt
wm_int & operator=(wm_int &&wt)
Assignment move operator.
int_vector ::value_type value_type
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
std::vector< point_type > point_vec_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
std::pair< value_type, size_type > point_type
random_access_const_iterator< wm_int > const_iterator
bool operator!=(wm_int const &other) const noexcept
Inequality operator.
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
t_select_zero select_0_type
node_type root() const
Return the root node.
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
select_1_type m_tree_select1
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
select_0_type m_tree_select0
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
const uint32_t & max_level
Maximal level of the wavelet tree.
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
int_vector ::size_type size_type
bool empty(const node_type &v) const
Indicates if node v is empty.
wm_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
wm_int(wm_int &&wt)
Move copy constructor.
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
bool empty(const range_type &r)
Empty range check.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
std::vector< range_type > range_vec_type
int remove(const std::string &)
Remove a file.
int_vector ::size_type size(const range_type &r)
Size of a range.
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Represents a node in the wavelet tree.
node_type & operator=(node_type &&)=default
node_type(const node_type &)=default
bool operator==(const node_type &v) const
bool operator>(const node_type &v) const
node_type(node_type &&)=default
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
node_type & operator=(const node_type &)=default
bool operator<(const node_type &v) const
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.