SDSL  3.0.0
Succinct Data Structure Library
divsufsort.hpp
Go to the documentation of this file.
1 /*
2  * libdivsufsort
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #ifndef INCLUDED_SDSL_DIVSUFSORT
28 #define INCLUDED_SDSL_DIVSUFSORT
29 
30 #include <algorithm>
31 #include <assert.h>
32 #include <inttypes.h>
33 #include <stdio.h>
34 
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 namespace sdsl
40 {
41 
42 #if !defined(UINT8_MAX)
43 #define UINT8_MAX (255)
44 #endif
45 
46 #define ALPHABET_SIZE (256)
47 #define BUCKET_A_SIZE (ALPHABET_SIZE)
48 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
49 #define SS_INSERTIONSORT_THRESHOLD (8)
50 #define SS_BLOCKSIZE (1024)
51 #define SS_MISORT_STACKSIZE (16)
52 #define TR_INSERTIONSORT_THRESHOLD (8)
53 
54 template <typename saidx_t>
56 
57 template <>
58 struct libdivsufsort_config<int32_t>
59 {
60  static constexpr uint64_t TR_STACKSIZE = 64;
61  static constexpr uint64_t SS_SMERGE_STACKSIZE = 32;
62 };
63 
64 template <>
65 struct libdivsufsort_config<int64_t>
66 {
67  static constexpr uint64_t TR_STACKSIZE = 96;
68  static constexpr uint64_t SS_SMERGE_STACKSIZE = 64;
69 };
70 
71 #define BUCKET_A(_c0) bucket_A[(_c0)]
72 #if ALPHABET_SIZE == 256
73 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
74 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
75 #else
76 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1)*ALPHABET_SIZE + (_c0)])
77 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0)*ALPHABET_SIZE + (_c1)])
78 #endif
79 
80 #define STACK_PUSH(_a, _b, _c, _d) \
81  do { \
82  stack[ssize].a = (_a), stack[ssize].b = (_b), stack[ssize].c = (_c), stack[ssize++].d = (_d); \
83  } while (0)
84 #define STACK_PUSH5(_a, _b, _c, _d, _e) \
85  do { \
86  stack[ssize].a = (_a), stack[ssize].b = (_b), stack[ssize].c = (_c), stack[ssize].d = (_d), \
87  stack[ssize++].e = (_e); \
88  } while (0)
89 #define STACK_POP(_a, _b, _c, _d) \
90  do { \
91  assert(0 <= ssize); \
92  if (ssize == 0) { return; } \
93  (_a) = stack[--ssize].a, (_b) = stack[ssize].b, (_c) = stack[ssize].c, (_d) = stack[ssize].d; \
94  } while (0)
95 #define STACK_POP5(_a, _b, _c, _d, _e) \
96  do { \
97  assert(0 <= ssize); \
98  if (ssize == 0) { return; } \
99  (_a) = stack[--ssize].a, (_b) = stack[ssize].b, (_c) = stack[ssize].c, (_d) = stack[ssize].d, \
100  (_e) = stack[ssize].e; \
101  } while (0)
102 
103 static const int32_t lg_table[256] = {
104  -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5,
105  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
106  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
107  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
108  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
109  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
110  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
111 };
112 
113 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
114 
115 inline int32_t ss_ilg(int32_t n)
116 {
117 #if SS_BLOCKSIZE == 0
118  return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff])
119  : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]);
120 #elif SS_BLOCKSIZE < 256
121  return lg_table[n];
122 #else
123  return (n & 0xff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff];
124 #endif
125 }
126 
127 inline int32_t ss_ilg(int64_t n)
128 {
129 #if SS_BLOCKSIZE == 0
130  return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? 56 + lg_table[(n >> 56) & 0xff] : 48 + lg_table[(n >> 48) & 0xff])
131  : ((n >> 40) ? 40 + lg_table[(n >> 40) & 0xff] : 32 + lg_table[(n >> 32) & 0xff]))
132  : ((n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff]
133  : 16 + lg_table[(n >> 16) & 0xff])
134  : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff]
135  : 0 + lg_table[(n >> 0) & 0xff]));
136 #elif SS_BLOCKSIZE < 256
137  return lg_table[n];
138 #else
139  return (n & 0xff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff];
140 #endif
141 }
142 
143 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
144 
145 #if SS_BLOCKSIZE != 0
146 
147 static const int32_t sqq_table[256] = {
148  0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, 71, 73,
149  75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104,
150  106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 128, 128,
151  129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149,
152  150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167,
153  167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183,
154  183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197, 197,
155  198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
156  212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224,
157  225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236,
158  237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248,
159  248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
160 };
161 
162 template <typename saidx_t>
163 inline saidx_t ss_isqrt(saidx_t x)
164 {
165  saidx_t y, e;
166 
167  if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
168  e = (x & 0xffff0000) ? ((x & 0xff000000) ? 24 + lg_table[(x >> 24) & 0xff] : 16 + lg_table[(x >> 16) & 0xff])
169  : ((x & 0x0000ff00) ? 8 + lg_table[(x >> 8) & 0xff] : 0 + lg_table[(x >> 0) & 0xff]);
170 
171  if (e >= 16)
172  {
173  y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
174  if (e >= 24) { y = (y + 1 + x / y) >> 1; }
175  y = (y + 1 + x / y) >> 1;
176  }
177  else if (e >= 8)
178  {
179  y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
180  }
181  else
182  {
183  return sqq_table[x] >> 4;
184  }
185 
186  return (x < (y * y)) ? y - 1 : y;
187 }
188 
189 #endif /* SS_BLOCKSIZE != 0 */
190 
191 /* Compares two suffixes. */
192 template <typename saidx_t>
193 inline int32_t ss_compare(const uint8_t * T, const saidx_t * p1, const saidx_t * p2, saidx_t depth)
194 {
195  const uint8_t *U1, *U2, *U1n, *U2n;
196 
197  for (U1 = T + depth + *p1, U2 = T + depth + *p2, U1n = T + *(p1 + 1) + 2, U2n = T + *(p2 + 1) + 2;
198  (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
199  ++U1, ++U2)
200  {}
201 
202  return U1 < U1n ? (U2 < U2n ? *U1 - *U2 : 1) : (U2 < U2n ? -1 : 0);
203 }
204 
205 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
206 
207 /* Insertionsort for small size groups */
208 template <typename saidx_t>
209 inline void ss_insertionsort(const uint8_t * T, const saidx_t * PA, saidx_t * first, saidx_t * last, saidx_t depth)
210 {
211  saidx_t *i, *j;
212  saidx_t t;
213  int32_t r;
214 
215  for (i = last - 2; first <= i; --i)
216  {
217  for (t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));)
218  {
219  do {
220  *(j - 1) = *j;
221  } while ((++j < last) && (*j < 0));
222  if (last <= j) { break; }
223  }
224  if (r == 0) { *j = ~*j; }
225  *(j - 1) = t;
226  }
227 }
228 
229 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
230 
231 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
232 
233 template <typename saidx_t>
234 inline void ss_fixdown(const uint8_t * Td, const saidx_t * PA, saidx_t * SA, saidx_t i, saidx_t size)
235 {
236  saidx_t j, k;
237  saidx_t v;
238  int32_t c, d, e;
239 
240  for (v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k)
241  {
242  d = Td[PA[SA[k = j++]]];
243  if (d < (e = Td[PA[SA[j]]]))
244  {
245  k = j;
246  d = e;
247  }
248  if (d <= c) { break; }
249  }
250  SA[i] = v;
251 }
252 
253 /* Simple top-down heapsort. */
254 template <typename saidx_t>
255 inline void ss_heapsort(const uint8_t * Td, const saidx_t * PA, saidx_t * SA, saidx_t size)
256 {
257  saidx_t i, m;
258  saidx_t t;
259 
260  m = size;
261  if ((size % 2) == 0)
262  {
263  m--;
264  if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { std::swap(SA[m], SA[m / 2]); }
265  }
266 
267  for (i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
268  if ((size % 2) == 0)
269  {
270  std::swap(SA[0], SA[m]);
271  ss_fixdown(Td, PA, SA, (saidx_t)0, m);
272  }
273  for (i = m - 1; 0 < i; --i)
274  {
275  t = SA[0], SA[0] = SA[i];
276  ss_fixdown(Td, PA, SA, (saidx_t)0, i);
277  SA[i] = t;
278  }
279 }
280 
281 /* Returns the median of three elements. */
282 template <typename saidx_t>
283 inline saidx_t * ss_median3(const uint8_t * Td, const saidx_t * PA, saidx_t * v1, saidx_t * v2, saidx_t * v3)
284 {
285  if (Td[PA[*v1]] > Td[PA[*v2]]) { std::swap(v1, v2); }
286  if (Td[PA[*v2]] > Td[PA[*v3]])
287  {
288  if (Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
289  else
290  {
291  return v3;
292  }
293  }
294  return v2;
295 }
296 
297 /* Returns the median of five elements. */
298 template <typename saidx_t>
299 inline saidx_t *
300 ss_median5(const uint8_t * Td, const saidx_t * PA, saidx_t * v1, saidx_t * v2, saidx_t * v3, saidx_t * v4, saidx_t * v5)
301 {
302  if (Td[PA[*v2]] > Td[PA[*v3]]) { std::swap(v2, v3); }
303  if (Td[PA[*v4]] > Td[PA[*v5]]) { std::swap(v4, v5); }
304  if (Td[PA[*v2]] > Td[PA[*v4]])
305  {
306  std::swap(v2, v4);
307  std::swap(v3, v5);
308  }
309  if (Td[PA[*v1]] > Td[PA[*v3]]) { std::swap(v1, v3); }
310  if (Td[PA[*v1]] > Td[PA[*v4]])
311  {
312  std::swap(v1, v4);
313  std::swap(v3, v5);
314  }
315  if (Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
316  return v3;
317 }
318 
319 /* Returns the pivot element. */
320 template <typename saidx_t>
321 inline saidx_t * ss_pivot(const uint8_t * Td, const saidx_t * PA, saidx_t * first, saidx_t * last)
322 {
323  saidx_t * middle;
324  saidx_t t;
325 
326  t = last - first;
327  middle = first + t / 2;
328 
329  if (t <= 512)
330  {
331  if (t <= 32) { return ss_median3(Td, PA, first, middle, last - 1); }
332  else
333  {
334  t >>= 2;
335  return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
336  }
337  }
338  t >>= 3;
339  first = ss_median3(Td, PA, first, first + t, first + (t << 1));
340  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
341  last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
342  return ss_median3(Td, PA, first, middle, last);
343 }
344 
345 /* Binary partition for substrings. */
346 template <typename saidx_t>
347 inline saidx_t * ss_partition(const saidx_t * PA, saidx_t * first, saidx_t * last, saidx_t depth)
348 {
349  saidx_t *a, *b;
350  saidx_t t;
351  for (a = first - 1, b = last;;)
352  {
353  for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
354  for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) {}
355  if (b <= a) { break; }
356  t = ~*b;
357  *b = *a;
358  *a = t;
359  }
360  if (first < a) { *first = ~*first; }
361  return a;
362 }
363 
364 /* Multikey introsort for medium size groups. */
365 template <typename saidx_t>
366 inline void ss_mintrosort(const uint8_t * T, const saidx_t * PA, saidx_t * first, saidx_t * last, saidx_t depth)
367 {
368  struct
369  {
370  saidx_t *a, *b, c;
371  int32_t d;
372  } stack[SS_MISORT_STACKSIZE];
373  const uint8_t * Td;
374  saidx_t *a, *b, *c, *d, *e, *f;
375  saidx_t s, t;
376  int32_t ssize;
377  int32_t limit;
378  int32_t v, x = 0;
379 
380  for (ssize = 0, limit = ss_ilg((saidx_t)(last - first));;)
381  {
382 
383  if ((last - first) <= SS_INSERTIONSORT_THRESHOLD)
384  {
385 #if 1 < SS_INSERTIONSORT_THRESHOLD
386  if (1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
387 #endif
388  STACK_POP(first, last, depth, limit);
389  continue;
390  }
391 
392  Td = T + depth;
393  if (limit-- == 0) { ss_heapsort(Td, PA, first, (saidx_t)(last - first)); }
394  if (limit < 0)
395  {
396  for (a = first + 1, v = Td[PA[*first]]; a < last; ++a)
397  {
398  if ((x = Td[PA[*a]]) != v)
399  {
400  if (1 < (a - first)) { break; }
401  v = x;
402  first = a;
403  }
404  }
405  if (Td[PA[*first] - 1] < v) { first = ss_partition(PA, first, a, depth); }
406  if ((a - first) <= (last - a))
407  {
408  if (1 < (a - first))
409  {
410  STACK_PUSH(a, last, depth, -1);
411  last = a, depth += 1, limit = ss_ilg((saidx_t)(a - first));
412  }
413  else
414  {
415  first = a, limit = -1;
416  }
417  }
418  else
419  {
420  if (1 < (last - a))
421  {
422  STACK_PUSH(first, a, depth + 1, ss_ilg((saidx_t)(a - first)));
423  first = a, limit = -1;
424  }
425  else
426  {
427  last = a, depth += 1, limit = ss_ilg((saidx_t)(a - first));
428  }
429  }
430  continue;
431  }
432 
433  /* choose pivot */
434  a = ss_pivot(Td, PA, first, last);
435  v = Td[PA[*a]];
436  std::swap(*first, *a);
437 
438  /* partition */
439  for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) {}
440  if (((a = b) < last) && (x < v))
441  {
442  for (; (++b < last) && ((x = Td[PA[*b]]) <= v);)
443  {
444  if (x == v)
445  {
446  std::swap(*b, *a);
447  ++a;
448  }
449  }
450  }
451  for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) {}
452  if ((b < (d = c)) && (x > v))
453  {
454  for (; (b < --c) && ((x = Td[PA[*c]]) >= v);)
455  {
456  if (x == v)
457  {
458  std::swap(*c, *d);
459  --d;
460  }
461  }
462  }
463  for (; b < c;)
464  {
465  std::swap(*b, *c);
466  for (; (++b < c) && ((x = Td[PA[*b]]) <= v);)
467  {
468  if (x == v)
469  {
470  std::swap(*b, *a);
471  ++a;
472  }
473  }
474  for (; (b < --c) && ((x = Td[PA[*c]]) >= v);)
475  {
476  if (x == v)
477  {
478  std::swap(*c, *d);
479  --d;
480  }
481  }
482  }
483 
484  if (a <= d)
485  {
486  c = b - 1;
487 
488  if ((s = a - first) > (t = b - a)) { s = t; }
489  for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { std::swap(*e, *f); }
490  if ((s = d - c) > (t = last - d - 1)) { s = t; }
491  for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { std::swap(*e, *f); }
492 
493  a = first + (b - a), c = last - (d - c);
494  b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
495 
496  if ((a - first) <= (last - c))
497  {
498  if ((last - c) <= (c - b))
499  {
500  STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b)));
501  STACK_PUSH(c, last, depth, limit);
502  last = a;
503  }
504  else if ((a - first) <= (c - b))
505  {
506  STACK_PUSH(c, last, depth, limit);
507  STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b)));
508  last = a;
509  }
510  else
511  {
512  STACK_PUSH(c, last, depth, limit);
513  STACK_PUSH(first, a, depth, limit);
514  first = b, last = c, depth += 1, limit = ss_ilg((saidx_t)(c - b));
515  }
516  }
517  else
518  {
519  if ((a - first) <= (c - b))
520  {
521  STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b)));
522  STACK_PUSH(first, a, depth, limit);
523  first = c;
524  }
525  else if ((last - c) <= (c - b))
526  {
527  STACK_PUSH(first, a, depth, limit);
528  STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b)));
529  first = c;
530  }
531  else
532  {
533  STACK_PUSH(first, a, depth, limit);
534  STACK_PUSH(c, last, depth, limit);
535  first = b, last = c, depth += 1, limit = ss_ilg((saidx_t)(c - b));
536  }
537  }
538  }
539  else
540  {
541  limit += 1;
542  if (Td[PA[*first] - 1] < v)
543  {
544  first = ss_partition(PA, first, last, depth);
545  limit = ss_ilg((saidx_t)(last - first));
546  }
547  depth += 1;
548  }
549  }
550 }
551 
552 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
553 
554 #if SS_BLOCKSIZE != 0
555 
556 template <typename saidx_t>
557 inline void ss_blockswap(saidx_t * a, saidx_t * b, saidx_t n)
558 {
559  saidx_t t;
560  for (; 0 < n; --n, ++a, ++b) { t = *a, *a = *b, *b = t; }
561 }
562 
563 template <typename saidx_t>
564 inline void ss_rotate(saidx_t * first, saidx_t * middle, saidx_t * last)
565 {
566  saidx_t *a, *b, t;
567  saidx_t l, r;
568  l = middle - first, r = last - middle;
569  for (; (0 < l) && (0 < r);)
570  {
571  if (l == r)
572  {
573  ss_blockswap(first, middle, l);
574  break;
575  }
576  if (l < r)
577  {
578  a = last - 1, b = middle - 1;
579  t = *a;
580  do {
581  *a-- = *b, *b-- = *a;
582  if (b < first)
583  {
584  *a = t;
585  last = a;
586  if ((r -= l + 1) <= l) { break; }
587  a -= 1, b = middle - 1;
588  t = *a;
589  }
590  } while (1);
591  }
592  else
593  {
594  a = first, b = middle;
595  t = *a;
596  do {
597  *a++ = *b, *b++ = *a;
598  if (last <= b)
599  {
600  *a = t;
601  first = a + 1;
602  if ((l -= r + 1) <= r) { break; }
603  a += 1, b = middle;
604  t = *a;
605  }
606  } while (1);
607  }
608  }
609 }
610 
611 template <typename saidx_t>
612 inline void ss_inplacemerge(const uint8_t * T,
613  const saidx_t * PA,
614  saidx_t * first,
615  saidx_t * middle,
616  saidx_t * last,
617  saidx_t depth)
618 {
619  const saidx_t * p;
620  saidx_t *a, *b;
621  saidx_t len, half;
622  int32_t q, r;
623  int32_t x;
624 
625  for (;;)
626  {
627  if (*(last - 1) < 0)
628  {
629  x = 1;
630  p = PA + ~*(last - 1);
631  }
632  else
633  {
634  x = 0;
635  p = PA + *(last - 1);
636  }
637  for (a = first, len = middle - first, half = len >> 1, r = -1; 0 < len; len = half, half >>= 1)
638  {
639  b = a + half;
640  q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
641  if (q < 0)
642  {
643  a = b + 1;
644  half -= (len & 1) ^ 1;
645  }
646  else
647  {
648  r = q;
649  }
650  }
651  if (a < middle)
652  {
653  if (r == 0) { *a = ~*a; }
654  ss_rotate(a, middle, last);
655  last -= middle - a;
656  middle = a;
657  if (first == middle) { break; }
658  }
659  --last;
660  if (x != 0)
661  {
662  while (*--last < 0) {}
663  }
664  if (middle == last) { break; }
665  }
666 }
667 
668 /* Merge-forward with internal buffer. */
669 template <typename saidx_t>
670 inline void ss_mergeforward(const uint8_t * T,
671  const saidx_t * PA,
672  saidx_t * first,
673  saidx_t * middle,
674  saidx_t * last,
675  saidx_t * buf,
676  saidx_t depth)
677 {
678  saidx_t *a, *b, *c, *bufend;
679  saidx_t t;
680  int32_t r;
681 
682  bufend = buf + (middle - first) - 1;
683  ss_blockswap(buf, first, (saidx_t)(middle - first));
684 
685  for (t = *(a = first), b = buf, c = middle;;)
686  {
687  r = ss_compare(T, PA + *b, PA + *c, depth);
688  if (r < 0)
689  {
690  do {
691  *a++ = *b;
692  if (bufend <= b)
693  {
694  *bufend = t;
695  return;
696  }
697  *b++ = *a;
698  } while (*b < 0);
699  }
700  else if (r > 0)
701  {
702  do {
703  *a++ = *c, *c++ = *a;
704  if (last <= c)
705  {
706  while (b < bufend) { *a++ = *b, *b++ = *a; }
707  *a = *b, *b = t;
708  return;
709  }
710  } while (*c < 0);
711  }
712  else
713  {
714  *c = ~*c;
715  do {
716  *a++ = *b;
717  if (bufend <= b)
718  {
719  *bufend = t;
720  return;
721  }
722  *b++ = *a;
723  } while (*b < 0);
724 
725  do {
726  *a++ = *c, *c++ = *a;
727  if (last <= c)
728  {
729  while (b < bufend) { *a++ = *b, *b++ = *a; }
730  *a = *b, *b = t;
731  return;
732  }
733  } while (*c < 0);
734  }
735  }
736 }
737 
738 /* Merge-backward with internal buffer. */
739 template <typename saidx_t>
740 inline void ss_mergebackward(const uint8_t * T,
741  const saidx_t * PA,
742  saidx_t * first,
743  saidx_t * middle,
744  saidx_t * last,
745  saidx_t * buf,
746  saidx_t depth)
747 {
748  const saidx_t *p1, *p2;
749  saidx_t *a, *b, *c, *bufend;
750  saidx_t t;
751  int32_t r;
752  int32_t x;
753 
754  bufend = buf + (last - middle) - 1;
755  ss_blockswap(buf, middle, (saidx_t)(last - middle));
756 
757  x = 0;
758  if (*bufend < 0)
759  {
760  p1 = PA + ~*bufend;
761  x |= 1;
762  }
763  else
764  {
765  p1 = PA + *bufend;
766  }
767  if (*(middle - 1) < 0)
768  {
769  p2 = PA + ~*(middle - 1);
770  x |= 2;
771  }
772  else
773  {
774  p2 = PA + *(middle - 1);
775  }
776  for (t = *(a = last - 1), b = bufend, c = middle - 1;;)
777  {
778  r = ss_compare(T, p1, p2, depth);
779  if (0 < r)
780  {
781  if (x & 1)
782  {
783  do {
784  *a-- = *b, *b-- = *a;
785  } while (*b < 0);
786  x ^= 1;
787  }
788  *a-- = *b;
789  if (b <= buf)
790  {
791  *buf = t;
792  break;
793  }
794  *b-- = *a;
795  if (*b < 0)
796  {
797  p1 = PA + ~*b;
798  x |= 1;
799  }
800  else
801  {
802  p1 = PA + *b;
803  }
804  }
805  else if (r < 0)
806  {
807  if (x & 2)
808  {
809  do {
810  *a-- = *c, *c-- = *a;
811  } while (*c < 0);
812  x ^= 2;
813  }
814  *a-- = *c, *c-- = *a;
815  if (c < first)
816  {
817  while (buf < b) { *a-- = *b, *b-- = *a; }
818  *a = *b, *b = t;
819  break;
820  }
821  if (*c < 0)
822  {
823  p2 = PA + ~*c;
824  x |= 2;
825  }
826  else
827  {
828  p2 = PA + *c;
829  }
830  }
831  else
832  {
833  if (x & 1)
834  {
835  do {
836  *a-- = *b, *b-- = *a;
837  } while (*b < 0);
838  x ^= 1;
839  }
840  *a-- = ~*b;
841  if (b <= buf)
842  {
843  *buf = t;
844  break;
845  }
846  *b-- = *a;
847  if (x & 2)
848  {
849  do {
850  *a-- = *c, *c-- = *a;
851  } while (*c < 0);
852  x ^= 2;
853  }
854  *a-- = *c, *c-- = *a;
855  if (c < first)
856  {
857  while (buf < b) { *a-- = *b, *b-- = *a; }
858  *a = *b, *b = t;
859  break;
860  }
861  if (*b < 0)
862  {
863  p1 = PA + ~*b;
864  x |= 1;
865  }
866  else
867  {
868  p1 = PA + *b;
869  }
870  if (*c < 0)
871  {
872  p2 = PA + ~*c;
873  x |= 2;
874  }
875  else
876  {
877  p2 = PA + *c;
878  }
879  }
880  }
881 }
882 
883 /* D&C based merge. */
884 template <typename saidx_t>
885 inline void ss_swapmerge(const uint8_t * T,
886  const saidx_t * PA,
887  saidx_t * first,
888  saidx_t * middle,
889  saidx_t * last,
890  saidx_t * buf,
891  saidx_t bufsize,
892  saidx_t depth)
893 {
894 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
895 #define MERGE_CHECK(a, b, c) \
896  do { \
897  if (((c)&1) || (((c)&2) && (ss_compare(T, PA + GETIDX(*((a)-1)), PA + *(a), depth) == 0))) { *(a) = ~*(a); } \
898  if (((c)&4) && ((ss_compare(T, PA + GETIDX(*((b)-1)), PA + *(b), depth) == 0))) { *(b) = ~*(b); } \
899  } while (0)
900  struct
901  {
902  saidx_t *a, *b, *c;
903  int32_t d;
905  saidx_t *l, *r, *lm, *rm;
906  saidx_t m, len, half;
907  int32_t ssize;
908  int32_t check, next;
909 
910  for (check = 0, ssize = 0;;)
911  {
912  if ((last - middle) <= bufsize)
913  {
914  if ((first < middle) && (middle < last)) { ss_mergebackward(T, PA, first, middle, last, buf, depth); }
915  MERGE_CHECK(first, last, check);
916  STACK_POP(first, middle, last, check);
917  continue;
918  }
919 
920  if ((middle - first) <= bufsize)
921  {
922  if (first < middle) { ss_mergeforward(T, PA, first, middle, last, buf, depth); }
923  MERGE_CHECK(first, last, check);
924  STACK_POP(first, middle, last, check);
925  continue;
926  }
927 
928  for (m = 0, len = std::min(middle - first, last - middle), half = len >> 1; 0 < len; len = half, half >>= 1)
929  {
930  if (ss_compare(T, PA + GETIDX(*(middle + m + half)), PA + GETIDX(*(middle - m - half - 1)), depth) < 0)
931  {
932  m += half + 1;
933  half -= (len & 1) ^ 1;
934  }
935  }
936 
937  if (0 < m)
938  {
939  lm = middle - m, rm = middle + m;
940  ss_blockswap(lm, middle, m);
941  l = r = middle, next = 0;
942  if (rm < last)
943  {
944  if (*rm < 0)
945  {
946  *rm = ~*rm;
947  if (first < lm)
948  {
949  for (; *--l < 0;) {}
950  next |= 4;
951  }
952  next |= 1;
953  }
954  else if (first < lm)
955  {
956  for (; *r < 0; ++r) {}
957  next |= 2;
958  }
959  }
960 
961  if ((l - first) <= (last - r))
962  {
963  STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
964  middle = lm, last = l, check = (check & 3) | (next & 4);
965  }
966  else
967  {
968  if ((next & 2) && (r == middle)) { next ^= 6; }
969  STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
970  first = r, middle = rm, check = (next & 3) | (check & 4);
971  }
972  }
973  else
974  {
975  if (ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { *middle = ~*middle; }
976  MERGE_CHECK(first, last, check);
977  STACK_POP(first, middle, last, check);
978  }
979  }
980 }
981 
982 #endif /* SS_BLOCKSIZE != 0 */
983 
984 /*- Function -*/
985 
986 /* Substring sort */
987 template <typename saidx_t>
988 void sssort(const uint8_t * T,
989  const saidx_t * PA,
990  saidx_t * first,
991  saidx_t * last,
992  saidx_t * buf,
993  saidx_t bufsize,
994  saidx_t depth,
995  saidx_t n,
996  int32_t lastsuffix)
997 {
998  saidx_t * a;
999 #if SS_BLOCKSIZE != 0
1000  saidx_t *b, *middle, *curbuf;
1001  saidx_t j, k, curbufsize, limit;
1002 #endif
1003  saidx_t i;
1004 
1005  if (lastsuffix != 0) { ++first; }
1006 
1007 #if SS_BLOCKSIZE == 0
1008  ss_mintrosort(T, PA, first, last, depth);
1009 #else
1010  if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < (limit = ss_isqrt(last - first))))
1011  {
1012  if (SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
1013  buf = middle = last - limit, bufsize = limit;
1014  }
1015  else
1016  {
1017  middle = last, limit = 0;
1018  }
1019  for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i)
1020  {
1021 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
1022  ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
1023 #elif 1 < SS_BLOCKSIZE
1024  ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
1025 #endif
1026  curbufsize = last - (a + SS_BLOCKSIZE);
1027  curbuf = a + SS_BLOCKSIZE;
1028  if (curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
1029  for (b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1)
1030  {
1031  ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
1032  }
1033  }
1034 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
1035  ss_mintrosort(T, PA, a, middle, depth);
1036 #elif 1 < SS_BLOCKSIZE
1037  ss_insertionsort(T, PA, a, middle, depth);
1038 #endif
1039  for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1)
1040  {
1041  if (i & 1)
1042  {
1043  ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
1044  a -= k;
1045  }
1046  }
1047  if (limit != 0)
1048  {
1049 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
1050  ss_mintrosort(T, PA, middle, last, depth);
1051 #elif 1 < SS_BLOCKSIZE
1052  ss_insertionsort(T, PA, middle, last, depth);
1053 #endif
1054  ss_inplacemerge(T, PA, first, middle, last, depth);
1055  }
1056 #endif
1057 
1058  if (lastsuffix != 0)
1059  {
1060  /* Insert last type B* suffix. */
1061  saidx_t PAi[2];
1062  PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
1063  for (a = first, i = *(first - 1); (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
1064  ++a)
1065  {
1066  *(a - 1) = *a;
1067  }
1068  *(a - 1) = i;
1069  }
1070 }
1071 
1072 inline int32_t tr_ilg(int32_t n)
1073 {
1074  return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff])
1075  : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]);
1076 }
1077 
1078 inline int32_t tr_ilg(int64_t n)
1079 {
1080  return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? 56 + lg_table[(n >> 56) & 0xff] : 48 + lg_table[(n >> 48) & 0xff])
1081  : ((n >> 40) ? 40 + lg_table[(n >> 40) & 0xff] : 32 + lg_table[(n >> 32) & 0xff]))
1082  : ((n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff]
1083  : 16 + lg_table[(n >> 16) & 0xff])
1084  : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff]
1085  : 0 + lg_table[(n >> 0) & 0xff]));
1086 }
1087 
1088 /* Simple insertionsort for small size groups. */
1089 template <typename saidx_t>
1090 inline void tr_insertionsort(const saidx_t * ISAd, saidx_t * first, saidx_t * last)
1091 {
1092  saidx_t *a, *b;
1093  saidx_t t, r;
1094 
1095  for (a = first + 1; a < last; ++a)
1096  {
1097  for (t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);)
1098  {
1099  do {
1100  *(b + 1) = *b;
1101  } while ((first <= --b) && (*b < 0));
1102  if (b < first) { break; }
1103  }
1104  if (r == 0) { *b = ~*b; }
1105  *(b + 1) = t;
1106  }
1107 }
1108 
1109 template <typename saidx_t>
1110 inline void tr_fixdown(const saidx_t * ISAd, saidx_t * SA, saidx_t i, saidx_t size)
1111 {
1112  saidx_t j, k;
1113  saidx_t v;
1114  saidx_t c, d, e;
1115 
1116  for (v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k)
1117  {
1118  d = ISAd[SA[k = j++]];
1119  if (d < (e = ISAd[SA[j]]))
1120  {
1121  k = j;
1122  d = e;
1123  }
1124  if (d <= c) { break; }
1125  }
1126  SA[i] = v;
1127 }
1128 
1129 /* Simple top-down heapsort. */
1130 template <typename saidx_t>
1131 inline void tr_heapsort(const saidx_t * ISAd, saidx_t * SA, saidx_t size)
1132 {
1133  saidx_t i, m;
1134  saidx_t t;
1135 
1136  m = size;
1137  if ((size % 2) == 0)
1138  {
1139  m--;
1140  if (ISAd[SA[m / 2]] < ISAd[SA[m]]) { std::swap(SA[m], SA[m / 2]); }
1141  }
1142 
1143  for (i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
1144  if ((size % 2) == 0)
1145  {
1146  std::swap(SA[0], SA[m]);
1147  tr_fixdown(ISAd, SA, (saidx_t)0, m);
1148  }
1149  for (i = m - 1; 0 < i; --i)
1150  {
1151  t = SA[0], SA[0] = SA[i];
1152  tr_fixdown(ISAd, SA, (saidx_t)0, i);
1153  SA[i] = t;
1154  }
1155 }
1156 
1157 /* Returns the median of three elements. */
1158 template <typename saidx_t>
1159 inline saidx_t * tr_median3(const saidx_t * ISAd, saidx_t * v1, saidx_t * v2, saidx_t * v3)
1160 {
1161  if (ISAd[*v1] > ISAd[*v2]) { std::swap(v1, v2); }
1162  if (ISAd[*v2] > ISAd[*v3])
1163  {
1164  if (ISAd[*v1] > ISAd[*v3]) { return v1; }
1165  else
1166  {
1167  return v3;
1168  }
1169  }
1170  return v2;
1171 }
1172 
1173 /* Returns the median of five elements. */
1174 template <typename saidx_t>
1175 inline saidx_t * tr_median5(const saidx_t * ISAd, saidx_t * v1, saidx_t * v2, saidx_t * v3, saidx_t * v4, saidx_t * v5)
1176 {
1177  if (ISAd[*v2] > ISAd[*v3]) { std::swap(v2, v3); }
1178  if (ISAd[*v4] > ISAd[*v5]) { std::swap(v4, v5); }
1179  if (ISAd[*v2] > ISAd[*v4])
1180  {
1181  std::swap(v2, v4);
1182  std::swap(v3, v5);
1183  }
1184  if (ISAd[*v1] > ISAd[*v3]) { std::swap(v1, v3); }
1185  if (ISAd[*v1] > ISAd[*v4])
1186  {
1187  std::swap(v1, v4);
1188  std::swap(v3, v5);
1189  }
1190  if (ISAd[*v3] > ISAd[*v4]) { return v4; }
1191  return v3;
1192 }
1193 
1194 /* Returns the pivot element. */
1195 template <typename saidx_t>
1196 inline saidx_t * tr_pivot(const saidx_t * ISAd, saidx_t * first, saidx_t * last)
1197 {
1198  saidx_t * middle;
1199  saidx_t t;
1200 
1201  t = last - first;
1202  middle = first + t / 2;
1203 
1204  if (t <= 512)
1205  {
1206  if (t <= 32) { return tr_median3(ISAd, first, middle, last - 1); }
1207  else
1208  {
1209  t >>= 2;
1210  return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1211  }
1212  }
1213  t >>= 3;
1214  first = tr_median3(ISAd, first, first + t, first + (t << 1));
1215  middle = tr_median3(ISAd, middle - t, middle, middle + t);
1216  last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1217  return tr_median3(ISAd, first, middle, last);
1218 }
1219 
1220 template <typename saidx_t>
1222 {
1223  saidx_t chance;
1224  saidx_t remain;
1225  saidx_t incval;
1226  saidx_t count;
1227 };
1228 
1229 template <typename saidx_t>
1230 using trbudget_t = struct _trbudget_t<saidx_t>;
1231 // typedef struct _trbudget_t trbudget_t;
1232 
1233 template <typename saidx_t>
1234 inline void trbudget_init(trbudget_t<saidx_t> * budget, saidx_t chance, saidx_t incval)
1235 {
1236  budget->chance = chance;
1237  budget->remain = budget->incval = incval;
1238 }
1239 
1240 template <typename saidx_t>
1241 inline int32_t trbudget_check(trbudget_t<saidx_t> * budget, saidx_t size)
1242 {
1243  if (size <= budget->remain)
1244  {
1245  budget->remain -= size;
1246  return 1;
1247  }
1248  if (budget->chance == 0)
1249  {
1250  budget->count += size;
1251  return 0;
1252  }
1253  budget->remain += budget->incval - size;
1254  budget->chance -= 1;
1255  return 1;
1256 }
1257 
1258 template <typename saidx_t>
1259 inline void tr_partition(const saidx_t * ISAd,
1260  saidx_t * first,
1261  saidx_t * middle,
1262  saidx_t * last,
1263  saidx_t ** pa,
1264  saidx_t ** pb,
1265  saidx_t v)
1266 {
1267  saidx_t *a, *b, *c, *d, *e, *f;
1268  saidx_t t, s;
1269  saidx_t x = 0;
1270 
1271  for (b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) {}
1272  if (((a = b) < last) && (x < v))
1273  {
1274  for (; (++b < last) && ((x = ISAd[*b]) <= v);)
1275  {
1276  if (x == v)
1277  {
1278  std::swap(*b, *a);
1279  ++a;
1280  }
1281  }
1282  }
1283  for (c = last; (b < --c) && ((x = ISAd[*c]) == v);) {}
1284  if ((b < (d = c)) && (x > v))
1285  {
1286  for (; (b < --c) && ((x = ISAd[*c]) >= v);)
1287  {
1288  if (x == v)
1289  {
1290  std::swap(*c, *d);
1291  --d;
1292  }
1293  }
1294  }
1295  for (; b < c;)
1296  {
1297  std::swap(*b, *c);
1298  for (; (++b < c) && ((x = ISAd[*b]) <= v);)
1299  {
1300  if (x == v)
1301  {
1302  std::swap(*b, *a);
1303  ++a;
1304  }
1305  }
1306  for (; (b < --c) && ((x = ISAd[*c]) >= v);)
1307  {
1308  if (x == v)
1309  {
1310  std::swap(*c, *d);
1311  --d;
1312  }
1313  }
1314  }
1315 
1316  if (a <= d)
1317  {
1318  c = b - 1;
1319  if ((s = a - first) > (t = b - a)) { s = t; }
1320  for (e = first, f = b - s; 0 < s; --s, ++e, ++f) { std::swap(*e, *f); }
1321  if ((s = d - c) > (t = last - d - 1)) { s = t; }
1322  for (e = b, f = last - s; 0 < s; --s, ++e, ++f) { std::swap(*e, *f); }
1323  first += (b - a), last -= (d - c);
1324  }
1325  *pa = first, *pb = last;
1326 }
1327 
1328 template <typename saidx_t>
1329 inline void
1330 tr_copy(saidx_t * ISA, const saidx_t * SA, saidx_t * first, saidx_t * a, saidx_t * b, saidx_t * last, saidx_t depth)
1331 {
1332  /* sort suffixes of middle partition
1333  by using sorted order of suffixes of left and right partition. */
1334  saidx_t *c, *d, *e;
1335  saidx_t s, v;
1336 
1337  v = b - SA - 1;
1338  for (c = first, d = a - 1; c <= d; ++c)
1339  {
1340  if ((0 <= (s = *c - depth)) && (ISA[s] == v))
1341  {
1342  *++d = s;
1343  ISA[s] = d - SA;
1344  }
1345  }
1346  for (c = last - 1, e = d + 1, d = b; e < d; --c)
1347  {
1348  if ((0 <= (s = *c - depth)) && (ISA[s] == v))
1349  {
1350  *--d = s;
1351  ISA[s] = d - SA;
1352  }
1353  }
1354 }
1355 
1356 template <typename saidx_t>
1357 inline void tr_partialcopy(saidx_t * ISA,
1358  const saidx_t * SA,
1359  saidx_t * first,
1360  saidx_t * a,
1361  saidx_t * b,
1362  saidx_t * last,
1363  saidx_t depth)
1364 {
1365  saidx_t *c, *d, *e;
1366  saidx_t s, v;
1367  saidx_t rank, lastrank, newrank = -1;
1368 
1369  v = b - SA - 1;
1370  lastrank = -1;
1371  for (c = first, d = a - 1; c <= d; ++c)
1372  {
1373  if ((0 <= (s = *c - depth)) && (ISA[s] == v))
1374  {
1375  *++d = s;
1376  rank = ISA[s + depth];
1377  if (lastrank != rank)
1378  {
1379  lastrank = rank;
1380  newrank = d - SA;
1381  }
1382  ISA[s] = newrank;
1383  }
1384  }
1385 
1386  lastrank = -1;
1387  for (e = d; first <= e; --e)
1388  {
1389  rank = ISA[*e];
1390  if (lastrank != rank)
1391  {
1392  lastrank = rank;
1393  newrank = e - SA;
1394  }
1395  if (newrank != rank) { ISA[*e] = newrank; }
1396  }
1397 
1398  lastrank = -1;
1399  for (c = last - 1, e = d + 1, d = b; e < d; --c)
1400  {
1401  if ((0 <= (s = *c - depth)) && (ISA[s] == v))
1402  {
1403  *--d = s;
1404  rank = ISA[s + depth];
1405  if (lastrank != rank)
1406  {
1407  lastrank = rank;
1408  newrank = d - SA;
1409  }
1410  ISA[s] = newrank;
1411  }
1412  }
1413 }
1414 
1415 template <typename saidx_t>
1416 inline void tr_introsort(saidx_t * ISA,
1417  const saidx_t * ISAd,
1418  saidx_t * SA,
1419  saidx_t * first,
1420  saidx_t * last,
1421  trbudget_t<saidx_t> * budget)
1422 {
1423  struct
1424  {
1425  const saidx_t * a;
1426  saidx_t *b, *c;
1427  int32_t d, e;
1429  saidx_t *a, *b, *c;
1430  saidx_t v, x = 0;
1431  saidx_t incr = ISAd - ISA;
1432  int32_t limit, next;
1433  int32_t ssize, trlink = -1;
1434 
1435  for (ssize = 0, limit = tr_ilg((saidx_t)(last - first));;)
1436  {
1437 
1438  if (limit < 0)
1439  {
1440  if (limit == -1)
1441  {
1442  /* tandem repeat partition */
1443  tr_partition(ISAd - incr, first, first, last, &a, &b, (saidx_t)(last - SA - 1));
1444 
1445  /* update ranks */
1446  if (a < last)
1447  {
1448  for (c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1449  }
1450  if (b < last)
1451  {
1452  for (c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1453  }
1454 
1455  /* push */
1456  if (1 < (b - a))
1457  {
1458  STACK_PUSH5(NULL, a, b, 0, 0);
1459  STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1460  trlink = ssize - 2;
1461  }
1462  if ((a - first) <= (last - b))
1463  {
1464  if (1 < (a - first))
1465  {
1466  STACK_PUSH5(ISAd, b, last, tr_ilg((saidx_t)(last - b)), trlink);
1467  last = a, limit = tr_ilg((saidx_t)(a - first));
1468  }
1469  else if (1 < (last - b))
1470  {
1471  first = b, limit = tr_ilg((saidx_t)(last - b));
1472  }
1473  else
1474  {
1475  STACK_POP5(ISAd, first, last, limit, trlink);
1476  }
1477  }
1478  else
1479  {
1480  if (1 < (last - b))
1481  {
1482  STACK_PUSH5(ISAd, first, a, tr_ilg((saidx_t)(a - first)), trlink);
1483  first = b, limit = tr_ilg((saidx_t)(last - b));
1484  }
1485  else if (1 < (a - first))
1486  {
1487  last = a, limit = tr_ilg((saidx_t)(a - first));
1488  }
1489  else
1490  {
1491  STACK_POP5(ISAd, first, last, limit, trlink);
1492  }
1493  }
1494  }
1495  else if (limit == -2)
1496  {
1497  /* tandem repeat copy */
1498  a = stack[--ssize].b, b = stack[ssize].c;
1499  if (stack[ssize].d == 0) { tr_copy(ISA, SA, first, a, b, last, (saidx_t)(ISAd - ISA)); }
1500  else
1501  {
1502  if (0 <= trlink) { stack[trlink].d = -1; }
1503  tr_partialcopy(ISA, SA, first, a, b, last, (saidx_t)(ISAd - ISA));
1504  }
1505  STACK_POP5(ISAd, first, last, limit, trlink);
1506  }
1507  else
1508  {
1509  /* sorted partition */
1510  if (0 <= *first)
1511  {
1512  a = first;
1513  do {
1514  ISA[*a] = a - SA;
1515  } while ((++a < last) && (0 <= *a));
1516  first = a;
1517  }
1518  if (first < last)
1519  {
1520  a = first;
1521  do {
1522  *a = ~*a;
1523  } while (*++a < 0);
1524  next = (ISA[*a] != ISAd[*a]) ? tr_ilg((saidx_t)(a - first + 1)) : -1;
1525  if (++a < last)
1526  {
1527  for (b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; }
1528  }
1529 
1530  /* push */
1531  if (trbudget_check(budget, (saidx_t)(a - first)))
1532  {
1533  if ((a - first) <= (last - a))
1534  {
1535  STACK_PUSH5(ISAd, a, last, -3, trlink);
1536  ISAd += incr, last = a, limit = next;
1537  }
1538  else
1539  {
1540  if (1 < (last - a))
1541  {
1542  STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1543  first = a, limit = -3;
1544  }
1545  else
1546  {
1547  ISAd += incr, last = a, limit = next;
1548  }
1549  }
1550  }
1551  else
1552  {
1553  if (0 <= trlink) { stack[trlink].d = -1; }
1554  if (1 < (last - a)) { first = a, limit = -3; }
1555  else
1556  {
1557  STACK_POP5(ISAd, first, last, limit, trlink);
1558  }
1559  }
1560  }
1561  else
1562  {
1563  STACK_POP5(ISAd, first, last, limit, trlink);
1564  }
1565  }
1566  continue;
1567  }
1568 
1569  if ((last - first) <= TR_INSERTIONSORT_THRESHOLD)
1570  {
1571  tr_insertionsort(ISAd, first, last);
1572  limit = -3;
1573  continue;
1574  }
1575 
1576  if (limit-- == 0)
1577  {
1578  tr_heapsort(ISAd, first, (saidx_t)(last - first));
1579  for (a = last - 1; first < a; a = b)
1580  {
1581  for (x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1582  }
1583  limit = -3;
1584  continue;
1585  }
1586 
1587  /* choose pivot */
1588  a = tr_pivot(ISAd, first, last);
1589  std::swap(*first, *a);
1590  v = ISAd[*first];
1591 
1592  /* partition */
1593  tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1594  if ((last - first) != (b - a))
1595  {
1596  next = (ISA[*a] != v) ? tr_ilg((saidx_t)(b - a)) : -1;
1597 
1598  /* update ranks */
1599  for (c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1600  if (b < last)
1601  {
1602  for (c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1603  }
1604 
1605  /* push */
1606  if ((1 < (b - a)) && (trbudget_check(budget, (saidx_t)(b - a))))
1607  {
1608  if ((a - first) <= (last - b))
1609  {
1610  if ((last - b) <= (b - a))
1611  {
1612  if (1 < (a - first))
1613  {
1614  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1615  STACK_PUSH5(ISAd, b, last, limit, trlink);
1616  last = a;
1617  }
1618  else if (1 < (last - b))
1619  {
1620  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1621  first = b;
1622  }
1623  else
1624  {
1625  ISAd += incr, first = a, last = b, limit = next;
1626  }
1627  }
1628  else if ((a - first) <= (b - a))
1629  {
1630  if (1 < (a - first))
1631  {
1632  STACK_PUSH5(ISAd, b, last, limit, trlink);
1633  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1634  last = a;
1635  }
1636  else
1637  {
1638  STACK_PUSH5(ISAd, b, last, limit, trlink);
1639  ISAd += incr, first = a, last = b, limit = next;
1640  }
1641  }
1642  else
1643  {
1644  STACK_PUSH5(ISAd, b, last, limit, trlink);
1645  STACK_PUSH5(ISAd, first, a, limit, trlink);
1646  ISAd += incr, first = a, last = b, limit = next;
1647  }
1648  }
1649  else
1650  {
1651  if ((a - first) <= (b - a))
1652  {
1653  if (1 < (last - b))
1654  {
1655  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1656  STACK_PUSH5(ISAd, first, a, limit, trlink);
1657  first = b;
1658  }
1659  else if (1 < (a - first))
1660  {
1661  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1662  last = a;
1663  }
1664  else
1665  {
1666  ISAd += incr, first = a, last = b, limit = next;
1667  }
1668  }
1669  else if ((last - b) <= (b - a))
1670  {
1671  if (1 < (last - b))
1672  {
1673  STACK_PUSH5(ISAd, first, a, limit, trlink);
1674  STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1675  first = b;
1676  }
1677  else
1678  {
1679  STACK_PUSH5(ISAd, first, a, limit, trlink);
1680  ISAd += incr, first = a, last = b, limit = next;
1681  }
1682  }
1683  else
1684  {
1685  STACK_PUSH5(ISAd, first, a, limit, trlink);
1686  STACK_PUSH5(ISAd, b, last, limit, trlink);
1687  ISAd += incr, first = a, last = b, limit = next;
1688  }
1689  }
1690  }
1691  else
1692  {
1693  if ((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1694  if ((a - first) <= (last - b))
1695  {
1696  if (1 < (a - first))
1697  {
1698  STACK_PUSH5(ISAd, b, last, limit, trlink);
1699  last = a;
1700  }
1701  else if (1 < (last - b))
1702  {
1703  first = b;
1704  }
1705  else
1706  {
1707  STACK_POP5(ISAd, first, last, limit, trlink);
1708  }
1709  }
1710  else
1711  {
1712  if (1 < (last - b))
1713  {
1714  STACK_PUSH5(ISAd, first, a, limit, trlink);
1715  first = b;
1716  }
1717  else if (1 < (a - first))
1718  {
1719  last = a;
1720  }
1721  else
1722  {
1723  STACK_POP5(ISAd, first, last, limit, trlink);
1724  }
1725  }
1726  }
1727  }
1728  else
1729  {
1730  if (trbudget_check(budget, (saidx_t)(last - first)))
1731  {
1732  limit = tr_ilg((saidx_t)(last - first)), ISAd += incr;
1733  }
1734  else
1735  {
1736  if (0 <= trlink) { stack[trlink].d = -1; }
1737  STACK_POP5(ISAd, first, last, limit, trlink);
1738  }
1739  }
1740  }
1741 }
1742 
1743 /*- Function -*/
1744 
1745 /* Tandem repeat sort */
1746 template <typename saidx_t>
1747 inline void trsort(saidx_t * ISA, saidx_t * SA, saidx_t n, saidx_t depth)
1748 {
1749  saidx_t * ISAd;
1750  saidx_t *first, *last;
1751  trbudget_t<saidx_t> budget;
1752  saidx_t t, skip, unsorted;
1753 
1754  trbudget_init(&budget, (saidx_t)(tr_ilg(n) * 2 / 3), n);
1755  /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1756  for (ISAd = ISA + depth; - n < *SA; ISAd += ISAd - ISA)
1757  {
1758  first = SA;
1759  skip = 0;
1760  unsorted = 0;
1761  do {
1762  if ((t = *first) < 0)
1763  {
1764  first -= t;
1765  skip += t;
1766  }
1767  else
1768  {
1769  if (skip != 0)
1770  {
1771  *(first + skip) = skip;
1772  skip = 0;
1773  }
1774  last = SA + ISA[t] + 1;
1775  if (1 < (last - first))
1776  {
1777  budget.count = 0;
1778  tr_introsort(ISA, ISAd, SA, first, last, &budget);
1779  if (budget.count != 0) { unsorted += budget.count; }
1780  else
1781  {
1782  skip = first - last;
1783  }
1784  }
1785  else if ((last - first) == 1)
1786  {
1787  skip = -1;
1788  }
1789  first = last;
1790  }
1791  } while (first < (SA + n));
1792  if (skip != 0) { *(first + skip) = skip; }
1793  if (unsorted == 0) { break; }
1794  }
1795 }
1796 
1797 /* Sorts suffixes of type B*. */
1798 template <typename saidx_t>
1799 inline saidx_t sort_typeBstar(const uint8_t * T, saidx_t * SA, saidx_t * bucket_A, saidx_t * bucket_B, saidx_t n)
1800 {
1801  saidx_t *PAb, *ISAb, *buf;
1802 #ifdef _OPENMP
1803  saidx_t * curbuf;
1804  saidx_t l;
1805 #endif
1806  saidx_t i, j, k, t, m, bufsize;
1807  int32_t c0, c1;
1808 #ifdef _OPENMP
1809  int32_t d0, d1;
1810  int tmp;
1811 #endif
1812 
1813  /* Initialize bucket arrays. */
1814  for (i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1815  for (i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1816 
1817  /* Count the number of occurrences of the first one or two characters of each
1818  * type A, B and B* suffix. Moreover, store the beginning position of all
1819  type B* suffixes into the array SA. */
1820  for (i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;)
1821  {
1822  /* type A suffix. */
1823  do {
1824  ++BUCKET_A(c1 = c0);
1825  } while ((0 <= --i) && ((c0 = T[i]) >= c1));
1826  if (0 <= i)
1827  {
1828  /* type B* suffix. */
1829  ++BUCKET_BSTAR(c0, c1);
1830  SA[--m] = i;
1831  /* type B suffix. */
1832  for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { ++BUCKET_B(c0, c1); }
1833  }
1834  }
1835  m = n - m;
1836  /*
1837  * note:
1838  * A type B* suffix is lexicographically smaller than a type B suffix that
1839  * begins with the same first two characters.
1840  */
1841 
1842  /* Calculate the index of start/end point of each bucket. */
1843  for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0)
1844  {
1845  t = i + BUCKET_A(c0);
1846  BUCKET_A(c0) = i + j; /* start point */
1847  i = t + BUCKET_B(c0, c0);
1848  for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1)
1849  {
1850  j += BUCKET_BSTAR(c0, c1);
1851  BUCKET_BSTAR(c0, c1) = j; /* end point */
1852  i += BUCKET_B(c0, c1);
1853  }
1854  }
1855 
1856  if (0 < m)
1857  {
1858  /* Sort the type B* suffixes by their first two characters. */
1859  PAb = SA + n - m;
1860  ISAb = SA + m;
1861  for (i = m - 2; 0 <= i; --i)
1862  {
1863  t = PAb[i], c0 = T[t], c1 = T[t + 1];
1864  SA[--BUCKET_BSTAR(c0, c1)] = i;
1865  }
1866  t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1867  SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1868 
1869  /* Sort the type B* substrings using sssort. */
1870 #ifdef _OPENMP
1871  tmp = omp_get_max_threads();
1872  buf = SA + m, bufsize = (n - (2 * m)) / tmp;
1873  c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1874 #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
1875  {
1876  tmp = omp_get_thread_num();
1877  curbuf = buf + tmp * bufsize;
1878  k = 0;
1879  for (;;)
1880  {
1881 #pragma omp critical(sssort_lock)
1882  {
1883  if (0 < (l = j))
1884  {
1885  d0 = c0, d1 = c1;
1886  do {
1887  k = BUCKET_BSTAR(d0, d1);
1888  if (--d1 <= d0)
1889  {
1890  d1 = ALPHABET_SIZE - 1;
1891  if (--d0 < 0) { break; }
1892  }
1893  } while (((l - k) <= 1) && (0 < (l = k)));
1894  c0 = d0, c1 = d1, j = k;
1895  }
1896  }
1897  if (l == 0) { break; }
1898  sssort(T, PAb, SA + k, SA + l, curbuf, bufsize, (saidx_t)2, n, *(SA + k) == (m - 1));
1899  }
1900  }
1901 #else
1902  buf = SA + m, bufsize = n - (2 * m);
1903  for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0)
1904  {
1905  for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1)
1906  {
1907  i = BUCKET_BSTAR(c0, c1);
1908  if (1 < (j - i)) { sssort(T, PAb, SA + i, SA + j, buf, bufsize, (saidx_t)2, n, *(SA + i) == (m - 1)); }
1909  }
1910  }
1911 #endif
1912 
1913  /* Compute ranks of type B* substrings. */
1914  for (i = m - 1; 0 <= i; --i)
1915  {
1916  if (0 <= SA[i])
1917  {
1918  j = i;
1919  do {
1920  ISAb[SA[i]] = i;
1921  } while ((0 <= --i) && (0 <= SA[i]));
1922  SA[i + 1] = i - j;
1923  if (i <= 0) { break; }
1924  }
1925  j = i;
1926  do {
1927  ISAb[SA[i] = ~SA[i]] = j;
1928  } while (SA[--i] < 0);
1929  ISAb[SA[i]] = j;
1930  }
1931 
1932  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1933  trsort(ISAb, SA, m, (saidx_t)1);
1934 
1935  /* Set the sorted order of tyoe B* suffixes. */
1936  for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;)
1937  {
1938  for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) {}
1939  if (0 <= i)
1940  {
1941  t = i;
1942  for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {}
1943  SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1944  }
1945  }
1946 
1947  /* Calculate the index of start/end point of each bucket. */
1948  BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1949  for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0)
1950  {
1951  i = BUCKET_A(c0 + 1) - 1;
1952  for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1)
1953  {
1954  t = i - BUCKET_B(c0, c1);
1955  BUCKET_B(c0, c1) = i; /* end point */
1956 
1957  /* Move all type B* suffixes to the correct position. */
1958  for (i = t, j = BUCKET_BSTAR(c0, c1); j <= k; --i, --k) { SA[i] = SA[k]; }
1959  }
1960  BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1961  BUCKET_B(c0, c0) = i; /* end point */
1962  }
1963  }
1964 
1965  return m;
1966 }
1967 
1968 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1969 template <typename saidx_t>
1970 inline void construct_SA(const uint8_t * T, saidx_t * SA, saidx_t * bucket_A, saidx_t * bucket_B, saidx_t n, saidx_t m)
1971 {
1972  saidx_t *i, *j, *k;
1973  saidx_t s;
1974  int32_t c0, c1, c2;
1975 
1976  if (0 < m)
1977  {
1978  /* Construct the sorted order of type B suffixes by using
1979  the sorted order of type B* suffixes. */
1980  for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1)
1981  {
1982  /* Scan the suffix array from right to left. */
1983  for (i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j)
1984  {
1985  if (0 < (s = *j))
1986  {
1987  assert(T[s] == c1);
1988  assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1989  assert(T[s - 1] <= T[s]);
1990  *j = ~s;
1991  c0 = T[--s];
1992  if ((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1993  if (c0 != c2)
1994  {
1995  if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1996  k = SA + BUCKET_B(c2 = c0, c1);
1997  }
1998  assert(k < j);
1999  *k-- = s;
2000  }
2001  else
2002  {
2003  assert(((s == 0) && (T[s] == c1)) || (s < 0));
2004  *j = ~s;
2005  }
2006  }
2007  }
2008  }
2009 
2010  /* Construct the suffix array by using
2011  the sorted order of type B suffixes. */
2012  k = SA + BUCKET_A(c2 = T[n - 1]);
2013  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
2014  /* Scan the suffix array from left to right. */
2015  for (i = SA, j = SA + n; i < j; ++i)
2016  {
2017  if (0 < (s = *i))
2018  {
2019  assert(T[s - 1] >= T[s]);
2020  c0 = T[--s];
2021  if ((s == 0) || (T[s - 1] < c0)) { s = ~s; }
2022  if (c0 != c2)
2023  {
2024  BUCKET_A(c2) = k - SA;
2025  k = SA + BUCKET_A(c2 = c0);
2026  }
2027  assert(i < k);
2028  *k++ = s;
2029  }
2030  else
2031  {
2032  assert(s < 0);
2033  *i = ~s;
2034  }
2035  }
2036 }
2037 
2038 /* Constructs the burrows-wheeler transformed string directly
2039  by using the sorted order of type B* suffixes. */
2040 template <typename saidx_t>
2041 inline saidx_t construct_BWT(const uint8_t * T,
2042  saidx_t * SA,
2043  saidx_t * bucket_A,
2044  saidx_t * bucket_B,
2045  saidx_t n,
2046  saidx_t m)
2047 {
2048  saidx_t *i, *j, *k, *orig;
2049  saidx_t s;
2050  int32_t c0, c1, c2;
2051 
2052  if (0 < m)
2053  {
2054  /* Construct the sorted order of type B suffixes by using
2055  the sorted order of type B* suffixes. */
2056  for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1)
2057  {
2058  /* Scan the suffix array from right to left. */
2059  for (i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j)
2060  {
2061  if (0 < (s = *j))
2062  {
2063  assert(T[s] == c1);
2064  assert(((s + 1) < n) && (T[s] <= T[s + 1]));
2065  assert(T[s - 1] <= T[s]);
2066  c0 = T[--s];
2067  *j = ~((saidx_t)c0);
2068  if ((0 < s) && (T[s - 1] > c0)) { s = ~s; }
2069  if (c0 != c2)
2070  {
2071  if (0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
2072  k = SA + BUCKET_B(c2 = c0, c1);
2073  }
2074  assert(k < j);
2075  *k-- = s;
2076  }
2077  else if (s != 0)
2078  {
2079  *j = ~s;
2080 #ifndef NDEBUG
2081  }
2082  else
2083  {
2084  assert(T[s] == c1);
2085 #endif
2086  }
2087  }
2088  }
2089  }
2090 
2091  /* Construct the BWTed string by using
2092  the sorted order of type B suffixes. */
2093  k = SA + BUCKET_A(c2 = T[n - 1]);
2094  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
2095  /* Scan the suffix array from left to right. */
2096  for (i = SA, j = SA + n, orig = SA; i < j; ++i)
2097  {
2098  if (0 < (s = *i))
2099  {
2100  assert(T[s - 1] >= T[s]);
2101  c0 = T[--s];
2102  *i = c0;
2103  if ((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
2104  if (c0 != c2)
2105  {
2106  BUCKET_A(c2) = k - SA;
2107  k = SA + BUCKET_A(c2 = c0);
2108  }
2109  assert(i < k);
2110  *k++ = s;
2111  }
2112  else if (s != 0)
2113  {
2114  *i = ~s;
2115  }
2116  else
2117  {
2118  orig = i;
2119  }
2120  }
2121 
2122  return orig - SA;
2123 }
2124 
2125 /*- Function -*/
2126 
2127 template <typename saidx_t>
2128 int32_t divsufsort(const uint8_t * T, saidx_t * SA, saidx_t n)
2129 {
2130  saidx_t *bucket_A, *bucket_B;
2131  saidx_t m;
2132  int32_t err = 0;
2133 
2134  /* Check arguments. */
2135  if ((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
2136  else if (n == 0)
2137  {
2138  return 0;
2139  }
2140  else if (n == 1)
2141  {
2142  SA[0] = 0;
2143  return 0;
2144  }
2145  else if (n == 2)
2146  {
2147  m = (T[0] < T[1]);
2148  SA[m ^ 1] = 0, SA[m] = 1;
2149  return 0;
2150  }
2151 
2152  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
2153  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
2154 
2155  /* Suffixsort. */
2156  if ((bucket_A != NULL) && (bucket_B != NULL))
2157  {
2158  m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
2159  construct_SA(T, SA, bucket_A, bucket_B, n, m);
2160  }
2161  else
2162  {
2163  err = -2;
2164  }
2165 
2166  free(bucket_B);
2167  free(bucket_A);
2168 
2169  return err;
2170 }
2171 
2172 // template <typename saidx_t>
2173 // saidx_t
2174 // divbwt(const uint8_t *T, uint8_t *U, saidx_t *A, saidx_t n) {
2175 // saidx_t *B;
2176 // saidx_t *bucket_A, *bucket_B;
2177 // saidx_t m, pidx, i;
2178 //
2179 // /* Check arguments. */
2180 // if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
2181 // else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
2182 //
2183 // if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
2184 // bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
2185 // bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
2186 //
2187 // /* Burrows-Wheeler Transform. */
2188 // if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
2189 // m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
2190 // pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
2191 //
2192 // /* Copy to output string. */
2193 // U[0] = T[n - 1];
2194 // for(i = 0; i < pidx; ++i) { U[i + 1] = (uint8_t)B[i]; }
2195 // for(i += 1; i < n; ++i) { U[i] = (uint8_t)B[i]; }
2196 // pidx += 1;
2197 // } else {
2198 // pidx = -2;
2199 // }
2200 //
2201 // free(bucket_B);
2202 // free(bucket_A);
2203 // if(A == NULL) { free(B); }
2204 //
2205 // return pidx;
2206 // }
2207 
2208 inline int32_t divsufsort64(const uint8_t * T, int64_t * SA, int64_t n)
2209 {
2210  return divsufsort(T, SA, n);
2211 }
2212 
2213 template <typename saidx_t>
2214 inline int _compare(const uint8_t * T, saidx_t Tsize, const uint8_t * P, saidx_t Psize, saidx_t suf, saidx_t * match)
2215 {
2216  saidx_t i, j;
2217  int32_t r;
2218  for (i = suf + *match, j = *match, r = 0; (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) {}
2219  *match = j;
2220  return (r == 0) ? -(j != Psize) : r;
2221 }
2222 
2223 /* Burrows-Wheeler transform. */
2224 // TODO: use?
2225 // template <typename saidx_t>
2226 // int32_t
2227 // bw_transform(const uint8_t *T, uint8_t *U, saidx_t *SA,
2228 // saidx_t n, saidx_t *idx) {
2229 // saidx_t *A, i, j, p, t;
2230 // int32_t c;
2231 //
2232 // /* Check arguments. */
2233 // if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
2234 // if(n <= 1) {
2235 // if(n == 1) { U[0] = T[0]; }
2236 // *idx = n;
2237 // return 0;
2238 // }
2239 //
2240 // if((A = SA) == NULL) {
2241 // i = divbwt(T, U, NULL, n);
2242 // if(0 <= i) { *idx = i; i = 0; }
2243 // return (int32_t)i;
2244 // }
2245 //
2246 // /* BW transform. */
2247 // if(T == U) {
2248 // t = n;
2249 // for(i = 0, j = 0; i < n; ++i) {
2250 // p = t - 1;
2251 // t = A[i];
2252 // if(0 <= p) {
2253 // c = T[j];
2254 // U[j] = (j <= p) ? T[p] : (uint8_t)A[p];
2255 // A[j] = c;
2256 // j++;
2257 // } else {
2258 // *idx = i;
2259 // }
2260 // }
2261 // p = t - 1;
2262 // if(0 <= p) {
2263 // c = T[j];
2264 // U[j] = (j <= p) ? T[p] : (uint8_t)A[p];
2265 // A[j] = c;
2266 // } else {
2267 // *idx = i;
2268 // }
2269 // } else {
2270 // U[0] = T[n - 1];
2271 // for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
2272 // *idx = i + 1;
2273 // for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
2274 // }
2275 //
2276 // if(SA == NULL) {
2277 // /* Deallocate memory. */
2278 // free(A);
2279 // }
2280 //
2281 // return 0;
2282 // }
2283 
2284 // TODO: use?
2285 /* Inverse Burrows-Wheeler transform. */
2286 // template <typename saidx_t>
2287 // int32_t
2288 // inverse_bw_transform(const uint8_t *T, uint8_t *U, saidx_t *A,
2289 // saidx_t n, saidx_t idx) {
2290 // saidx_t C[ALPHABET_SIZE];
2291 // uint8_t D[ALPHABET_SIZE];
2292 // saidx_t *B;
2293 // saidx_t i, p;
2294 // int32_t c, d;
2295 //
2296 // /* Check arguments. */
2297 // if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
2298 // (n < idx) || ((0 < n) && (idx == 0))) {
2299 // return -1;
2300 // }
2301 // if(n <= 1) { return 0; }
2302 //
2303 // if((B = A) == NULL) {
2304 // /* Allocate n*sizeof(saidx_t) bytes of memory. */
2305 // if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
2306 // }
2307 //
2308 // /* Inverse BW transform. */
2309 // for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
2310 // for(i = 0; i < n; ++i) { ++C[T[i]]; }
2311 // for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
2312 // p = C[c];
2313 // if(0 < p) {
2314 // C[c] = i;
2315 // D[d++] = (uint8_t)c;
2316 // i += p;
2317 // }
2318 // }
2319 // for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
2320 // for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
2321 // for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
2322 // for(i = 0, p = idx; i < n; ++i) {
2323 // U[i] = D[binarysearch_lower(C, d, p)];
2324 // p = B[p - 1];
2325 // }
2326 //
2327 // if(A == NULL) {
2328 // /* Deallocate memory. */
2329 // free(B);
2330 // }
2331 //
2332 // return 0;
2333 // }
2334 
2335 } // namespace sdsl
2336 
2337 #endif
#define STACK_POP5(_a, _b, _c, _d, _e)
Definition: divsufsort.hpp:95
#define SS_BLOCKSIZE
Definition: divsufsort.hpp:50
#define STACK_PUSH5(_a, _b, _c, _d, _e)
Definition: divsufsort.hpp:84
#define TR_INSERTIONSORT_THRESHOLD
Definition: divsufsort.hpp:52
#define GETIDX(a)
#define BUCKET_BSTAR(_c0, _c1)
Definition: divsufsort.hpp:74
#define SS_INSERTIONSORT_THRESHOLD
Definition: divsufsort.hpp:49
#define BUCKET_A_SIZE
Definition: divsufsort.hpp:47
#define ALPHABET_SIZE
Definition: divsufsort.hpp:46
#define BUCKET_B_SIZE
Definition: divsufsort.hpp:48
#define SS_MISORT_STACKSIZE
Definition: divsufsort.hpp:51
#define STACK_POP(_a, _b, _c, _d)
Definition: divsufsort.hpp:89
#define BUCKET_A(_c0)
Definition: divsufsort.hpp:71
#define BUCKET_B(_c0, _c1)
Definition: divsufsort.hpp:73
#define STACK_PUSH(_a, _b, _c, _d)
Definition: divsufsort.hpp:80
#define MERGE_CHECK(a, b, c)
Namespace for the succinct data structure library.
void trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth)
saidx_t * tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3)
void ss_mergeforward(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t *buf, saidx_t depth)
Definition: divsufsort.hpp:670
void tr_introsort(saidx_t *ISA, const saidx_t *ISAd, saidx_t *SA, saidx_t *first, saidx_t *last, trbudget_t< saidx_t > *budget)
saidx_t * tr_median5(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5)
void ss_mergebackward(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t *buf, saidx_t depth)
Definition: divsufsort.hpp:740
void ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last)
Definition: divsufsort.hpp:564
int32_t trbudget_check(trbudget_t< saidx_t > *budget, saidx_t size)
void ss_insertionsort(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *last, saidx_t depth)
Definition: divsufsort.hpp:209
saidx_t sort_typeBstar(const uint8_t *T, saidx_t *SA, saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n)
int32_t divsufsort(const uint8_t *T, saidx_t *SA, saidx_t n)
void ss_fixdown(const uint8_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t i, saidx_t size)
Definition: divsufsort.hpp:234
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:970
saidx_t * tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last)
void ss_inplacemerge(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t depth)
Definition: divsufsort.hpp:612
saidx_t * ss_pivot(const uint8_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last)
Definition: divsufsort.hpp:321
saidx_t * ss_partition(const saidx_t *PA, saidx_t *first, saidx_t *last, saidx_t depth)
Definition: divsufsort.hpp:347
void trbudget_init(trbudget_t< saidx_t > *budget, saidx_t chance, saidx_t incval)
void tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size)
int _compare(const uint8_t *T, saidx_t Tsize, const uint8_t *P, saidx_t Psize, saidx_t suf, saidx_t *match)
void tr_copy(saidx_t *ISA, const saidx_t *SA, saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, saidx_t depth)
int32_t ss_ilg(int32_t n)
Definition: divsufsort.hpp:115
void construct_SA(const uint8_t *T, saidx_t *SA, saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n, saidx_t m)
int32_t ss_compare(const uint8_t *T, const saidx_t *p1, const saidx_t *p2, saidx_t depth)
Definition: divsufsort.hpp:193
saidx_t ss_isqrt(saidx_t x)
Definition: divsufsort.hpp:163
void ss_mintrosort(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *last, saidx_t depth)
Definition: divsufsort.hpp:366
void sssort(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *last, saidx_t *buf, saidx_t bufsize, saidx_t depth, saidx_t n, int32_t lastsuffix)
Definition: divsufsort.hpp:988
void ss_heapsort(const uint8_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size)
Definition: divsufsort.hpp:255
void tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last)
void ss_swapmerge(const uint8_t *T, const saidx_t *PA, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t *buf, saidx_t bufsize, saidx_t depth)
Definition: divsufsort.hpp:885
void tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size)
int32_t divsufsort64(const uint8_t *T, int64_t *SA, int64_t n)
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
void tr_partition(const saidx_t *ISAd, saidx_t *first, saidx_t *middle, saidx_t *last, saidx_t **pa, saidx_t **pb, saidx_t v)
saidx_t * ss_median3(const uint8_t *Td, const saidx_t *PA, saidx_t *v1, saidx_t *v2, saidx_t *v3)
Definition: divsufsort.hpp:283
void tr_partialcopy(saidx_t *ISA, const saidx_t *SA, saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, saidx_t depth)
void ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n)
Definition: divsufsort.hpp:557
int32_t tr_ilg(int32_t n)
saidx_t * ss_median5(const uint8_t *Td, const saidx_t *PA, saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5)
Definition: divsufsort.hpp:300
saidx_t construct_BWT(const uint8_t *T, saidx_t *SA, saidx_t *bucket_A, saidx_t *bucket_B, saidx_t n, saidx_t m)