|
11 | 11 | #include <inttypes.h> |
12 | 12 |
|
13 | 13 | #define LOG_BITSET 0 |
| 14 | +#define LOG_BSEARCH 0 |
14 | 15 |
|
15 | 16 | #include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */ |
16 | 17 |
|
@@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc, |
184 | 185 | return 1; |
185 | 186 | } |
186 | 187 |
|
| 188 | +enum fsp_res { |
| 189 | + FSP_FOUND_INSERT_POSITION, |
| 190 | + FSP_FOUND_VALUE_PRESENT, |
| 191 | +}; |
| 192 | + |
| 193 | +/* Use binary search to find the first position N where set->groups[N].to >= state, |
| 194 | + * which includes the position immediately following the last entry. Return an enum |
| 195 | + * which indicates whether state is already present. */ |
| 196 | +static enum fsp_res |
| 197 | +find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst) |
| 198 | +{ |
| 199 | + size_t lo = 0, hi = set->count; |
| 200 | + if (LOG_BSEARCH) { |
| 201 | + fprintf(stderr, "%s: looking for %d in %p (count %zu)\n", |
| 202 | + __func__, state, (void *)set, set->count); |
| 203 | + } |
| 204 | + |
| 205 | +#if EXPENSIVE_CHECKS |
| 206 | + /* invariant: input is unique and sorted */ |
| 207 | + for (size_t i = 1; i < set->count; i++) { |
| 208 | + assert(set->groups[i - 1].to < set->groups[i].to); |
| 209 | + } |
| 210 | +#endif |
| 211 | + |
| 212 | + if (set->count == 0) { |
| 213 | + if (LOG_BSEARCH) { |
| 214 | + fprintf(stderr, "%s: empty, returning 0\n", __func__); |
| 215 | + } |
| 216 | + *dst = 0; |
| 217 | + return FSP_FOUND_INSERT_POSITION; |
| 218 | + } else { |
| 219 | + if (LOG_BSEARCH) { |
| 220 | + fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n", |
| 221 | + __func__, state, set->groups[hi - 1].to); |
| 222 | + } |
| 223 | + |
| 224 | + /* Check the last entry so we can append in constant time. */ |
| 225 | + const fsm_state_t last = set->groups[hi - 1].to; |
| 226 | + if (state > last) { |
| 227 | + *dst = hi; |
| 228 | + return FSP_FOUND_INSERT_POSITION; |
| 229 | + } else if (state == last) { |
| 230 | + *dst = hi - 1; |
| 231 | + return FSP_FOUND_VALUE_PRESENT; |
| 232 | + } |
| 233 | + } |
| 234 | + |
| 235 | + size_t mid; |
| 236 | + while (lo < hi) { /* lo <= mid < hi */ |
| 237 | + mid = lo + (hi - lo)/2; /* avoid overflow */ |
| 238 | + const struct edge_group *eg = &set->groups[mid]; |
| 239 | + const fsm_state_t cur = eg->to; |
| 240 | + if (LOG_BSEARCH) { |
| 241 | + fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n", |
| 242 | + __func__, lo, hi, mid, cur, state); |
| 243 | + } |
| 244 | + |
| 245 | + if (state == cur) { |
| 246 | + *dst = mid; |
| 247 | + return FSP_FOUND_VALUE_PRESENT; |
| 248 | + } else if (state > cur) { |
| 249 | + lo = mid + 1; |
| 250 | + if (LOG_BSEARCH) { |
| 251 | + fprintf(stderr, "%s: new lo %zd\n", __func__, lo); |
| 252 | + } |
| 253 | + |
| 254 | + /* Update mid if we're about to halt, because we're looking |
| 255 | + * for the first position >= state, not the last position <=. */ |
| 256 | + if (lo == hi) { |
| 257 | + mid = lo; |
| 258 | + if (LOG_BSEARCH) { |
| 259 | + fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid); |
| 260 | + } |
| 261 | + } |
| 262 | + } else if (state < cur) { |
| 263 | + hi = mid; |
| 264 | + if (LOG_BSEARCH) { |
| 265 | + fprintf(stderr, "%s: new hi %zd\n", __func__, hi); |
| 266 | + } |
| 267 | + } |
| 268 | + } |
| 269 | + |
| 270 | + if (LOG_BSEARCH) { |
| 271 | + fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n", |
| 272 | + __func__, mid, state, set->groups[mid].to); |
| 273 | + } |
| 274 | + |
| 275 | + /* dst is now the first position > state (== case is handled above), |
| 276 | + * which may be one past the end of the array. */ |
| 277 | + assert(mid == set->count || set->groups[mid].to > state); |
| 278 | + *dst = mid; |
| 279 | + return FSP_FOUND_INSERT_POSITION; |
| 280 | +} |
| 281 | + |
187 | 282 | int |
188 | 283 | edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc, |
189 | 284 | uint64_t symbols[256/64], fsm_state_t state) |
@@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc, |
223 | 318 | assert(set->count <= set->ceil); |
224 | 319 |
|
225 | 320 | #if LOG_BITSET |
226 | | - fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n", |
227 | | - symbols[0], symbols[1], symbols[2], symbols[3], |
228 | | - state, (void *)set); |
| 321 | + fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n", |
| 322 | + symbols[0], symbols[1], symbols[2], symbols[3], |
| 323 | + state, (void *)set); |
229 | 324 | #endif |
230 | 325 |
|
231 | | - /* Linear search for a group with the same destination |
232 | | - * state, or the position where that group would go. */ |
233 | | - for (i = 0; i < set->count; i++) { |
| 326 | + switch (find_state_position(set, state, &i)) { |
| 327 | + case FSP_FOUND_VALUE_PRESENT: |
| 328 | + assert(i < set->count); |
234 | 329 | eg = &set->groups[i]; |
235 | | - |
236 | | - if (eg->to == state) { |
237 | | - /* This API does not indicate whether that |
238 | | - * symbol -> to edge was already present. */ |
239 | | - size_t i; |
240 | | - for (i = 0; i < 256/64; i++) { |
241 | | - eg->symbols[i] |= symbols[i]; |
242 | | - } |
243 | | - dump_edge_set(set); |
244 | | - return 1; |
245 | | - } else if (eg->to > state) { |
246 | | - break; /* will shift down and insert below */ |
247 | | - } else { |
248 | | - continue; |
| 330 | + for (i = 0; i < 256/64; i++) { |
| 331 | + eg->symbols[i] |= symbols[i]; |
249 | 332 | } |
| 333 | + dump_edge_set(set); |
| 334 | + return 1; |
| 335 | + |
| 336 | + break; |
| 337 | + case FSP_FOUND_INSERT_POSITION: |
| 338 | + break; /* continue below */ |
250 | 339 | } |
251 | 340 |
|
252 | 341 | /* insert/append at i */ |
|
0 commit comments