Skip to content

Commit 5ece637

Browse files
committed
Merge branch 'main' into sv/integrate-combinable-DFA-capture-resolution
There were merge conflicts, which had to do with the return type of idmap_iter's callback -- on `main` it was `void`, on the branch it was `int` and indicated whether iterations should continue or halt. I picked the `int` form, and updated `idmap_iter` so that it was actually checked, because it probably doesn't make sense to continue iterating when earlier steps have errored out.
2 parents 722260a + 1ca3726 commit 5ece637

7 files changed

Lines changed: 223 additions & 56 deletions

File tree

.github/workflows/ci.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,8 +409,15 @@ jobs:
409409
path: ${{ env.build }}
410410
key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }}
411411

412-
# note we do the fuzzing unconditionally; each run adds to the corpus
412+
# note we do the fuzzing unconditionally; each run adds to the corpus.
413+
#
414+
# We only run fuzzing for PRs in the base repo, this prevents attempting
415+
# to purge the seed cache from a PR syncing a forked repo, which fails
416+
# due to a permissions error (I'm unsure why, I think PRs from clones can't
417+
# purge a cache in CI presumably for security/DoS reasons). PRs from clones
418+
# still run fuzzing, just from empty, and do not save their seeds.
413419
- name: Restore seeds (mode ${{ matrix.mode }})
420+
if: github.repository == 'katef/libfsm'
414421
uses: actions/cache/restore@v3
415422
id: cache-seeds
416423
with:

include/adt/idmap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ idmap_get(const struct idmap *m, fsm_state_t state_id,
4040
size_t buf_size, unsigned *buf, size_t *written);
4141

4242
/* Iterator callback.
43-
* The return value indicates whether iteration should continue. */
43+
* Return status indicates whether to continue. */
4444
typedef int
4545
idmap_iter_fun(fsm_state_t state_id, unsigned value, void *opaque);
4646

src/adt/edgeset.c

Lines changed: 109 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <inttypes.h>
1212

1313
#define LOG_BITSET 0
14+
#define LOG_BSEARCH 0
1415

1516
#include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */
1617

@@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc,
184185
return 1;
185186
}
186187

188+
enum fsp_res {
189+
FSP_FOUND_INSERT_POSITION,
190+
FSP_FOUND_VALUE_PRESENT,
191+
};
192+
193+
/* Use binary search to find the first position N where set->groups[N].to >= state,
194+
* which includes the position immediately following the last entry. Return an enum
195+
* which indicates whether state is already present. */
196+
static enum fsp_res
197+
find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
198+
{
199+
size_t lo = 0, hi = set->count;
200+
if (LOG_BSEARCH) {
201+
fprintf(stderr, "%s: looking for %d in %p (count %zu)\n",
202+
__func__, state, (void *)set, set->count);
203+
}
204+
205+
#if EXPENSIVE_CHECKS
206+
/* invariant: input is unique and sorted */
207+
for (size_t i = 1; i < set->count; i++) {
208+
assert(set->groups[i - 1].to < set->groups[i].to);
209+
}
210+
#endif
211+
212+
if (set->count == 0) {
213+
if (LOG_BSEARCH) {
214+
fprintf(stderr, "%s: empty, returning 0\n", __func__);
215+
}
216+
*dst = 0;
217+
return FSP_FOUND_INSERT_POSITION;
218+
} else {
219+
if (LOG_BSEARCH) {
220+
fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n",
221+
__func__, state, set->groups[hi - 1].to);
222+
}
223+
224+
/* Check the last entry so we can append in constant time. */
225+
const fsm_state_t last = set->groups[hi - 1].to;
226+
if (state > last) {
227+
*dst = hi;
228+
return FSP_FOUND_INSERT_POSITION;
229+
} else if (state == last) {
230+
*dst = hi - 1;
231+
return FSP_FOUND_VALUE_PRESENT;
232+
}
233+
}
234+
235+
size_t mid;
236+
while (lo < hi) { /* lo <= mid < hi */
237+
mid = lo + (hi - lo)/2; /* avoid overflow */
238+
const struct edge_group *eg = &set->groups[mid];
239+
const fsm_state_t cur = eg->to;
240+
if (LOG_BSEARCH) {
241+
fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n",
242+
__func__, lo, hi, mid, cur, state);
243+
}
244+
245+
if (state == cur) {
246+
*dst = mid;
247+
return FSP_FOUND_VALUE_PRESENT;
248+
} else if (state > cur) {
249+
lo = mid + 1;
250+
if (LOG_BSEARCH) {
251+
fprintf(stderr, "%s: new lo %zd\n", __func__, lo);
252+
}
253+
254+
/* Update mid if we're about to halt, because we're looking
255+
* for the first position >= state, not the last position <=. */
256+
if (lo == hi) {
257+
mid = lo;
258+
if (LOG_BSEARCH) {
259+
fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid);
260+
}
261+
}
262+
} else if (state < cur) {
263+
hi = mid;
264+
if (LOG_BSEARCH) {
265+
fprintf(stderr, "%s: new hi %zd\n", __func__, hi);
266+
}
267+
}
268+
}
269+
270+
if (LOG_BSEARCH) {
271+
fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n",
272+
__func__, mid, state, set->groups[mid].to);
273+
}
274+
275+
/* dst is now the first position > state (== case is handled above),
276+
* which may be one past the end of the array. */
277+
assert(mid == set->count || set->groups[mid].to > state);
278+
*dst = mid;
279+
return FSP_FOUND_INSERT_POSITION;
280+
}
281+
187282
int
188283
edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
189284
uint64_t symbols[256/64], fsm_state_t state)
@@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
223318
assert(set->count <= set->ceil);
224319

225320
#if LOG_BITSET
226-
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
227-
symbols[0], symbols[1], symbols[2], symbols[3],
228-
state, (void *)set);
321+
fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
322+
symbols[0], symbols[1], symbols[2], symbols[3],
323+
state, (void *)set);
229324
#endif
230325

231-
/* Linear search for a group with the same destination
232-
* state, or the position where that group would go. */
233-
for (i = 0; i < set->count; i++) {
326+
switch (find_state_position(set, state, &i)) {
327+
case FSP_FOUND_VALUE_PRESENT:
328+
assert(i < set->count);
234329
eg = &set->groups[i];
235-
236-
if (eg->to == state) {
237-
/* This API does not indicate whether that
238-
* symbol -> to edge was already present. */
239-
size_t i;
240-
for (i = 0; i < 256/64; i++) {
241-
eg->symbols[i] |= symbols[i];
242-
}
243-
dump_edge_set(set);
244-
return 1;
245-
} else if (eg->to > state) {
246-
break; /* will shift down and insert below */
247-
} else {
248-
continue;
330+
for (i = 0; i < 256/64; i++) {
331+
eg->symbols[i] |= symbols[i];
249332
}
333+
dump_edge_set(set);
334+
return 1;
335+
336+
break;
337+
case FSP_FOUND_INSERT_POSITION:
338+
break; /* continue below */
250339
}
251340

252341
/* insert/append at i */

src/adt/idmap.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,9 @@ idmap_iter(const struct idmap *m,
334334
for (uint64_t b_i = 0; b_i < 64; b_i++) {
335335
if (w & ((uint64_t)1 << b_i)) {
336336
const unsigned v = 64*w_i + b_i;
337-
cb(b->state, v, opaque);
337+
if (!cb(b->state, v, opaque)) {
338+
return;
339+
}
338340
}
339341
}
340342
}
@@ -371,7 +373,9 @@ idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,
371373

372374
if (w & ((uint64_t)1 << b_i)) {
373375
const unsigned v = 64*w_i + b_i;
374-
cb(b->state, v, opaque);
376+
if (!cb(b->state, v, opaque)) {
377+
return;
378+
}
375379
block_count++;
376380
}
377381
b_i++;

src/adt/stateset.c

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
struct state_set {
5050
const struct fsm_alloc *alloc;
5151
fsm_state_t *a;
52-
size_t i;
53-
size_t n;
52+
size_t i; /* used */
53+
size_t n; /* ceil */
5454
};
5555

5656
int
@@ -143,7 +143,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b)
143143
}
144144

145145
/*
146-
* Return where an item would be, if it were inserted
146+
* Return where an item would be, if it were inserted.
147+
* When insertion would append this returns one past the array.
147148
*/
148149
static size_t
149150
state_set_search(const struct state_set *set, fsm_state_t state)
@@ -155,6 +156,11 @@ state_set_search(const struct state_set *set, fsm_state_t state)
155156
assert(!IS_SINGLETON(set));
156157
assert(set->a != NULL);
157158

159+
/* fast path: append case */
160+
if (set->i > 0 && state > set->a[set->i - 1]) {
161+
return set->i;
162+
}
163+
158164
start = mid = 0;
159165
end = set->i;
160166

@@ -166,6 +172,12 @@ state_set_search(const struct state_set *set, fsm_state_t state)
166172
end = mid;
167173
} else if (r > 0) {
168174
start = mid + 1;
175+
/* update mid if we're about to halt, because
176+
* we're looking for the first position >= state,
177+
* not the last position <= */
178+
if (start == end) {
179+
mid = start;
180+
}
169181
} else {
170182
return mid;
171183
}
@@ -247,7 +259,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
247259
*/
248260
if (!state_set_empty(set)) {
249261
i = state_set_search(set, state);
250-
if (set->a[i] == state) {
262+
if (i < set->i && set->a[i] == state) {
251263
return 1;
252264
}
253265
}
@@ -266,11 +278,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
266278
set->n *= 2;
267279
}
268280

269-
if (state_set_cmpval(state, set->a[i]) > 0) {
270-
i++;
271-
}
272-
273-
if (i <= set->i) {
281+
if (i < set->i) {
274282
memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
275283
}
276284

@@ -281,9 +289,9 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
281289
set->i = 1;
282290
}
283291

284-
#if EXPENSIVE_CHECKS
292+
/* This assert can be pretty expensive in -O0 but in -O3 it has very
293+
* little impact on the overall runtime. */
285294
assert(state_set_contains(set, state));
286-
#endif
287295

288296
return 1;
289297
}
@@ -477,7 +485,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
477485
}
478486

479487
i = state_set_search(set, state);
480-
if (set->a[i] == state) {
488+
if (i < set->i && set->a[i] == state) {
481489
if (i < set->i) {
482490
memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a));
483491
}
@@ -533,7 +541,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state)
533541
}
534542

535543
i = state_set_search(set, state);
536-
if (set->a[i] == state) {
544+
if (i < set->i && set->a[i] == state) {
537545
return 1;
538546
}
539547

0 commit comments

Comments
 (0)